]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/spiegel.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
15 from . spiegeltv
import SpiegeltvIE
18 class SpiegelIE ( InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
21 'url' : 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html' ,
22 'md5' : '2c2754212136f35fb4b19767d242f66e' ,
26 'title' : 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv' ,
27 'description' : 'md5:8029d8310232196eb235d27575a8b9f4' ,
31 'url' : 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html' ,
32 'md5' : 'f2cdf638d7aa47654e251e1aee360af1' ,
36 'title' : 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' ,
37 'description' : 'md5:c2322b65e58f385a820c10fa03b2d088' ,
41 'url' : 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html' ,
42 'md5' : 'd8eeca6bfc8f1cd6f490eb1f44695d51' ,
46 'description' : 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.' ,
47 'title' : 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"' ,
51 def _real_extract ( self
, url
):
52 video_id
= self
._ match
_ id
( url
)
53 webpage
, handle
= self
._ download
_ webpage
_ handle
( url
, video_id
)
55 # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
56 if SpiegeltvIE
. suitable ( handle
. geturl ()):
57 return self
. url_result ( handle
. geturl (), 'Spiegeltv' )
59 title
= re
. sub ( r
'\s+' , ' ' , self
._ html
_ search
_ regex
(
60 r
'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>' ,
62 description
= self
._ html
_ search
_ meta
( 'description' , webpage
, 'description' )
64 base_url
= self
._ search
_ regex
(
65 r
'var\s+server\s*=\s*"([^"]+)\"' , webpage
, 'server URL' )
67 xml_url
= base_url
+ video_id
+ '.xml'
68 idoc
= self
._ download
_ xml
( xml_url
, video_id
)
72 if n
. tag
. startswith ( 'type' ) and n
. tag
!= 'type6' :
73 format_id
= n
. tag
. rpartition ( 'type' )[ 2 ]
74 video_url
= base_url
+ n
. find ( './filename' ). text
75 # Test video URLs beforehand as some of them are invalid
77 self
._ request
_ webpage
(
78 HEADRequest ( video_url
), video_id
,
79 'Checking %s video URL' % format_id
)
80 except ExtractorError
as e
:
81 if isinstance ( e
. cause
, compat_HTTPError
) and e
. cause
. code
== 404 :
83 ' %s video URL is invalid, skipping' % format_id
, video_id
)
86 'format_id' : format_id
,
88 'width' : int ( n
. find ( './width' ). text
),
89 'height' : int ( n
. find ( './height' ). text
),
90 'abr' : int ( n
. find ( './audiobitrate' ). text
),
91 'vbr' : int ( n
. find ( './videobitrate' ). text
),
92 'vcodec' : n
. find ( './codec' ). text
,
95 duration
= float ( idoc
[ 0 ]. findall ( './duration' )[ 0 ]. text
)
97 self
._ sort
_ formats
( formats
)
102 'description' : description
,
103 'duration' : duration
,
108 class SpiegelArticleIE ( InfoExtractor
):
109 _VALID_URL
= 'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'
110 IE_NAME
= 'Spiegel:Article'
111 IE_DESC
= 'Articles on spiegel.de'
113 'url' : 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html' ,
117 'title' : 'Faszination Badminton: Nennt es bloß nicht Federball' ,
118 'description' : 're:^Patrick Kämnitz gehört.{100,}' ,
121 'url' : 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html' ,
128 def _real_extract ( self
, url
):
129 video_id
= self
._ match
_ id
( url
)
130 webpage
= self
._ download
_ webpage
( url
, video_id
)
132 # Single video on top of the page
133 video_link
= self
._ search
_ regex
(
134 r
'<a href="([^"]+)" onclick="return spOpenVideo\(this,' , webpage
,
135 'video page URL' , default
= None )
137 video_url
= compat_urlparse
. urljoin (
138 self
. http_scheme () + '//spiegel.de/' , video_link
)
139 return self
. url_result ( video_url
)
141 # Multiple embedded videos
143 r
'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"' ,
146 self
. url_result ( compat_urlparse
. urljoin (
147 self
. http_scheme () + '//spiegel.de/' , embed_path
))
148 for embed_path
in embeds
150 return self
. playlist_result ( entries
)