]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/spiegel.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
11 from . spiegeltv
import SpiegeltvIE
12 from .. compat
import compat_urlparse
16 get_element_by_attribute
,
20 class SpiegelIE ( InfoExtractor
):
21 _VALID_URL
= r
'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
23 'url' : 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html' ,
24 'md5' : '2c2754212136f35fb4b19767d242f66e' ,
28 'title' : 'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv' ,
29 'description' : 'md5:8029d8310232196eb235d27575a8b9f4' ,
31 'upload_date' : '20130311' ,
34 'url' : 'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html' ,
35 'md5' : 'f2cdf638d7aa47654e251e1aee360af1' ,
39 'title' : 'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' ,
40 'description' : 'md5:c2322b65e58f385a820c10fa03b2d088' ,
42 'upload_date' : '20131115' ,
45 'url' : 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html' ,
46 'md5' : 'd8eeca6bfc8f1cd6f490eb1f44695d51' ,
50 'description' : 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.' ,
51 'title' : 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"' ,
52 'upload_date' : '20140904' ,
55 'url' : 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html' ,
56 'only_matching' : True ,
59 'url' : 'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html' ,
60 'only_matching' : True ,
63 def _real_extract ( self
, url
):
64 video_id
= self
._ match
_ id
( url
)
65 webpage
, handle
= self
._ download
_ webpage
_ handle
( url
, video_id
)
67 # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html
68 if SpiegeltvIE
. suitable ( handle
. geturl ()):
69 return self
. url_result ( handle
. geturl (), 'Spiegeltv' )
71 nexx_id
= self
._ search
_ regex
(
72 r
'nexxOmniaId\s*:\s*(\d+)' , webpage
, 'nexx id' , default
= None )
74 domain_id
= NexxIE
._ extract
_ domain
_ id
( webpage
) or '748'
75 return self
. url_result (
76 'nexx: %s : %s ' % ( domain_id
, nexx_id
), ie
= NexxIE
. ie_key (),
79 video_data
= extract_attributes ( self
._ search
_ regex
( r
'(<div[^>]+id="spVideoElements"[^>]+>)' , webpage
, 'video element' , default
= '' ))
81 title
= video_data
. get ( 'data-video-title' ) or get_element_by_attribute ( 'class' , 'module-title' , webpage
)
82 description
= video_data
. get ( 'data-video-teaser' ) or self
._ html
_ search
_ meta
( 'description' , webpage
, 'description' )
84 base_url
= self
._ search
_ regex
(
85 [ r
'server\s*:\s*(["\' ])( ?P
< url
>.+ ?
) \
1 ', r' var\s
+ server\s
*= \s
* "(?P<url>[^" ]+) \" '],
86 webpage, ' server URL
', group=' url
')
88 xml_url = base_url + video_id + ' . xml
'
89 idoc = self._download_xml(xml_url, video_id)
93 if n.tag.startswith(' type ') and n.tag != ' type6
':
94 format_id = n.tag.rpartition(' type ')[2]
95 video_url = base_url + n.find(' ./ filename
').text
97 ' format_id
': format_id,
99 ' width
': int(n.find(' ./ width
').text),
100 ' height
': int(n.find(' ./ height
').text),
101 ' abr
': int(n.find(' ./ audiobitrate
').text),
102 ' vbr
': int(n.find(' ./ videobitrate
').text),
103 ' vcodec
': n.find(' ./ codec
').text,
106 duration = float(idoc[0].findall(' ./ duration
')[0].text)
108 self._check_formats(formats, video_id)
109 self._sort_formats(formats)
114 ' description
': description.strip() if description else None,
115 ' duration
': duration,
116 ' upload_date
': unified_strdate(video_data.get(' data
- video
- date
')),
121 class SpiegelArticleIE(InfoExtractor):
122 _VALID_URL = r' https?
://( ?
: www\
.) ?spiegel\
. de
/( ?
! video
/)[ ^?
#]*?-(?P<id>[0-9]+)\.html'
123 IE_NAME
= 'Spiegel:Article'
124 IE_DESC
= 'Articles on spiegel.de'
126 'url' : 'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html' ,
130 'title' : 'Faszination Badminton: Nennt es bloß nicht Federball' ,
131 'description' : 're:^Patrick Kämnitz gehört.{100,}' ,
132 'upload_date' : '20140825' ,
135 'url' : 'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html' ,
142 'url' : 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html' ,
146 'title' : 'Nervenkitzel Achterbahn' ,
147 'alt_title' : 'Karussellbauer in Deutschland' ,
148 'description' : 'md5:ffe7b1cc59a01f585e0569949aef73cc' ,
149 'release_year' : 2005 ,
150 'creator' : 'SPIEGEL TV' ,
151 'thumbnail' : r
're:^https?://.*\.jpg$' ,
153 'timestamp' : 1394021479 ,
154 'upload_date' : '20140305' ,
157 'format' : 'bestvideo' ,
158 'skip_download' : True ,
162 def _real_extract ( self
, url
):
163 video_id
= self
._ match
_ id
( url
)
164 webpage
= self
._ download
_ webpage
( url
, video_id
)
166 # Single video on top of the page
167 video_link
= self
._ search
_ regex
(
168 r
'<a href="([^"]+)" onclick="return spOpenVideo\(this,' , webpage
,
169 'video page URL' , default
= None )
171 video_url
= compat_urlparse
. urljoin (
172 self
. http_scheme () + '//spiegel.de/' , video_link
)
173 return self
. url_result ( video_url
)
175 # Multiple embedded videos
177 r
'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"' ,
180 self
. url_result ( compat_urlparse
. urljoin (
181 self
. http_scheme () + '//spiegel.de/' , embed_path
))
182 for embed_path
in embeds
]
184 return self
. playlist_result ( entries
)
186 return self
. playlist_from_matches (
187 NexxEmbedIE
._ extract
_u rls
( webpage
), ie
= NexxEmbedIE
. ie_key ())