]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/spiegel.py 
 
 
 
 
 
 
 
 
fc995e8c14da760dc33c706bfadba532bd86b05d
   2  from  __future__ 
import  unicode_literals
 
   6  from  . common 
import  InfoExtractor
 
  11  from  . spiegeltv 
import  SpiegeltvIE
 
  12  from  .. compat 
import  compat_urlparse
 
  16      get_element_by_attribute
,  
  20  class  SpiegelIE ( InfoExtractor
):  
  21      _VALID_URL 
=  r
'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'  
  23          'url' :  'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html' ,  
  24          'md5' :  '2c2754212136f35fb4b19767d242f66e' ,  
  28              'title' :  'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv' ,  
  29              'description' :  'md5:8029d8310232196eb235d27575a8b9f4' ,  
  31              'upload_date' :  '20130311' ,  
  34          'url' :  'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html' ,  
  35          'md5' :  'f2cdf638d7aa47654e251e1aee360af1' ,  
  39              'title' :  'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' ,  
  40              'description' :  'md5:c2322b65e58f385a820c10fa03b2d088' ,  
  42              'upload_date' :  '20131115' ,  
  45          'url' :  'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html' ,  
  46          'md5' :  'd8eeca6bfc8f1cd6f490eb1f44695d51' ,  
  50              'description' :  'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.' ,  
  51              'title' :  'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"' ,  
  52              'upload_date' :  '20140904' ,  
  55          'url' :  'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html' ,  
  56          'only_matching' :  True ,  
  59          'url' :  'http://www.spiegel.de/video/spiegel-tv-magazin-ueber-guellekrise-in-schleswig-holstein-video-99012776.html' ,  
  60          'only_matching' :  True ,  
  63      def  _real_extract ( self
,  url
):  
  64          video_id 
=  self
._ match
_ id
( url
)  
  65          webpage
,  handle 
=  self
._ download
_ webpage
_ handle
( url
,  video_id
)  
  67          # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html  
  68          if  SpiegeltvIE
. suitable ( handle
. geturl ()):  
  69              return  self
. url_result ( handle
. geturl (),  'Spiegeltv' )  
  71          nexx_id 
=  self
._ search
_ regex
(  
  72              r
'nexxOmniaId\s*:\s*(\d+)' ,  webpage
,  'nexx id' ,  default
= None )  
  74              domain_id 
=  NexxIE
._ extract
_ domain
_ id
( webpage
)  or  '748'  
  75              return  self
. url_result (  
  76                  'nexx: %s : %s '  % ( domain_id
,  nexx_id
),  ie
= NexxIE
. ie_key (),  
  79          video_data 
=  extract_attributes ( self
._ search
_ regex
( r
'(<div[^>]+id="spVideoElements"[^>]+>)' ,  webpage
,  'video element' ,  default
= '' ))  
  81          title 
=  video_data
. get ( 'data-video-title' )  or  get_element_by_attribute ( 'class' ,  'module-title' ,  webpage
)  
  82          description 
=  video_data
. get ( 'data-video-teaser' )  or  self
._ html
_ search
_ meta
( 'description' ,  webpage
,  'description' )  
  84          base_url 
=  self
._ search
_ regex
(  
  85              [ r
'server\s*:\s*(["\' ])( ?P
< url
>.+ ?
) \
1 ', r' var\s
+ server\s
*= \s
* "(?P<url>[^" ]+) \" '],  
  86              webpage, ' server URL
', group=' url
')  
  88          xml_url = base_url + video_id + ' . xml
'  
  89          idoc = self._download_xml(xml_url, video_id)  
  93              if n.tag.startswith(' type ') and n.tag != ' type6
':  
  94                  format_id = n.tag.rpartition(' type ')[2]  
  95                  video_url = base_url + n.find(' ./ filename
').text  
  97                      ' format_id
': format_id,  
  99                      ' width
': int(n.find(' ./ width
').text),  
 100                      ' height
': int(n.find(' ./ height
').text),  
 101                      ' abr
': int(n.find(' ./ audiobitrate
').text),  
 102                      ' vbr
': int(n.find(' ./ videobitrate
').text),  
 103                      ' vcodec
': n.find(' ./ codec
').text,  
 106          duration = float(idoc[0].findall(' ./ duration
')[0].text)  
 108          self._check_formats(formats, video_id)  
 109          self._sort_formats(formats)  
 114              ' description
': description.strip() if description else None,  
 115              ' duration
': duration,  
 116              ' upload_date
': unified_strdate(video_data.get(' data
- video
- date
')),  
 121  class SpiegelArticleIE(InfoExtractor):  
 122      _VALID_URL = r' https?
://( ?
: www\
.) ?spiegel\
. de
/( ?
! video
/)[ ^?
#]*?-(?P<id>[0-9]+)\.html'  
 123      IE_NAME 
=  'Spiegel:Article'  
 124      IE_DESC 
=  'Articles on spiegel.de'  
 126          'url' :  'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html' ,  
 130              'title' :  'Faszination Badminton: Nennt es bloß nicht Federball' ,  
 131              'description' :  're:^Patrick Kämnitz gehört.{100,}' ,  
 132              'upload_date' :  '20140825' ,  
 135          'url' :  'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html' ,  
 142          'url' :  'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html' ,  
 146              'title' :  'Nervenkitzel Achterbahn' ,  
 147              'alt_title' :  'Karussellbauer in Deutschland' ,  
 148              'description' :  'md5:ffe7b1cc59a01f585e0569949aef73cc' ,  
 149              'release_year' :  2005 ,  
 150              'creator' :  'SPIEGEL TV' ,  
 151              'thumbnail' :  r
're:^https?://.*\.jpg$' ,  
 153              'timestamp' :  1394021479 ,  
 154              'upload_date' :  '20140305' ,  
 157              'format' :  'bestvideo' ,  
 158              'skip_download' :  True ,  
 162      def  _real_extract ( self
,  url
):  
 163          video_id 
=  self
._ match
_ id
( url
)  
 164          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  
 166          # Single video on top of the page  
 167          video_link 
=  self
._ search
_ regex
(  
 168              r
'<a href="([^"]+)" onclick="return spOpenVideo\(this,' ,  webpage
,  
 169              'video page URL' ,  default
= None )  
 171              video_url 
=  compat_urlparse
. urljoin (  
 172                  self
. http_scheme () +  '//spiegel.de/' ,  video_link
)  
 173              return  self
. url_result ( video_url
)  
 175          # Multiple embedded videos  
 177              r
'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"' ,  
 180              self
. url_result ( compat_urlparse
. urljoin (  
 181                  self
. http_scheme () +  '//spiegel.de/' ,  embed_path
))  
 182              for  embed_path 
in  embeds
]  
 184              return  self
. playlist_result ( entries
)  
 186          return  self
. playlist_from_matches (  
 187              NexxEmbedIE
._ extract
_u rls
( webpage
),  ie
= NexxEmbedIE
. ie_key ())