]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/spiegel.py 
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  15  from  . spiegeltv 
import  SpiegeltvIE
  18  class  SpiegelIE ( InfoExtractor
):   19      _VALID_URL 
=  r
'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'   21          'url' :  'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html' ,   22          'md5' :  '2c2754212136f35fb4b19767d242f66e' ,   26              'title' :  'Vulkanausbruch in Ecuador: Der "Feuerschlund" ist wieder aktiv' ,   27              'description' :  'md5:8029d8310232196eb235d27575a8b9f4' ,   31          'url' :  'http://www.spiegel.de/video/schach-wm-videoanalyse-des-fuenften-spiels-video-1309159.html' ,   32          'md5' :  'f2cdf638d7aa47654e251e1aee360af1' ,   36              'title' :  'Schach-WM in der Videoanalyse: Carlsen nutzt die Fehlgriffe des Titelverteidigers' ,   37              'description' :  'md5:c2322b65e58f385a820c10fa03b2d088' ,   41          'url' :  'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-embed.html' ,   42          'md5' :  'd8eeca6bfc8f1cd6f490eb1f44695d51' ,   46              'description' :  'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.' ,   47              'title' :  'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"' ,   51      def  _real_extract ( self
,  url
):   52          video_id 
=  self
._ match
_ id
( url
)   53          webpage
,  handle 
=  self
._ download
_ webpage
_ handle
( url
,  video_id
)   55          # 302 to spiegel.tv, like http://www.spiegel.de/video/der-film-zum-wochenende-die-wahrheit-ueber-maenner-video-99003272.html   56          if  SpiegeltvIE
. suitable ( handle
. geturl ()):   57              return  self
. url_result ( handle
. geturl (),  'Spiegeltv' )   59          title 
=  re
. sub ( r
'\s+' ,  ' ' ,  self
._ html
_ search
_ regex
(   60              r
'(?s)<(?:h1|div) class="module-title"[^>]*>(.*?)</(?:h1|div)>' ,   62          description 
=  self
._ html
_ search
_ meta
( 'description' ,  webpage
,  'description' )   64          base_url 
=  self
._ search
_ regex
(   65              r
'var\s+server\s*=\s*"([^"]+)\"' ,  webpage
,  'server URL' )   67          xml_url 
=  base_url 
+  video_id 
+  '.xml'   68          idoc 
=  self
._ download
_ xml
( xml_url
,  video_id
)   72              if  n
. tag
. startswith ( 'type' )  and  n
. tag 
!=  'type6' :   73                  format_id 
=  n
. tag
. rpartition ( 'type' )[ 2 ]   74                  video_url 
=  base_url 
+  n
. find ( './filename' ). text
  75                  # Test video URLs beforehand as some of them are invalid   77                      self
._ request
_ webpage
(   78                          HEADRequest ( video_url
),  video_id
,   79                          'Checking  %s  video URL'  %  format_id
)   80                  except  ExtractorError 
as  e
:   81                      if  isinstance ( e
. cause
,  compat_HTTPError
)  and  e
. cause
. code 
==  404 :   83                              ' %s  video URL is invalid, skipping'  %  format_id
,  video_id
)   86                      'format_id' :  format_id
,   88                      'width' :  int ( n
. find ( './width' ). text
),   89                      'height' :  int ( n
. find ( './height' ). text
),   90                      'abr' :  int ( n
. find ( './audiobitrate' ). text
),   91                      'vbr' :  int ( n
. find ( './videobitrate' ). text
),   92                      'vcodec' :  n
. find ( './codec' ). text
,   95          duration 
=  float ( idoc
[ 0 ]. findall ( './duration' )[ 0 ]. text
)   97          self
._ sort
_ formats
( formats
)  102              'description' :  description
,  103              'duration' :  duration
,  108  class  SpiegelArticleIE ( InfoExtractor
):  109      _VALID_URL 
=  'https?://www\.spiegel\.de/(?!video/)[^?#]*?-(?P<id>[0-9]+)\.html'  110      IE_NAME 
=  'Spiegel:Article'  111      IE_DESC 
=  'Articles on spiegel.de'  113          'url' :  'http://www.spiegel.de/sport/sonst/badminton-wm-die-randsportart-soll-populaerer-werden-a-987092.html' ,  117              'title' :  'Faszination Badminton: Nennt es bloß nicht Federball' ,  118              'description' :  're:^Patrick Kämnitz gehört.{100,}' ,  121          'url' :  'http://www.spiegel.de/wissenschaft/weltall/astronaut-alexander-gerst-antwortet-spiegel-online-lesern-a-989876.html' ,  128      def  _real_extract ( self
,  url
):  129          video_id 
=  self
._ match
_ id
( url
)  130          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  132          # Single video on top of the page  133          video_link 
=  self
._ search
_ regex
(  134              r
'<a href="([^"]+)" onclick="return spOpenVideo\(this,' ,  webpage
,  135              'video page URL' ,  default
= None )  137              video_url 
=  compat_urlparse
. urljoin (  138                  self
. http_scheme () +  '//spiegel.de/' ,  video_link
)  139              return  self
. url_result ( video_url
)  141          # Multiple embedded videos  143              r
'<div class="vid_holder[0-9]+.*?</div>\s*.*?url\s*=\s*"([^"]+)"' ,  146              self
. url_result ( compat_urlparse
. urljoin (  147                  self
. http_scheme () +  '//spiegel.de/' ,  embed_path
))  148              for  embed_path 
in  embeds
 150          return  self
. playlist_result ( entries
)