]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   9  from  . common 
import  InfoExtractor
 
  22  class  ORFTVthekIE ( InfoExtractor
):  
  23      IE_NAME 
=  'orf:tvthek'  
  24      IE_DESC 
=  'ORF TVthek'  
  25      _VALID_URL 
=  r
'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'  
  28          'url' :  'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389' ,  
  30              'md5' :  '2942210346ed779588f428a92db88712' ,  
  34                  'title' :  'Aufgetischt: Mit der Steirischen Tafelrunde' ,  
  35                  'description' :  'md5:c1272f0245537812d4e36419c207b67d' ,  
  37                  'upload_date' :  '20141208' ,  
  40          'skip' :  'Blocked outside of Austria / Germany' ,  
  42          'url' :  'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256' ,  
  44              'md5' :  '68f543909aea49d621dfc7703a11cfaf' ,  
  48                  'title' :  'Best of Ingrid Thurnher' ,  
  49                  'upload_date' :  '20140527' ,  
  50                  'description' :  'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".' ,  
  53          '_skip' :  'Blocked outside of Austria / Germany' ,  
  56      def  _real_extract ( self
,  url
):  
  57          playlist_id 
=  self
._ match
_ id
( url
)  
  58          webpage 
=  self
._ download
_ webpage
( url
,  playlist_id
)  
  60          data_json 
=  self
._ search
_ regex
(  
  61              r
'initializeAdworx\((.+?)\);\n' ,  webpage
,  'video info' )  
  62          all_data 
=  json
. loads ( data_json
)  
  64          def  get_segments ( all_data
):  
  67                          'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM' ,  
  68                          'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC' ):  
  69                      return  data
[ 'values' ][ 'segments' ]  
  71          sdata 
=  get_segments ( all_data
)  
  73              raise  ExtractorError ( 'Unable to extract segments' )  
  75          def  quality_to_int ( s
):  
  76              m 
=  re
. search ( '([0-9]+)' ,  s
)  
  79              return  int ( m
. group ( 1 ))  
  85                  'preference' : - 10  if  fd
[ 'delivery' ] ==  'hls'  else None ,  
  86                  'format_id' :  ' %s-%s-%s '  % (  
  87                      fd
[ 'delivery' ],  fd
[ 'quality' ],  fd
[ 'quality_string' ]),  
  89                  'protocol' :  fd
[ 'protocol' ],  
  90                  'quality' :  quality_to_int ( fd
[ 'quality' ]),  
  91              }  for  fd 
in  sd
[ 'playlist_item_array' ][ 'sources' ]]  
  93              # Check for geoblocking.  
  94              # There is a property is_geoprotection, but that's always false  
  95              geo_str 
=  sd
. get ( 'geoprotection_string' )  
 101                          if  re
. match ( r
'^https?://.*\.mp4$' ,  f
[ 'url' ]))  
 102                  except  StopIteration :  
 105                      req 
=  HEADRequest ( http_url
)  
 106                      self
._ request
_ webpage
(  
 108                          note
= 'Testing for geoblocking' ,  
 110                              'This video seems to be blocked outside of  %s . '  
 111                              'You may want to try the streaming-* formats.' )  
 115              self
._ sort
_ formats
( formats
)  
 117              upload_date 
=  unified_strdate ( sd
[ 'created_date' ])  
 121                  'title' :  sd
[ 'header' ],  
 123                  'description' :  sd
. get ( 'description' ),  
 124                  'duration' :  int ( sd
[ 'duration_in_seconds' ]),  
 125                  'upload_date' :  upload_date
,  
 126                  'thumbnail' :  sd
. get ( 'image_full_url' ),  
 136  class  ORFOE1IE ( InfoExtractor
):  
 138      IE_DESC 
=  'Radio Österreich 1'  
 139      _VALID_URL 
=  r
'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'  
 141      # Audios on ORF radio are only available for 7 days, so we can't add tests.  
 143          'url' :  'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211' ,  
 144          'only_matching' :  True ,  
 147      def  _real_extract ( self
,  url
):  
 148          show_id 
=  self
._ match
_ id
( url
)  
 149          data 
=  self
._ download
_ json
(  
 150              'http://oe1.orf.at/programm/ %s /konsole'  %  show_id
,  
 154          timestamp 
=  datetime
. datetime
. strptime ( ' %s %s '  % (  
 155              data
[ 'item' ][ 'day_label' ],  
 158          unix_timestamp 
=  calendar
. timegm ( timestamp
. utctimetuple ())  
 162              'title' :  data
[ 'item' ][ 'title' ],  
 163              'url' :  data
[ 'item' ][ 'url_stream' ],  
 165              'description' :  data
[ 'item' ]. get ( 'info' ),  
 166              'timestamp' :  unix_timestamp
 
 170  class  ORFFM4IE ( InfoExtractor
):  
 172      IE_DESC 
=  'radio FM4'  
 173      _VALID_URL 
=  r
'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)'  
 175      def  _real_extract ( self
,  url
):  
 176          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 177          show_date 
=  mobj
. group ( 'date' )  
 178          show_id 
=  mobj
. group ( 'show' )  
 180          data 
=  self
._ download
_ json
(  
 181              'http://audioapi.orf.at/fm4/json/2.0/broadcasts/ %s /4 %s '  % ( show_date
,  show_id
),  
 185          def  extract_entry_dict ( info
,  title
,  subtitle
):  
 187                  'id' :  info
[ 'loopStreamId' ]. replace ( '.mp3' ,  '' ),  
 188                  'url' :  'http://loopstream01.apa.at/?channel=fm4&id= %s '  %  info
[ 'loopStreamId' ],  
 190                  'description' :  subtitle
,  
 191                  'duration' : ( info
[ 'end' ] -  info
[ 'start' ]) /  1000 ,  
 192                  'timestamp' :  info
[ 'start' ] /  1000 ,  
 196          entries 
= [ extract_entry_dict ( t
,  data
[ 'title' ],  data
[ 'subtitle' ])  for  t 
in  data
[ 'streams' ]]  
 201              'title' :  data
[ 'title' ],  
 202              'description' :  data
[ 'subtitle' ],  
 207  class  ORFIPTVIE ( InfoExtractor
):  
 209      IE_DESC 
=  'iptv.ORF.at'  
 210      _VALID_URL 
=  r
'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'  
 213          'url' :  'http://iptv.orf.at/stories/2275236/' ,  
 214          'md5' :  'c8b22af4718a4b4af58342529453e3e5' ,  
 218              'title' :  'Weitere Evakuierungen um Vulkan Calbuco' ,  
 219              'description' :  'md5:d689c959bdbcf04efeddedbf2299d633' ,  
 221              'thumbnail' :  're:^https?://.*\.jpg$' ,  
 222              'upload_date' :  '20150425' ,  
 226      def  _real_extract ( self
,  url
):  
 227          story_id 
=  self
._ match
_ id
( url
)  
 229          webpage 
=  self
._ download
_ webpage
(  
 230              'http://iptv.orf.at/stories/ %s '  %  story_id
,  story_id
)  
 232          video_id 
=  self
._ search
_ regex
(  
 233              r
'data-video(?:id)?="(\d+)"' ,  webpage
,  'video id' )  
 235          data 
=  self
._ download
_ json
(  
 236              'http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s '  %  video_id
,  
 239          duration 
=  float_or_none ( data
[ 'duration' ],  1000 )  
 241          video 
=  data
[ 'sources' ][ 'default' ]  
 242          load_balancer_url 
=  video
[ 'loadBalancerUrl' ]  
 243          abr 
=  int_or_none ( video
. get ( 'audioBitrate' ))  
 244          vbr 
=  int_or_none ( video
. get ( 'bitrate' ))  
 245          fps 
=  int_or_none ( video
. get ( 'videoFps' ))  
 246          width 
=  int_or_none ( video
. get ( 'videoWidth' ))  
 247          height 
=  int_or_none ( video
. get ( 'videoHeight' ))  
 248          thumbnail 
=  video
. get ( 'preview' )  
 250          rendition 
=  self
._ download
_ json
(  
 251              load_balancer_url
,  video_id
,  transform_source
= strip_jsonp
)  
 262          for  format_id
,  format_url 
in  rendition
[ 'redirect' ]. items ():  
 263              if  format_id 
==  'rtmp' :  
 267                      'format_id' :  format_id
,  
 270              elif  determine_ext ( format_url
) ==  'f4m' :  
 271                  formats
. extend ( self
._ extract
_ f
4 m
_ formats
(  
 272                      format_url
,  video_id
,  f4m_id
= format_id
))  
 273              elif  determine_ext ( format_url
) ==  'm3u8' :  
 274                  formats
. extend ( self
._ extract
_ m
3u8_ formats
(  
 275                      format_url
,  video_id
,  'mp4' ,  m3u8_id
= format_id
))  
 278          self
._ sort
_ formats
( formats
)  
 280          title 
=  remove_end ( self
._ og
_ search
_ title
( webpage
),  ' - iptv.ORF.at' )  
 281          description 
=  self
._ og
_ search
_ description
( webpage
)  
 282          upload_date 
=  unified_strdate ( self
._ html
_ search
_ meta
(  
 283              'dc.date' ,  webpage
,  'upload date' ))  
 288              'description' :  description
,  
 289              'duration' :  duration
,  
 290              'thumbnail' :  thumbnail
,  
 291              'upload_date' :  upload_date
,