]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   9  from  . common 
import  InfoExtractor
 
  22  class  ORFTVthekIE ( InfoExtractor
):  
  23      IE_NAME 
=  'orf:tvthek'  
  24      IE_DESC 
=  'ORF TVthek'  
  25      _VALID_URL 
=  r
'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics?/.+?|program/[^/]+)/(?P<id>\d+)'  
  28          'url' :  'http://tvthek.orf.at/program/Aufgetischt/2745173/Aufgetischt-Mit-der-Steirischen-Tafelrunde/8891389' ,  
  30              'md5' :  '2942210346ed779588f428a92db88712' ,  
  34                  'title' :  'Aufgetischt: Mit der Steirischen Tafelrunde' ,  
  35                  'description' :  'md5:c1272f0245537812d4e36419c207b67d' ,  
  37                  'upload_date' :  '20141208' ,  
  40          'skip' :  'Blocked outside of Austria / Germany' ,  
  42          'url' :  'http://tvthek.orf.at/topic/Im-Wandel-der-Zeit/8002126/Best-of-Ingrid-Thurnher/7982256' ,  
  44              'md5' :  '68f543909aea49d621dfc7703a11cfaf' ,  
  48                  'title' :  'Best of Ingrid Thurnher' ,  
  49                  'upload_date' :  '20140527' ,  
  50                  'description' :  'Viele Jahre war Ingrid Thurnher das "Gesicht" der ZIB 2. Vor ihrem Wechsel zur ZIB 2 im jahr 1995 moderierte sie unter anderem "Land und Leute", "Österreich-Bild" und "Niederösterreich heute".' ,  
  53          '_skip' :  'Blocked outside of Austria / Germany' ,  
  56      def  _real_extract ( self
,  url
):  
  57          playlist_id 
=  self
._ match
_ id
( url
)  
  58          webpage 
=  self
._ download
_ webpage
( url
,  playlist_id
)  
  60          data_json 
=  self
._ search
_ regex
(  
  61              r
'initializeAdworx\((.+?)\);\n' ,  webpage
,  'video info' )  
  62          all_data 
=  json
. loads ( data_json
)  
  64          def  get_segments ( all_data
):  
  67                          'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM' ,  
  68                          'Tracker::EPISODE_DETAIL_PAGE_OVER_TOPIC' ):  
  69                      return  data
[ 'values' ][ 'segments' ]  
  71          sdata 
=  get_segments ( all_data
)  
  73              raise  ExtractorError ( 'Unable to extract segments' )  
  75          def  quality_to_int ( s
):  
  76              m 
=  re
. search ( '([0-9]+)' ,  s
)  
  79              return  int ( m
. group ( 1 ))  
  85                  'preference' : - 10  if  fd
[ 'delivery' ] ==  'hls'  else None ,  
  86                  'format_id' :  ' %s-%s-%s '  % (  
  87                      fd
[ 'delivery' ],  fd
[ 'quality' ],  fd
[ 'quality_string' ]),  
  89                  'protocol' :  fd
[ 'protocol' ],  
  90                  'quality' :  quality_to_int ( fd
[ 'quality' ]),  
  91              }  for  fd 
in  sd
[ 'playlist_item_array' ][ 'sources' ]]  
  93              # Check for geoblocking.  
  94              # There is a property is_geoprotection, but that's always false  
  95              geo_str 
=  sd
. get ( 'geoprotection_string' )  
 101                          if  re
. match ( r
'^https?://.*\.mp4$' ,  f
[ 'url' ]))  
 102                  except  StopIteration :  
 105                      req 
=  HEADRequest ( http_url
)  
 106                      self
._ request
_ webpage
(  
 108                          note
= 'Testing for geoblocking' ,  
 110                              'This video seems to be blocked outside of  %s . '  
 111                              'You may want to try the streaming-* formats.' )  
 115              self
._ check
_ formats
( formats
,  video_id
)  
 116              self
._ sort
_ formats
( formats
)  
 118              upload_date 
=  unified_strdate ( sd
[ 'created_date' ])  
 122                  'title' :  sd
[ 'header' ],  
 124                  'description' :  sd
. get ( 'description' ),  
 125                  'duration' :  int ( sd
[ 'duration_in_seconds' ]),  
 126                  'upload_date' :  upload_date
,  
 127                  'thumbnail' :  sd
. get ( 'image_full_url' ),  
 137  class  ORFOE1IE ( InfoExtractor
):  
 139      IE_DESC 
=  'Radio Österreich 1'  
 140      _VALID_URL 
=  r
'http://oe1\.orf\.at/(?:programm/|konsole.*?#\?track_id=)(?P<id>[0-9]+)'  
 142      # Audios on ORF radio are only available for 7 days, so we can't add tests.  
 144          'url' :  'http://oe1.orf.at/konsole?show=on_demand#?track_id=394211' ,  
 145          'only_matching' :  True ,  
 148      def  _real_extract ( self
,  url
):  
 149          show_id 
=  self
._ match
_ id
( url
)  
 150          data 
=  self
._ download
_ json
(  
 151              'http://oe1.orf.at/programm/ %s /konsole'  %  show_id
,  
 155          timestamp 
=  datetime
. datetime
. strptime ( ' %s %s '  % (  
 156              data
[ 'item' ][ 'day_label' ],  
 159          unix_timestamp 
=  calendar
. timegm ( timestamp
. utctimetuple ())  
 163              'title' :  data
[ 'item' ][ 'title' ],  
 164              'url' :  data
[ 'item' ][ 'url_stream' ],  
 166              'description' :  data
[ 'item' ]. get ( 'info' ),  
 167              'timestamp' :  unix_timestamp
 
 171  class  ORFFM4IE ( InfoExtractor
):  
 173      IE_DESC 
=  'radio FM4'  
 174      _VALID_URL 
=  r
'http://fm4\.orf\.at/(?:7tage/?#|player/)(?P<date>[0-9]+)/(?P<show>\w+)'  
 177          'url' :  'http://fm4.orf.at/player/20160110/IS/' ,  
 178          'md5' :  '01e736e8f1cef7e13246e880a59ad298' ,  
 180              'id' :  '2016-01-10_2100_tl_54_7DaysSun13_11244' ,  
 183              'description' :  'md5:384c543f866c4e422a55f66a62d669cd' ,  
 185              'timestamp' :  1452456073 ,  
 186              'upload_date' :  '20160110' ,  
 190      def  _real_extract ( self
,  url
):  
 191          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 192          show_date 
=  mobj
. group ( 'date' )  
 193          show_id 
=  mobj
. group ( 'show' )  
 195          data 
=  self
._ download
_ json
(  
 196              'http://audioapi.orf.at/fm4/json/2.0/broadcasts/ %s /4 %s '  % ( show_date
,  show_id
),  
 200          def  extract_entry_dict ( info
,  title
,  subtitle
):  
 202                  'id' :  info
[ 'loopStreamId' ]. replace ( '.mp3' ,  '' ),  
 203                  'url' :  'http://loopstream01.apa.at/?channel=fm4&id= %s '  %  info
[ 'loopStreamId' ],  
 205                  'description' :  subtitle
,  
 206                  'duration' : ( info
[ 'end' ] -  info
[ 'start' ]) /  1000 ,  
 207                  'timestamp' :  info
[ 'start' ] /  1000 ,  
 211          entries 
= [ extract_entry_dict ( t
,  data
[ 'title' ],  data
[ 'subtitle' ])  for  t 
in  data
[ 'streams' ]]  
 216              'title' :  data
[ 'title' ],  
 217              'description' :  data
[ 'subtitle' ],  
 222  class  ORFIPTVIE ( InfoExtractor
):  
 224      IE_DESC 
=  'iptv.ORF.at'  
 225      _VALID_URL 
=  r
'http://iptv\.orf\.at/(?:#/)?stories/(?P<id>\d+)'  
 228          'url' :  'http://iptv.orf.at/stories/2275236/' ,  
 229          'md5' :  'c8b22af4718a4b4af58342529453e3e5' ,  
 233              'title' :  'Weitere Evakuierungen um Vulkan Calbuco' ,  
 234              'description' :  'md5:d689c959bdbcf04efeddedbf2299d633' ,  
 236              'thumbnail' :  're:^https?://.*\.jpg$' ,  
 237              'upload_date' :  '20150425' ,  
 241      def  _real_extract ( self
,  url
):  
 242          story_id 
=  self
._ match
_ id
( url
)  
 244          webpage 
=  self
._ download
_ webpage
(  
 245              'http://iptv.orf.at/stories/ %s '  %  story_id
,  story_id
)  
 247          video_id 
=  self
._ search
_ regex
(  
 248              r
'data-video(?:id)?="(\d+)"' ,  webpage
,  'video id' )  
 250          data 
=  self
._ download
_ json
(  
 251              'http://bits.orf.at/filehandler/static-api/json/current/data.json?file= %s '  %  video_id
,  
 254          duration 
=  float_or_none ( data
[ 'duration' ],  1000 )  
 256          video 
=  data
[ 'sources' ][ 'default' ]  
 257          load_balancer_url 
=  video
[ 'loadBalancerUrl' ]  
 258          abr 
=  int_or_none ( video
. get ( 'audioBitrate' ))  
 259          vbr 
=  int_or_none ( video
. get ( 'bitrate' ))  
 260          fps 
=  int_or_none ( video
. get ( 'videoFps' ))  
 261          width 
=  int_or_none ( video
. get ( 'videoWidth' ))  
 262          height 
=  int_or_none ( video
. get ( 'videoHeight' ))  
 263          thumbnail 
=  video
. get ( 'preview' )  
 265          rendition 
=  self
._ download
_ json
(  
 266              load_balancer_url
,  video_id
,  transform_source
= strip_jsonp
)  
 277          for  format_id
,  format_url 
in  rendition
[ 'redirect' ]. items ():  
 278              if  format_id 
==  'rtmp' :  
 282                      'format_id' :  format_id
,  
 285              elif  determine_ext ( format_url
) ==  'f4m' :  
 286                  formats
. extend ( self
._ extract
_ f
4 m
_ formats
(  
 287                      format_url
,  video_id
,  f4m_id
= format_id
))  
 288              elif  determine_ext ( format_url
) ==  'm3u8' :  
 289                  formats
. extend ( self
._ extract
_ m
3u8_ formats
(  
 290                      format_url
,  video_id
,  'mp4' ,  m3u8_id
= format_id
))  
 293          self
._ sort
_ formats
( formats
)  
 295          title 
=  remove_end ( self
._ og
_ search
_ title
( webpage
),  ' - iptv.ORF.at' )  
 296          description 
=  self
._ og
_ search
_ description
( webpage
)  
 297          upload_date 
=  unified_strdate ( self
._ html
_ search
_ meta
(  
 298              'dc.date' ,  webpage
,  'upload date' ))  
 303              'description' :  description
,  
 304              'duration' :  duration
,  
 305              'thumbnail' :  thumbnail
,  
 306              'upload_date' :  upload_date
,