]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  15 class ORFIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' 
  19         'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', 
  20         'file': '7319747.mp4', 
  21         'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', 
  23             'title': 'Was Sie schon immer über Klassik wissen wollten', 
  24             'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', 
  26             'upload_date': '20140105', 
  28         'skip': 'Blocked outside of Austria', 
  31     def _real_extract(self
, url
): 
  32         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  33         playlist_id 
= mobj
.group('id') 
  34         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  36         data_json 
= self
._search
_regex
( 
  37             r
'initializeAdworx\((.+?)\);\n', webpage
, 'video info') 
  38         all_data 
= json
.loads(data_json
) 
  40         def get_segments(all_data
): 
  42                 if data
['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': 
  43                     return data
['values']['segments'] 
  45         sdata 
= get_segments(all_data
) 
  47             raise ExtractorError('Unable to extract segments') 
  49         def quality_to_int(s
): 
  50             m 
= re
.search('([0-9]+)', s
) 
  53             return int(m
.group(1)) 
  59                 'preference': -10 if fd
['delivery'] == 'hls' else None, 
  60                 'format_id': '%s-%s-%s' % ( 
  61                     fd
['delivery'], fd
['quality'], fd
['quality_string']), 
  63                 'protocol': fd
['protocol'], 
  64                 'quality': quality_to_int(fd
['quality']), 
  65             } for fd 
in sd
['playlist_item_array']['sources']] 
  67             # Check for geoblocking. 
  68             # There is a property is_geoprotection, but that's always false 
  69             geo_str 
= sd
.get('geoprotection_string') 
  75                         if re
.match(r
'^https?://.*\.mp4$', f
['url'])) 
  79                     req 
= HEADRequest(http_url
) 
  80                     self
._request
_webpage
( 
  82                         note
='Testing for geoblocking', 
  84                             'This video seems to be blocked outside of %s. ' 
  85                             'You may want to try the streaming-* formats.') 
  89             self
._sort
_formats
(formats
) 
  91             upload_date 
= unified_strdate(sd
['created_date']) 
  95                 'title': sd
['header'], 
  97                 'description': sd
.get('description'), 
  98                 'duration': int(sd
['duration_in_seconds']), 
  99                 'upload_date': upload_date
, 
 100                 'thumbnail': sd
.get('image_full_url'),