]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py
572a234ad8c2514e5704d936fb98a19035662f40
   2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  17 class ORFTVthekIE(InfoExtractor
): 
  18     IE_NAME 
= 'orf:tvthek' 
  19     IE_DESC 
= 'ORF TVthek' 
  20     _VALID_URL 
= r
'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)' 
  23         'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747', 
  24         'file': '7319747.mp4', 
  25         'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375', 
  27             'title': 'Was Sie schon immer über Klassik wissen wollten', 
  28             'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4', 
  30             'upload_date': '20140105', 
  32         'skip': 'Blocked outside of Austria', 
  35     def _real_extract(self
, url
): 
  36         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  37         playlist_id 
= mobj
.group('id') 
  38         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
  40         data_json 
= self
._search
_regex
( 
  41             r
'initializeAdworx\((.+?)\);\n', webpage
, 'video info') 
  42         all_data 
= json
.loads(data_json
) 
  44         def get_segments(all_data
): 
  46                 if data
['name'] == 'Tracker::EPISODE_DETAIL_PAGE_OVER_PROGRAM': 
  47                     return data
['values']['segments'] 
  49         sdata 
= get_segments(all_data
) 
  51             raise ExtractorError('Unable to extract segments') 
  53         def quality_to_int(s
): 
  54             m 
= re
.search('([0-9]+)', s
) 
  57             return int(m
.group(1)) 
  63                 'preference': -10 if fd
['delivery'] == 'hls' else None, 
  64                 'format_id': '%s-%s-%s' % ( 
  65                     fd
['delivery'], fd
['quality'], fd
['quality_string']), 
  67                 'protocol': fd
['protocol'], 
  68                 'quality': quality_to_int(fd
['quality']), 
  69             } for fd 
in sd
['playlist_item_array']['sources']] 
  71             # Check for geoblocking. 
  72             # There is a property is_geoprotection, but that's always false 
  73             geo_str 
= sd
.get('geoprotection_string') 
  79                         if re
.match(r
'^https?://.*\.mp4$', f
['url'])) 
  83                     req 
= HEADRequest(http_url
) 
  84                     self
._request
_webpage
( 
  86                         note
='Testing for geoblocking', 
  88                             'This video seems to be blocked outside of %s. ' 
  89                             'You may want to try the streaming-* formats.') 
  93             self
._sort
_formats
(formats
) 
  95             upload_date 
= unified_strdate(sd
['created_date']) 
  99                 'title': sd
['header'], 
 101                 'description': sd
.get('description'), 
 102                 'duration': int(sd
['duration_in_seconds']), 
 103                 'upload_date': upload_date
, 
 104                 'thumbnail': sd
.get('image_full_url'), 
 114 # Audios on ORF radio are only available for 7 days, so we can't add tests. 
 117 class ORFOE1IE(InfoExtractor
): 
 119     IE_DESC 
= 'Radio Österreich 1' 
 120     _VALID_URL 
= r
'http://oe1\.orf\.at/programm/(?P<id>[0-9]+)' 
 122     def _real_extract(self
, url
): 
 123         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 124         show_id 
= mobj
.group('id') 
 126         data 
= self
._download
_json
( 
 127             'http://oe1.orf.at/programm/%s/konsole' % show_id
, 
 131         timestamp 
= datetime
.datetime
.strptime('%s %s' % ( 
 132             data
['item']['day_label'], 
 135         unix_timestamp 
= calendar
.timegm(timestamp
.utctimetuple()) 
 139             'title': data
['item']['title'], 
 140             'url': data
['item']['url_stream'], 
 142             'description': data
['item'].get('info'), 
 143             'timestamp': unix_timestamp
 
 147 class ORFFM4IE(InfoExtractor
): 
 149     IE_DESC 
= 'radio FM4' 
 150     _VALID_URL 
= r
'http://fm4\.orf\.at/7tage/?#(?P<date>[0-9]+)/(?P<show>\w+)' 
 152     def _real_extract(self
, url
): 
 153         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 154         show_date 
= mobj
.group('date') 
 155         show_id 
= mobj
.group('show') 
 157         data 
= self
._download
_json
( 
 158             'http://audioapi.orf.at/fm4/json/2.0/broadcasts/%s/4%s' % (show_date
, show_id
), 
 162         def extract_entry_dict(info
, title
, subtitle
): 
 164                 'id': info
['loopStreamId'].replace('.mp3', ''), 
 165                 'url': 'http://loopstream01.apa.at/?channel=fm4&id=%s' % info
['loopStreamId'], 
 167                 'description': subtitle
, 
 168                 'duration': (info
['end'] - info
['start']) / 1000, 
 169                 'timestamp': info
['start'] / 1000, 
 173         entries 
= [extract_entry_dict(t
, data
['title'], data
['subtitle']) for t 
in data
['streams']] 
 178             'title': data
['title'], 
 179             'description': data
['subtitle'],