]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/orf.py
5f5694393765104b45b573c53155d447a45b1e50
2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
14 class ORFIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://tvthek\.orf\.at/(?:programs/.+?/episodes|topics/.+?|program/[^/]+)/(?P<id>\d+)'
18 'url': 'http://tvthek.orf.at/program/matinee-Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7317210/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319746/Was-Sie-schon-immer-ueber-Klassik-wissen-wollten/7319747',
19 'file': '7319747.mp4',
20 'md5': 'bd803c5d8c32d3c64a0ea4b4eeddf375',
22 'title': 'Was Sie schon immer über Klassik wissen wollten',
23 'description': 'md5:0ddf0d5f0060bd53f744edaa5c2e04a4',
25 'upload_date': '20140105',
27 'skip': 'Blocked outside of Austria',
30 def _real_extract(self
, url
):
31 mobj
= re
.match(self
._VALID
_URL
, url
)
32 playlist_id
= mobj
.group('id')
33 webpage
= self
._download
_webpage
(url
, playlist_id
)
35 data_json
= self
._search
_regex
(
36 r
'initializeAdworx\((.+?)\);\n', webpage
, 'video info')
37 all_data
= json
.loads(data_json
)
38 sdata
= all_data
[0]['values']['segments']
40 def quality_to_int(s
):
41 m
= re
.search('([0-9]+)', s
)
44 return int(m
.group(1))
50 'preference': -10 if fd
['delivery'] == 'hls' else None,
51 'format_id': '%s-%s-%s' % (
52 fd
['delivery'], fd
['quality'], fd
['quality_string']),
54 'protocol': fd
['protocol'],
55 'quality': quality_to_int(fd
['quality']),
56 } for fd
in sd
['playlist_item_array']['sources']]
58 # Check for geoblocking.
59 # There is a property is_geoprotection, but that's always false
60 geo_str
= sd
.get('geoprotection_string')
66 if re
.match(r
'^https?://.*\.mp4$', f
['url']))
70 req
= HEADRequest(http_url
)
71 self
._request
_webpage
(
73 note
='Testing for geoblocking',
75 'This video seems to be blocked outside of %s. '
76 'You may want to try the streaming-* formats.')
80 self
._sort
_formats
(formats
)
82 upload_date
= unified_strdate(sd
['created_date'])
86 'title': sd
['header'],
88 'description': sd
.get('description'),
89 'duration': int(sd
['duration_in_seconds']),
90 'upload_date': upload_date
,
91 'thumbnail': sd
.get('image_full_url'),