1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
28 class RaiBaseIE(InfoExtractor
):
29 _UUID_RE
= r
'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
30 _GEO_COUNTRIES
= ['IT']
33 def _extract_relinker_info(self
, relinker_url
, video_id
):
39 for platform
in ('mon', 'flash', 'native'):
40 relinker
= self
._download
_xml
(
41 relinker_url
, video_id
,
42 note
='Downloading XML metadata for platform %s' % platform
,
43 transform_source
=fix_xml_ampersands
,
44 query
={'output': 45, 'pl': platform
},
45 headers
=self
.geo_verification_headers())
48 geoprotection
= xpath_text(
49 relinker
, './geoprotection', default
=None) == 'Y'
53 relinker
, './is_live', default
=None) == 'Y'
55 duration
= parse_duration(xpath_text(
56 relinker
, './duration', default
=None))
58 url_elem
= find_xpath_attr(relinker
, './url', 'type', 'content')
62 media_url
= url_elem
.text
64 # This does not imply geo restriction (e.g.
65 # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
66 if media_url
== 'http://download.rai.it/video_no_available.mp4':
69 ext
= determine_ext(media_url
)
70 if (ext
== 'm3u8' and platform
!= 'mon') or (ext
== 'f4m' and platform
!= 'flash'):
74 formats
.extend(self
._extract
_m
3u8_formats
(
75 media_url
, video_id
, 'mp4', 'm3u8_native',
76 m3u8_id
='hls', fatal
=False))
78 manifest_url
= update_url_query(
79 media_url
.replace('manifest#live_hds.f4m', 'manifest.f4m'),
80 {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
81 formats
.extend(self
._extract
_f
4m
_formats
(
82 manifest_url
, video_id
, f4m_id
='hds', fatal
=False))
84 bitrate
= int_or_none(xpath_text(relinker
, 'bitrate'))
87 'tbr': bitrate
if bitrate
> 0 else None,
88 'format_id': 'http-%d' % bitrate
if bitrate
> 0 else 'http',
91 if not formats
and geoprotection
is True:
92 self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
)
94 return dict((k
, v
) for k
, v
in {
98 }.items() if v
is not None)
101 def _extract_subtitles(url
, subtitle_url
):
103 if subtitle_url
and isinstance(subtitle_url
, compat_str
):
104 subtitle_url
= urljoin(url
, subtitle_url
)
111 if subtitle_url
.endswith(STL_EXT
):
112 srt_url
= subtitle_url
[:-len(STL_EXT
)] + SRT_EXT
113 subtitles
['it'].append({
120 class RaiPlayIE(RaiBaseIE
):
121 _VALID_URL
= r
'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE
._UUID
_RE
123 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
124 'md5': '340aa3b7afb54bfd14a8c11786450d76',
126 'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
128 'title': 'La Casa Bianca',
129 'alt_title': 'S2016 - Puntata del 23/10/2016',
130 'description': 'md5:a09d45890850458077d1f68bb036e0a5',
131 'thumbnail': r
're:^https?://.*\.jpg$',
135 'timestamp': 1477764300,
136 'upload_date': '20161029',
137 'series': 'La Casa Bianca',
141 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
142 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
144 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
146 'title': 'Report del 07/04/2014',
147 'alt_title': 'S2013/14 - Puntata del 07/04/2014',
148 'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
149 'thumbnail': r
're:^https?://.*\.jpg$',
158 'skip_download': True,
161 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
162 'only_matching': True,
165 def _real_extract(self
, url
):
166 mobj
= re
.match(self
._VALID
_URL
, url
)
167 url
, video_id
= mobj
.group('url', 'id')
169 media
= self
._download
_json
(
170 '%s?json' % url
, video_id
, 'Downloading video JSON')
172 title
= media
['name']
174 video
= media
['video']
176 relinker_info
= self
._extract
_relinker
_info
(video
['contentUrl'], video_id
)
177 self
._sort
_formats
(relinker_info
['formats'])
180 if 'images' in media
:
181 for _
, value
in media
.get('images').items():
184 'url': value
.replace('[RESOLUTION]', '600x400')
187 timestamp
= unified_timestamp(try_get(
188 media
, lambda x
: x
['availabilities'][0]['start'], compat_str
))
190 subtitles
= self
._extract
_subtitles
(url
, video
.get('subtitles'))
194 'title': self
._live
_title
(title
) if relinker_info
.get(
195 'is_live') else title
,
196 'alt_title': media
.get('subtitle'),
197 'description': media
.get('description'),
198 'uploader': strip_or_none(media
.get('channel')),
199 'creator': strip_or_none(media
.get('editor')),
200 'duration': parse_duration(video
.get('duration')),
201 'timestamp': timestamp
,
202 'thumbnails': thumbnails
,
204 media
, lambda x
: x
['isPartOf']['name'], compat_str
),
205 'season_number': int_or_none(try_get(
206 media
, lambda x
: x
['isPartOf']['numeroStagioni'])),
207 'season': media
.get('stagione') or None,
208 'subtitles': subtitles
,
211 info
.update(relinker_info
)
215 class RaiPlayLiveIE(RaiBaseIE
):
216 _VALID_URL
= r
'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
218 'url': 'http://www.raiplay.it/dirette/rainews24',
220 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
221 'display_id': 'rainews24',
223 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
224 'description': 'md5:6eca31500550f9376819f174e5644754',
225 'uploader': 'Rai News 24',
226 'creator': 'Rai News 24',
230 'skip_download': True,
234 def _real_extract(self
, url
):
235 display_id
= self
._match
_id
(url
)
237 webpage
= self
._download
_webpage
(url
, display_id
)
239 video_id
= self
._search
_regex
(
240 r
'data-uniquename=["\']ContentItem
-(%s)' % RaiBaseIE._UUID_RE,
241 webpage, 'content
id')
244 '_type
': 'url_transparent
',
245 'ie_key
': RaiPlayIE.ie_key(),
246 'url
': 'http
://www
.raiplay
.it
/dirette
/ContentItem
-%s.html
' % video_id,
248 'display_id
': display_id,
252 class RaiIE(RaiBaseIE):
253 _VALID_URL = r'https?
://[^
/]+\
.(?
:rai\
.(?
:it|tv
)|rainews\
.it
)/dl
/.+?
-(?P
<id>%s)(?
:-.+?
)?\
.html
' % RaiBaseIE._UUID_RE
255 # var uniquename = "ContentItem-..."
256 # data-id="ContentItem-..."
257 'url
': 'http
://www
.raisport
.rai
.it
/dl
/raiSport
/media
/rassegna
-stampa
-04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
.html
',
259 'id': '04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
',
261 'title
': 'TG PRIMO TEMPO
',
262 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
264 'upload_date
': '20140612',
267 # with ContentItem in many metas
268 'url
': 'http
://www
.rainews
.it
/dl
/rainews
/media
/Weekend
-al
-cinema
-da
-Hollywood
-arriva
-il
-thriller
-di
-Tate
-Taylor
-La
-ragazza
-del-treno
-1632c009
-c843
-4836-bb65
-80c33084a64b
.html
',
270 'id': '1632c009
-c843
-4836-bb65
-80c33084a64b
',
272 'title
': 'Weekend al cinema
, da Hollywood arriva il thriller di Tate Taylor
"La ragazza del treno"',
273 'description
': 'I film
in uscita questa settimana
.',
274 'thumbnail
': r're
:^https?
://.*\
.png$
',
276 'upload_date
': '20161103',
279 # with ContentItem in og:url
280 'url
': 'http
://www
.rai
.it
/dl
/RaiTV
/programmi
/media
/ContentItem
-efb17665
-691c
-45d5
-a60c
-5301333cbb0c
.html
',
281 'md5
': '11959b4e44fa74de47011b5799490adf
',
283 'id': 'efb17665
-691c
-45d5
-a60c
-5301333cbb0c
',
285 'title
': 'TG1 ore
20:00 del 03/11/2016',
286 'description
': 'TG1 edizione integrale ore
20:00 del giorno
03/11/2016',
287 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
289 'upload_date
': '20161103',
292 # drawMediaRaiTV(...)
293 'url
': 'http
://www
.report
.rai
.it
/dl
/Report
/puntata
/ContentItem
-0c7a664b
-d0f4
-4b2c
-8835-3f82e46f433e
.html
',
294 'md5
': '2dd727e61114e1ee9c47f0da6914e178
',
296 'id': '59d69d28
-6bb6
-409d
-a4b5
-ed44096560af
',
299 'description
': 'md5
:4b1afae1364115ce5d78ed83cd2e5b3a
',
300 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
301 'upload_date
': '20141221',
304 # initEdizione('ContentItem
-...'
305 'url
': 'http
://www
.tg1
.rai
.it
/dl
/tg1
/2010/edizioni
/ContentSet
-9b6e0cba
-4bef
-4aef
-8cf0
-9f7f665b7dfb
-tg1
.html?item
=undefined
',
307 'id': 'c2187016
-8484-4e3a
-8ac8
-35e475b07303
',
309 'title
': r're
:TG1 ore \d{2}
:\d{2}
del \d{2}
/\d{2}
/\d{4}
',
311 'upload_date
': '20170401',
313 'skip
': 'Changes daily
',
315 # HDS live stream with only relinker URL
316 'url
': 'http
://www
.rai
.tv
/dl
/RaiTV
/dirette
/PublishingBlock
-1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
.html?channel
=EuroNews
',
318 'id': '1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
',
323 'skip_download
': True,
326 # HLS live stream with ContentItem in og:url
327 'url
': 'http
://www
.rainews
.it
/dl
/rainews
/live
/ContentItem
-3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
.html
',
329 'id': '3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
',
331 'title
': 'La diretta di Rainews24
',
334 'skip_download
': True,
338 def _extract_from_content_id(self, content_id, url):
339 media = self._download_json(
340 'http
://www
.rai
.tv
/dl
/RaiTV
/programmi
/media
/ContentItem
-%s.html?json
' % content_id,
341 content_id, 'Downloading video JSON
')
343 title = media['name
'].strip()
345 media_type = media['type']
346 if 'Audio
' in media_type:
349 'format_id
': media.get('formatoAudio
'),
350 'url
': media['audioUrl
'],
351 'ext
': media.get('formatoAudio
'),
354 elif 'Video
' in media_type:
355 relinker_info = self._extract_relinker_info(media['mediaUri
'], content_id)
357 raise ExtractorError('not a media
file')
359 self._sort_formats(relinker_info['formats
'])
362 for image_type in ('image
', 'image_medium
', 'image_300
'):
363 thumbnail_url = media.get(image_type)
366 'url
': compat_urlparse.urljoin(url, thumbnail_url),
369 subtitles = self._extract_subtitles(url, media.get('subtitlesUrl
'))
374 'description
': strip_or_none(media.get('desc
')),
375 'thumbnails
': thumbnails,
376 'uploader
': media.get('author
'),
377 'upload_date
': unified_strdate(media.get('date
')),
378 'duration
': parse_duration(media.get('length
')),
379 'subtitles
': subtitles,
382 info.update(relinker_info)
386 def _real_extract(self, url):
387 video_id = self._match_id(url)
389 webpage = self._download_webpage(url, video_id)
391 content_item_id = None
393 content_item_url = self._html_search_meta(
394 ('og
:url
', 'og
:video
', 'og
:video
:secure_url
', 'twitter
:url
',
395 'twitter
:player
', 'jsonlink
'), webpage, default=None)
397 content_item_id = self._search_regex(
398 r'ContentItem
-(%s)' % self._UUID_RE, content_item_url,
399 'content item
id', default=None)
401 if not content_item_id:
402 content_item_id = self._search_regex(
405 (?:initEdizione|drawMediaRaiTV)\(|
406 <(?:[^>]+\bdata-id|var\s+uniquename)=
409 (?:(?!\1).)*\bContentItem-(?P<id>%s)
411 webpage, 'content item
id', default=None, group='id')
413 content_item_ids = set()
415 content_item_ids.add(content_item_id)
416 if video_id not in content_item_ids:
417 content_item_ids.add(video_id)
419 for content_item_id in content_item_ids:
421 return self._extract_from_content_id(content_item_id, url)
422 except GeoRestrictedError:
424 except ExtractorError:
427 relinker_url = self._search_regex(
436 //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
437 (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
439 webpage, 'relinker URL
', group='url
')
441 relinker_info = self._extract_relinker_info(
442 urljoin(url, relinker_url), video_id)
443 self._sort_formats(relinker_info['formats
'])
445 title = self._search_regex(
446 r'var\s
+videoTitolo\s
*=\s
*([\'"])(?P<title>[^\'"]+)\
1',
447 webpage, 'title
', group='title
',
448 default=None) or self._og_search_title(webpage)
455 info.update(relinker_info)