1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
29 class RaiBaseIE(InfoExtractor
):
30 _UUID_RE
= r
'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}'
31 _GEO_COUNTRIES
= ['IT']
34 def _extract_relinker_info(self
, relinker_url
, video_id
):
40 for platform
in ('mon', 'flash', 'native'):
41 relinker
= self
._download
_xml
(
42 relinker_url
, video_id
,
43 note
='Downloading XML metadata for platform %s' % platform
,
44 transform_source
=fix_xml_ampersands
,
45 query
={'output': 45, 'pl': platform
},
46 headers
=self
.geo_verification_headers())
49 geoprotection
= xpath_text(
50 relinker
, './geoprotection', default
=None) == 'Y'
54 relinker
, './is_live', default
=None) == 'Y'
56 duration
= parse_duration(xpath_text(
57 relinker
, './duration', default
=None))
59 url_elem
= find_xpath_attr(relinker
, './url', 'type', 'content')
63 media_url
= url_elem
.text
65 # This does not imply geo restriction (e.g.
66 # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html)
67 if media_url
== 'http://download.rai.it/video_no_available.mp4':
70 ext
= determine_ext(media_url
)
71 if (ext
== 'm3u8' and platform
!= 'mon') or (ext
== 'f4m' and platform
!= 'flash'):
75 formats
.extend(self
._extract
_m
3u8_formats
(
76 media_url
, video_id
, 'mp4', 'm3u8_native',
77 m3u8_id
='hls', fatal
=False))
79 manifest_url
= update_url_query(
80 media_url
.replace('manifest#live_hds.f4m', 'manifest.f4m'),
81 {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'})
82 formats
.extend(self
._extract
_f
4m
_formats
(
83 manifest_url
, video_id
, f4m_id
='hds', fatal
=False))
85 bitrate
= int_or_none(xpath_text(relinker
, 'bitrate'))
88 'tbr': bitrate
if bitrate
> 0 else None,
89 'format_id': 'http-%d' % bitrate
if bitrate
> 0 else 'http',
92 if not formats
and geoprotection
is True:
93 self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
)
95 return dict((k
, v
) for k
, v
in {
99 }.items() if v
is not None)
102 def _extract_subtitles(url
, subtitle_url
):
104 if subtitle_url
and isinstance(subtitle_url
, compat_str
):
105 subtitle_url
= urljoin(url
, subtitle_url
)
112 if subtitle_url
.endswith(STL_EXT
):
113 srt_url
= subtitle_url
[:-len(STL_EXT
)] + SRT_EXT
114 subtitles
['it'].append({
121 class RaiPlayIE(RaiBaseIE
):
122 _VALID_URL
= r
'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE
._UUID
_RE
124 'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter',
125 'md5': '340aa3b7afb54bfd14a8c11786450d76',
127 'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66',
129 'title': 'La Casa Bianca',
130 'alt_title': 'S2016 - Puntata del 23/10/2016',
131 'description': 'md5:a09d45890850458077d1f68bb036e0a5',
132 'thumbnail': r
're:^https?://.*\.jpg$',
136 'timestamp': 1477764300,
137 'upload_date': '20161029',
138 'series': 'La Casa Bianca',
142 'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html',
143 'md5': '8970abf8caf8aef4696e7b1f2adfc696',
145 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391',
147 'title': 'Report del 07/04/2014',
148 'alt_title': 'S2013/14 - Puntata del 07/04/2014',
149 'description': 'md5:f27c544694cacb46a078db84ec35d2d9',
150 'thumbnail': r
're:^https?://.*\.jpg$',
159 'skip_download': True,
162 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?',
163 'only_matching': True,
166 def _real_extract(self
, url
):
167 mobj
= re
.match(self
._VALID
_URL
, url
)
168 url
, video_id
= mobj
.group('url', 'id')
170 media
= self
._download
_json
(
171 '%s?json' % url
, video_id
, 'Downloading video JSON')
173 title
= media
['name']
175 video
= media
['video']
177 relinker_info
= self
._extract
_relinker
_info
(video
['contentUrl'], video_id
)
178 self
._sort
_formats
(relinker_info
['formats'])
181 if 'images' in media
:
182 for _
, value
in media
.get('images').items():
185 'url': value
.replace('[RESOLUTION]', '600x400')
188 timestamp
= unified_timestamp(try_get(
189 media
, lambda x
: x
['availabilities'][0]['start'], compat_str
))
191 subtitles
= self
._extract
_subtitles
(url
, video
.get('subtitles'))
195 'title': self
._live
_title
(title
) if relinker_info
.get(
196 'is_live') else title
,
197 'alt_title': media
.get('subtitle'),
198 'description': media
.get('description'),
199 'uploader': strip_or_none(media
.get('channel')),
200 'creator': strip_or_none(media
.get('editor')),
201 'duration': parse_duration(video
.get('duration')),
202 'timestamp': timestamp
,
203 'thumbnails': thumbnails
,
205 media
, lambda x
: x
['isPartOf']['name'], compat_str
),
206 'season_number': int_or_none(try_get(
207 media
, lambda x
: x
['isPartOf']['numeroStagioni'])),
208 'season': media
.get('stagione') or None,
209 'subtitles': subtitles
,
212 info
.update(relinker_info
)
216 class RaiPlayLiveIE(RaiBaseIE
):
217 _VALID_URL
= r
'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)'
219 'url': 'http://www.raiplay.it/dirette/rainews24',
221 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c',
222 'display_id': 'rainews24',
224 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
225 'description': 'md5:6eca31500550f9376819f174e5644754',
226 'uploader': 'Rai News 24',
227 'creator': 'Rai News 24',
231 'skip_download': True,
235 def _real_extract(self
, url
):
236 display_id
= self
._match
_id
(url
)
238 webpage
= self
._download
_webpage
(url
, display_id
)
240 video_id
= self
._search
_regex
(
241 r
'data-uniquename=["\']ContentItem
-(%s)' % RaiBaseIE._UUID_RE,
242 webpage, 'content
id')
245 '_type
': 'url_transparent
',
246 'ie_key
': RaiPlayIE.ie_key(),
247 'url
': 'http
://www
.raiplay
.it
/dirette
/ContentItem
-%s.html
' % video_id,
249 'display_id
': display_id,
253 class RaiPlayPlaylistIE(InfoExtractor):
254 _VALID_URL = r'https?
://(?
:www\
.)?raiplay\
.it
/programmi
/(?P
<id>[^
/?
#&]+)'
256 'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/',
258 'id': 'nondirloalmiocapo',
259 'title': 'Non dirlo al mio capo',
260 'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86',
262 'playlist_mincount': 12,
265 def _real_extract(self
, url
):
266 playlist_id
= self
._match
_id
(url
)
268 webpage
= self
._download
_webpage
(url
, playlist_id
)
270 title
= self
._html
_search
_meta
(
271 ('programma', 'nomeProgramma'), webpage
, 'title')
272 description
= unescapeHTML(self
._html
_search
_meta
(
273 ('description', 'og:description'), webpage
, 'description'))
277 for mobj
in re
.finditer(
278 r
'<a\b[^>]+\bhref=(["\'])(?P
<path
>/raiplay
/video
/.+?
)\
1',
280 video_url = urljoin(url, mobj.group('path
'))
281 entries.append(self.url_result(
282 video_url, ie=RaiPlayIE.ie_key(),
283 video_id=RaiPlayIE._match_id(video_url)))
285 return self.playlist_result(entries, playlist_id, title, description)
288 class RaiIE(RaiBaseIE):
289 _VALID_URL = r'https?
://[^
/]+\
.(?
:rai\
.(?
:it|tv
)|rainews\
.it
)/dl
/.+?
-(?P
<id>%s)(?
:-.+?
)?\
.html
' % RaiBaseIE._UUID_RE
291 # var uniquename = "ContentItem-..."
292 # data-id="ContentItem-..."
293 'url
': 'http
://www
.raisport
.rai
.it
/dl
/raiSport
/media
/rassegna
-stampa
-04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
.html
',
295 'id': '04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
',
297 'title
': 'TG PRIMO TEMPO
',
298 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
300 'upload_date
': '20140612',
303 # with ContentItem in many metas
304 'url
': 'http
://www
.rainews
.it
/dl
/rainews
/media
/Weekend
-al
-cinema
-da
-Hollywood
-arriva
-il
-thriller
-di
-Tate
-Taylor
-La
-ragazza
-del-treno
-1632c009
-c843
-4836-bb65
-80c33084a64b
.html
',
306 'id': '1632c009
-c843
-4836-bb65
-80c33084a64b
',
308 'title
': 'Weekend al cinema
, da Hollywood arriva il thriller di Tate Taylor
"La ragazza del treno"',
309 'description
': 'I film
in uscita questa settimana
.',
310 'thumbnail
': r're
:^https?
://.*\
.png$
',
312 'upload_date
': '20161103',
315 # with ContentItem in og:url
316 'url
': 'http
://www
.rai
.it
/dl
/RaiTV
/programmi
/media
/ContentItem
-efb17665
-691c
-45d5
-a60c
-5301333cbb0c
.html
',
317 'md5
': '11959b4e44fa74de47011b5799490adf
',
319 'id': 'efb17665
-691c
-45d5
-a60c
-5301333cbb0c
',
321 'title
': 'TG1 ore
20:00 del 03/11/2016',
322 'description
': 'TG1 edizione integrale ore
20:00 del giorno
03/11/2016',
323 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
325 'upload_date
': '20161103',
328 # drawMediaRaiTV(...)
329 'url
': 'http
://www
.report
.rai
.it
/dl
/Report
/puntata
/ContentItem
-0c7a664b
-d0f4
-4b2c
-8835-3f82e46f433e
.html
',
330 'md5
': '2dd727e61114e1ee9c47f0da6914e178
',
332 'id': '59d69d28
-6bb6
-409d
-a4b5
-ed44096560af
',
335 'description
': 'md5
:4b1afae1364115ce5d78ed83cd2e5b3a
',
336 'thumbnail
': r're
:^https?
://.*\
.jpg$
',
337 'upload_date
': '20141221',
340 # initEdizione('ContentItem
-...'
341 'url
': 'http
://www
.tg1
.rai
.it
/dl
/tg1
/2010/edizioni
/ContentSet
-9b6e0cba
-4bef
-4aef
-8cf0
-9f7f665b7dfb
-tg1
.html?item
=undefined
',
343 'id': 'c2187016
-8484-4e3a
-8ac8
-35e475b07303
',
345 'title
': r're
:TG1 ore \d{2}
:\d{2}
del \d{2}
/\d{2}
/\d{4}
',
347 'upload_date
': '20170401',
349 'skip
': 'Changes daily
',
351 # HDS live stream with only relinker URL
352 'url
': 'http
://www
.rai
.tv
/dl
/RaiTV
/dirette
/PublishingBlock
-1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
.html?channel
=EuroNews
',
354 'id': '1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
',
359 'skip_download
': True,
362 # HLS live stream with ContentItem in og:url
363 'url
': 'http
://www
.rainews
.it
/dl
/rainews
/live
/ContentItem
-3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
.html
',
365 'id': '3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
',
367 'title
': 'La diretta di Rainews24
',
370 'skip_download
': True,
374 def _extract_from_content_id(self, content_id, url):
375 media = self._download_json(
376 'http
://www
.rai
.tv
/dl
/RaiTV
/programmi
/media
/ContentItem
-%s.html?json
' % content_id,
377 content_id, 'Downloading video JSON
')
379 title = media['name
'].strip()
381 media_type = media['type']
382 if 'Audio
' in media_type:
385 'format_id
': media.get('formatoAudio
'),
386 'url
': media['audioUrl
'],
387 'ext
': media.get('formatoAudio
'),
390 elif 'Video
' in media_type:
391 relinker_info = self._extract_relinker_info(media['mediaUri
'], content_id)
393 raise ExtractorError('not a media
file')
395 self._sort_formats(relinker_info['formats
'])
398 for image_type in ('image
', 'image_medium
', 'image_300
'):
399 thumbnail_url = media.get(image_type)
402 'url
': compat_urlparse.urljoin(url, thumbnail_url),
405 subtitles = self._extract_subtitles(url, media.get('subtitlesUrl
'))
410 'description
': strip_or_none(media.get('desc
')),
411 'thumbnails
': thumbnails,
412 'uploader
': media.get('author
'),
413 'upload_date
': unified_strdate(media.get('date
')),
414 'duration
': parse_duration(media.get('length
')),
415 'subtitles
': subtitles,
418 info.update(relinker_info)
422 def _real_extract(self, url):
423 video_id = self._match_id(url)
425 webpage = self._download_webpage(url, video_id)
427 content_item_id = None
429 content_item_url = self._html_search_meta(
430 ('og
:url
', 'og
:video
', 'og
:video
:secure_url
', 'twitter
:url
',
431 'twitter
:player
', 'jsonlink
'), webpage, default=None)
433 content_item_id = self._search_regex(
434 r'ContentItem
-(%s)' % self._UUID_RE, content_item_url,
435 'content item
id', default=None)
437 if not content_item_id:
438 content_item_id = self._search_regex(
441 (?:initEdizione|drawMediaRaiTV)\(|
442 <(?:[^>]+\bdata-id|var\s+uniquename)=
445 (?:(?!\1).)*\bContentItem-(?P<id>%s)
447 webpage, 'content item
id', default=None, group='id')
449 content_item_ids = set()
451 content_item_ids.add(content_item_id)
452 if video_id not in content_item_ids:
453 content_item_ids.add(video_id)
455 for content_item_id in content_item_ids:
457 return self._extract_from_content_id(content_item_id, url)
458 except GeoRestrictedError:
460 except ExtractorError:
463 relinker_url = self._search_regex(
472 //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\?
473 (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1
475 webpage, 'relinker URL
', group='url
')
477 relinker_info = self._extract_relinker_info(
478 urljoin(url, relinker_url), video_id)
479 self._sort_formats(relinker_info['formats
'])
481 title = self._search_regex(
482 r'var\s
+videoTitolo\s
*=\s
*([\'"])(?P<title>[^\'"]+)\
1',
483 webpage, 'title
', group='title
',
484 default=None) or self._og_search_title(webpage)
491 info.update(relinker_info)