1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  29 class RaiBaseIE(InfoExtractor
): 
  30     _UUID_RE 
= r
'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' 
  31     _GEO_COUNTRIES 
= ['IT'] 
  34     def _extract_relinker_info(self
, relinker_url
, video_id
): 
  35         if not re
.match(r
'https?://', relinker_url
): 
  36             return {'formats': [{'url': relinker_url
}]} 
  43         for platform 
in ('mon', 'flash', 'native'): 
  44             relinker 
= self
._download
_xml
( 
  45                 relinker_url
, video_id
, 
  46                 note
='Downloading XML metadata for platform %s' % platform
, 
  47                 transform_source
=fix_xml_ampersands
, 
  48                 query
={'output': 45, 'pl': platform
}, 
  49                 headers
=self
.geo_verification_headers()) 
  52                 geoprotection 
= xpath_text( 
  53                     relinker
, './geoprotection', default
=None) == 'Y' 
  57                     relinker
, './is_live', default
=None) == 'Y' 
  59                 duration 
= parse_duration(xpath_text( 
  60                     relinker
, './duration', default
=None)) 
  62             url_elem 
= find_xpath_attr(relinker
, './url', 'type', 'content') 
  66             media_url 
= url_elem
.text
 
  68             # This does not imply geo restriction (e.g. 
  69             # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) 
  70             if media_url 
== 'http://download.rai.it/video_no_available.mp4': 
  73             ext 
= determine_ext(media_url
) 
  74             if (ext 
== 'm3u8' and platform 
!= 'mon') or (ext 
== 'f4m' and platform 
!= 'flash'): 
  77             if ext 
== 'm3u8' or 'format=m3u8' in media_url 
or platform 
== 'mon': 
  78                 formats
.extend(self
._extract
_m
3u8_formats
( 
  79                     media_url
, video_id
, 'mp4', 'm3u8_native', 
  80                     m3u8_id
='hls', fatal
=False)) 
  81             elif ext 
== 'f4m' or platform 
== 'flash': 
  82                 manifest_url 
= update_url_query( 
  83                     media_url
.replace('manifest#live_hds.f4m', 'manifest.f4m'), 
  84                     {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) 
  85                 formats
.extend(self
._extract
_f
4m
_formats
( 
  86                     manifest_url
, video_id
, f4m_id
='hds', fatal
=False)) 
  88                 bitrate 
= int_or_none(xpath_text(relinker
, 'bitrate')) 
  91                     'tbr': bitrate 
if bitrate 
> 0 else None, 
  92                     'format_id': 'http-%d' % bitrate 
if bitrate 
> 0 else 'http', 
  95         if not formats 
and geoprotection 
is True: 
  96             self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
) 
  98         return dict((k
, v
) for k
, v 
in { 
 100             'duration': duration
, 
 102         }.items() if v 
is not None) 
 105     def _extract_subtitles(url
, subtitle_url
): 
 107         if subtitle_url 
and isinstance(subtitle_url
, compat_str
): 
 108             subtitle_url 
= urljoin(url
, subtitle_url
) 
 115             if subtitle_url
.endswith(STL_EXT
): 
 116                 srt_url 
= subtitle_url
[:-len(STL_EXT
)] + SRT_EXT
 
 117                 subtitles
['it'].append({ 
 124 class RaiPlayIE(RaiBaseIE
): 
 125     _VALID_URL 
= r
'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE
._UUID
_RE
 
 127         'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', 
 128         'md5': '340aa3b7afb54bfd14a8c11786450d76', 
 130             'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', 
 132             'title': 'La Casa Bianca', 
 133             'alt_title': 'S2016 - Puntata del 23/10/2016', 
 134             'description': 'md5:a09d45890850458077d1f68bb036e0a5', 
 135             'thumbnail': r
're:^https?://.*\.jpg$', 
 139             'timestamp': 1477764300, 
 140             'upload_date': '20161029', 
 141             'series': 'La Casa Bianca', 
 145         'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 
 146         'md5': '8970abf8caf8aef4696e7b1f2adfc696', 
 148             'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 
 150             'title': 'Report del 07/04/2014', 
 151             'alt_title': 'S2013/14 - Puntata del 07/04/2014', 
 152             'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 
 153             'thumbnail': r
're:^https?://.*\.jpg$', 
 162             'skip_download': True, 
 165         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 
 166         'only_matching': True, 
 169     def _real_extract(self
, url
): 
 170         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 171         url
, video_id 
= mobj
.group('url', 'id') 
 173         media 
= self
._download
_json
( 
 174             '%s?json' % url
, video_id
, 'Downloading video JSON') 
 176         title 
= media
['name'] 
 178         video 
= media
['video'] 
 180         relinker_info 
= self
._extract
_relinker
_info
(video
['contentUrl'], video_id
) 
 181         self
._sort
_formats
(relinker_info
['formats']) 
 184         if 'images' in media
: 
 185             for _
, value 
in media
.get('images').items(): 
 188                         'url': value
.replace('[RESOLUTION]', '600x400') 
 191         timestamp 
= unified_timestamp(try_get( 
 192             media
, lambda x
: x
['availabilities'][0]['start'], compat_str
)) 
 194         subtitles 
= self
._extract
_subtitles
(url
, video
.get('subtitles')) 
 198             'title': self
._live
_title
(title
) if relinker_info
.get( 
 199                 'is_live') else title
, 
 200             'alt_title': media
.get('subtitle'), 
 201             'description': media
.get('description'), 
 202             'uploader': strip_or_none(media
.get('channel')), 
 203             'creator': strip_or_none(media
.get('editor')), 
 204             'duration': parse_duration(video
.get('duration')), 
 205             'timestamp': timestamp
, 
 206             'thumbnails': thumbnails
, 
 208                 media
, lambda x
: x
['isPartOf']['name'], compat_str
), 
 209             'season_number': int_or_none(try_get( 
 210                 media
, lambda x
: x
['isPartOf']['numeroStagioni'])), 
 211             'season': media
.get('stagione') or None, 
 212             'subtitles': subtitles
, 
 215         info
.update(relinker_info
) 
 219 class RaiPlayLiveIE(RaiBaseIE
): 
 220     _VALID_URL 
= r
'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' 
 222         'url': 'http://www.raiplay.it/dirette/rainews24', 
 224             'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 
 225             'display_id': 'rainews24', 
 227             'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 228             'description': 'md5:6eca31500550f9376819f174e5644754', 
 229             'uploader': 'Rai News 24', 
 230             'creator': 'Rai News 24', 
 234             'skip_download': True, 
 238     def _real_extract(self
, url
): 
 239         display_id 
= self
._match
_id
(url
) 
 241         webpage 
= self
._download
_webpage
(url
, display_id
) 
 243         video_id 
= self
._search
_regex
( 
 244             r
'data-uniquename=["\']ContentItem
-(%s)' % RaiBaseIE._UUID_RE, 
 245             webpage, 'content 
id') 
 248             '_type
': 'url_transparent
', 
 249             'ie_key
': RaiPlayIE.ie_key(), 
 250             'url
': 'http
://www
.raiplay
.it
/dirette
/ContentItem
-%s.html
' % video_id, 
 252             'display_id
': display_id, 
 256 class RaiPlayPlaylistIE(InfoExtractor): 
 257     _VALID_URL = r'https?
://(?
:www\
.)?raiplay\
.it
/programmi
/(?P
<id>[^
/?
#&]+)' 
 259         'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', 
 261             'id': 'nondirloalmiocapo', 
 262             'title': 'Non dirlo al mio capo', 
 263             'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', 
 265         'playlist_mincount': 12, 
 268     def _real_extract(self
, url
): 
 269         playlist_id 
= self
._match
_id
(url
) 
 271         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 273         title 
= self
._html
_search
_meta
( 
 274             ('programma', 'nomeProgramma'), webpage
, 'title') 
 275         description 
= unescapeHTML(self
._html
_search
_meta
( 
 276             ('description', 'og:description'), webpage
, 'description')) 
 279         for mobj 
in re
.finditer( 
 280                 r
'<a\b[^>]+\bhref=(["\'])(?P
<path
>/raiplay
/video
/.+?
)\
1', 
 282             video_url = urljoin(url, mobj.group('path
')) 
 283             entries.append(self.url_result( 
 284                 video_url, ie=RaiPlayIE.ie_key(), 
 285                 video_id=RaiPlayIE._match_id(video_url))) 
 287         return self.playlist_result(entries, playlist_id, title, description) 
 290 class RaiIE(RaiBaseIE): 
 291     _VALID_URL = r'https?
://[^
/]+\
.(?
:rai\
.(?
:it|tv
)|rainews\
.it
)/.+?
-(?P
<id>%s)(?
:-.+?
)?\
.html
' % RaiBaseIE._UUID_RE 
 293         # var uniquename = "ContentItem-..." 
 294         # data-id="ContentItem-..." 
 295         'url
': 'http
://www
.raisport
.rai
.it
/dl
/raiSport
/media
/rassegna
-stampa
-04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
.html
', 
 297             'id': '04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
', 
 299             'title
': 'TG PRIMO TEMPO
', 
 300             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 302             'upload_date
': '20140612', 
 305         # with ContentItem in many metas 
 306         'url
': 'http
://www
.rainews
.it
/dl
/rainews
/media
/Weekend
-al
-cinema
-da
-Hollywood
-arriva
-il
-thriller
-di
-Tate
-Taylor
-La
-ragazza
-del-treno
-1632c009
-c843
-4836-bb65
-80c33084a64b
.html
', 
 308             'id': '1632c009
-c843
-4836-bb65
-80c33084a64b
', 
 310             'title
': 'Weekend al cinema
, da Hollywood arriva il thriller di Tate Taylor 
"La ragazza del treno"', 
 311             'description
': 'I film 
in uscita questa settimana
.', 
 312             'thumbnail
': r're
:^https?
://.*\
.png$
', 
 314             'upload_date
': '20161103', 
 317         # with ContentItem in og:url 
 318         'url
': 'http
://www
.rai
.it
/dl
/RaiTV
/programmi
/media
/ContentItem
-efb17665
-691c
-45d5
-a60c
-5301333cbb0c
.html
', 
 319         'md5
': '11959b4e44fa74de47011b5799490adf
', 
 321             'id': 'efb17665
-691c
-45d5
-a60c
-5301333cbb0c
', 
 323             'title
': 'TG1 ore 
20:00 del 03/11/2016', 
 324             'description
': 'TG1 edizione integrale ore 
20:00 del giorno 
03/11/2016', 
 325             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 327             'upload_date
': '20161103', 
 330         # drawMediaRaiTV(...) 
 331         'url
': 'http
://www
.report
.rai
.it
/dl
/Report
/puntata
/ContentItem
-0c7a664b
-d0f4
-4b2c
-8835-3f82e46f433e
.html
', 
 332         'md5
': '2dd727e61114e1ee9c47f0da6914e178
', 
 334             'id': '59d69d28
-6bb6
-409d
-a4b5
-ed44096560af
', 
 337             'description
': 'md5
:4b1afae1364115ce5d78ed83cd2e5b3a
', 
 338             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 339             'upload_date
': '20141221', 
 342         # initEdizione('ContentItem
-...' 
 343         'url
': 'http
://www
.tg1
.rai
.it
/dl
/tg1
/2010/edizioni
/ContentSet
-9b6e0cba
-4bef
-4aef
-8cf0
-9f7f665b7dfb
-tg1
.html?item
=undefined
', 
 345             'id': 'c2187016
-8484-4e3a
-8ac8
-35e475b07303
', 
 347             'title
': r're
:TG1 ore \d{2}
:\d{2} 
del \d{2}
/\d{2}
/\d{4}
', 
 349             'upload_date
': '20170401', 
 351         'skip
': 'Changes daily
', 
 353         # HDS live stream with only relinker URL 
 354         'url
': 'http
://www
.rai
.tv
/dl
/RaiTV
/dirette
/PublishingBlock
-1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
.html?channel
=EuroNews
', 
 356             'id': '1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
', 
 361             'skip_download
': True, 
 364         # HLS live stream with ContentItem in og:url 
 365         'url
': 'http
://www
.rainews
.it
/dl
/rainews
/live
/ContentItem
-3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
.html
', 
 367             'id': '3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
', 
 369             'title
': 'La diretta di Rainews24
', 
 372             'skip_download
': True, 
 376         'url
': 'http
://www
.rai
.it
/dl
/RaiTV
/programmi
/media
/ContentItem
-b63a4089
-ac28
-48cf
-bca5
-9f5b5bc46df5
.html
', 
 377         'only_matching
': True, 
 379         'url
': 'https
://www
.rainews
.it
/tgr
/marche
/notiziari
/video
/2019/02/ContentItem
-6ba945a2
-889c
-4a80
-bdeb
-8489c70a8db9
.html
', 
 380         'only_matching
': True, 
 383     def _extract_from_content_id(self, content_id, url): 
 384         media = self._download_json( 
 385             'http
://www
.rai
.tv
/dl
/RaiTV
/programmi
/media
/ContentItem
-%s.html?json
' % content_id, 
 386             content_id, 'Downloading video JSON
') 
 388         title = media['name
'].strip() 
 390         media_type = media['type'] 
 391         if 'Audio
' in media_type: 
 394                     'format_id
': media.get('formatoAudio
'), 
 395                     'url
': media['audioUrl
'], 
 396                     'ext
': media.get('formatoAudio
'), 
 399         elif 'Video
' in media_type: 
 400             relinker_info = self._extract_relinker_info(media['mediaUri
'], content_id) 
 402             raise ExtractorError('not a media 
file') 
 404         self._sort_formats(relinker_info['formats
']) 
 407         for image_type in ('image
', 'image_medium
', 'image_300
'): 
 408             thumbnail_url = media.get(image_type) 
 411                     'url
': compat_urlparse.urljoin(url, thumbnail_url), 
 414         subtitles = self._extract_subtitles(url, media.get('subtitlesUrl
')) 
 419             'description
': strip_or_none(media.get('desc
')), 
 420             'thumbnails
': thumbnails, 
 421             'uploader
': media.get('author
'), 
 422             'upload_date
': unified_strdate(media.get('date
')), 
 423             'duration
': parse_duration(media.get('length
')), 
 424             'subtitles
': subtitles, 
 427         info.update(relinker_info) 
 431     def _real_extract(self, url): 
 432         video_id = self._match_id(url) 
 434         webpage = self._download_webpage(url, video_id) 
 436         content_item_id = None 
 438         content_item_url = self._html_search_meta( 
 439             ('og
:url
', 'og
:video
', 'og
:video
:secure_url
', 'twitter
:url
', 
 440              'twitter
:player
', 'jsonlink
'), webpage, default=None) 
 442             content_item_id = self._search_regex( 
 443                 r'ContentItem
-(%s)' % self._UUID_RE, content_item_url, 
 444                 'content item 
id', default=None) 
 446         if not content_item_id: 
 447             content_item_id = self._search_regex( 
 450                         (?:initEdizione|drawMediaRaiTV)\(| 
 451                         <(?:[^>]+\bdata-id|var\s+uniquename)= 
 454                     (?:(?!\1).)*\bContentItem-(?P<id>%s) 
 456                 webpage, 'content item 
id', default=None, group='id') 
 458         content_item_ids = set() 
 460             content_item_ids.add(content_item_id) 
 461         if video_id not in content_item_ids: 
 462             content_item_ids.add(video_id) 
 464         for content_item_id in content_item_ids: 
 466                 return self._extract_from_content_id(content_item_id, url) 
 467             except GeoRestrictedError: 
 469             except ExtractorError: 
 472         relinker_url = self._search_regex( 
 481                     //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? 
 482                     (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 
 484             webpage, 'relinker URL
', group='url
') 
 486         relinker_info = self._extract_relinker_info( 
 487             urljoin(url, relinker_url), video_id) 
 488         self._sort_formats(relinker_info['formats
']) 
 490         title = self._search_regex( 
 491             r'var\s
+videoTitolo\s
*=\s
*([\'"])(?P<title>[^\'"]+)\
1', 
 492             webpage, 'title
', group='title
', 
 493             default=None) or self._og_search_title(webpage) 
 500         info.update(relinker_info)