1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  29 class RaiBaseIE(InfoExtractor
): 
  30     _UUID_RE 
= r
'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' 
  31     _GEO_COUNTRIES 
= ['IT'] 
  34     def _extract_relinker_info(self
, relinker_url
, video_id
): 
  40         for platform 
in ('mon', 'flash', 'native'): 
  41             relinker 
= self
._download
_xml
( 
  42                 relinker_url
, video_id
, 
  43                 note
='Downloading XML metadata for platform %s' % platform
, 
  44                 transform_source
=fix_xml_ampersands
, 
  45                 query
={'output': 45, 'pl': platform
}, 
  46                 headers
=self
.geo_verification_headers()) 
  49                 geoprotection 
= xpath_text( 
  50                     relinker
, './geoprotection', default
=None) == 'Y' 
  54                     relinker
, './is_live', default
=None) == 'Y' 
  56                 duration 
= parse_duration(xpath_text( 
  57                     relinker
, './duration', default
=None)) 
  59             url_elem 
= find_xpath_attr(relinker
, './url', 'type', 'content') 
  63             media_url 
= url_elem
.text
 
  65             # This does not imply geo restriction (e.g. 
  66             # http://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html) 
  67             if media_url 
== 'http://download.rai.it/video_no_available.mp4': 
  70             ext 
= determine_ext(media_url
) 
  71             if (ext 
== 'm3u8' and platform 
!= 'mon') or (ext 
== 'f4m' and platform 
!= 'flash'): 
  75                 formats
.extend(self
._extract
_m
3u8_formats
( 
  76                     media_url
, video_id
, 'mp4', 'm3u8_native', 
  77                     m3u8_id
='hls', fatal
=False)) 
  79                 manifest_url 
= update_url_query( 
  80                     media_url
.replace('manifest#live_hds.f4m', 'manifest.f4m'), 
  81                     {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) 
  82                 formats
.extend(self
._extract
_f
4m
_formats
( 
  83                     manifest_url
, video_id
, f4m_id
='hds', fatal
=False)) 
  85                 bitrate 
= int_or_none(xpath_text(relinker
, 'bitrate')) 
  88                     'tbr': bitrate 
if bitrate 
> 0 else None, 
  89                     'format_id': 'http-%d' % bitrate 
if bitrate 
> 0 else 'http', 
  92         if not formats 
and geoprotection 
is True: 
  93             self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
) 
  95         return dict((k
, v
) for k
, v 
in { 
  99         }.items() if v 
is not None) 
 102     def _extract_subtitles(url
, subtitle_url
): 
 104         if subtitle_url 
and isinstance(subtitle_url
, compat_str
): 
 105             subtitle_url 
= urljoin(url
, subtitle_url
) 
 112             if subtitle_url
.endswith(STL_EXT
): 
 113                 srt_url 
= subtitle_url
[:-len(STL_EXT
)] + SRT_EXT
 
 114                 subtitles
['it'].append({ 
 121 class RaiPlayIE(RaiBaseIE
): 
 122     _VALID_URL 
= r
'(?P<url>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>%s)\.html)' % RaiBaseIE
._UUID
_RE
 
 124         'url': 'http://www.raiplay.it/video/2016/10/La-Casa-Bianca-e06118bb-59a9-4636-b914-498e4cfd2c66.html?source=twitter', 
 125         'md5': '340aa3b7afb54bfd14a8c11786450d76', 
 127             'id': 'e06118bb-59a9-4636-b914-498e4cfd2c66', 
 129             'title': 'La Casa Bianca', 
 130             'alt_title': 'S2016 - Puntata del 23/10/2016', 
 131             'description': 'md5:a09d45890850458077d1f68bb036e0a5', 
 132             'thumbnail': r
're:^https?://.*\.jpg$', 
 136             'timestamp': 1477764300, 
 137             'upload_date': '20161029', 
 138             'series': 'La Casa Bianca', 
 142         'url': 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 
 143         'md5': '8970abf8caf8aef4696e7b1f2adfc696', 
 145             'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 
 147             'title': 'Report del 07/04/2014', 
 148             'alt_title': 'S2013/14 - Puntata del 07/04/2014', 
 149             'description': 'md5:f27c544694cacb46a078db84ec35d2d9', 
 150             'thumbnail': r
're:^https?://.*\.jpg$', 
 159             'skip_download': True, 
 162         'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 
 163         'only_matching': True, 
 166     def _real_extract(self
, url
): 
 167         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 168         url
, video_id 
= mobj
.group('url', 'id') 
 170         media 
= self
._download
_json
( 
 171             '%s?json' % url
, video_id
, 'Downloading video JSON') 
 173         title 
= media
['name'] 
 175         video 
= media
['video'] 
 177         relinker_info 
= self
._extract
_relinker
_info
(video
['contentUrl'], video_id
) 
 178         self
._sort
_formats
(relinker_info
['formats']) 
 181         if 'images' in media
: 
 182             for _
, value 
in media
.get('images').items(): 
 185                         'url': value
.replace('[RESOLUTION]', '600x400') 
 188         timestamp 
= unified_timestamp(try_get( 
 189             media
, lambda x
: x
['availabilities'][0]['start'], compat_str
)) 
 191         subtitles 
= self
._extract
_subtitles
(url
, video
.get('subtitles')) 
 195             'title': self
._live
_title
(title
) if relinker_info
.get( 
 196                 'is_live') else title
, 
 197             'alt_title': media
.get('subtitle'), 
 198             'description': media
.get('description'), 
 199             'uploader': strip_or_none(media
.get('channel')), 
 200             'creator': strip_or_none(media
.get('editor')), 
 201             'duration': parse_duration(video
.get('duration')), 
 202             'timestamp': timestamp
, 
 203             'thumbnails': thumbnails
, 
 205                 media
, lambda x
: x
['isPartOf']['name'], compat_str
), 
 206             'season_number': int_or_none(try_get( 
 207                 media
, lambda x
: x
['isPartOf']['numeroStagioni'])), 
 208             'season': media
.get('stagione') or None, 
 209             'subtitles': subtitles
, 
 212         info
.update(relinker_info
) 
 216 class RaiPlayLiveIE(RaiBaseIE
): 
 217     _VALID_URL 
= r
'https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+)' 
 219         'url': 'http://www.raiplay.it/dirette/rainews24', 
 221             'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 
 222             'display_id': 'rainews24', 
 224             'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 
 225             'description': 'md5:6eca31500550f9376819f174e5644754', 
 226             'uploader': 'Rai News 24', 
 227             'creator': 'Rai News 24', 
 231             'skip_download': True, 
 235     def _real_extract(self
, url
): 
 236         display_id 
= self
._match
_id
(url
) 
 238         webpage 
= self
._download
_webpage
(url
, display_id
) 
 240         video_id 
= self
._search
_regex
( 
 241             r
'data-uniquename=["\']ContentItem
-(%s)' % RaiBaseIE._UUID_RE, 
 242             webpage, 'content 
id') 
 245             '_type
': 'url_transparent
', 
 246             'ie_key
': RaiPlayIE.ie_key(), 
 247             'url
': 'http
://www
.raiplay
.it
/dirette
/ContentItem
-%s.html
' % video_id, 
 249             'display_id
': display_id, 
 253 class RaiPlayPlaylistIE(InfoExtractor): 
 254     _VALID_URL = r'https?
://(?
:www\
.)?raiplay\
.it
/programmi
/(?P
<id>[^
/?
#&]+)' 
 256         'url': 'http://www.raiplay.it/programmi/nondirloalmiocapo/', 
 258             'id': 'nondirloalmiocapo', 
 259             'title': 'Non dirlo al mio capo', 
 260             'description': 'md5:9f3d603b2947c1c7abb098f3b14fac86', 
 262         'playlist_mincount': 12, 
 265     def _real_extract(self
, url
): 
 266         playlist_id 
= self
._match
_id
(url
) 
 268         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 270         title 
= self
._html
_search
_meta
( 
 271             ('programma', 'nomeProgramma'), webpage
, 'title') 
 272         description 
= unescapeHTML(self
._html
_search
_meta
( 
 273             ('description', 'og:description'), webpage
, 'description')) 
 277         for mobj 
in re
.finditer( 
 278                 r
'<a\b[^>]+\bhref=(["\'])(?P
<path
>/raiplay
/video
/.+?
)\
1', 
 280             video_url = urljoin(url, mobj.group('path
')) 
 281             entries.append(self.url_result( 
 282                 video_url, ie=RaiPlayIE.ie_key(), 
 283                 video_id=RaiPlayIE._match_id(video_url))) 
 285         return self.playlist_result(entries, playlist_id, title, description) 
 288 class RaiIE(RaiBaseIE): 
 289     _VALID_URL = r'https?
://[^
/]+\
.(?
:rai\
.(?
:it|tv
)|rainews\
.it
)/dl
/.+?
-(?P
<id>%s)(?
:-.+?
)?\
.html
' % RaiBaseIE._UUID_RE 
 291         # var uniquename = "ContentItem-..." 
 292         # data-id="ContentItem-..." 
 293         'url
': 'http
://www
.raisport
.rai
.it
/dl
/raiSport
/media
/rassegna
-stampa
-04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
.html
', 
 295             'id': '04a9f4bd
-b563
-40cf
-82a6
-aad3529cb4a9
', 
 297             'title
': 'TG PRIMO TEMPO
', 
 298             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 300             'upload_date
': '20140612', 
 303         # with ContentItem in many metas 
 304         'url
': 'http
://www
.rainews
.it
/dl
/rainews
/media
/Weekend
-al
-cinema
-da
-Hollywood
-arriva
-il
-thriller
-di
-Tate
-Taylor
-La
-ragazza
-del-treno
-1632c009
-c843
-4836-bb65
-80c33084a64b
.html
', 
 306             'id': '1632c009
-c843
-4836-bb65
-80c33084a64b
', 
 308             'title
': 'Weekend al cinema
, da Hollywood arriva il thriller di Tate Taylor 
"La ragazza del treno"', 
 309             'description
': 'I film 
in uscita questa settimana
.', 
 310             'thumbnail
': r're
:^https?
://.*\
.png$
', 
 312             'upload_date
': '20161103', 
 315         # with ContentItem in og:url 
 316         'url
': 'http
://www
.rai
.it
/dl
/RaiTV
/programmi
/media
/ContentItem
-efb17665
-691c
-45d5
-a60c
-5301333cbb0c
.html
', 
 317         'md5
': '11959b4e44fa74de47011b5799490adf
', 
 319             'id': 'efb17665
-691c
-45d5
-a60c
-5301333cbb0c
', 
 321             'title
': 'TG1 ore 
20:00 del 03/11/2016', 
 322             'description
': 'TG1 edizione integrale ore 
20:00 del giorno 
03/11/2016', 
 323             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 325             'upload_date
': '20161103', 
 328         # drawMediaRaiTV(...) 
 329         'url
': 'http
://www
.report
.rai
.it
/dl
/Report
/puntata
/ContentItem
-0c7a664b
-d0f4
-4b2c
-8835-3f82e46f433e
.html
', 
 330         'md5
': '2dd727e61114e1ee9c47f0da6914e178
', 
 332             'id': '59d69d28
-6bb6
-409d
-a4b5
-ed44096560af
', 
 335             'description
': 'md5
:4b1afae1364115ce5d78ed83cd2e5b3a
', 
 336             'thumbnail
': r're
:^https?
://.*\
.jpg$
', 
 337             'upload_date
': '20141221', 
 340         # initEdizione('ContentItem
-...' 
 341         'url
': 'http
://www
.tg1
.rai
.it
/dl
/tg1
/2010/edizioni
/ContentSet
-9b6e0cba
-4bef
-4aef
-8cf0
-9f7f665b7dfb
-tg1
.html?item
=undefined
', 
 343             'id': 'c2187016
-8484-4e3a
-8ac8
-35e475b07303
', 
 345             'title
': r're
:TG1 ore \d{2}
:\d{2} 
del \d{2}
/\d{2}
/\d{4}
', 
 347             'upload_date
': '20170401', 
 349         'skip
': 'Changes daily
', 
 351         # HDS live stream with only relinker URL 
 352         'url
': 'http
://www
.rai
.tv
/dl
/RaiTV
/dirette
/PublishingBlock
-1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
.html?channel
=EuroNews
', 
 354             'id': '1912dbbf
-3f96
-44c3
-b4cf
-523681fbacbc
', 
 359             'skip_download
': True, 
 362         # HLS live stream with ContentItem in og:url 
 363         'url
': 'http
://www
.rainews
.it
/dl
/rainews
/live
/ContentItem
-3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
.html
', 
 365             'id': '3156f2f2
-dc70
-4953-8e2f
-70d7489d4ce9
', 
 367             'title
': 'La diretta di Rainews24
', 
 370             'skip_download
': True, 
 374     def _extract_from_content_id(self, content_id, url): 
 375         media = self._download_json( 
 376             'http
://www
.rai
.tv
/dl
/RaiTV
/programmi
/media
/ContentItem
-%s.html?json
' % content_id, 
 377             content_id, 'Downloading video JSON
') 
 379         title = media['name
'].strip() 
 381         media_type = media['type'] 
 382         if 'Audio
' in media_type: 
 385                     'format_id
': media.get('formatoAudio
'), 
 386                     'url
': media['audioUrl
'], 
 387                     'ext
': media.get('formatoAudio
'), 
 390         elif 'Video
' in media_type: 
 391             relinker_info = self._extract_relinker_info(media['mediaUri
'], content_id) 
 393             raise ExtractorError('not a media 
file') 
 395         self._sort_formats(relinker_info['formats
']) 
 398         for image_type in ('image
', 'image_medium
', 'image_300
'): 
 399             thumbnail_url = media.get(image_type) 
 402                     'url
': compat_urlparse.urljoin(url, thumbnail_url), 
 405         subtitles = self._extract_subtitles(url, media.get('subtitlesUrl
')) 
 410             'description
': strip_or_none(media.get('desc
')), 
 411             'thumbnails
': thumbnails, 
 412             'uploader
': media.get('author
'), 
 413             'upload_date
': unified_strdate(media.get('date
')), 
 414             'duration
': parse_duration(media.get('length
')), 
 415             'subtitles
': subtitles, 
 418         info.update(relinker_info) 
 422     def _real_extract(self, url): 
 423         video_id = self._match_id(url) 
 425         webpage = self._download_webpage(url, video_id) 
 427         content_item_id = None 
 429         content_item_url = self._html_search_meta( 
 430             ('og
:url
', 'og
:video
', 'og
:video
:secure_url
', 'twitter
:url
', 
 431              'twitter
:player
', 'jsonlink
'), webpage, default=None) 
 433             content_item_id = self._search_regex( 
 434                 r'ContentItem
-(%s)' % self._UUID_RE, content_item_url, 
 435                 'content item 
id', default=None) 
 437         if not content_item_id: 
 438             content_item_id = self._search_regex( 
 441                         (?:initEdizione|drawMediaRaiTV)\(| 
 442                         <(?:[^>]+\bdata-id|var\s+uniquename)= 
 445                     (?:(?!\1).)*\bContentItem-(?P<id>%s) 
 447                 webpage, 'content item 
id', default=None, group='id') 
 449         content_item_ids = set() 
 451             content_item_ids.add(content_item_id) 
 452         if video_id not in content_item_ids: 
 453             content_item_ids.add(video_id) 
 455         for content_item_id in content_item_ids: 
 457                 return self._extract_from_content_id(content_item_id, url) 
 458             except GeoRestrictedError: 
 460             except ExtractorError: 
 463         relinker_url = self._search_regex( 
 472                     //mediapolis(?:vod)?\.rai\.it/relinker/relinkerServlet\.htm\? 
 473                     (?:(?!\1).)*\bcont=(?:(?!\1).)+)\1 
 475             webpage, 'relinker URL
', group='url
') 
 477         relinker_info = self._extract_relinker_info( 
 478             urljoin(url, relinker_url), video_id) 
 479         self._sort_formats(relinker_info['formats
']) 
 481         title = self._search_regex( 
 482             r'var\s
+videoTitolo\s
*=\s
*([\'"])(?P<title>[^\'"]+)\
1', 
 483             webpage, 'title
', group='title
', 
 484             default=None) or self._og_search_title(webpage) 
 491         info.update(relinker_info)