2 from __future__ 
import unicode_literals
 
   5 import xml
.etree
.ElementTree 
as etree
 
   9 from .common 
import InfoExtractor
 
  10 from .brightcove 
import BrightcoveNewIE
 
  11 from ..compat 
import ( 
  13     compat_etree_register_namespace
, 
  30 class ITVIE(InfoExtractor
): 
  31     _VALID_URL 
= r
'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' 
  32     _GEO_COUNTRIES 
= ['GB'] 
  34         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 
  38             'title': 'Home Movie', 
  42             'skip_download': True, 
  45         # unavailable via data-playlist-url 
  46         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033', 
  47         'only_matching': True, 
  50         'url': 'https://www.itv.com/hub/james-martins-saturday-morning/2a5159a0034', 
  51         'only_matching': True, 
  54         'url': 'https://www.itv.com/hub/whos-doing-the-dishes/2a2898a0024', 
  55         'only_matching': True, 
  58     def _real_extract(self
, url
): 
  59         video_id 
= self
._match
_id
(url
) 
  60         webpage 
= self
._download
_webpage
(url
, video_id
) 
  61         params 
= extract_attributes(self
._search
_regex
( 
  62             r
'(?s)(<[^>]+id="video"[^>]*>)', webpage
, 'params')) 
  65             'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', 
  66             'tem': 'http://tempuri.org/', 
  67             'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types', 
  68             'com': 'http://schemas.itv.com/2009/05/Common', 
  70         for ns
, full_ns 
in ns_map
.items(): 
  71             compat_etree_register_namespace(ns
, full_ns
) 
  74             return xpath_with_ns(name
, ns_map
) 
  76         def _add_sub_element(element
, name
): 
  77             return etree
.SubElement(element
, _add_ns(name
)) 
  80             params
.get('data-video-autoplay-id') or 
  82                 params
.get('data-video-episode-id') or 
  83                 video_id
.replace('a', '/'))) 
  85         req_env 
= etree
.Element(_add_ns('soapenv:Envelope')) 
  86         _add_sub_element(req_env
, 'soapenv:Header') 
  87         body 
= _add_sub_element(req_env
, 'soapenv:Body') 
  88         get_playlist 
= _add_sub_element(body
, ('tem:GetPlaylist')) 
  89         request 
= _add_sub_element(get_playlist
, 'tem:request') 
  90         _add_sub_element(request
, 'itv:ProductionId').text 
= production_id
 
  91         _add_sub_element(request
, 'itv:RequestGuid').text 
= compat_str(uuid
.uuid4()).upper() 
  92         vodcrid 
= _add_sub_element(request
, 'itv:Vodcrid') 
  93         _add_sub_element(vodcrid
, 'com:Id') 
  94         _add_sub_element(request
, 'itv:Partition') 
  95         user_info 
= _add_sub_element(get_playlist
, 'tem:userInfo') 
  96         _add_sub_element(user_info
, 'itv:Broadcaster').text 
= 'Itv' 
  97         _add_sub_element(user_info
, 'itv:DM') 
  98         _add_sub_element(user_info
, 'itv:RevenueScienceValue') 
  99         _add_sub_element(user_info
, 'itv:SessionId') 
 100         _add_sub_element(user_info
, 'itv:SsoToken') 
 101         _add_sub_element(user_info
, 'itv:UserToken') 
 102         site_info 
= _add_sub_element(get_playlist
, 'tem:siteInfo') 
 103         _add_sub_element(site_info
, 'itv:AdvertisingRestriction').text 
= 'None' 
 104         _add_sub_element(site_info
, 'itv:AdvertisingSite').text 
= 'ITV' 
 105         _add_sub_element(site_info
, 'itv:AdvertisingType').text 
= 'Any' 
 106         _add_sub_element(site_info
, 'itv:Area').text 
= 'ITVPLAYER.VIDEO' 
 107         _add_sub_element(site_info
, 'itv:Category') 
 108         _add_sub_element(site_info
, 'itv:Platform').text 
= 'DotCom' 
 109         _add_sub_element(site_info
, 'itv:Site').text 
= 'ItvCom' 
 110         device_info 
= _add_sub_element(get_playlist
, 'tem:deviceInfo') 
 111         _add_sub_element(device_info
, 'itv:ScreenSize').text 
= 'Big' 
 112         player_info 
= _add_sub_element(get_playlist
, 'tem:playerInfo') 
 113         _add_sub_element(player_info
, 'itv:Version').text 
= '2' 
 115         headers 
= self
.geo_verification_headers() 
 117             'Content-Type': 'text/xml; charset=utf-8', 
 118             'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', 
 121         info 
= self
._search
_json
_ld
(webpage
, video_id
, default
={}) 
 125         def extract_subtitle(sub_url
): 
 126             ext 
= determine_ext(sub_url
, 'ttml') 
 127             subtitles
.setdefault('en', []).append({ 
 129                 'ext': 'ttml' if ext 
== 'xml' else ext
, 
 132         resp_env 
= self
._download
_xml
( 
 133             params
['data-playlist-url'], video_id
, 
 134             headers
=headers
, data
=etree
.tostring(req_env
), fatal
=False) 
 136             playlist 
= xpath_element(resp_env
, './/Playlist') 
 138                 fault_code 
= xpath_text(resp_env
, './/faultcode') 
 139                 fault_string 
= xpath_text(resp_env
, './/faultstring') 
 140                 if fault_code 
== 'InvalidGeoRegion': 
 141                     self
.raise_geo_restricted( 
 142                         msg
=fault_string
, countries
=self
._GEO
_COUNTRIES
) 
 143                 elif fault_code 
not in ( 
 144                         'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'): 
 145                     raise ExtractorError( 
 146                         '%s said: %s' % (self
.IE_NAME
, fault_string
), expected
=True) 
 148                     'title': self
._og
_search
_title
(webpage
), 
 149                     'episode_title': params
.get('data-video-episode'), 
 150                     'series': params
.get('data-video-title'), 
 153                 title 
= xpath_text(playlist
, 'EpisodeTitle', default
=None) 
 156                     'episode_title': title
, 
 157                     'episode_number': int_or_none(xpath_text(playlist
, 'EpisodeNumber')), 
 158                     'series': xpath_text(playlist
, 'ProgrammeTitle'), 
 159                     'duration': parse_duration(xpath_text(playlist
, 'Duration')), 
 161                 video_element 
= xpath_element(playlist
, 'VideoEntries/Video', fatal
=True) 
 162                 media_files 
= xpath_element(video_element
, 'MediaFiles', fatal
=True) 
 163                 rtmp_url 
= media_files
.attrib
['base'] 
 165                 for media_file 
in media_files
.findall('MediaFile'): 
 166                     play_path 
= xpath_text(media_file
, 'URL') 
 169                     tbr 
= int_or_none(media_file
.get('bitrate'), 1000) 
 171                         'format_id': 'rtmp' + ('-%d' % tbr 
if tbr 
else ''), 
 172                         'play_path': play_path
, 
 173                         # Providing this swfVfy allows to avoid truncated downloads 
 174                         'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', 
 179                     app 
= self
._search
_regex
( 
 180                         'rtmpe?://[^/]+/(.+)$', rtmp_url
, 'app', default
=None) 
 183                             'url': rtmp_url
.split('?', 1)[0], 
 190                 for caption_url 
in video_element
.findall('ClosedCaptioningURIs/URL'): 
 192                         extract_subtitle(caption_url
.text
) 
 194         ios_playlist_url 
= params
.get('data-video-playlist') or params
.get('data-video-id') 
 195         hmac 
= params
.get('data-video-hmac') 
 196         if ios_playlist_url 
and hmac 
and re
.match(r
'https?://', ios_playlist_url
): 
 197             headers 
= self
.geo_verification_headers() 
 199                 'Accept': 'application/vnd.itv.vod.playlist.v2+json', 
 200                 'Content-Type': 'application/json', 
 201                 'hmac': hmac
.upper(), 
 203             ios_playlist 
= self
._download
_json
( 
 204                 ios_playlist_url
, video_id
, data
=json
.dumps({ 
 211                         'manufacturer': 'Safari', 
 214                             'name': 'Windows NT', 
 223                     'variantAvailability': { 
 225                             'min': ['hls', 'aes', 'outband-webvtt'], 
 226                             'max': ['hls', 'aes', 'outband-webvtt'] 
 228                         'platformTag': 'dotcom' 
 230                 }).encode(), headers
=headers
, fatal
=False) 
 232                 video_data 
= ios_playlist
.get('Playlist', {}).get('Video', {}) 
 233                 ios_base_url 
= video_data
.get('Base') 
 234                 for media_file 
in video_data
.get('MediaFiles', []): 
 235                     href 
= media_file
.get('Href') 
 239                         href 
= ios_base_url 
+ href
 
 240                     ext 
= determine_ext(href
) 
 242                         formats
.extend(self
._extract
_m
3u8_formats
( 
 243                             href
, video_id
, 'mp4', entry_protocol
='m3u8_native', 
 244                             m3u8_id
='hls', fatal
=False)) 
 249                 subs 
= video_data
.get('Subtitles') 
 250                 if isinstance(subs
, list): 
 252                         if not isinstance(sub
, dict): 
 254                         href 
= url_or_none(sub
.get('Href')) 
 256                             extract_subtitle(href
) 
 257                 if not info
.get('duration'): 
 258                     info
['duration'] = parse_duration(video_data
.get('Duration')) 
 260         self
._sort
_formats
(formats
) 
 265             'subtitles': subtitles
, 
 268         webpage_info 
= self
._search
_json
_ld
(webpage
, video_id
, default
={}) 
 269         if not webpage_info
.get('title'): 
 270             webpage_info
['title'] = self
._html
_search
_regex
( 
 271                 r
'(?s)<h\d+[^>]+\bclass=["\'][^
>]*episode
-title
["\'][^>]*>([^<]+)<', 
 272                 webpage, 'title', default=None) or self._og_search_title( 
 273                 webpage, default=None) or self._html_search_meta( 
 274                 'twitter:title', webpage, 'title', 
 275                 default=None) or webpage_info['episode'] 
 277         return merge_dicts(info, webpage_info) 
 280 class ITVBTCCIE(InfoExtractor): 
 281     _VALID_URL = r'https?://(?:www\.)?itv\.com/btcc/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
 283         'url': 'http://www.itv.com/btcc/races/btcc-2018-all-the-action-from-brands-hatch', 
 285             'id': 'btcc-2018-all-the-action-from-brands-hatch', 
 286             'title': 'BTCC 2018: All the action from Brands Hatch', 
 288         'playlist_mincount': 9, 
 290     BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1582188683001/HkiHLnNRx_default/index.html?videoId=%s' 
 292     def _real_extract(self, url): 
 293         playlist_id = self._match_id(url) 
 295         webpage = self._download_webpage(url, playlist_id) 
 299                 smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % video_id, { 
 300                     # ITV does not like some GB IP ranges, so here are some 
 301                     # IP blocks it accepts 
 303                         '193.113.0.0/16', '54.36.162.0/23', '159.65.16.0/21' 
 307                 ie=BrightcoveNewIE.ie_key(), video_id=video_id) 
 308             for video_id in re.findall(r'data-video-id=["\'](\d
+)', webpage)] 
 310         title = self._og_search_title(webpage, fatal=False) 
 312         return self.playlist_result(entries, playlist_id, title)