2 from __future__ 
import unicode_literals
 
   5 import xml
.etree
.ElementTree 
as etree
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  12     compat_etree_register_namespace
, 
  26 class ITVIE(InfoExtractor
): 
  27     _VALID_URL 
= r
'https?://(?:www\.)?itv\.com/hub/[^/]+/(?P<id>[0-9a-zA-Z]+)' 
  28     _GEO_COUNTRIES 
= ['GB'] 
  30         'url': 'http://www.itv.com/hub/mr-bean-animated-series/2a2936a0053', 
  34             'title': 'Home Movie', 
  38             'skip_download': True, 
  41         # unavailable via data-playlist-url 
  42         'url': 'https://www.itv.com/hub/through-the-keyhole/2a2271a0033', 
  43         'only_matching': True, 
  46     def _real_extract(self
, url
): 
  47         video_id 
= self
._match
_id
(url
) 
  48         webpage 
= self
._download
_webpage
(url
, video_id
) 
  49         params 
= extract_attributes(self
._search
_regex
( 
  50             r
'(?s)(<[^>]+id="video"[^>]*>)', webpage
, 'params')) 
  53             'soapenv': 'http://schemas.xmlsoap.org/soap/envelope/', 
  54             'tem': 'http://tempuri.org/', 
  55             'itv': 'http://schemas.datacontract.org/2004/07/Itv.BB.Mercury.Common.Types', 
  56             'com': 'http://schemas.itv.com/2009/05/Common', 
  58         for ns
, full_ns 
in ns_map
.items(): 
  59             compat_etree_register_namespace(ns
, full_ns
) 
  62             return xpath_with_ns(name
, ns_map
) 
  64         def _add_sub_element(element
, name
): 
  65             return etree
.SubElement(element
, _add_ns(name
)) 
  68             params
.get('data-video-autoplay-id') or 
  70                 params
.get('data-video-episode-id') or 
  71                 video_id
.replace('a', '/'))) 
  73         req_env 
= etree
.Element(_add_ns('soapenv:Envelope')) 
  74         _add_sub_element(req_env
, 'soapenv:Header') 
  75         body 
= _add_sub_element(req_env
, 'soapenv:Body') 
  76         get_playlist 
= _add_sub_element(body
, ('tem:GetPlaylist')) 
  77         request 
= _add_sub_element(get_playlist
, 'tem:request') 
  78         _add_sub_element(request
, 'itv:ProductionId').text 
= production_id
 
  79         _add_sub_element(request
, 'itv:RequestGuid').text 
= compat_str(uuid
.uuid4()).upper() 
  80         vodcrid 
= _add_sub_element(request
, 'itv:Vodcrid') 
  81         _add_sub_element(vodcrid
, 'com:Id') 
  82         _add_sub_element(request
, 'itv:Partition') 
  83         user_info 
= _add_sub_element(get_playlist
, 'tem:userInfo') 
  84         _add_sub_element(user_info
, 'itv:Broadcaster').text 
= 'Itv' 
  85         _add_sub_element(user_info
, 'itv:DM') 
  86         _add_sub_element(user_info
, 'itv:RevenueScienceValue') 
  87         _add_sub_element(user_info
, 'itv:SessionId') 
  88         _add_sub_element(user_info
, 'itv:SsoToken') 
  89         _add_sub_element(user_info
, 'itv:UserToken') 
  90         site_info 
= _add_sub_element(get_playlist
, 'tem:siteInfo') 
  91         _add_sub_element(site_info
, 'itv:AdvertisingRestriction').text 
= 'None' 
  92         _add_sub_element(site_info
, 'itv:AdvertisingSite').text 
= 'ITV' 
  93         _add_sub_element(site_info
, 'itv:AdvertisingType').text 
= 'Any' 
  94         _add_sub_element(site_info
, 'itv:Area').text 
= 'ITVPLAYER.VIDEO' 
  95         _add_sub_element(site_info
, 'itv:Category') 
  96         _add_sub_element(site_info
, 'itv:Platform').text 
= 'DotCom' 
  97         _add_sub_element(site_info
, 'itv:Site').text 
= 'ItvCom' 
  98         device_info 
= _add_sub_element(get_playlist
, 'tem:deviceInfo') 
  99         _add_sub_element(device_info
, 'itv:ScreenSize').text 
= 'Big' 
 100         player_info 
= _add_sub_element(get_playlist
, 'tem:playerInfo') 
 101         _add_sub_element(player_info
, 'itv:Version').text 
= '2' 
 103         headers 
= self
.geo_verification_headers() 
 105             'Content-Type': 'text/xml; charset=utf-8', 
 106             'SOAPAction': 'http://tempuri.org/PlaylistService/GetPlaylist', 
 109         info 
= self
._search
_json
_ld
(webpage
, video_id
, default
={}) 
 113         def extract_subtitle(sub_url
): 
 114             ext 
= determine_ext(sub_url
, 'ttml') 
 115             subtitles
.setdefault('en', []).append({ 
 117                 'ext': 'ttml' if ext 
== 'xml' else ext
, 
 120         resp_env 
= self
._download
_xml
( 
 121             params
['data-playlist-url'], video_id
, 
 122             headers
=headers
, data
=etree
.tostring(req_env
)) 
 123         playlist 
= xpath_element(resp_env
, './/Playlist') 
 125             fault_code 
= xpath_text(resp_env
, './/faultcode') 
 126             fault_string 
= xpath_text(resp_env
, './/faultstring') 
 127             if fault_code 
== 'InvalidGeoRegion': 
 128                 self
.raise_geo_restricted( 
 129                     msg
=fault_string
, countries
=self
._GEO
_COUNTRIES
) 
 130             elif fault_code 
!= 'InvalidEntity': 
 131                 raise ExtractorError( 
 132                     '%s said: %s' % (self
.IE_NAME
, fault_string
), expected
=True) 
 134                 'title': self
._og
_search
_title
(webpage
), 
 135                 'episode_title': params
.get('data-video-episode'), 
 136                 'series': params
.get('data-video-title'), 
 139             title 
= xpath_text(playlist
, 'EpisodeTitle', default
=None) 
 142                 'episode_title': title
, 
 143                 'episode_number': int_or_none(xpath_text(playlist
, 'EpisodeNumber')), 
 144                 'series': xpath_text(playlist
, 'ProgrammeTitle'), 
 145                 'duration': parse_duration(xpath_text(playlist
, 'Duration')), 
 147             video_element 
= xpath_element(playlist
, 'VideoEntries/Video', fatal
=True) 
 148             media_files 
= xpath_element(video_element
, 'MediaFiles', fatal
=True) 
 149             rtmp_url 
= media_files
.attrib
['base'] 
 151             for media_file 
in media_files
.findall('MediaFile'): 
 152                 play_path 
= xpath_text(media_file
, 'URL') 
 155                 tbr 
= int_or_none(media_file
.get('bitrate'), 1000) 
 157                     'format_id': 'rtmp' + ('-%d' % tbr 
if tbr 
else ''), 
 158                     'play_path': play_path
, 
 159                     # Providing this swfVfy allows to avoid truncated downloads 
 160                     'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf', 
 165                 app 
= self
._search
_regex
( 
 166                     'rtmpe?://[^/]+/(.+)$', rtmp_url
, 'app', default
=None) 
 169                         'url': rtmp_url
.split('?', 1)[0], 
 176             for caption_url 
in video_element
.findall('ClosedCaptioningURIs/URL'): 
 178                     extract_subtitle(caption_url
.text
) 
 180         ios_playlist_url 
= params
.get('data-video-playlist') or params
.get('data-video-id') 
 181         hmac 
= params
.get('data-video-hmac') 
 182         if ios_playlist_url 
and hmac 
and re
.match(r
'https?://', ios_playlist_url
): 
 183             headers 
= self
.geo_verification_headers() 
 185                 'Accept': 'application/vnd.itv.vod.playlist.v2+json', 
 186                 'Content-Type': 'application/json', 
 187                 'hmac': hmac
.upper(), 
 189             ios_playlist 
= self
._download
_json
( 
 190                 ios_playlist_url
, video_id
, data
=json
.dumps({ 
 197                         'manufacturer': 'Safari', 
 200                             'name': 'Windows NT', 
 209                     'variantAvailability': { 
 211                             'min': ['hls', 'aes', 'outband-webvtt'], 
 212                             'max': ['hls', 'aes', 'outband-webvtt'] 
 214                         'platformTag': 'dotcom' 
 216                 }).encode(), headers
=headers
, fatal
=False) 
 218                 video_data 
= ios_playlist
.get('Playlist', {}).get('Video', {}) 
 219                 ios_base_url 
= video_data
.get('Base') 
 220                 for media_file 
in video_data
.get('MediaFiles', []): 
 221                     href 
= media_file
.get('Href') 
 225                         href 
= ios_base_url 
+ href
 
 226                     ext 
= determine_ext(href
) 
 228                         formats
.extend(self
._extract
_m
3u8_formats
( 
 229                             href
, video_id
, 'mp4', entry_protocol
='m3u8_native', 
 230                             m3u8_id
='hls', fatal
=False)) 
 235                 subs 
= video_data
.get('Subtitles') 
 236                 if isinstance(subs
, list): 
 238                         if not isinstance(sub
, dict): 
 240                         href 
= sub
.get('Href') 
 241                         if isinstance(href
, compat_str
): 
 242                             extract_subtitle(href
) 
 243                 if not info
.get('duration'): 
 244                     info
['duration'] = parse_duration(video_data
.get('Duration')) 
 246         self
._sort
_formats
(formats
) 
 251             'subtitles': subtitles
,