2 from __future__ 
import unicode_literals
 
   7 from xml
.sax
.saxutils 
import escape
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  32 class CBCIE(InfoExtractor
): 
  34     _VALID_URL 
= r
'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' 
  37         'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', 
  38         'md5': '97e24d09672fc4cf56256d6faa6c25bc', 
  42             'title': 'Don Cherry – All-Stars', 
  43             'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', 
  44             'timestamp': 1454463000, 
  45             'upload_date': '20160203', 
  46             'uploader': 'CBCC-NEW', 
  48         'skip': 'Geo-restricted to Canada', 
  50         # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com 
  51         'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4', 
  52         'md5': '162adfa070274b144f4fdc3c3b8207db', 
  56             'title': '22 Minutes Update: What Not To Wear Quebec', 
  57             'description': "This week's latest Canadian top political story is What Not To Wear Quebec.", 
  58             'upload_date': '20131025', 
  59             'uploader': 'CBCC-NEW', 
  60             'timestamp': 1382717907, 
  63         # with clipId, feed only available via tpfeed.cbc.ca 
  64         'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 
  65         'md5': '0274a90b51a9b4971fe005c63f592f12', 
  69             'title': 'Robin Williams freestyles on 90 Minutes Live', 
  70             'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', 
  71             'upload_date': '19780210', 
  72             'uploader': 'CBCC-NEW', 
  73             'timestamp': 255977160, 
  77         'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', 
  79             'md5': '377572d0b49c4ce0c9ad77470e0b96b4', 
  83                 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', 
  84                 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', 
  85                 'upload_date': '20160201', 
  86                 'timestamp': 1454342820, 
  87                 'uploader': 'CBCC-NEW', 
  90             'md5': '415a0e3f586113894174dfb31aa5bb1a', 
  94                 'title': 'Fly like an eagle!', 
  95                 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', 
  96                 'upload_date': '20150315', 
  97                 'timestamp': 1426443984, 
  98                 'uploader': 'CBCC-NEW', 
 101         'skip': 'Geo-restricted to Canada', 
 103         # multiple CBC.APP.Caffeine.initInstance(...) 
 104         'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', 
 106             'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', 
 107             'id': 'dog-indoor-exercise-winter-1.3928238', 
 108             'description': 'md5:c18552e41726ee95bd75210d1ca9194c', 
 110         'playlist_mincount': 6, 
 114     def suitable(cls
, url
): 
 115         return False if CBCPlayerIE
.suitable(url
) else super(CBCIE
, cls
).suitable(url
) 
 117     def _extract_player_init(self
, player_init
, display_id
): 
 118         player_info 
= self
._parse
_json
(player_init
, display_id
, js_to_json
) 
 119         media_id 
= player_info
.get('mediaId') 
 121             clip_id 
= player_info
['clipId'] 
 122             feed 
= self
._download
_json
( 
 123                 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id
, 
 124                 clip_id
, fatal
=False) 
 126                 media_id 
= try_get(feed
, lambda x
: x
['entries'][0]['guid'], compat_str
) 
 128                 media_id 
= self
._download
_json
( 
 129                     'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id
, 
 130                     clip_id
)['entries'][0]['id'].split('/')[-1] 
 131         return self
.url_result('cbcplayer:%s' % media_id
, 'CBCPlayer', media_id
) 
 133     def _real_extract(self
, url
): 
 134         display_id 
= self
._match
_id
(url
) 
 135         webpage 
= self
._download
_webpage
(url
, display_id
) 
 136         title 
= self
._og
_search
_title
(webpage
, default
=None) or self
._html
_search
_meta
( 
 137             'twitter:title', webpage
, 'title', default
=None) or self
._html
_search
_regex
( 
 138                 r
'<title>([^<]+)</title>', webpage
, 'title', fatal
=False) 
 140             self
._extract
_player
_init
(player_init
, display_id
) 
 141             for player_init 
in re
.findall(r
'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage
)] 
 144                 r
'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', 
 145                 r
'<div[^>]+\bid=["\']player
-(\d
+)', 
 146                 r'guid
["\']\s*:\s*["\'](\d
+)'): 
 147             media_ids.extend(re.findall(media_id_re, webpage)) 
 149             self.url_result('cbcplayer
:%s' % media_id, 'CBCPlayer
', media_id) 
 150             for media_id in orderedSet(media_ids)]) 
 151         return self.playlist_result( 
 152             entries, display_id, strip_or_none(title), 
 153             self._og_search_description(webpage)) 
 156 class CBCPlayerIE(InfoExtractor): 
 157     IE_NAME = 'cbc
.ca
:player
' 
 158     _VALID_URL = r'(?
:cbcplayer
:|https?
://(?
:www\
.)?cbc\
.ca
/(?
:player
/play
/|i
/caffeine
/syndicate
/\?mediaId
=))(?P
<id>\d
+)' 
 160         'url
': 'http
://www
.cbc
.ca
/player
/play
/2683190193', 
 161         'md5
': '64d25f841ddf4ddb28a235338af32e2c
', 
 165             'title
': 'Gerry Runs a Sweat Shop
', 
 166             'description
': 'md5
:b457e1c01e8ff408d9d801c1c2cd29b0
', 
 167             'timestamp
': 1455071400, 
 168             'upload_date
': '20160210', 
 169             'uploader
': 'CBCC
-NEW
', 
 171         'skip
': 'Geo
-restricted to Canada
', 
 173         # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ 
 174         'url
': 'http
://www
.cbc
.ca
/player
/play
/2657631896', 
 175         'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
', 
 179             'title
': 'CBC Montreal 
is organizing its first ever community hackathon
!', 
 180             'description
': 'The modern technology we tend to depend on so heavily
, is never without it
\'s share of hiccups 
and headaches
. Next weekend 
- CBC Montreal will be getting members of the public 
for its first Hackathon
.', 
 181             'timestamp
': 1425704400, 
 182             'upload_date
': '20150307', 
 183             'uploader
': 'CBCC
-NEW
', 
 186         'url
': 'http
://www
.cbc
.ca
/player
/play
/2164402062', 
 187         'md5
': '33fcd8f6719b9dd60a5e73adcb83b9f6
', 
 191             'title
': 'Cancer survivor four times over
', 
 192             'description
': 'Tim Mayer has beaten three different forms of cancer four times 
in five years
.', 
 193             'timestamp
': 1320410746, 
 194             'upload_date
': '20111104', 
 195             'uploader
': 'CBCC
-NEW
', 
 199     def _real_extract(self, url): 
 200         video_id = self._match_id(url) 
 202             '_type
': 'url_transparent
', 
 203             'ie_key
': 'ThePlatform
', 
 205                 'http
://link
.theplatform
.com
/s
/ExhSPC
/media
/guid
/2655402169/%s?mbr
=true
&formats
=MPEG4
,FLV
,MP3
' % video_id, { 
 206                     'force_smil_url
': True 
 212 class CBCWatchBaseIE(InfoExtractor): 
 215     _API_BASE_URL = 'https
://api
-cbc
.cloud
.clearleap
.com
/cloffice
/client
/' 
 217         'media
': 'http
://search
.yahoo
.com
/mrss
/', 
 218         'clearleap
': 'http
://www
.clearleap
.com
/namespace
/clearleap
/1.0/', 
 220     _GEO_COUNTRIES = ['CA
'] 
 221     _LOGIN_URL = 'https
://api
.loginradius
.com
/identity
/v2
/auth
/login
' 
 222     _TOKEN_URL = 'https
://cloud
-api
.loginradius
.com
/sso
/jwt
/api
/token
' 
 223     _API_KEY = '3f4beddd
-2061-49b0
-ae80
-6f1f2ed65b37
' 
 224     _NETRC_MACHINE = 'cbcwatch
' 
 226     def _signature(self, email, password): 
 229             'password
': password, 
 231         headers = {'content
-type': 'application
/json
'} 
 232         query = {'apikey
': self._API_KEY} 
 233         resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query) 
 234         access_token = resp['access_token
'] 
 238             'access_token
': access_token, 
 239             'apikey
': self._API_KEY, 
 242         resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query) 
 243         return resp['signature
'] 
 245     def _call_api(self, path, video_id): 
 246         url = path if path.startswith('http
') else self._API_BASE_URL + path 
 249                 result = self._download_xml(url, video_id, headers={ 
 250                     'X
-Clearleap
-DeviceId
': self._device_id, 
 251                     'X
-Clearleap
-DeviceToken
': self._device_token, 
 253             except ExtractorError as e: 
 254                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: 
 255                     # Device token has expired, re-acquiring device token 
 256                     self._register_device() 
 259         error_message = xpath_text(result, 'userMessage
') or xpath_text(result, 'systemMessage
') 
 261             raise ExtractorError('%s said
: %s' % (self.IE_NAME, error_message)) 
 264     def _real_initialize(self): 
 265         if self._valid_device_token(): 
 267         device = self._downloader.cache.load( 
 268             'cbcwatch
', self._cache_device_key()) or {} 
 269         self._device_id, self._device_token = device.get('id'), device.get('token
') 
 270         if self._valid_device_token(): 
 272         self._register_device() 
 274     def _valid_device_token(self): 
 275         return self._device_id and self._device_token 
 277     def _cache_device_key(self): 
 278         email, _ = self._get_login_info() 
 279         return '%s_device
' % hashlib.sha256(email.encode()).hexdigest() if email else 'device
' 
 281     def _register_device(self): 
 282         result = self._download_xml( 
 283             self._API_BASE_URL + 'device
/register
', 
 284             None, 'Acquiring device token
', 
 285             data=b'<device
><type>web
</type></device
>') 
 286         self._device_id = xpath_text(result, 'deviceId
', fatal=True) 
 287         email, password = self._get_login_info() 
 288         if email and password: 
 289             signature = self._signature(email, password) 
 290             data = '<login
><token
>{0}
</token
><device
><deviceId
>{1}
</deviceId
><type>web
</type></device
></login
>'.format( 
 291                 escape(signature), escape(self._device_id)).encode() 
 292             url = self._API_BASE_URL + 'device
/login
' 
 293             result = self._download_xml( 
 294                 url, None, data=data, 
 295                 headers={'content
-type': 'application
/xml
'}) 
 296             self._device_token = xpath_text(result, 'token
', fatal=True) 
 298             self._device_token = xpath_text(result, 'deviceToken
', fatal=True) 
 299         self._downloader.cache.store( 
 300             'cbcwatch
', self._cache_device_key(), { 
 301                 'id': self._device_id, 
 302                 'token
': self._device_token, 
 305     def _parse_rss_feed(self, rss): 
 306         channel = xpath_element(rss, 'channel
', fatal=True) 
 309             return xpath_with_ns(path, self._NS_MAP) 
 312         for item in channel.findall('item
'): 
 313             guid = xpath_text(item, 'guid
', fatal=True) 
 314             title = xpath_text(item, 'title
', fatal=True) 
 316             media_group = xpath_element(item, _add_ns('media
:group
'), fatal=True) 
 317             content = xpath_element(media_group, _add_ns('media
:content
'), fatal=True) 
 318             content_url = content.attrib['url
'] 
 321             for thumbnail in media_group.findall(_add_ns('media
:thumbnail
')): 
 322                 thumbnail_url = thumbnail.get('url
') 
 323                 if not thumbnail_url: 
 326                     'id': thumbnail.get('profile
'), 
 327                     'url
': thumbnail_url, 
 328                     'width
': int_or_none(thumbnail.get('width
')), 
 329                     'height
': int_or_none(thumbnail.get('height
')), 
 333             release_date = find_xpath_attr( 
 334                 item, _add_ns('media
:credit
'), 'role
', 'releaseDate
') 
 335             if release_date is not None: 
 336                 timestamp = parse_iso8601(release_date.text) 
 339                 '_type
': 'url_transparent
', 
 343                 'description
': xpath_text(item, 'description
'), 
 344                 'timestamp
': timestamp, 
 345                 'duration
': int_or_none(content.get('duration
')), 
 346                 'age_limit
': parse_age_limit(xpath_text(item, _add_ns('media
:rating
'))), 
 347                 'episode
': xpath_text(item, _add_ns('clearleap
:episode
')), 
 348                 'episode_number
': int_or_none(xpath_text(item, _add_ns('clearleap
:episodeInSeason
'))), 
 349                 'series
': xpath_text(item, _add_ns('clearleap
:series
')), 
 350                 'season_number
': int_or_none(xpath_text(item, _add_ns('clearleap
:season
'))), 
 351                 'thumbnails
': thumbnails, 
 352                 'ie_key
': 'CBCWatchVideo
', 
 355         return self.playlist_result( 
 356             entries, xpath_text(channel, 'guid
'), 
 357             xpath_text(channel, 'title
'), 
 358             xpath_text(channel, 'description
')) 
 361 class CBCWatchVideoIE(CBCWatchBaseIE): 
 362     IE_NAME = 'cbc
.ca
:watch
:video
' 
 363     _VALID_URL = r'https?
://api
-cbc\
.cloud\
.clearleap\
.com
/cloffice
/client
/web
/play
/?
\?.*?
\bcontentId
=(?P
<id>[\da
-f
]{8}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{12}
)' 
 365         # geo-restricted to Canada, bypassable 
 366         'url
': 'https
://api
-cbc
.cloud
.clearleap
.com
/cloffice
/client
/web
/play
/?contentId
=3c84472a
-1eea
-4dee
-9267-2655d5055dcf
&categoryId
=ebc258f5
-ee40
-4cca
-b66b
-ba6bd55b7235
', 
 367         'only_matching
': True, 
 370     def _real_extract(self, url): 
 371         video_id = self._match_id(url) 
 372         result = self._call_api(url, video_id) 
 374         m3u8_url = xpath_text(result, 'url
', fatal=True) 
 375         formats = self._extract_m3u8_formats(re.sub(r'/([^
/]+)/[^
/?
]+\
.m3u8
', r'/\
1/\
1.m3u8
', m3u8_url), video_id, 'mp4
', fatal=False) 
 377             formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4
') 
 379             format_id = f.get('format_id
') 
 380             if format_id.startswith('AAC
'): 
 382             elif format_id.startswith('AC3
'): 
 384         self._sort_formats(formats) 
 392         rss = xpath_element(result, 'rss
') 
 394             info.update(self._parse_rss_feed(rss)['entries
'][0]) 
 401 class CBCWatchIE(CBCWatchBaseIE): 
 402     IE_NAME = 'cbc
.ca
:watch
' 
 403     _VALID_URL = r'https?
://(?
:gem|watch
)\
.cbc\
.ca
/(?
:[^
/]+/)+(?P
<id>[0-9a
-f
-]+)' 
 405         # geo-restricted to Canada, bypassable 
 406         'url
': 'http
://watch
.cbc
.ca
/doc
-zone
/season
-6/customer
-disservice
/38e815a
-009e3ab12e4
', 
 408             'id': '9673749a
-5e77
-484c
-8b62
-a1092a6b5168
', 
 410             'title
': 'Customer (Dis
)Service
', 
 411             'description
': 'md5
:8bdd6913a0fe03d4b2a17ebe169c7c87
', 
 412             'upload_date
': '20160219', 
 413             'timestamp
': 1455840000, 
 417             'skip_download
': True, 
 418             'format
': 'bestvideo
', 
 421         # geo-restricted to Canada, bypassable 
 422         'url
': 'http
://watch
.cbc
.ca
/arthur
/all
/1ed4b385
-cd84
-49cf
-95f0
-80f004680057
', 
 424             'id': '1ed4b385
-cd84
-49cf
-95f0
-80f004680057
', 
 426             'description
': 'Arthur
, the sweetest 
8-year
-old aardvark
, and his pals solve all kinds of problems 
with humour
, kindness 
and teamwork
.', 
 428         'playlist_mincount
': 30, 
 430         'url
': 'https
://gem
.cbc
.ca
/media
/this
-hour
-has
-22-minutes
/season
-26/episode
-20/38e815a
-0108c6c6a42
', 
 431         'only_matching
': True, 
 434     def _real_extract(self, url): 
 435         video_id = self._match_id(url) 
 436         rss = self._call_api('web
/browse
/' + video_id, video_id) 
 437         return self._parse_rss_feed(rss) 
 440 class CBCOlympicsIE(InfoExtractor): 
 441     IE_NAME = 'cbc
.ca
:olympics
' 
 442     _VALID_URL = r'https?
://olympics\
.cbc\
.ca
/video
/[^
/]+/(?P
<id>[^
/?
#]+)' 
 444         'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', 
 445         'only_matching': True, 
 448     def _real_extract(self
, url
): 
 449         display_id 
= self
._match
_id
(url
) 
 450         webpage 
= self
._download
_webpage
(url
, display_id
) 
 451         video_id 
= self
._hidden
_inputs
(webpage
)['videoId'] 
 452         video_doc 
= self
._download
_xml
( 
 453             'https://olympics.cbc.ca/videodata/%s.xml' % video_id
, video_id
) 
 454         title 
= xpath_text(video_doc
, 'title', fatal
=True) 
 455         is_live 
= xpath_text(video_doc
, 'kind') == 'Live' 
 457             title 
= self
._live
_title
(title
) 
 460         for video_source 
in video_doc
.findall('videoSources/videoSource'): 
 461             uri 
= xpath_text(video_source
, 'uri') 
 464             tokenize 
= self
._download
_json
( 
 465                 'https://olympics.cbc.ca/api/api-akamai/tokenize', 
 466                 video_id
, data
=json
.dumps({ 
 468                 }).encode(), headers
={ 
 469                     'Content-Type': 'application/json', 
 471                     # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js 
 472                     'Cookie': '_dvp=TK:C0ObxjerU',  # AKAMAI CDN cookie 
 476             content_url 
= tokenize
['ContentUrl'] 
 477             video_source_format 
= video_source
.get('format') 
 478             if video_source_format 
== 'IIS': 
 479                 formats
.extend(self
._extract
_ism
_formats
( 
 480                     content_url
, video_id
, ism_id
=video_source_format
, fatal
=False)) 
 482                 formats
.extend(self
._extract
_m
3u8_formats
( 
 483                     content_url
, video_id
, 'mp4', 
 484                     'm3u8' if is_live 
else 'm3u8_native', 
 485                     m3u8_id
=video_source_format
, fatal
=False)) 
 486         self
._sort
_formats
(formats
) 
 490             'display_id': display_id
, 
 492             'description': xpath_text(video_doc
, 'description'), 
 493             'thumbnail': xpath_text(video_doc
, 'thumbnailUrl'), 
 494             'duration': parse_duration(xpath_text(video_doc
, 'duration')),