2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  30 class CBCIE(InfoExtractor
): 
  32     _VALID_URL 
= r
'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)' 
  35         'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs', 
  36         'md5': '97e24d09672fc4cf56256d6faa6c25bc', 
  40             'title': 'Don Cherry – All-Stars', 
  41             'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.', 
  42             'timestamp': 1454463000, 
  43             'upload_date': '20160203', 
  44             'uploader': 'CBCC-NEW', 
  46         'skip': 'Geo-restricted to Canada', 
  48         # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com 
  49         'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4', 
  50         'md5': '162adfa070274b144f4fdc3c3b8207db', 
  54             'title': '22 Minutes Update: What Not To Wear Quebec', 
  55             'description': "This week's latest Canadian top political story is What Not To Wear Quebec.", 
  56             'upload_date': '20131025', 
  57             'uploader': 'CBCC-NEW', 
  58             'timestamp': 1382717907, 
  61         # with clipId, feed only available via tpfeed.cbc.ca 
  62         'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live', 
  63         'md5': '0274a90b51a9b4971fe005c63f592f12', 
  67             'title': 'Robin Williams freestyles on 90 Minutes Live', 
  68             'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.', 
  69             'upload_date': '19780210', 
  70             'uploader': 'CBCC-NEW', 
  71             'timestamp': 255977160, 
  75         'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot', 
  77             'md5': '377572d0b49c4ce0c9ad77470e0b96b4', 
  81                 'title': 'An Eagle\'s-Eye View Off Burrard Bridge', 
  82                 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.', 
  83                 'upload_date': '20160201', 
  84                 'timestamp': 1454342820, 
  85                 'uploader': 'CBCC-NEW', 
  88             'md5': '415a0e3f586113894174dfb31aa5bb1a', 
  92                 'title': 'Fly like an eagle!', 
  93                 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower', 
  94                 'upload_date': '20150315', 
  95                 'timestamp': 1426443984, 
  96                 'uploader': 'CBCC-NEW', 
  99         'skip': 'Geo-restricted to Canada', 
 101         # multiple CBC.APP.Caffeine.initInstance(...) 
 102         'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238', 
 104             'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks', 
 105             'id': 'dog-indoor-exercise-winter-1.3928238', 
 106             'description': 'md5:c18552e41726ee95bd75210d1ca9194c', 
 108         'playlist_mincount': 6, 
 112     def suitable(cls
, url
): 
 113         return False if CBCPlayerIE
.suitable(url
) else super(CBCIE
, cls
).suitable(url
) 
 115     def _extract_player_init(self
, player_init
, display_id
): 
 116         player_info 
= self
._parse
_json
(player_init
, display_id
, js_to_json
) 
 117         media_id 
= player_info
.get('mediaId') 
 119             clip_id 
= player_info
['clipId'] 
 120             feed 
= self
._download
_json
( 
 121                 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id
, 
 122                 clip_id
, fatal
=False) 
 124                 media_id 
= try_get(feed
, lambda x
: x
['entries'][0]['guid'], compat_str
) 
 126                 media_id 
= self
._download
_json
( 
 127                     'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id
, 
 128                     clip_id
)['entries'][0]['id'].split('/')[-1] 
 129         return self
.url_result('cbcplayer:%s' % media_id
, 'CBCPlayer', media_id
) 
 131     def _real_extract(self
, url
): 
 132         display_id 
= self
._match
_id
(url
) 
 133         webpage 
= self
._download
_webpage
(url
, display_id
) 
 134         title 
= self
._og
_search
_title
(webpage
, default
=None) or self
._html
_search
_meta
( 
 135             'twitter:title', webpage
, 'title', default
=None) or self
._html
_search
_regex
( 
 136                 r
'<title>([^<]+)</title>', webpage
, 'title', fatal
=False) 
 138             self
._extract
_player
_init
(player_init
, display_id
) 
 139             for player_init 
in re
.findall(r
'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage
)] 
 142                 r
'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', 
 143                 r
'<div[^>]+\bid=["\']player
-(\d
+)', 
 144                 r'guid
["\']\s*:\s*["\'](\d
+)'): 
 145             media_ids.extend(re.findall(media_id_re, webpage)) 
 147             self.url_result('cbcplayer
:%s' % media_id, 'CBCPlayer
', media_id) 
 148             for media_id in orderedSet(media_ids)]) 
 149         return self.playlist_result( 
 150             entries, display_id, strip_or_none(title), 
 151             self._og_search_description(webpage)) 
 154 class CBCPlayerIE(InfoExtractor): 
 155     IE_NAME = 'cbc
.ca
:player
' 
 156     _VALID_URL = r'(?
:cbcplayer
:|https?
://(?
:www\
.)?cbc\
.ca
/(?
:player
/play
/|i
/caffeine
/syndicate
/\?mediaId
=))(?P
<id>\d
+)' 
 158         'url
': 'http
://www
.cbc
.ca
/player
/play
/2683190193', 
 159         'md5
': '64d25f841ddf4ddb28a235338af32e2c
', 
 163             'title
': 'Gerry Runs a Sweat Shop
', 
 164             'description
': 'md5
:b457e1c01e8ff408d9d801c1c2cd29b0
', 
 165             'timestamp
': 1455071400, 
 166             'upload_date
': '20160210', 
 167             'uploader
': 'CBCC
-NEW
', 
 169         'skip
': 'Geo
-restricted to Canada
', 
 171         # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/ 
 172         'url
': 'http
://www
.cbc
.ca
/player
/play
/2657631896', 
 173         'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
', 
 177             'title
': 'CBC Montreal 
is organizing its first ever community hackathon
!', 
 178             'description
': 'The modern technology we tend to depend on so heavily
, is never without it
\'s share of hiccups 
and headaches
. Next weekend 
- CBC Montreal will be getting members of the public 
for its first Hackathon
.', 
 179             'timestamp
': 1425704400, 
 180             'upload_date
': '20150307', 
 181             'uploader
': 'CBCC
-NEW
', 
 184         'url
': 'http
://www
.cbc
.ca
/player
/play
/2164402062', 
 185         'md5
': '33fcd8f6719b9dd60a5e73adcb83b9f6
', 
 189             'title
': 'Cancer survivor four times over
', 
 190             'description
': 'Tim Mayer has beaten three different forms of cancer four times 
in five years
.', 
 191             'timestamp
': 1320410746, 
 192             'upload_date
': '20111104', 
 193             'uploader
': 'CBCC
-NEW
', 
 197     def _real_extract(self, url): 
 198         video_id = self._match_id(url) 
 200             '_type
': 'url_transparent
', 
 201             'ie_key
': 'ThePlatform
', 
 203                 'http
://link
.theplatform
.com
/s
/ExhSPC
/media
/guid
/2655402169/%s?mbr
=true
&formats
=MPEG4
,FLV
,MP3
' % video_id, { 
 204                     'force_smil_url
': True 
 210 class CBCWatchBaseIE(InfoExtractor): 
 213     _API_BASE_URL = 'https
://api
-cbc
.cloud
.clearleap
.com
/cloffice
/client
/' 
 215         'media
': 'http
://search
.yahoo
.com
/mrss
/', 
 216         'clearleap
': 'http
://www
.clearleap
.com
/namespace
/clearleap
/1.0/', 
 218     _GEO_COUNTRIES = ['CA
'] 
 220     def _call_api(self, path, video_id): 
 221         url = path if path.startswith('http
') else self._API_BASE_URL + path 
 224                 result = self._download_xml(url, video_id, headers={ 
 225                     'X
-Clearleap
-DeviceId
': self._device_id, 
 226                     'X
-Clearleap
-DeviceToken
': self._device_token, 
 228             except ExtractorError as e: 
 229                 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401: 
 230                     # Device token has expired, re-acquiring device token 
 231                     self._register_device() 
 234         error_message = xpath_text(result, 'userMessage
') or xpath_text(result, 'systemMessage
') 
 236             raise ExtractorError('%s said
: %s' % (self.IE_NAME, error_message)) 
 239     def _real_initialize(self): 
 240         if self._valid_device_token(): 
 242         device = self._downloader.cache.load('cbcwatch
', 'device
') or {} 
 243         self._device_id, self._device_token = device.get('id'), device.get('token
') 
 244         if self._valid_device_token(): 
 246         self._register_device() 
 248     def _valid_device_token(self): 
 249         return self._device_id and self._device_token 
 251     def _register_device(self): 
 252         self._device_id = self._device_token = None 
 253         result = self._download_xml( 
 254             self._API_BASE_URL + 'device
/register
', 
 255             None, 'Acquiring device token
', 
 256             data=b'<device
><type>web
</type></device
>') 
 257         self._device_id = xpath_text(result, 'deviceId
', fatal=True) 
 258         self._device_token = xpath_text(result, 'deviceToken
', fatal=True) 
 259         self._downloader.cache.store( 
 260             'cbcwatch
', 'device
', { 
 261                 'id': self._device_id, 
 262                 'token
': self._device_token, 
 265     def _parse_rss_feed(self, rss): 
 266         channel = xpath_element(rss, 'channel
', fatal=True) 
 269             return xpath_with_ns(path, self._NS_MAP) 
 272         for item in channel.findall('item
'): 
 273             guid = xpath_text(item, 'guid
', fatal=True) 
 274             title = xpath_text(item, 'title
', fatal=True) 
 276             media_group = xpath_element(item, _add_ns('media
:group
'), fatal=True) 
 277             content = xpath_element(media_group, _add_ns('media
:content
'), fatal=True) 
 278             content_url = content.attrib['url
'] 
 281             for thumbnail in media_group.findall(_add_ns('media
:thumbnail
')): 
 282                 thumbnail_url = thumbnail.get('url
') 
 283                 if not thumbnail_url: 
 286                     'id': thumbnail.get('profile
'), 
 287                     'url
': thumbnail_url, 
 288                     'width
': int_or_none(thumbnail.get('width
')), 
 289                     'height
': int_or_none(thumbnail.get('height
')), 
 293             release_date = find_xpath_attr( 
 294                 item, _add_ns('media
:credit
'), 'role
', 'releaseDate
') 
 295             if release_date is not None: 
 296                 timestamp = parse_iso8601(release_date.text) 
 299                 '_type
': 'url_transparent
', 
 303                 'description
': xpath_text(item, 'description
'), 
 304                 'timestamp
': timestamp, 
 305                 'duration
': int_or_none(content.get('duration
')), 
 306                 'age_limit
': parse_age_limit(xpath_text(item, _add_ns('media
:rating
'))), 
 307                 'episode
': xpath_text(item, _add_ns('clearleap
:episode
')), 
 308                 'episode_number
': int_or_none(xpath_text(item, _add_ns('clearleap
:episodeInSeason
'))), 
 309                 'series
': xpath_text(item, _add_ns('clearleap
:series
')), 
 310                 'season_number
': int_or_none(xpath_text(item, _add_ns('clearleap
:season
'))), 
 311                 'thumbnails
': thumbnails, 
 312                 'ie_key
': 'CBCWatchVideo
', 
 315         return self.playlist_result( 
 316             entries, xpath_text(channel, 'guid
'), 
 317             xpath_text(channel, 'title
'), 
 318             xpath_text(channel, 'description
')) 
 321 class CBCWatchVideoIE(CBCWatchBaseIE): 
 322     IE_NAME = 'cbc
.ca
:watch
:video
' 
 323     _VALID_URL = r'https?
://api
-cbc\
.cloud\
.clearleap\
.com
/cloffice
/client
/web
/play
/?
\?.*?
\bcontentId
=(?P
<id>[\da
-f
]{8}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{12}
)' 
 325         # geo-restricted to Canada, bypassable 
 326         'url
': 'https
://api
-cbc
.cloud
.clearleap
.com
/cloffice
/client
/web
/play
/?contentId
=3c84472a
-1eea
-4dee
-9267-2655d5055dcf
&categoryId
=ebc258f5
-ee40
-4cca
-b66b
-ba6bd55b7235
', 
 327         'only_matching
': True, 
 330     def _real_extract(self, url): 
 331         video_id = self._match_id(url) 
 332         result = self._call_api(url, video_id) 
 334         m3u8_url = xpath_text(result, 'url
', fatal=True) 
 335         formats = self._extract_m3u8_formats(re.sub(r'/([^
/]+)/[^
/?
]+\
.m3u8
', r'/\
1/\
1.m3u8
', m3u8_url), video_id, 'mp4
', fatal=False) 
 337             formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4
') 
 339             format_id = f.get('format_id
') 
 340             if format_id.startswith('AAC
'): 
 342             elif format_id.startswith('AC3
'): 
 344         self._sort_formats(formats) 
 352         rss = xpath_element(result, 'rss
') 
 354             info.update(self._parse_rss_feed(rss)['entries
'][0]) 
 361 class CBCWatchIE(CBCWatchBaseIE): 
 362     IE_NAME = 'cbc
.ca
:watch
' 
 363     _VALID_URL = r'https?
://watch\
.cbc\
.ca
/(?
:[^
/]+/)+(?P
<id>[0-9a
-f
-]+)' 
 365         # geo-restricted to Canada, bypassable 
 366         'url
': 'http
://watch
.cbc
.ca
/doc
-zone
/season
-6/customer
-disservice
/38e815a
-009e3ab12e4
', 
 368             'id': '9673749a
-5e77
-484c
-8b62
-a1092a6b5168
', 
 370             'title
': 'Customer (Dis
)Service
', 
 371             'description
': 'md5
:8bdd6913a0fe03d4b2a17ebe169c7c87
', 
 372             'upload_date
': '20160219', 
 373             'timestamp
': 1455840000, 
 377             'skip_download
': True, 
 378             'format
': 'bestvideo
', 
 381         # geo-restricted to Canada, bypassable 
 382         'url
': 'http
://watch
.cbc
.ca
/arthur
/all
/1ed4b385
-cd84
-49cf
-95f0
-80f004680057
', 
 384             'id': '1ed4b385
-cd84
-49cf
-95f0
-80f004680057
', 
 386             'description
': 'Arthur
, the sweetest 
8-year
-old aardvark
, and his pals solve all kinds of problems 
with humour
, kindness 
and teamwork
.', 
 388         'playlist_mincount
': 30, 
 391     def _real_extract(self, url): 
 392         video_id = self._match_id(url) 
 393         rss = self._call_api('web
/browse
/' + video_id, video_id) 
 394         return self._parse_rss_feed(rss) 
 397 class CBCOlympicsIE(InfoExtractor): 
 398     IE_NAME = 'cbc
.ca
:olympics
' 
 399     _VALID_URL = r'https?
://olympics\
.cbc\
.ca
/video
/[^
/]+/(?P
<id>[^
/?
#]+)' 
 401         'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/', 
 402         'only_matching': True, 
 405     def _real_extract(self
, url
): 
 406         display_id 
= self
._match
_id
(url
) 
 407         webpage 
= self
._download
_webpage
(url
, display_id
) 
 408         video_id 
= self
._hidden
_inputs
(webpage
)['videoId'] 
 409         video_doc 
= self
._download
_xml
( 
 410             'https://olympics.cbc.ca/videodata/%s.xml' % video_id
, video_id
) 
 411         title 
= xpath_text(video_doc
, 'title', fatal
=True) 
 412         is_live 
= xpath_text(video_doc
, 'kind') == 'Live' 
 414             title 
= self
._live
_title
(title
) 
 417         for video_source 
in video_doc
.findall('videoSources/videoSource'): 
 418             uri 
= xpath_text(video_source
, 'uri') 
 421             tokenize 
= self
._download
_json
( 
 422                 'https://olympics.cbc.ca/api/api-akamai/tokenize', 
 423                 video_id
, data
=json
.dumps({ 
 425                 }).encode(), headers
={ 
 426                     'Content-Type': 'application/json', 
 428                     # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js 
 429                     'Cookie': '_dvp=TK:C0ObxjerU',  # AKAMAI CDN cookie 
 433             content_url 
= tokenize
['ContentUrl'] 
 434             video_source_format 
= video_source
.get('format') 
 435             if video_source_format 
== 'IIS': 
 436                 formats
.extend(self
._extract
_ism
_formats
( 
 437                     content_url
, video_id
, ism_id
=video_source_format
, fatal
=False)) 
 439                 formats
.extend(self
._extract
_m
3u8_formats
( 
 440                     content_url
, video_id
, 'mp4', 
 441                     'm3u8' if is_live 
else 'm3u8_native', 
 442                     m3u8_id
=video_source_format
, fatal
=False)) 
 443         self
._sort
_formats
(formats
) 
 447             'display_id': display_id
, 
 449             'description': xpath_text(video_doc
, 'description'), 
 450             'thumbnail': xpath_text(video_doc
, 'thumbnailUrl'), 
 451             'duration': parse_duration(xpath_text(video_doc
, 'duration')),