2 from __future__
import unicode_literals
7 from xml
.sax
.saxutils
import escape
9 from .common
import InfoExtractor
10 from ..compat
import (
32 class CBCIE(InfoExtractor
):
34 _VALID_URL
= r
'https?://(?:www\.)?cbc\.ca/(?!player/)(?:[^/]+/)+(?P<id>[^/?#]+)'
37 'url': 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs',
38 'md5': '97e24d09672fc4cf56256d6faa6c25bc',
42 'title': 'Don Cherry – All-Stars',
43 'description': 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.',
44 'timestamp': 1454463000,
45 'upload_date': '20160203',
46 'uploader': 'CBCC-NEW',
48 'skip': 'Geo-restricted to Canada',
50 # with clipId, feed available via tpfeed.cbc.ca and feed.theplatform.com
51 'url': 'http://www.cbc.ca/22minutes/videos/22-minutes-update/22-minutes-update-episode-4',
52 'md5': '162adfa070274b144f4fdc3c3b8207db',
56 'title': '22 Minutes Update: What Not To Wear Quebec',
57 'description': "This week's latest Canadian top political story is What Not To Wear Quebec.",
58 'upload_date': '20131025',
59 'uploader': 'CBCC-NEW',
60 'timestamp': 1382717907,
63 # with clipId, feed only available via tpfeed.cbc.ca
64 'url': 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live',
65 'md5': '0274a90b51a9b4971fe005c63f592f12',
69 'title': 'Robin Williams freestyles on 90 Minutes Live',
70 'description': 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC\'s 90 Minutes Live.',
71 'upload_date': '19780210',
72 'uploader': 'CBCC-NEW',
73 'timestamp': 255977160,
77 'url': 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot',
79 'md5': '377572d0b49c4ce0c9ad77470e0b96b4',
83 'title': 'An Eagle\'s-Eye View Off Burrard Bridge',
84 'description': 'Hercules the eagle flies from Vancouver\'s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.',
85 'upload_date': '20160201',
86 'timestamp': 1454342820,
87 'uploader': 'CBCC-NEW',
90 'md5': '415a0e3f586113894174dfb31aa5bb1a',
94 'title': 'Fly like an eagle!',
95 'description': 'Eagle equipped with a mini camera flies from the world\'s tallest tower',
96 'upload_date': '20150315',
97 'timestamp': 1426443984,
98 'uploader': 'CBCC-NEW',
101 'skip': 'Geo-restricted to Canada',
103 # multiple CBC.APP.Caffeine.initInstance(...)
104 'url': 'http://www.cbc.ca/news/canada/calgary/dog-indoor-exercise-winter-1.3928238',
106 'title': 'Keep Rover active during the deep freeze with doggie pushups and other fun indoor tasks',
107 'id': 'dog-indoor-exercise-winter-1.3928238',
108 'description': 'md5:c18552e41726ee95bd75210d1ca9194c',
110 'playlist_mincount': 6,
114 def suitable(cls
, url
):
115 return False if CBCPlayerIE
.suitable(url
) else super(CBCIE
, cls
).suitable(url
)
117 def _extract_player_init(self
, player_init
, display_id
):
118 player_info
= self
._parse
_json
(player_init
, display_id
, js_to_json
)
119 media_id
= player_info
.get('mediaId')
121 clip_id
= player_info
['clipId']
122 feed
= self
._download
_json
(
123 'http://tpfeed.cbc.ca/f/ExhSPC/vms_5akSXx4Ng_Zn?byCustomValue={:mpsReleases}{%s}' % clip_id
,
124 clip_id
, fatal
=False)
126 media_id
= try_get(feed
, lambda x
: x
['entries'][0]['guid'], compat_str
)
128 media_id
= self
._download
_json
(
129 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id
,
130 clip_id
)['entries'][0]['id'].split('/')[-1]
131 return self
.url_result('cbcplayer:%s' % media_id
, 'CBCPlayer', media_id
)
133 def _real_extract(self
, url
):
134 display_id
= self
._match
_id
(url
)
135 webpage
= self
._download
_webpage
(url
, display_id
)
136 title
= self
._og
_search
_title
(webpage
, default
=None) or self
._html
_search
_meta
(
137 'twitter:title', webpage
, 'title', default
=None) or self
._html
_search
_regex
(
138 r
'<title>([^<]+)</title>', webpage
, 'title', fatal
=False)
140 self
._extract
_player
_init
(player_init
, display_id
)
141 for player_init
in re
.findall(r
'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage
)]
144 r
'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
145 r
'<div[^>]+\bid=["\']player
-(\d
+)',
146 r'guid
["\']\s*:\s*["\'](\d
+)'):
147 media_ids.extend(re.findall(media_id_re, webpage))
149 self.url_result('cbcplayer
:%s' % media_id, 'CBCPlayer
', media_id)
150 for media_id in orderedSet(media_ids)])
151 return self.playlist_result(
152 entries, display_id, strip_or_none(title),
153 self._og_search_description(webpage))
156 class CBCPlayerIE(InfoExtractor):
157 IE_NAME = 'cbc
.ca
:player
'
158 _VALID_URL = r'(?
:cbcplayer
:|https?
://(?
:www\
.)?cbc\
.ca
/(?
:player
/play
/|i
/caffeine
/syndicate
/\?mediaId
=))(?P
<id>\d
+)'
160 'url
': 'http
://www
.cbc
.ca
/player
/play
/2683190193',
161 'md5
': '64d25f841ddf4ddb28a235338af32e2c
',
165 'title
': 'Gerry Runs a Sweat Shop
',
166 'description
': 'md5
:b457e1c01e8ff408d9d801c1c2cd29b0
',
167 'timestamp
': 1455071400,
168 'upload_date
': '20160210',
169 'uploader
': 'CBCC
-NEW
',
171 'skip
': 'Geo
-restricted to Canada
',
173 # Redirected from http://www.cbc.ca/player/AudioMobile/All%20in%20a%20Weekend%20Montreal/ID/2657632011/
174 'url
': 'http
://www
.cbc
.ca
/player
/play
/2657631896',
175 'md5
': 'e5e708c34ae6fca156aafe17c43e8b75
',
179 'title
': 'CBC Montreal
is organizing its first ever community hackathon
!',
180 'description
': 'The modern technology we tend to depend on so heavily
, is never without it
\'s share of hiccups
and headaches
. Next weekend
- CBC Montreal will be getting members of the public
for its first Hackathon
.',
181 'timestamp
': 1425704400,
182 'upload_date
': '20150307',
183 'uploader
': 'CBCC
-NEW
',
186 'url
': 'http
://www
.cbc
.ca
/player
/play
/2164402062',
187 'md5
': '33fcd8f6719b9dd60a5e73adcb83b9f6
',
191 'title
': 'Cancer survivor four times over
',
192 'description
': 'Tim Mayer has beaten three different forms of cancer four times
in five years
.',
193 'timestamp
': 1320410746,
194 'upload_date
': '20111104',
195 'uploader
': 'CBCC
-NEW
',
199 def _real_extract(self, url):
200 video_id = self._match_id(url)
202 '_type
': 'url_transparent
',
203 'ie_key
': 'ThePlatform
',
205 'http
://link
.theplatform
.com
/s
/ExhSPC
/media
/guid
/2655402169/%s?mbr
=true
&formats
=MPEG4
,FLV
,MP3
' % video_id, {
206 'force_smil_url
': True
212 class CBCWatchBaseIE(InfoExtractor):
215 _API_BASE_URL = 'https
://api
-cbc
.cloud
.clearleap
.com
/cloffice
/client
/'
217 'media
': 'http
://search
.yahoo
.com
/mrss
/',
218 'clearleap
': 'http
://www
.clearleap
.com
/namespace
/clearleap
/1.0/',
220 _GEO_COUNTRIES = ['CA
']
221 _LOGIN_URL = 'https
://api
.loginradius
.com
/identity
/v2
/auth
/login
'
222 _TOKEN_URL = 'https
://cloud
-api
.loginradius
.com
/sso
/jwt
/api
/token
'
223 _API_KEY = '3f4beddd
-2061-49b0
-ae80
-6f1f2ed65b37
'
224 _NETRC_MACHINE = 'cbcwatch
'
226 def _signature(self, email, password):
229 'password
': password,
231 headers = {'content
-type': 'application
/json
'}
232 query = {'apikey
': self._API_KEY}
233 resp = self._download_json(self._LOGIN_URL, None, data=data, headers=headers, query=query)
234 access_token = resp['access_token
']
238 'access_token
': access_token,
239 'apikey
': self._API_KEY,
242 resp = self._download_json(self._TOKEN_URL, None, headers=headers, query=query)
243 return resp['signature
']
245 def _call_api(self, path, video_id):
246 url = path if path.startswith('http
') else self._API_BASE_URL + path
249 result = self._download_xml(url, video_id, headers={
250 'X
-Clearleap
-DeviceId
': self._device_id,
251 'X
-Clearleap
-DeviceToken
': self._device_token,
253 except ExtractorError as e:
254 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
255 # Device token has expired, re-acquiring device token
256 self._register_device()
259 error_message = xpath_text(result, 'userMessage
') or xpath_text(result, 'systemMessage
')
261 raise ExtractorError('%s said
: %s' % (self.IE_NAME, error_message))
264 def _real_initialize(self):
265 if self._valid_device_token():
267 device = self._downloader.cache.load(
268 'cbcwatch
', self._cache_device_key()) or {}
269 self._device_id, self._device_token = device.get('id'), device.get('token
')
270 if self._valid_device_token():
272 self._register_device()
274 def _valid_device_token(self):
275 return self._device_id and self._device_token
277 def _cache_device_key(self):
278 email, _ = self._get_login_info()
279 return '%s_device
' % hashlib.sha256(email.encode()).hexdigest() if email else 'device
'
281 def _register_device(self):
282 result = self._download_xml(
283 self._API_BASE_URL + 'device
/register
',
284 None, 'Acquiring device token
',
285 data=b'<device
><type>web
</type></device
>')
286 self._device_id = xpath_text(result, 'deviceId
', fatal=True)
287 email, password = self._get_login_info()
288 if email and password:
289 signature = self._signature(email, password)
290 data = '<login
><token
>{0}
</token
><device
><deviceId
>{1}
</deviceId
><type>web
</type></device
></login
>'.format(
291 escape(signature), escape(self._device_id)).encode()
292 url = self._API_BASE_URL + 'device
/login
'
293 result = self._download_xml(
294 url, None, data=data,
295 headers={'content
-type': 'application
/xml
'})
296 self._device_token = xpath_text(result, 'token
', fatal=True)
298 self._device_token = xpath_text(result, 'deviceToken
', fatal=True)
299 self._downloader.cache.store(
300 'cbcwatch
', self._cache_device_key(), {
301 'id': self._device_id,
302 'token
': self._device_token,
305 def _parse_rss_feed(self, rss):
306 channel = xpath_element(rss, 'channel
', fatal=True)
309 return xpath_with_ns(path, self._NS_MAP)
312 for item in channel.findall('item
'):
313 guid = xpath_text(item, 'guid
', fatal=True)
314 title = xpath_text(item, 'title
', fatal=True)
316 media_group = xpath_element(item, _add_ns('media
:group
'), fatal=True)
317 content = xpath_element(media_group, _add_ns('media
:content
'), fatal=True)
318 content_url = content.attrib['url
']
321 for thumbnail in media_group.findall(_add_ns('media
:thumbnail
')):
322 thumbnail_url = thumbnail.get('url
')
323 if not thumbnail_url:
326 'id': thumbnail.get('profile
'),
327 'url
': thumbnail_url,
328 'width
': int_or_none(thumbnail.get('width
')),
329 'height
': int_or_none(thumbnail.get('height
')),
333 release_date = find_xpath_attr(
334 item, _add_ns('media
:credit
'), 'role
', 'releaseDate
')
335 if release_date is not None:
336 timestamp = parse_iso8601(release_date.text)
339 '_type
': 'url_transparent
',
343 'description
': xpath_text(item, 'description
'),
344 'timestamp
': timestamp,
345 'duration
': int_or_none(content.get('duration
')),
346 'age_limit
': parse_age_limit(xpath_text(item, _add_ns('media
:rating
'))),
347 'episode
': xpath_text(item, _add_ns('clearleap
:episode
')),
348 'episode_number
': int_or_none(xpath_text(item, _add_ns('clearleap
:episodeInSeason
'))),
349 'series
': xpath_text(item, _add_ns('clearleap
:series
')),
350 'season_number
': int_or_none(xpath_text(item, _add_ns('clearleap
:season
'))),
351 'thumbnails
': thumbnails,
352 'ie_key
': 'CBCWatchVideo
',
355 return self.playlist_result(
356 entries, xpath_text(channel, 'guid
'),
357 xpath_text(channel, 'title
'),
358 xpath_text(channel, 'description
'))
361 class CBCWatchVideoIE(CBCWatchBaseIE):
362 IE_NAME = 'cbc
.ca
:watch
:video
'
363 _VALID_URL = r'https?
://api
-cbc\
.cloud\
.clearleap\
.com
/cloffice
/client
/web
/play
/?
\?.*?
\bcontentId
=(?P
<id>[\da
-f
]{8}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{4}
-[\da
-f
]{12}
)'
365 # geo-restricted to Canada, bypassable
366 'url
': 'https
://api
-cbc
.cloud
.clearleap
.com
/cloffice
/client
/web
/play
/?contentId
=3c84472a
-1eea
-4dee
-9267-2655d5055dcf
&categoryId
=ebc258f5
-ee40
-4cca
-b66b
-ba6bd55b7235
',
367 'only_matching
': True,
370 def _real_extract(self, url):
371 video_id = self._match_id(url)
372 result = self._call_api(url, video_id)
374 m3u8_url = xpath_text(result, 'url
', fatal=True)
375 formats = self._extract_m3u8_formats(re.sub(r'/([^
/]+)/[^
/?
]+\
.m3u8
', r'/\
1/\
1.m3u8
', m3u8_url), video_id, 'mp4
', fatal=False)
377 formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4
')
379 format_id = f.get('format_id
')
380 if format_id.startswith('AAC
'):
382 elif format_id.startswith('AC3
'):
384 self._sort_formats(formats)
392 rss = xpath_element(result, 'rss
')
394 info.update(self._parse_rss_feed(rss)['entries
'][0])
401 class CBCWatchIE(CBCWatchBaseIE):
402 IE_NAME = 'cbc
.ca
:watch
'
403 _VALID_URL = r'https?
://(?
:gem|watch
)\
.cbc\
.ca
/(?
:[^
/]+/)+(?P
<id>[0-9a
-f
-]+)'
405 # geo-restricted to Canada, bypassable
406 'url
': 'http
://watch
.cbc
.ca
/doc
-zone
/season
-6/customer
-disservice
/38e815a
-009e3ab12e4
',
408 'id': '9673749a
-5e77
-484c
-8b62
-a1092a6b5168
',
410 'title
': 'Customer (Dis
)Service
',
411 'description
': 'md5
:8bdd6913a0fe03d4b2a17ebe169c7c87
',
412 'upload_date
': '20160219',
413 'timestamp
': 1455840000,
417 'skip_download
': True,
418 'format
': 'bestvideo
',
421 # geo-restricted to Canada, bypassable
422 'url
': 'http
://watch
.cbc
.ca
/arthur
/all
/1ed4b385
-cd84
-49cf
-95f0
-80f004680057
',
424 'id': '1ed4b385
-cd84
-49cf
-95f0
-80f004680057
',
426 'description
': 'Arthur
, the sweetest
8-year
-old aardvark
, and his pals solve all kinds of problems
with humour
, kindness
and teamwork
.',
428 'playlist_mincount
': 30,
430 'url
': 'https
://gem
.cbc
.ca
/media
/this
-hour
-has
-22-minutes
/season
-26/episode
-20/38e815a
-0108c6c6a42
',
431 'only_matching
': True,
434 def _real_extract(self, url):
435 video_id = self._match_id(url)
436 rss = self._call_api('web
/browse
/' + video_id, video_id)
437 return self._parse_rss_feed(rss)
440 class CBCOlympicsIE(InfoExtractor):
441 IE_NAME = 'cbc
.ca
:olympics
'
442 _VALID_URL = r'https?
://olympics\
.cbc\
.ca
/video
/[^
/]+/(?P
<id>[^
/?
#]+)'
444 'url': 'https://olympics.cbc.ca/video/whats-on-tv/olympic-morning-featuring-the-opening-ceremony/',
445 'only_matching': True,
448 def _real_extract(self
, url
):
449 display_id
= self
._match
_id
(url
)
450 webpage
= self
._download
_webpage
(url
, display_id
)
451 video_id
= self
._hidden
_inputs
(webpage
)['videoId']
452 video_doc
= self
._download
_xml
(
453 'https://olympics.cbc.ca/videodata/%s.xml' % video_id
, video_id
)
454 title
= xpath_text(video_doc
, 'title', fatal
=True)
455 is_live
= xpath_text(video_doc
, 'kind') == 'Live'
457 title
= self
._live
_title
(title
)
460 for video_source
in video_doc
.findall('videoSources/videoSource'):
461 uri
= xpath_text(video_source
, 'uri')
464 tokenize
= self
._download
_json
(
465 'https://olympics.cbc.ca/api/api-akamai/tokenize',
466 video_id
, data
=json
.dumps({
468 }).encode(), headers
={
469 'Content-Type': 'application/json',
471 # d3.VideoPlayer._init in https://olympics.cbc.ca/components/script/base.js
472 'Cookie': '_dvp=TK:C0ObxjerU', # AKAMAI CDN cookie
476 content_url
= tokenize
['ContentUrl']
477 video_source_format
= video_source
.get('format')
478 if video_source_format
== 'IIS':
479 formats
.extend(self
._extract
_ism
_formats
(
480 content_url
, video_id
, ism_id
=video_source_format
, fatal
=False))
482 formats
.extend(self
._extract
_m
3u8_formats
(
483 content_url
, video_id
, 'mp4',
484 'm3u8' if is_live
else 'm3u8_native',
485 m3u8_id
=video_source_format
, fatal
=False))
486 self
._sort
_formats
(formats
)
490 'display_id': display_id
,
492 'description': xpath_text(video_doc
, 'description'),
493 'thumbnail': xpath_text(video_doc
, 'thumbnailUrl'),
494 'duration': parse_duration(xpath_text(video_doc
, 'duration')),