]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cbc.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
7 from .. utils
import js_to_json
10 class CBCIE ( InfoExtractor
):
11 _VALID_URL
= r
'https?://(?:www\.)?cbc\.ca/(?:[^/]+/)+(?P<id>[^/?#]+)'
14 'url' : 'http://www.cbc.ca/22minutes/videos/clips-season-23/don-cherry-play-offs' ,
18 'title' : 'Don Cherry – All-Stars' ,
19 'description' : 'Don Cherry has a bee in his bonnet about AHL player John Scott because that guy’s got heart.' ,
20 'timestamp' : 1454475540 ,
21 'upload_date' : '20160203' ,
25 'skip_download' : True ,
29 'url' : 'http://www.cbc.ca/archives/entry/1978-robin-williams-freestyles-on-90-minutes-live' ,
33 'title' : 'Robin Williams freestyles on 90 Minutes Live' ,
34 'description' : 'Wacky American comedian Robin Williams shows off his infamous "freestyle" comedic talents while being interviewed on CBC \' s 90 Minutes Live.' ,
35 'upload_date' : '19700101' ,
39 'skip_download' : True ,
43 'url' : 'http://www.cbc.ca/natureofthings/blog/birds-eye-view-from-vancouvers-burrard-street-bridge-how-we-got-the-shot' ,
48 'title' : 'An Eagle \' s-Eye View Off Burrard Bridge' ,
49 'description' : 'Hercules the eagle flies from Vancouver \' s Burrard Bridge down to a nearby park with a mini-camera strapped to his back.' ,
50 'upload_date' : '19700101' ,
56 'title' : 'Fly like an eagle!' ,
57 'description' : 'Eagle equipped with a mini camera flies from the world \' s tallest tower' ,
58 'upload_date' : '19700101' ,
63 'skip_download' : True ,
68 def suitable ( cls
, url
):
69 return False if CBCPlayerIE
. suitable ( url
) else super ( CBCIE
, cls
). suitable ( url
)
71 def _real_extract ( self
, url
):
72 display_id
= self
._ match
_ id
( url
)
73 webpage
= self
._ download
_ webpage
( url
, display_id
)
74 player_init
= self
._ search
_ regex
(
75 r
'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);' , webpage
, 'player init' ,
78 player_info
= self
._ parse
_ json
( player_init
, display_id
, js_to_json
)
79 media_id
= player_info
. get ( 'mediaId' )
81 clip_id
= player_info
[ 'clipId' ]
82 media_id
= self
._ download
_ json
(
83 'http://feed.theplatform.com/f/h9dtGB/punlNGjMlc1F?fields=id&byContent=byReleases%3DbyId%253D' + clip_id
,
84 clip_id
)[ 'entries' ][ 0 ][ 'id' ]. split ( '/' )[- 1 ]
85 return self
. url_result ( 'cbcplayer: %s ' % media_id
, 'CBCPlayer' , media_id
)
87 entries
= [ self
. url_result ( 'cbcplayer: %s ' % media_id
, 'CBCPlayer' , media_id
) for media_id
in re
. findall ( r
'<iframe[^>]+src="[^"]+?mediaId=(\d+)"' , webpage
)]
88 return self
. playlist_result ( entries
)
91 class CBCPlayerIE ( InfoExtractor
):
92 _VALID_URL
= r
'(?:cbcplayer:|https?://(?:www\.)?cbc\.ca/(?:player/play/|i/caffeine/syndicate/\?mediaId=))(?P<id>\d+)'
94 'url' : 'http://www.cbc.ca/player/play/2683190193' ,
98 'title' : 'Gerry Runs a Sweat Shop' ,
99 'description' : 'md5:b457e1c01e8ff408d9d801c1c2cd29b0' ,
100 'timestamp' : 1455067800 ,
101 'upload_date' : '20160210' ,
105 'skip_download' : True ,
109 def _real_extract ( self
, url
):
110 video_id
= self
._ match
_ id
( url
)
111 return self
. url_result (
112 'http://feed.theplatform.com/f/ExhSPC/vms_5akSXx4Ng_Zn?byGuid= %s ' % video_id
,
113 'ThePlatformFeed' , video_id
)