]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/brightcove.py
3 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
12 class BrightcoveIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
14 _FEDERATED_URL_TEMPLATE
= 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
15 _PLAYLIST_URL_TEMPLATE
= 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
17 # There is a test for Brigtcove in GenericIE, that way we test both the download
18 # and the detection of videos, and we don't have to find an URL that is always valid
21 def _build_brighcove_url(cls
, object_str
):
23 Build a Brightcove url from a xml string containing
24 <object class="BrightcoveExperience">{params}</object>
26 object_doc
= xml
.etree
.ElementTree
.fromstring(object_str
)
27 assert u
'BrightcoveExperience' in object_doc
.attrib
['class']
28 params
= {'flashID': object_doc
.attrib
['id'],
29 'playerID': find_xpath_attr(object_doc
, './param', 'name', 'playerID').attrib
['value'],
31 playerKey
= find_xpath_attr(object_doc
, './param', 'name', 'playerKey')
32 # Not all pages define this value
33 if playerKey
is not None:
34 params
['playerKey'] = playerKey
.attrib
['value']
35 videoPlayer
= find_xpath_attr(object_doc
, './param', 'name', '@videoPlayer')
36 if videoPlayer
is not None:
37 params
['@videoPlayer'] = videoPlayer
.attrib
['value']
38 data
= compat_urllib_parse
.urlencode(params
)
39 return cls
._FEDERATED
_URL
_TEMPLATE
% data
41 def _real_extract(self
, url
):
42 mobj
= re
.match(self
._VALID
_URL
, url
)
43 query_str
= mobj
.group('query')
44 query
= compat_urlparse
.parse_qs(query_str
)
46 videoPlayer
= query
.get('@videoPlayer')
48 return self
._get
_video
_info
(videoPlayer
[0], query_str
)
50 player_key
= query
['playerKey']
51 return self
._get
_playlist
_info
(player_key
[0])
53 def _get_video_info(self
, video_id
, query
):
54 request_url
= self
._FEDERATED
_URL
_TEMPLATE
% query
55 webpage
= self
._download
_webpage
(request_url
, video_id
)
57 self
.report_extraction(video_id
)
58 info
= self
._search
_regex
(r
'var experienceJSON = ({.*?});', webpage
, 'json')
59 info
= json
.loads(info
)['data']
60 video_info
= info
['programmedContent']['videoPlayer']['mediaDTO']
62 return self
._extract
_video
_info
(video_info
)
64 def _get_playlist_info(self
, player_key
):
65 playlist_info
= self
._download
_webpage
(self
._PLAYLIST
_URL
_TEMPLATE
% player_key
,
66 player_key
, u
'Downloading playlist information')
68 playlist_info
= json
.loads(playlist_info
)['videoList']
69 videos
= [self
._extract
_video
_info
(video_info
) for video_info
in playlist_info
['mediaCollectionDTO']['videoDTOs']]
71 return self
.playlist_result(videos
, playlist_id
=playlist_info
['id'],
72 playlist_title
=playlist_info
['mediaCollectionDTO']['displayName'])
74 def _extract_video_info(self
, video_info
):
75 renditions
= video_info
['renditions']
76 renditions
= sorted(renditions
, key
=lambda r
: r
['size'])
77 best_format
= renditions
[-1]
79 return {'id': video_info
['id'],
80 'title': video_info
['displayName'],
81 'url': best_format
['defaultURL'],
83 'description': video_info
.get('shortDescription'),
84 'thumbnail': video_info
.get('videoStillURL') or video_info
.get('thumbnailURL'),
85 'uploader': video_info
.get('publisherName'),