]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/brightcove.py
558b3d0093975eff273d38a7a2bad9f1d4e36355
5 import xml
.etree
.ElementTree
7 from .common
import InfoExtractor
16 class BrightcoveIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
18 _FEDERATED_URL_TEMPLATE
= 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
19 _PLAYLIST_URL_TEMPLATE
= 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
23 # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
24 u
'url': u
'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
25 u
'file': u
'2371591881001.mp4',
26 u
'md5': u
'9e80619e0a94663f0bdc849b4566af19',
27 u
'note': u
'Test Brightcove downloads and detection in GenericIE',
29 u
'title': u
'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
31 u
'description': u
'md5:a950cc4285c43e44d763d036710cd9cd',
35 # From http://medianetwork.oracle.com/video/player/1785452137001
36 u
'url': u
'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
37 u
'file': u
'1785452137001.flv',
39 u
'title': u
'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
40 u
'description': u
'John Rose speaks at the JVM Language Summit, August 1, 2012.',
41 u
'uploader': u
'Oracle',
47 def _build_brighcove_url(cls
, object_str
):
49 Build a Brightcove url from a xml string containing
50 <object class="BrightcoveExperience">{params}</object>
52 object_doc
= xml
.etree
.ElementTree
.fromstring(object_str
)
53 assert u
'BrightcoveExperience' in object_doc
.attrib
['class']
54 params
= {'flashID': object_doc
.attrib
['id'],
55 'playerID': find_xpath_attr(object_doc
, './param', 'name', 'playerID').attrib
['value'],
57 playerKey
= find_xpath_attr(object_doc
, './param', 'name', 'playerKey')
58 # Not all pages define this value
59 if playerKey
is not None:
60 params
['playerKey'] = playerKey
.attrib
['value']
61 videoPlayer
= find_xpath_attr(object_doc
, './param', 'name', '@videoPlayer')
62 if videoPlayer
is not None:
63 params
['@videoPlayer'] = videoPlayer
.attrib
['value']
64 data
= compat_urllib_parse
.urlencode(params
)
65 return cls
._FEDERATED
_URL
_TEMPLATE
% data
67 def _real_extract(self
, url
):
68 mobj
= re
.match(self
._VALID
_URL
, url
)
69 query_str
= mobj
.group('query')
70 query
= compat_urlparse
.parse_qs(query_str
)
72 videoPlayer
= query
.get('@videoPlayer')
74 return self
._get
_video
_info
(videoPlayer
[0], query_str
)
76 player_key
= query
['playerKey']
77 return self
._get
_playlist
_info
(player_key
[0])
79 def _get_video_info(self
, video_id
, query
):
80 request_url
= self
._FEDERATED
_URL
_TEMPLATE
% query
81 webpage
= self
._download
_webpage
(request_url
, video_id
)
83 self
.report_extraction(video_id
)
84 info
= self
._search
_regex
(r
'var experienceJSON = ({.*?});', webpage
, 'json')
85 info
= json
.loads(info
)['data']
86 video_info
= info
['programmedContent']['videoPlayer']['mediaDTO']
88 return self
._extract
_video
_info
(video_info
)
90 def _get_playlist_info(self
, player_key
):
91 playlist_info
= self
._download
_webpage
(self
._PLAYLIST
_URL
_TEMPLATE
% player_key
,
92 player_key
, u
'Downloading playlist information')
94 playlist_info
= json
.loads(playlist_info
)['videoList']
95 videos
= [self
._extract
_video
_info
(video_info
) for video_info
in playlist_info
['mediaCollectionDTO']['videoDTOs']]
97 return self
.playlist_result(videos
, playlist_id
=playlist_info
['id'],
98 playlist_title
=playlist_info
['mediaCollectionDTO']['displayName'])
100 def _extract_video_info(self
, video_info
):
102 'id': video_info
['id'],
103 'title': video_info
['displayName'],
104 'description': video_info
.get('shortDescription'),
105 'thumbnail': video_info
.get('videoStillURL') or video_info
.get('thumbnailURL'),
106 'uploader': video_info
.get('publisherName'),
109 renditions
= video_info
.get('renditions')
111 renditions
= sorted(renditions
, key
=lambda r
: r
['size'])
112 best_format
= renditions
[-1]
114 'url': best_format
['defaultURL'],
117 elif video_info
.get('FLVFullLengthURL') is not None:
119 'url': video_info
['FLVFullLengthURL'],
123 raise ExtractorError(u
'Unable to extract video url for %s' % info
['id'])