]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/brightcove.py
1392f382a24c273604f0c67db7afafefbcec85b8
5 import xml
.etree
.ElementTree
7 from .common
import InfoExtractor
16 class BrightcoveIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)'
18 _FEDERATED_URL_TEMPLATE
= 'http://c.brightcove.com/services/viewer/htmlFederated?%s'
19 _PLAYLIST_URL_TEMPLATE
= 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s'
23 # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/
24 u
'url': u
'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001',
25 u
'file': u
'2371591881001.mp4',
26 u
'md5': u
'9e80619e0a94663f0bdc849b4566af19',
27 u
'note': u
'Test Brightcove downloads and detection in GenericIE',
29 u
'title': u
'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”',
31 u
'description': u
'md5:a950cc4285c43e44d763d036710cd9cd',
35 # From http://medianetwork.oracle.com/video/player/1785452137001
36 u
'url': u
'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001',
37 u
'file': u
'1785452137001.flv',
39 u
'title': u
'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges',
40 u
'description': u
'John Rose speaks at the JVM Language Summit, August 1, 2012.',
41 u
'uploader': u
'Oracle',
47 def _build_brighcove_url(cls
, object_str
):
49 Build a Brightcove url from a xml string containing
50 <object class="BrightcoveExperience">{params}</object>
53 # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553
54 object_str
= re
.sub(r
'(<param name="[^"]+" value="[^"]+")>',
55 lambda m
: m
.group(1) + '/>', object_str
)
56 # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608
57 object_str
= object_str
.replace(u
'<--', u
'<!--')
59 object_doc
= xml
.etree
.ElementTree
.fromstring(object_str
)
60 assert u
'BrightcoveExperience' in object_doc
.attrib
['class']
61 params
= {'flashID': object_doc
.attrib
['id'],
62 'playerID': find_xpath_attr(object_doc
, './param', 'name', 'playerID').attrib
['value'],
64 playerKey
= find_xpath_attr(object_doc
, './param', 'name', 'playerKey')
65 # Not all pages define this value
66 if playerKey
is not None:
67 params
['playerKey'] = playerKey
.attrib
['value']
68 videoPlayer
= find_xpath_attr(object_doc
, './param', 'name', '@videoPlayer')
69 if videoPlayer
is not None:
70 params
['@videoPlayer'] = videoPlayer
.attrib
['value']
71 data
= compat_urllib_parse
.urlencode(params
)
72 return cls
._FEDERATED
_URL
_TEMPLATE
% data
74 def _real_extract(self
, url
):
75 mobj
= re
.match(self
._VALID
_URL
, url
)
76 query_str
= mobj
.group('query')
77 query
= compat_urlparse
.parse_qs(query_str
)
79 videoPlayer
= query
.get('@videoPlayer')
81 return self
._get
_video
_info
(videoPlayer
[0], query_str
)
83 player_key
= query
['playerKey']
84 return self
._get
_playlist
_info
(player_key
[0])
86 def _get_video_info(self
, video_id
, query
):
87 request_url
= self
._FEDERATED
_URL
_TEMPLATE
% query
88 webpage
= self
._download
_webpage
(request_url
, video_id
)
90 self
.report_extraction(video_id
)
91 info
= self
._search
_regex
(r
'var experienceJSON = ({.*?});', webpage
, 'json')
92 info
= json
.loads(info
)['data']
93 video_info
= info
['programmedContent']['videoPlayer']['mediaDTO']
95 return self
._extract
_video
_info
(video_info
)
97 def _get_playlist_info(self
, player_key
):
98 playlist_info
= self
._download
_webpage
(self
._PLAYLIST
_URL
_TEMPLATE
% player_key
,
99 player_key
, u
'Downloading playlist information')
101 json_data
= json
.loads(playlist_info
)
102 if 'videoList' not in json_data
:
103 raise ExtractorError(u
'Empty playlist')
104 playlist_info
= json_data
['videoList']
105 videos
= [self
._extract
_video
_info
(video_info
) for video_info
in playlist_info
['mediaCollectionDTO']['videoDTOs']]
107 return self
.playlist_result(videos
, playlist_id
=playlist_info
['id'],
108 playlist_title
=playlist_info
['mediaCollectionDTO']['displayName'])
110 def _extract_video_info(self
, video_info
):
112 'id': video_info
['id'],
113 'title': video_info
['displayName'],
114 'description': video_info
.get('shortDescription'),
115 'thumbnail': video_info
.get('videoStillURL') or video_info
.get('thumbnailURL'),
116 'uploader': video_info
.get('publisherName'),
119 renditions
= video_info
.get('renditions')
121 renditions
= sorted(renditions
, key
=lambda r
: r
['size'])
122 best_format
= renditions
[-1]
124 'url': best_format
['defaultURL'],
127 elif video_info
.get('FLVFullLengthURL') is not None:
129 'url': video_info
['FLVFullLengthURL'],
133 raise ExtractorError(u
'Unable to extract video url for %s' % info
['id'])