]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/brightcove.py
1392f382a24c273604f0c67db7afafefbcec85b8
   5 import xml
.etree
.ElementTree
 
   7 from .common 
import InfoExtractor
 
  16 class BrightcoveIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'https?://.*brightcove\.com/(services|viewer).*\?(?P<query>.*)' 
  18     _FEDERATED_URL_TEMPLATE 
= 'http://c.brightcove.com/services/viewer/htmlFederated?%s' 
  19     _PLAYLIST_URL_TEMPLATE 
= 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' 
  23             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ 
  24             u
'url': u
'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', 
  25             u
'file': u
'2371591881001.mp4', 
  26             u
'md5': u
'9e80619e0a94663f0bdc849b4566af19', 
  27             u
'note': u
'Test Brightcove downloads and detection in GenericIE', 
  29                 u
'title': u
'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', 
  31                 u
'description': u
'md5:a950cc4285c43e44d763d036710cd9cd', 
  35             # From http://medianetwork.oracle.com/video/player/1785452137001 
  36             u
'url': u
'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', 
  37             u
'file': u
'1785452137001.flv', 
  39                 u
'title': u
'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', 
  40                 u
'description': u
'John Rose speaks at the JVM Language Summit, August 1, 2012.', 
  41                 u
'uploader': u
'Oracle', 
  47     def _build_brighcove_url(cls
, object_str
): 
  49         Build a Brightcove url from a xml string containing 
  50         <object class="BrightcoveExperience">{params}</object> 
  53         # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 
  54         object_str 
= re
.sub(r
'(<param name="[^"]+" value="[^"]+")>', 
  55                             lambda m
: m
.group(1) + '/>', object_str
) 
  56         # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 
  57         object_str 
= object_str
.replace(u
'<--', u
'<!--') 
  59         object_doc 
= xml
.etree
.ElementTree
.fromstring(object_str
) 
  60         assert u
'BrightcoveExperience' in object_doc
.attrib
['class'] 
  61         params 
= {'flashID': object_doc
.attrib
['id'], 
  62                   'playerID': find_xpath_attr(object_doc
, './param', 'name', 'playerID').attrib
['value'], 
  64         playerKey 
= find_xpath_attr(object_doc
, './param', 'name', 'playerKey') 
  65         # Not all pages define this value 
  66         if playerKey 
is not None: 
  67             params
['playerKey'] = playerKey
.attrib
['value'] 
  68         videoPlayer 
= find_xpath_attr(object_doc
, './param', 'name', '@videoPlayer') 
  69         if videoPlayer 
is not None: 
  70             params
['@videoPlayer'] = videoPlayer
.attrib
['value'] 
  71         data 
= compat_urllib_parse
.urlencode(params
) 
  72         return cls
._FEDERATED
_URL
_TEMPLATE 
% data
 
  74     def _real_extract(self
, url
): 
  75         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  76         query_str 
= mobj
.group('query') 
  77         query 
= compat_urlparse
.parse_qs(query_str
) 
  79         videoPlayer 
= query
.get('@videoPlayer') 
  81             return self
._get
_video
_info
(videoPlayer
[0], query_str
) 
  83             player_key 
= query
['playerKey'] 
  84             return self
._get
_playlist
_info
(player_key
[0]) 
  86     def _get_video_info(self
, video_id
, query
): 
  87         request_url 
= self
._FEDERATED
_URL
_TEMPLATE 
% query
 
  88         webpage 
= self
._download
_webpage
(request_url
, video_id
) 
  90         self
.report_extraction(video_id
) 
  91         info 
= self
._search
_regex
(r
'var experienceJSON = ({.*?});', webpage
, 'json') 
  92         info 
= json
.loads(info
)['data'] 
  93         video_info 
= info
['programmedContent']['videoPlayer']['mediaDTO'] 
  95         return self
._extract
_video
_info
(video_info
) 
  97     def _get_playlist_info(self
, player_key
): 
  98         playlist_info 
= self
._download
_webpage
(self
._PLAYLIST
_URL
_TEMPLATE 
% player_key
, 
  99                                                player_key
, u
'Downloading playlist information') 
 101         json_data 
= json
.loads(playlist_info
) 
 102         if 'videoList' not in json_data
: 
 103             raise ExtractorError(u
'Empty playlist') 
 104         playlist_info 
= json_data
['videoList'] 
 105         videos 
= [self
._extract
_video
_info
(video_info
) for video_info 
in playlist_info
['mediaCollectionDTO']['videoDTOs']] 
 107         return self
.playlist_result(videos
, playlist_id
=playlist_info
['id'], 
 108                                     playlist_title
=playlist_info
['mediaCollectionDTO']['displayName']) 
 110     def _extract_video_info(self
, video_info
): 
 112             'id': video_info
['id'], 
 113             'title': video_info
['displayName'], 
 114             'description': video_info
.get('shortDescription'), 
 115             'thumbnail': video_info
.get('videoStillURL') or video_info
.get('thumbnailURL'), 
 116             'uploader': video_info
.get('publisherName'), 
 119         renditions 
= video_info
.get('renditions') 
 121             renditions 
= sorted(renditions
, key
=lambda r
: r
['size']) 
 122             best_format 
= renditions
[-1] 
 124                 'url': best_format
['defaultURL'], 
 127         elif video_info
.get('FLVFullLengthURL') is not None: 
 129                 'url': video_info
['FLVFullLengthURL'], 
 133             raise ExtractorError(u
'Unable to extract video url for %s' % info
['id'])