2 from __future__ 
import unicode_literals
 
   6 import xml
.etree
.ElementTree
 
   8 from .common 
import InfoExtractor
 
  13     compat_urllib_parse_urlparse
, 
  14     compat_urllib_request
, 
  16     compat_xml_parse_error
, 
  28 class BrightcoveIE(InfoExtractor
): 
  29     _VALID_URL 
= r
'(?:https?://.*brightcove\.com/(services|viewer).*?\?|brightcove:)(?P<query>.*)' 
  30     _FEDERATED_URL_TEMPLATE 
= 'http://c.brightcove.com/services/viewer/htmlFederated?%s' 
  34             # From http://www.8tv.cat/8aldia/videos/xavier-sala-i-martin-aquesta-tarda-a-8-al-dia/ 
  35             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1654948606001&flashID=myExperience&%40videoPlayer=2371591881001', 
  36             'md5': '5423e113865d26e40624dce2e4b45d95', 
  37             'note': 'Test Brightcove downloads and detection in GenericIE', 
  39                 'id': '2371591881001', 
  41                 'title': 'Xavier Sala i Martín: “Un banc que no presta és un banc zombi que no serveix per a res”', 
  43                 'description': 'md5:a950cc4285c43e44d763d036710cd9cd', 
  47             # From http://medianetwork.oracle.com/video/player/1785452137001 
  48             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=1217746023001&flashID=myPlayer&%40videoPlayer=1785452137001', 
  50                 'id': '1785452137001', 
  52                 'title': 'JVMLS 2012: Arrays 2.0 - Opportunities and Challenges', 
  53                 'description': 'John Rose speaks at the JVM Language Summit, August 1, 2012.', 
  58             # From http://mashable.com/2013/10/26/thermoelectric-bracelet-lets-you-control-your-body-temperature/ 
  59             'url': 'http://c.brightcove.com/services/viewer/federated_f9?&playerID=1265504713001&publisherID=AQ%7E%7E%2CAAABBzUwv1E%7E%2CxP-xFHVUstiMFlNYfvF4G9yFnNaqCw_9&videoID=2750934548001', 
  61                 'id': '2750934548001', 
  63                 'title': 'This Bracelet Acts as a Personal Thermostat', 
  64                 'description': 'md5:547b78c64f4112766ccf4e151c20b6a0', 
  65                 'uploader': 'Mashable', 
  69             # test that the default referer works 
  70             # from http://national.ballet.ca/interact/video/Lost_in_Motion_II/ 
  71             'url': 'http://link.brightcove.com/services/player/bcpid756015033001?bckey=AQ~~,AAAApYJi_Ck~,GxhXCegT1Dp39ilhXuxMJxasUhVNZiil&bctid=2878862109001', 
  73                 'id': '2878862109001', 
  75                 'title': 'Lost in Motion II', 
  76                 'description': 'md5:363109c02998fee92ec02211bd8000df', 
  77                 'uploader': 'National Ballet of Canada', 
  81             # test flv videos served by akamaihd.net 
  82             # From http://www.redbull.com/en/bike/stories/1331655643987/replay-uci-dh-world-cup-2014-from-fort-william 
  83             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?%40videoPlayer=ref%3ABC2996102916001&linkBaseURL=http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fvideos%2F1331655630249%2Freplay-uci-fort-william-2014-dh&playerKey=AQ%7E%7E%2CAAAApYJ7UqE%7E%2Cxqr_zXk0I-zzNndy8NlHogrCb5QdyZRf&playerID=1398061561001#__youtubedl_smuggle=%7B%22Referer%22%3A+%22http%3A%2F%2Fwww.redbull.com%2Fen%2Fbike%2Fstories%2F1331655643987%2Freplay-uci-dh-world-cup-2014-from-fort-william%22%7D', 
  84             # The md5 checksum changes on each download 
  86                 'id': '2996102916001', 
  88                 'title': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', 
  89                 'uploader': 'Red Bull TV', 
  90                 'description': 'UCI MTB World Cup 2014: Fort William, UK - Downhill Finals', 
  95             # from http://support.brightcove.com/en/video-cloud/docs/playlist-support-single-video-players 
  96             'url': 'http://c.brightcove.com/services/viewer/htmlFederated?playerID=3550052898001&playerKey=AQ%7E%7E%2CAAABmA9XpXk%7E%2C-Kp7jNgisre1fG5OdqpAFUTcs0lP_ZoL', 
  99                 'id': '3550319591001', 
 101             'playlist_mincount': 7, 
 106     def _build_brighcove_url(cls
, object_str
): 
 108         Build a Brightcove url from a xml string containing 
 109         <object class="BrightcoveExperience">{params}</object> 
 112         # Fix up some stupid HTML, see https://github.com/rg3/youtube-dl/issues/1553 
 113         object_str 
= re
.sub(r
'(<param(?:\s+[a-zA-Z0-9_]+="[^"]*")*)>', 
 114                             lambda m
: m
.group(1) + '/>', object_str
) 
 115         # Fix up some stupid XML, see https://github.com/rg3/youtube-dl/issues/1608 
 116         object_str 
= object_str
.replace('<--', '<!--') 
 117         # remove namespace to simplify extraction 
 118         object_str 
= re
.sub(r
'(<object[^>]*)(xmlns=".*?")', r
'\1', object_str
) 
 119         object_str 
= fix_xml_ampersands(object_str
) 
 122             object_doc 
= xml
.etree
.ElementTree
.fromstring(object_str
.encode('utf-8')) 
 123         except compat_xml_parse_error
: 
 126         fv_el 
= find_xpath_attr(object_doc
, './param', 'name', 'flashVars') 
 127         if fv_el 
is not None: 
 130                 for k
, v 
in compat_parse_qs(fv_el
.attrib
['value']).items()) 
 134         def find_param(name
): 
 135             if name 
in flashvars
: 
 136                 return flashvars
[name
] 
 137             node 
= find_xpath_attr(object_doc
, './param', 'name', name
) 
 139                 return node
.attrib
['value'] 
 144         playerID 
= find_param('playerID') 
 146             raise ExtractorError('Cannot find player ID') 
 147         params
['playerID'] = playerID
 
 149         playerKey 
= find_param('playerKey') 
 150         # Not all pages define this value 
 151         if playerKey 
is not None: 
 152             params
['playerKey'] = playerKey
 
 153         # The three fields hold the id of the video 
 154         videoPlayer 
= find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') 
 155         if videoPlayer 
is not None: 
 156             params
['@videoPlayer'] = videoPlayer
 
 157         linkBase 
= find_param('linkBaseURL') 
 158         if linkBase 
is not None: 
 159             params
['linkBaseURL'] = linkBase
 
 160         return cls
._make
_brightcove
_url
(params
) 
 163     def _build_brighcove_url_from_js(cls
, object_js
): 
 164         # The layout of JS is as follows: 
 165         # customBC.createVideo = function (width, height, playerID, playerKey, videoPlayer, VideoRandomID) { 
 166         #   // build Brightcove <object /> XML 
 169             r
'''(?x)customBC.\createVideo\( 
 170                 .*?                                                  # skipping width and height 
 171                 ["\'](?P
<playerID
>\d
+)["\']\s*,\s*                   # playerID 
 172                 ["\'](?P
<playerKey
>AQ
[^
"\']{48})[^"\']*["\']\s*,\s*  # playerKey begins with AQ and is 50 characters 
 173                                                                      # in length, however it's appended to itself 
 174                                                                      # in places, so truncate 
 175                 ["\'](?P
<videoID
>\d
+)["\']                           # @videoPlayer 
 178             return cls._make_brightcove_url(m.groupdict()) 
 181     def _make_brightcove_url(cls, params): 
 182         data = compat_urllib_parse.urlencode(params) 
 183         return cls._FEDERATED_URL_TEMPLATE % data 
 186     def _extract_brightcove_url(cls, webpage): 
 187         """Try to extract the brightcove url from the webpage, returns None 
 190         urls = cls._extract_brightcove_urls(webpage) 
 191         return urls[0] if urls else None 
 194     def _extract_brightcove_urls(cls, webpage): 
 195         """Return a list of all Brightcove URLs from the webpage """ 
 198             r'<meta\s+property=[\'"]og
:video
[\'"]\s+content=[\'"](https?
://(?
:secure|c
)\
.brightcove
.com
/[^
\'"]+)[\'"]', 
 201             url = unescapeHTML(url_m.group(1)) 
 202             # Some sites don't add it
, we can
't download with this url, for example: 
 203             # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ 
 204             if 'playerKey
' in url or 'videoId
' in url: 
 207         matches = re.findall( 
 210                 [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] | 
 211                 [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ 
 212             ).+?>\s*</object>''', 
 215             return list(filter(None, [cls._build_brighcove_url(m) for m in matches])) 
 217         return list(filter(None, [ 
 218             cls._build_brighcove_url_from_js(custom_bc) 
 219             for custom_bc in re.findall(r'(customBC\
.createVideo\
(.+?\
);)', webpage)])) 
 221     def _real_extract(self, url): 
 222         url, smuggled_data = unsmuggle_url(url, {}) 
 224         # Change the 'videoId
' and others field to '@videoPlayer' 
 225         url = re.sub(r'(?
<=[?
&])(videoI(d|D
)|bctid
)', '%40videoPlayer
', url) 
 226         # Change bckey (used by bcove.me urls) to playerKey 
 227         url = re.sub(r'(?
<=[?
&])bckey
', 'playerKey
', url) 
 228         mobj = re.match(self._VALID_URL, url) 
 229         query_str = mobj.group('query
') 
 230         query = compat_urlparse.parse_qs(query_str) 
 232         videoPlayer = query.get('@videoPlayer') 
 234             # We set the original url as the default 'Referer
' header 
 235             referer = smuggled_data.get('Referer
', url) 
 236             return self._get_video_info( 
 237                 videoPlayer[0], query_str, query, referer=referer) 
 238         elif 'playerKey
' in query: 
 239             player_key = query['playerKey
'] 
 240             return self._get_playlist_info(player_key[0]) 
 242             raise ExtractorError( 
 243                 'Cannot find playerKey
= variable
. Did you forget quotes 
in a shell invocation?
', 
 246     def _get_video_info(self, video_id, query_str, query, referer=None): 
 247         request_url = self._FEDERATED_URL_TEMPLATE % query_str 
 248         req = compat_urllib_request.Request(request_url) 
 249         linkBase = query.get('linkBaseURL
') 
 250         if linkBase is not None: 
 251             referer = linkBase[0] 
 252         if referer is not None: 
 253             req.add_header('Referer
', referer) 
 254         webpage = self._download_webpage(req, video_id) 
 256         error_msg = self._html_search_regex( 
 257             r"<h1>We're sorry
.</h1
>([\s
\n]*<p
>.*?
</p
>)+", webpage, 
 258             'error message', default=None) 
 259         if error_msg is not None: 
 260             raise ExtractorError( 
 261                 'brightcove said: %s' % error_msg, expected=True) 
 263         self.report_extraction(video_id) 
 264         info = self._search_regex(r'var experienceJSON = ({.*});', webpage, 'json') 
 265         info = json.loads(info)['data'] 
 266         video_info = info['programmedContent']['videoPlayer']['mediaDTO'] 
 267         video_info['_youtubedl_adServerURL'] = info.get('adServerURL') 
 269         return self._extract_video_info(video_info) 
 271     def _get_playlist_info(self, player_key): 
 272         info_url = 'http://c.brightcove.com/services/json/experience/runtime/?command=get_programming_for_experience&playerKey=%s' % player_key 
 273         playlist_info = self._download_webpage( 
 274             info_url, player_key, 'Downloading playlist information') 
 276         json_data = json.loads(playlist_info) 
 277         if 'videoList' not in json_data: 
 278             raise ExtractorError('Empty playlist') 
 279         playlist_info = json_data['videoList'] 
 280         videos = [self._extract_video_info(video_info) for video_info in playlist_info['mediaCollectionDTO']['videoDTOs']] 
 282         return self.playlist_result(videos, playlist_id='%s' % playlist_info['id'], 
 283                                     playlist_title=playlist_info['mediaCollectionDTO']['displayName']) 
 285     def _extract_video_info(self, video_info): 
 287             'id': compat_str(video_info['id']), 
 288             'title': video_info['displayName'].strip(), 
 289             'description': video_info.get('shortDescription'), 
 290             'thumbnail': video_info.get('videoStillURL') or video_info.get('thumbnailURL'), 
 291             'uploader': video_info.get('publisherName'), 
 294         renditions = video_info.get('renditions') 
 297             for rend in renditions: 
 298                 url = rend['defaultURL'] 
 303                     url_comp = compat_urllib_parse_urlparse(url) 
 304                     if url_comp.path.endswith('.m3u8'): 
 306                             self._extract_m3u8_formats(url, info['id'], 'mp4')) 
 308                     elif 'akamaihd.net' in url_comp.netloc: 
 309                         # This type of renditions are served through 
 310                         # akamaihd.net, but they don't use f4m manifests 
 311                         url = url.replace('control/', '') + '?&v=3.3.0&fp=13&r=FEEFJ&g=RTSJIMBMPFPB' 
 314                     ext = determine_ext(url) 
 315                 size = rend.get('size') 
 319                     'height': rend.get('frameHeight'), 
 320                     'width': rend.get('frameWidth'), 
 321                     'filesize': size if size != 0 else None, 
 323             self._sort_formats(formats) 
 324             info['formats'] = formats 
 325         elif video_info.get('FLVFullLengthURL') is not None: 
 327                 'url': video_info['FLVFullLengthURL'], 
 330         if self._downloader.params.get('include_ads', False): 
 331             adServerURL = video_info.get('_youtubedl_adServerURL') 
 340                         'title': info['title'], 
 341                         'entries': [ad_info, info], 
 346         if 'url' not in info and not info.get('formats'): 
 347             raise ExtractorError('Unable to extract video url for %s' % info['id'])