X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/99b46abf79266c2bf5eabc291dc4f366e61b5dde..97a8fc3ae80fb363c69c2e6b8c29b5373ac72aea:/youtube_dl/extractor/brightcove.py diff --git a/youtube_dl/extractor/brightcove.py b/youtube_dl/extractor/brightcove.py index aeb22be..46ef8e6 100644 --- a/youtube_dl/extractor/brightcove.py +++ b/youtube_dl/extractor/brightcove.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -179,7 +179,7 @@ class BrightcoveLegacyIE(InfoExtractor): params = {} - playerID = find_param('playerID') + playerID = find_param('playerID') or find_param('playerId') if playerID is None: raise ExtractorError('Cannot find player ID') params['playerID'] = playerID @@ -191,6 +191,16 @@ class BrightcoveLegacyIE(InfoExtractor): # These fields hold the id of the video videoPlayer = find_param('@videoPlayer') or find_param('videoId') or find_param('videoID') or find_param('@videoList') if videoPlayer is not None: + if isinstance(videoPlayer, list): + videoPlayer = videoPlayer[0] + videoPlayer = videoPlayer.strip() + # UUID is also possible for videoPlayer (e.g. + # http://www.popcornflix.com/hoodies-vs-hooligans/7f2d2b87-bbf2-4623-acfb-ea942b4f01dd + # or http://www8.hp.com/cn/zh/home.html) + if not (re.match( + r'^(?:\d+|[\da-fA-F]{8}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{4}-?[\da-fA-F]{12})$', + videoPlayer) or videoPlayer.startswith('ref:')): + return None params['@videoPlayer'] = videoPlayer linkBase = find_param('linkBaseURL') if linkBase is not None: @@ -204,7 +214,7 @@ class BrightcoveLegacyIE(InfoExtractor): # // build Brightcove XML # } m = re.search( - r'''(?x)customBC.\createVideo\( + r'''(?x)customBC\.createVideo\( .*? # skipping width and height ["\'](?P\d+)["\']\s*,\s* # playerID ["\'](?PAQ[^"\']{48})[^"\']*["\']\s*,\s* # playerKey begins with AQ and is 50 characters @@ -232,13 +242,16 @@ class BrightcoveLegacyIE(InfoExtractor): """Return a list of all Brightcove URLs from the webpage """ url_m = re.search( - r']+ + content=([\'"])(?Phttps?://(?:secure|c)\.brightcove.com/(?:(?!\2).)+)\2 + ''', webpage) if url_m: - url = unescapeHTML(url_m.group(1)) + url = unescapeHTML(url_m.group('url')) # Some sites don't add it, we can't download with this url, for example: # http://www.ktvu.com/videos/news/raw-video-caltrain-releases-video-of-man-almost/vCTZdY/ - if 'playerKey' in url or 'videoId' in url: + if 'playerKey' in url or 'videoId' in url or 'idVideo' in url: return [url] matches = re.findall( @@ -259,7 +272,7 @@ class BrightcoveLegacyIE(InfoExtractor): url, smuggled_data = unsmuggle_url(url, {}) # Change the 'videoId' and others field to '@videoPlayer' - url = re.sub(r'(?<=[?&])(videoI(d|D)|bctid)', '%40videoPlayer', url) + url = re.sub(r'(?<=[?&])(videoI(d|D)|idVideo|bctid)', '%40videoPlayer', url) # Change bckey (used by bcove.me urls) to playerKey url = re.sub(r'(?<=[?&])bckey', 'playerKey', url) mobj = re.match(self._VALID_URL, url) @@ -508,6 +521,9 @@ class BrightcoveNewIE(InfoExtractor): return entries def _real_extract(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + self._initialize_geo_bypass(smuggled_data.get('geo_countries')) + account_id, player_id, embed, video_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage( @@ -537,8 +553,10 @@ class BrightcoveNewIE(InfoExtractor): except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: json_data = self._parse_json(e.cause.read().decode(), video_id)[0] - raise ExtractorError( - json_data.get('message') or json_data['error_code'], expected=True) + message = json_data.get('message') or json_data['error_code'] + if json_data.get('error_subcode') == 'CLIENT_GEO': + self.raise_geo_restricted(msg=message) + raise ExtractorError(message, expected=True) raise title = json_data['name'].strip() @@ -548,7 +566,7 @@ class BrightcoveNewIE(InfoExtractor): container = source.get('container') ext = mimetype2ext(source.get('type')) src = source.get('src') - if ext == 'ism': + if ext == 'ism' or container == 'WVM': continue elif ext == 'm3u8' or container == 'M2TS': if not src: @@ -621,15 +639,21 @@ class BrightcoveNewIE(InfoExtractor): 'url': text_track['src'], }) + is_live = False + duration = float_or_none(json_data.get('duration'), 1000) + if duration and duration < 0: + is_live = True + return { 'id': video_id, - 'title': title, + 'title': self._live_title(title) if is_live else title, 'description': clean_html(json_data.get('description')), 'thumbnail': json_data.get('thumbnail') or json_data.get('poster'), - 'duration': float_or_none(json_data.get('duration'), 1000), + 'duration': duration, 'timestamp': parse_iso8601(json_data.get('published_at')), 'uploader_id': account_id, 'formats': formats, 'subtitles': subtitles, 'tags': json_data.get('tags', []), + 'is_live': is_live, }