- ]
-
- def __init__(self, *args, **kwargs):
- super(YoutubeIE, self).__init__(*args, **kwargs)
- self._player_cache = {}
-
- def report_video_info_webpage_download(self, video_id):
- """Report attempt to download video info webpage."""
- self.to_screen('%s: Downloading video info webpage' % video_id)
-
- def report_information_extraction(self, video_id):
- """Report attempt to extract video information."""
- self.to_screen('%s: Extracting video information' % video_id)
-
- def report_unavailable_format(self, video_id, format):
- """Report extracted video URL."""
- self.to_screen('%s: Format %s not available' % (video_id, format))
-
- def report_rtmp_download(self):
- """Indicate the download will use the RTMP protocol."""
- self.to_screen('RTMP download detected')
-
- def _signature_cache_id(self, example_sig):
- """ Return a string representation of a signature """
- return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
-
- def _extract_signature_function(self, video_id, player_url, example_sig):
- id_m = re.match(
- r'.*-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player)?\.(?P<ext>[a-z]+)$',
- player_url)
- if not id_m:
- raise ExtractorError('Cannot identify player %r' % player_url)
- player_type = id_m.group('ext')
- player_id = id_m.group('id')
-
- # Read from filesystem cache
- func_id = '%s_%s_%s' % (
- player_type, player_id, self._signature_cache_id(example_sig))
- assert os.path.basename(func_id) == func_id
-
- cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
- if cache_spec is not None:
- return lambda s: ''.join(s[i] for i in cache_spec)
-
- if player_type == 'js':
- code = self._download_webpage(
- player_url, video_id,
- note='Downloading %s player %s' % (player_type, player_id),
- errnote='Download of %s failed' % player_url)
- res = self._parse_sig_js(code)
- elif player_type == 'swf':
- urlh = self._request_webpage(
- player_url, video_id,
- note='Downloading %s player %s' % (player_type, player_id),
- errnote='Download of %s failed' % player_url)
- code = urlh.read()
- res = self._parse_sig_swf(code)
- else:
- assert False, 'Invalid player type %r' % player_type
-
- if cache_spec is None:
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
-
- self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
-
- def _print_sig_code(self, func, example_sig):
- def gen_sig_code(idxs):
- def _genslice(start, end, step):
- starts = '' if start == 0 else str(start)
- ends = (':%d' % (end+step)) if end + step >= 0 else ':'
- steps = '' if step == 1 else (':%d' % step)
- return 's[%s%s%s]' % (starts, ends, steps)
-
- step = None
- start = '(Never used)' # Quelch pyflakes warnings - start will be
- # set as soon as step is set
- for i, prev in zip(idxs[1:], idxs[:-1]):
- if step is not None:
- if i - prev == step:
- continue
- yield _genslice(start, prev, step)
- step = None
- continue
- if i - prev in [-1, 1]:
- step = i - prev
- start = prev
- continue
- else:
- yield 's[%d]' % prev
- if step is None:
- yield 's[%d]' % i
- else:
- yield _genslice(start, i, step)
-
- test_string = ''.join(map(compat_chr, range(len(example_sig))))
- cache_res = func(test_string)
- cache_spec = [ord(c) for c in cache_res]
- expr_code = ' + '.join(gen_sig_code(cache_spec))
- signature_id_tuple = '(%s)' % (
- ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
- code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
- ' return %s\n') % (signature_id_tuple, expr_code)
- self.to_screen('Extracted signature function:\n' + code)
-
- def _parse_sig_js(self, jscode):
- funcname = self._search_regex(
- r'signature=([$a-zA-Z]+)', jscode,
- 'Initial JS player signature function name')
-
- jsi = JSInterpreter(jscode)
- initial_function = jsi.extract_function(funcname)
- return lambda s: initial_function([s])
-
- def _parse_sig_swf(self, file_contents):
- swfi = SWFInterpreter(file_contents)
- TARGET_CLASSNAME = 'SignatureDecipher'
- searched_class = swfi.extract_class(TARGET_CLASSNAME)
- initial_function = swfi.extract_function(searched_class, 'decipher')
- return lambda s: initial_function([s])
-
- def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
- """Turn the encrypted s field into a working signature"""
-
- if player_url is None:
- raise ExtractorError('Cannot decrypt signature without player_url')
-
- if player_url.startswith('//'):
- player_url = 'https:' + player_url
- try:
- player_id = (player_url, self._signature_cache_id(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(
- video_id, player_url, s
- )
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- if self._downloader.params.get('youtube_print_sig_code'):
- self._print_sig_code(func, s)
- return func(s)
- except Exception as e:
- tb = traceback.format_exc()
- raise ExtractorError(
- 'Signature extraction failed: ' + tb, cause=e)
-
- def _get_available_subtitles(self, video_id, webpage):
- try:
- sub_list = self._download_webpage(
- 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
- video_id, note=False)
- except ExtractorError as err:
- self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
- return {}
- lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', sub_list)
-
- sub_lang_list = {}
- for l in lang_list:
- lang = l[1]
- if lang in sub_lang_list:
- continue
- params = compat_urllib_parse.urlencode({
- 'lang': lang,
- 'v': video_id,
- 'fmt': self._downloader.params.get('subtitlesformat', 'srt'),
- 'name': unescapeHTML(l[0]).encode('utf-8'),
- })
- url = 'https://www.youtube.com/api/timedtext?' + params
- sub_lang_list[lang] = url
- if not sub_lang_list:
- self._downloader.report_warning('video doesn\'t have subtitles')
- return {}
- return sub_lang_list
-
- def _get_available_automatic_caption(self, video_id, webpage):
- """We need the webpage for getting the captions url, pass it as an
- argument to speed up the process."""
- sub_format = self._downloader.params.get('subtitlesformat', 'srt')
- self.to_screen('%s: Looking for automatic captions' % video_id)
- mobj = re.search(r';ytplayer.config = ({.*?});', webpage)
- err_msg = 'Couldn\'t find automatic captions for %s' % video_id
- if mobj is None:
- self._downloader.report_warning(err_msg)
- return {}
- player_config = json.loads(mobj.group(1))
- try:
- args = player_config[u'args']
- caption_url = args[u'ttsurl']
- timestamp = args[u'timestamp']
- # We get the available subtitles
- list_params = compat_urllib_parse.urlencode({
- 'type': 'list',
- 'tlangs': 1,
- 'asrs': 1,
- })
- list_url = caption_url + '&' + list_params
- caption_list = self._download_xml(list_url, video_id)
- original_lang_node = caption_list.find('track')
- if original_lang_node is None or original_lang_node.attrib.get('kind') != 'asr' :
- self._downloader.report_warning('Video doesn\'t have automatic captions')
- return {}
- original_lang = original_lang_node.attrib['lang_code']
-
- sub_lang_list = {}
- for lang_node in caption_list.findall('target'):
- sub_lang = lang_node.attrib['lang_code']
- params = compat_urllib_parse.urlencode({
- 'lang': original_lang,
- 'tlang': sub_lang,
- 'fmt': sub_format,
- 'ts': timestamp,
- 'kind': 'asr',
- })
- sub_lang_list[sub_lang] = caption_url + '&' + params
- return sub_lang_list
- # An extractor error can be raise by the download process if there are
- # no automatic captions but there are subtitles
- except (KeyError, ExtractorError):
- self._downloader.report_warning(err_msg)
- return {}
-
+ # JS player signature function name containing $
+ {
+ 'url': 'https://www.youtube.com/watch?v=nfWlot6h_JM',
+ 'info_dict': {
+ 'id': 'nfWlot6h_JM',
+ 'ext': 'm4a',
+ 'title': 'Taylor Swift - Shake It Off',
+ 'description': 'md5:bec2185232c05479482cb5a9b82719bf',
+ 'duration': 242,
+ 'uploader': 'TaylorSwiftVEVO',
+ 'uploader_id': 'TaylorSwiftVEVO',
+ 'upload_date': '20140818',
+ 'creator': 'Taylor Swift',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '141/bestaudio[ext=m4a]',
+ },
+ },
+ # Controversy video
+ {
+ 'url': 'https://www.youtube.com/watch?v=T4XJQO3qol8',
+ 'info_dict': {
+ 'id': 'T4XJQO3qol8',
+ 'ext': 'mp4',
+ 'duration': 219,
+ 'upload_date': '20100909',
+ 'uploader': 'Amazing Atheist',
+ 'uploader_id': 'TheAmazingAtheist',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/TheAmazingAtheist',
+ 'title': 'Burning Everyone\'s Koran',
+ 'description': 'SUBSCRIBE: http://www.youtube.com/saturninefilms\n\nEven Obama has taken a stand against freedom on this issue: http://www.huffingtonpost.com/2010/09/09/obama-gma-interview-quran_n_710282.html',
+ }
+ },
+ # Normal age-gate video (No vevo, embed allowed)
+ {
+ 'url': 'https://youtube.com/watch?v=HtVdAasjOgU',
+ 'info_dict': {
+ 'id': 'HtVdAasjOgU',
+ 'ext': 'mp4',
+ 'title': 'The Witcher 3: Wild Hunt - The Sword Of Destiny Trailer',
+ 'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',
+ 'duration': 142,
+ 'uploader': 'The Witcher',
+ 'uploader_id': 'WitcherGame',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',
+ 'upload_date': '20140605',
+ 'age_limit': 18,
+ },
+ },
+ # Age-gate video with encrypted signature
+ {
+ 'url': 'https://www.youtube.com/watch?v=6kLq3WMV1nU',
+ 'info_dict': {
+ 'id': '6kLq3WMV1nU',
+ 'ext': 'mp4',
+ 'title': 'Dedication To My Ex (Miss That) (Lyric Video)',
+ 'description': 'md5:33765bb339e1b47e7e72b5490139bb41',
+ 'duration': 246,
+ 'uploader': 'LloydVEVO',
+ 'uploader_id': 'LloydVEVO',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/LloydVEVO',
+ 'upload_date': '20110629',
+ 'age_limit': 18,
+ },
+ },
+ # video_info is None (https://github.com/ytdl-org/youtube-dl/issues/4421)
+ # YouTube Red ad is not captured for creator
+ {
+ 'url': '__2ABJjxzNo',
+ 'info_dict': {
+ 'id': '__2ABJjxzNo',
+ 'ext': 'mp4',
+ 'duration': 266,
+ 'upload_date': '20100430',
+ 'uploader_id': 'deadmau5',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
+ 'creator': 'deadmau5',
+ 'description': 'md5:12c56784b8032162bb936a5f76d55360',
+ 'uploader': 'deadmau5',
+ 'title': 'Deadmau5 - Some Chords (HD)',
+ 'alt_title': 'Some Chords',
+ },
+ 'expected_warnings': [
+ 'DASH manifest missing',
+ ]
+ },
+ # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431)
+ {
+ 'url': 'lqQg6PlCWgI',
+ 'info_dict': {
+ 'id': 'lqQg6PlCWgI',
+ 'ext': 'mp4',
+ 'duration': 6085,
+ 'upload_date': '20150827',
+ 'uploader_id': 'olympic',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/olympic',
+ 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games',
+ 'uploader': 'Olympic',
+ 'title': 'Hockey - Women - GER-AUS - London 2012 Olympic Games',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ }
+ },
+ # Non-square pixels
+ {
+ 'url': 'https://www.youtube.com/watch?v=_b-2C3KPAM0',
+ 'info_dict': {
+ 'id': '_b-2C3KPAM0',
+ 'ext': 'mp4',
+ 'stretched_ratio': 16 / 9.,
+ 'duration': 85,
+ 'upload_date': '20110310',
+ 'uploader_id': 'AllenMeow',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/AllenMeow',
+ 'description': 'made by Wacom from Korea | 字幕&加油添醋 by TY\'s Allen | 感謝heylisa00cavey1001同學熱情提供梗及翻譯',
+ 'uploader': '孫ᄋᄅ',
+ 'title': '[A-made] 變態妍字幕版 太妍 我就是這樣的人',
+ },
+ },
+ # url_encoded_fmt_stream_map is empty string
+ {
+ 'url': 'qEJwOuvDf7I',
+ 'info_dict': {
+ 'id': 'qEJwOuvDf7I',
+ 'ext': 'webm',
+ 'title': 'Обсуждение судебной практики по выборам 14 сентября 2014 года в Санкт-Петербурге',
+ 'description': '',
+ 'upload_date': '20150404',
+ 'uploader_id': 'spbelect',
+ 'uploader': 'Наблюдатели Петербурга',
+ },
+ 'params': {
+ 'skip_download': 'requires avconv',
+ },
+ 'skip': 'This live event has ended.',
+ },
+ # Extraction from multiple DASH manifests (https://github.com/ytdl-org/youtube-dl/pull/6097)
+ {
+ 'url': 'https://www.youtube.com/watch?v=FIl7x6_3R5Y',
+ 'info_dict': {
+ 'id': 'FIl7x6_3R5Y',
+ 'ext': 'webm',
+ 'title': 'md5:7b81415841e02ecd4313668cde88737a',
+ 'description': 'md5:116377fd2963b81ec4ce64b542173306',
+ 'duration': 220,
+ 'upload_date': '20150625',
+ 'uploader_id': 'dorappi2000',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/dorappi2000',
+ 'uploader': 'dorappi2000',
+ 'formats': 'mincount:31',
+ },
+ 'skip': 'not actual anymore',
+ },
+ # DASH manifest with segment_list
+ {
+ 'url': 'https://www.youtube.com/embed/CsmdDsKjzN8',
+ 'md5': '8ce563a1d667b599d21064e982ab9e31',
+ 'info_dict': {
+ 'id': 'CsmdDsKjzN8',
+ 'ext': 'mp4',
+ 'upload_date': '20150501', # According to '<meta itemprop="datePublished"', but in other places it's 20150510
+ 'uploader': 'Airtek',
+ 'description': 'Retransmisión en directo de la XVIII media maratón de Zaragoza.',
+ 'uploader_id': 'UCzTzUmjXxxacNnL8I3m4LnQ',
+ 'title': 'Retransmisión XVIII Media maratón Zaragoza 2015',
+ },
+ 'params': {
+ 'youtube_include_dash_manifest': True,
+ 'format': '135', # bestvideo
+ },
+ 'skip': 'This live event has ended.',
+ },
+ {
+ # Multifeed videos (multiple cameras), URL is for Main Camera
+ 'url': 'https://www.youtube.com/watch?v=jqWvoWXjCVs',
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'title': 'teamPGP: Rocket League Noob Stream',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ 'id': 'jqWvoWXjCVs',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (Main Camera)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7335,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': '6h8e8xoXJzg',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (kreestuh)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'PUOgX5z9xZw',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (grizzle)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7337,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }, {
+ 'info_dict': {
+ 'id': 'teuwxikvS5k',
+ 'ext': 'mp4',
+ 'title': 'teamPGP: Rocket League Noob Stream (zim)',
+ 'description': 'md5:dc7872fb300e143831327f1bae3af010',
+ 'duration': 7334,
+ 'upload_date': '20150721',
+ 'uploader': 'Beer Games Beer',
+ 'uploader_id': 'beergamesbeer',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/beergamesbeer',
+ 'license': 'Standard YouTube License',
+ },
+ }],
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536)
+ 'url': 'https://www.youtube.com/watch?v=gVfLd0zydlo',
+ 'info_dict': {
+ 'id': 'gVfLd0zydlo',
+ 'title': 'DevConf.cz 2016 Day 2 Workshops 1 14:00 - 15:30',
+ },
+ 'playlist_count': 2,
+ 'skip': 'Not multifeed anymore',
+ },
+ {
+ 'url': 'https://vid.plus/FlRa-iH7PGw',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://zwearz.com/watch/9lWxNJF-ufM/electra-woman-dyna-girl-official-trailer-grace-helbig.html',
+ 'only_matching': True,
+ },
+ {
+ # Title with JS-like syntax "};" (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/ytdl-org/youtube-dl/issues/1892,
+ # https://github.com/ytdl-org/youtube-dl/issues/8164)
+ 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
+ 'info_dict': {
+ 'id': 'lsguqyKfVQg',
+ 'ext': 'mp4',
+ 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21',
+ 'alt_title': 'Dark Walk - Position Music',
+ 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a',
+ 'duration': 133,
+ 'upload_date': '20151119',
+ 'uploader_id': 'IronSoulElf',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/IronSoulElf',
+ 'uploader': 'IronSoulElf',
+ 'creator': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'track': 'Dark Walk - Position Music',
+ 'artist': 'Todd Haberman, Daniel Law Heath and Aaron Kaplan',
+ 'album': 'Position Music - Production Music Vol. 143 - Dark Walk',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Tags with '};' (see https://github.com/ytdl-org/youtube-dl/issues/7468)
+ 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8',
+ 'only_matching': True,
+ },
+ {
+ # Video with yt:stretch=17:0
+ 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM',
+ 'info_dict': {
+ 'id': 'Q39EVAstoRM',
+ 'ext': 'mp4',
+ 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4',
+ 'description': 'md5:ee18a25c350637c8faff806845bddee9',
+ 'upload_date': '20151107',
+ 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA',
+ 'uploader': 'CH GAMER DROID',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video does not exist.',
+ },
+ {
+ # Video licensed under Creative Commons
+ 'url': 'https://www.youtube.com/watch?v=M4gD1WSo5mA',
+ 'info_dict': {
+ 'id': 'M4gD1WSo5mA',
+ 'ext': 'mp4',
+ 'title': 'md5:e41008789470fc2533a3252216f1c1d1',
+ 'description': 'md5:a677553cf0840649b731a3024aeff4cc',
+ 'duration': 721,
+ 'upload_date': '20150127',
+ 'uploader_id': 'BerkmanCenter',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter',
+ 'uploader': 'The Berkman Klein Center for Internet & Society',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Channel-like uploader_url
+ 'url': 'https://www.youtube.com/watch?v=eQcmzGIKrzg',
+ 'info_dict': {
+ 'id': 'eQcmzGIKrzg',
+ 'ext': 'mp4',
+ 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders',
+ 'description': 'md5:dda0d780d5a6e120758d1711d062a867',
+ 'duration': 4060,
+ 'upload_date': '20151119',
+ 'uploader': 'Bernie Sanders',
+ 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg',
+ 'license': 'Creative Commons Attribution license (reuse allowed)',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY',
+ 'only_matching': True,
+ },
+ {
+ # YouTube Red paid video (https://github.com/ytdl-org/youtube-dl/issues/10059)
+ 'url': 'https://www.youtube.com/watch?v=i1Ko8UG-Tdo',
+ 'only_matching': True,
+ },
+ {
+ # Rental video preview
+ 'url': 'https://www.youtube.com/watch?v=yYr8q0y5Jfg',
+ 'info_dict': {
+ 'id': 'uGpuVWrhIzE',
+ 'ext': 'mp4',
+ 'title': 'Piku - Trailer',
+ 'description': 'md5:c36bd60c3fd6f1954086c083c72092eb',
+ 'upload_date': '20150811',
+ 'uploader': 'FlixMatrix',
+ 'uploader_id': 'FlixMatrixKaravan',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/FlixMatrixKaravan',
+ 'license': 'Standard YouTube License',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'skip': 'This video is not available.',
+ },
+ {
+ # YouTube Red video with episode data
+ 'url': 'https://www.youtube.com/watch?v=iqKdEhx-dD4',
+ 'info_dict': {
+ 'id': 'iqKdEhx-dD4',
+ 'ext': 'mp4',
+ 'title': 'Isolation - Mind Field (Ep 1)',
+ 'description': 'md5:46a29be4ceffa65b92d277b93f463c0f',
+ 'duration': 2085,
+ 'upload_date': '20170118',
+ 'uploader': 'Vsauce',
+ 'uploader_id': 'Vsauce',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/Vsauce',
+ 'series': 'Mind Field',
+ 'season_number': 1,
+ 'episode_number': 1,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'expected_warnings': [
+ 'Skipping DASH manifest',
+ ],
+ },
+ {
+ # The following content has been identified by the YouTube community
+ # as inappropriate or offensive to some audiences.
+ 'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+ 'info_dict': {
+ 'id': '6SJNVb0GnPI',
+ 'ext': 'mp4',
+ 'title': 'Race Differences in Intelligence',
+ 'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+ 'duration': 965,
+ 'upload_date': '20140124',
+ 'uploader': 'New Century Foundation',
+ 'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # itag 212
+ 'url': '1t24XAntNCY',
+ 'only_matching': True,
+ },
+ {
+ # geo restricted to JP
+ 'url': 'sJL6WA-aGkQ',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://invidio.us/watch?v=BaW_jenozKc',
+ 'only_matching': True,
+ },
+ {
+ # DRM protected
+ 'url': 'https://www.youtube.com/watch?v=s7_qI6_mIXc',
+ 'only_matching': True,
+ },
+ {
+ # Video with unsupported adaptive stream type formats
+ 'url': 'https://www.youtube.com/watch?v=Z4Vy8R84T1U',
+ 'info_dict': {
+ 'id': 'Z4Vy8R84T1U',
+ 'ext': 'mp4',
+ 'title': 'saman SMAN 53 Jakarta(Sancety) opening COFFEE4th at SMAN 53 Jakarta',
+ 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+ 'duration': 433,
+ 'upload_date': '20130923',
+ 'uploader': 'Amelia Putri Harwita',
+ 'uploader_id': 'UCpOxM49HJxmC1qCalXyB3_Q',
+ 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCpOxM49HJxmC1qCalXyB3_Q',
+ 'formats': 'maxcount:10',
+ },
+ 'params': {
+ 'skip_download': True,
+ 'youtube_include_dash_manifest': False,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ 'url': 'https://music.youtube.com/watch?v=MgNrAu2pzNs',
+ 'info_dict': {
+ 'id': 'MgNrAu2pzNs',
+ 'ext': 'mp4',
+ 'title': 'Voyeur Girl',
+ 'description': 'md5:7ae382a65843d6df2685993e90a8628f',
+ 'upload_date': '20190312',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+ 'artist': 'Stephen',
+ 'track': 'Voyeur Girl',
+ 'album': 'it\'s too much love to know my dear',
+ 'release_date': '20190313',
+ 'release_year': 2019,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # Retrieve 'artist' field from 'Artist:' in video description
+ # when it is present on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=k0jLE7tTwjY',
+ 'info_dict': {
+ 'id': 'k0jLE7tTwjY',
+ 'ext': 'mp4',
+ 'title': 'Latch Feat. Sam Smith',
+ 'description': 'md5:3cb1e8101a7c85fcba9b4fb41b951335',
+ 'upload_date': '20150110',
+ 'uploader': 'Various Artists - Topic',
+ 'uploader_id': 'UCNkEcmYdjrH4RqtNgh7BZ9w',
+ 'artist': 'Disclosure',
+ 'track': 'Latch Feat. Sam Smith',
+ 'album': 'Latch Featuring Sam Smith',
+ 'release_date': '20121008',
+ 'release_year': 2012,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle multiple artists on youtube music video
+ 'url': 'https://www.youtube.com/watch?v=74qn0eJSjpA',
+ 'info_dict': {
+ 'id': '74qn0eJSjpA',
+ 'ext': 'mp4',
+ 'title': 'Eastside',
+ 'description': 'md5:290516bb73dcbfab0dcc4efe6c3de5f2',
+ 'upload_date': '20180710',
+ 'uploader': 'Benny Blanco - Topic',
+ 'uploader_id': 'UCzqz_ksRu_WkIzmivMdIS7A',
+ 'artist': 'benny blanco, Halsey, Khalid',
+ 'track': 'Eastside',
+ 'album': 'Eastside',
+ 'release_date': '20180713',
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ {
+ # Youtube Music Auto-generated description
+ # handle youtube music video with release_year and no release_date
+ 'url': 'https://www.youtube.com/watch?v=-hcAI0g-f5M',
+ 'info_dict': {
+ 'id': '-hcAI0g-f5M',
+ 'ext': 'mp4',
+ 'title': 'Put It On Me',
+ 'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+ 'upload_date': '20180426',
+ 'uploader': 'Matt Maeson - Topic',
+ 'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
+ 'artist': 'Matt Maeson',
+ 'track': 'Put It On Me',
+ 'album': 'The Hearse',
+ 'release_date': None,
+ 'release_year': 2018,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ },
+ ]
+
+ def __init__(self, *args, **kwargs):
+ super(YoutubeIE, self).__init__(*args, **kwargs)
+ self._player_cache = {}
+
+ def report_video_info_webpage_download(self, video_id):
+ """Report attempt to download video info webpage."""
+ self.to_screen('%s: Downloading video info webpage' % video_id)
+
+ def report_information_extraction(self, video_id):
+ """Report attempt to extract video information."""
+ self.to_screen('%s: Extracting video information' % video_id)
+
+ def report_unavailable_format(self, video_id, format):
+ """Report extracted video URL."""
+ self.to_screen('%s: Format %s not available' % (video_id, format))
+
+ def report_rtmp_download(self):
+ """Indicate the download will use the RTMP protocol."""
+ self.to_screen('RTMP download detected')
+
+ def _signature_cache_id(self, example_sig):
+ """ Return a string representation of a signature """
+ return '.'.join(compat_str(len(part)) for part in example_sig.split('.'))
+
+ def _extract_signature_function(self, video_id, player_url, example_sig):
+ id_m = re.match(
+ r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+ player_url)
+ if not id_m:
+ raise ExtractorError('Cannot identify player %r' % player_url)
+ player_type = id_m.group('ext')
+ player_id = id_m.group('id')
+
+ # Read from filesystem cache
+ func_id = '%s_%s_%s' % (
+ player_type, player_id, self._signature_cache_id(example_sig))
+ assert os.path.basename(func_id) == func_id
+
+ cache_spec = self._downloader.cache.load('youtube-sigfuncs', func_id)
+ if cache_spec is not None:
+ return lambda s: ''.join(s[i] for i in cache_spec)
+
+ download_note = (
+ 'Downloading player %s' % player_url
+ if self._downloader.params.get('verbose') else
+ 'Downloading %s player %s' % (player_type, player_id)
+ )
+ if player_type == 'js':
+ code = self._download_webpage(
+ player_url, video_id,
+ note=download_note,
+ errnote='Download of %s failed' % player_url)
+ res = self._parse_sig_js(code)
+ elif player_type == 'swf':
+ urlh = self._request_webpage(
+ player_url, video_id,
+ note=download_note,
+ errnote='Download of %s failed' % player_url)
+ code = urlh.read()
+ res = self._parse_sig_swf(code)
+ else:
+ assert False, 'Invalid player type %r' % player_type
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = res(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+
+ self._downloader.cache.store('youtube-sigfuncs', func_id, cache_spec)
+ return res
+
+ def _print_sig_code(self, func, example_sig):
+ def gen_sig_code(idxs):
+ def _genslice(start, end, step):
+ starts = '' if start == 0 else str(start)
+ ends = (':%d' % (end + step)) if end + step >= 0 else ':'
+ steps = '' if step == 1 else (':%d' % step)
+ return 's[%s%s%s]' % (starts, ends, steps)
+
+ step = None
+ # Quelch pyflakes warnings - start will be set when step is set
+ start = '(Never used)'
+ for i, prev in zip(idxs[1:], idxs[:-1]):
+ if step is not None:
+ if i - prev == step:
+ continue
+ yield _genslice(start, prev, step)
+ step = None
+ continue
+ if i - prev in [-1, 1]:
+ step = i - prev
+ start = prev
+ continue
+ else:
+ yield 's[%d]' % prev
+ if step is None:
+ yield 's[%d]' % i
+ else:
+ yield _genslice(start, i, step)
+
+ test_string = ''.join(map(compat_chr, range(len(example_sig))))
+ cache_res = func(test_string)
+ cache_spec = [ord(c) for c in cache_res]
+ expr_code = ' + '.join(gen_sig_code(cache_spec))
+ signature_id_tuple = '(%s)' % (
+ ', '.join(compat_str(len(p)) for p in example_sig.split('.')))
+ code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n'
+ ' return %s\n') % (signature_id_tuple, expr_code)
+ self.to_screen('Extracted signature function:\n' + code)
+
+ def _parse_sig_js(self, jscode):
+ funcname = self._search_regex(
+ (r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',
+ # Obsolete patterns
+ r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+ r'\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\('),
+ jscode, 'Initial JS player signature function name', group='sig')
+
+ jsi = JSInterpreter(jscode)
+ initial_function = jsi.extract_function(funcname)
+ return lambda s: initial_function([s])
+
+ def _parse_sig_swf(self, file_contents):
+ swfi = SWFInterpreter(file_contents)
+ TARGET_CLASSNAME = 'SignatureDecipher'
+ searched_class = swfi.extract_class(TARGET_CLASSNAME)
+ initial_function = swfi.extract_function(searched_class, 'decipher')
+ return lambda s: initial_function([s])
+
+ def _decrypt_signature(self, s, video_id, player_url, age_gate=False):
+ """Turn the encrypted s field into a working signature"""
+
+ if player_url is None:
+ raise ExtractorError('Cannot decrypt signature without player_url')
+
+ if player_url.startswith('//'):
+ player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
+ try:
+ player_id = (player_url, self._signature_cache_id(s))
+ if player_id not in self._player_cache:
+ func = self._extract_signature_function(
+ video_id, player_url, s
+ )
+ self._player_cache[player_id] = func
+ func = self._player_cache[player_id]
+ if self._downloader.params.get('youtube_print_sig_code'):
+ self._print_sig_code(func, s)
+ return func(s)
+ except Exception as e:
+ tb = traceback.format_exc()
+ raise ExtractorError(
+ 'Signature extraction failed: ' + tb, cause=e)
+
+ def _get_subtitles(self, video_id, webpage):
+ try:
+ subs_doc = self._download_xml(
+ 'https://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id,
+ video_id, note=False)
+ except ExtractorError as err:
+ self._downloader.report_warning('unable to download video subtitles: %s' % error_to_compat_str(err))
+ return {}
+
+ sub_lang_list = {}
+ for track in subs_doc.findall('track'):
+ lang = track.attrib['lang_code']
+ if lang in sub_lang_list:
+ continue
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ params = compat_urllib_parse_urlencode({
+ 'lang': lang,
+ 'v': video_id,
+ 'fmt': ext,
+ 'name': track.attrib['name'].encode('utf-8'),
+ })
+ sub_formats.append({
+ 'url': 'https://www.youtube.com/api/timedtext?' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[lang] = sub_formats
+ if not sub_lang_list:
+ self._downloader.report_warning('video doesn\'t have subtitles')
+ return {}
+ return sub_lang_list
+
+ def _get_ytplayer_config(self, video_id, webpage):
+ patterns = (
+ # User data may contain arbitrary character sequences that may affect
+ # JSON extraction with regex, e.g. when '};' is contained the second
+ # regex won't capture the whole JSON. Yet working around by trying more
+ # concrete regex first keeping in mind proper quoted string handling
+ # to be implemented in future that will replace this workaround (see
+ # https://github.com/ytdl-org/youtube-dl/issues/7468,
+ # https://github.com/ytdl-org/youtube-dl/pull/7599)
+ r';ytplayer\.config\s*=\s*({.+?});ytplayer',
+ r';ytplayer\.config\s*=\s*({.+?});',
+ )
+ config = self._search_regex(
+ patterns, webpage, 'ytplayer.config', default=None)
+ if config:
+ return self._parse_json(
+ uppercase_escape(config), video_id, fatal=False)
+
+ def _get_automatic_captions(self, video_id, webpage):
+ """We need the webpage for getting the captions url, pass it as an
+ argument to speed up the process."""
+ self.to_screen('%s: Looking for automatic captions' % video_id)
+ player_config = self._get_ytplayer_config(video_id, webpage)
+ err_msg = 'Couldn\'t find automatic captions for %s' % video_id
+ if not player_config:
+ self._downloader.report_warning(err_msg)
+ return {}
+ try:
+ args = player_config['args']
+ caption_url = args.get('ttsurl')
+ if caption_url:
+ timestamp = args['timestamp']
+ # We get the available subtitles
+ list_params = compat_urllib_parse_urlencode({
+ 'type': 'list',
+ 'tlangs': 1,
+ 'asrs': 1,
+ })
+ list_url = caption_url + '&' + list_params
+ caption_list = self._download_xml(list_url, video_id)
+ original_lang_node = caption_list.find('track')
+ if original_lang_node is None:
+ self._downloader.report_warning('Video doesn\'t have automatic captions')
+ return {}
+ original_lang = original_lang_node.attrib['lang_code']
+ caption_kind = original_lang_node.attrib.get('kind', '')
+
+ sub_lang_list = {}
+ for lang_node in caption_list.findall('target'):
+ sub_lang = lang_node.attrib['lang_code']
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ params = compat_urllib_parse_urlencode({
+ 'lang': original_lang,
+ 'tlang': sub_lang,
+ 'fmt': ext,
+ 'ts': timestamp,
+ 'kind': caption_kind,
+ })
+ sub_formats.append({
+ 'url': caption_url + '&' + params,
+ 'ext': ext,
+ })
+ sub_lang_list[sub_lang] = sub_formats
+ return sub_lang_list
+
+ def make_captions(sub_url, sub_langs):
+ parsed_sub_url = compat_urllib_parse_urlparse(sub_url)
+ caption_qs = compat_parse_qs(parsed_sub_url.query)
+ captions = {}
+ for sub_lang in sub_langs:
+ sub_formats = []
+ for ext in self._SUBTITLE_FORMATS:
+ caption_qs.update({
+ 'tlang': [sub_lang],
+ 'fmt': [ext],
+ })
+ sub_url = compat_urlparse.urlunparse(parsed_sub_url._replace(
+ query=compat_urllib_parse_urlencode(caption_qs, True)))
+ sub_formats.append({
+ 'url': sub_url,
+ 'ext': ext,
+ })
+ captions[sub_lang] = sub_formats
+ return captions
+
+ # New captions format as of 22.06.2017
+ player_response = args.get('player_response')
+ if player_response and isinstance(player_response, compat_str):
+ player_response = self._parse_json(
+ player_response, video_id, fatal=False)
+ if player_response:
+ renderer = player_response['captions']['playerCaptionsTracklistRenderer']
+ base_url = renderer['captionTracks'][0]['baseUrl']
+ sub_lang_list = []
+ for lang in renderer['translationLanguages']:
+ lang_code = lang.get('languageCode')
+ if lang_code:
+ sub_lang_list.append(lang_code)
+ return make_captions(base_url, sub_lang_list)
+
+ # Some videos don't provide ttsurl but rather caption_tracks and
+ # caption_translation_languages (e.g. 20LmZk1hakA)
+ # Does not used anymore as of 22.06.2017
+ caption_tracks = args['caption_tracks']
+ caption_translation_languages = args['caption_translation_languages']
+ caption_url = compat_parse_qs(caption_tracks.split(',')[0])['u'][0]
+ sub_lang_list = []
+ for lang in caption_translation_languages.split(','):
+ lang_qs = compat_parse_qs(compat_urllib_parse_unquote_plus(lang))
+ sub_lang = lang_qs.get('lc', [None])[0]
+ if sub_lang:
+ sub_lang_list.append(sub_lang)
+ return make_captions(caption_url, sub_lang_list)
+ # An extractor error can be raise by the download process if there are
+ # no automatic captions but there are subtitles
+ except (KeyError, IndexError, ExtractorError):
+ self._downloader.report_warning(err_msg)
+ return {}
+
+ def _mark_watched(self, video_id, video_info, player_response):
+ playback_url = url_or_none(try_get(
+ player_response,
+ lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']) or try_get(
+ video_info, lambda x: x['videostats_playback_base_url'][0]))
+ if not playback_url:
+ return
+ parsed_playback_url = compat_urlparse.urlparse(playback_url)
+ qs = compat_urlparse.parse_qs(parsed_playback_url.query)
+
+ # cpn generation algorithm is reverse engineered from base.js.
+ # In fact it works even with dummy cpn.
+ CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
+ cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)))
+
+ qs.update({
+ 'ver': ['2'],
+ 'cpn': [cpn],
+ })
+ playback_url = compat_urlparse.urlunparse(
+ parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
+
+ self._download_webpage(
+ playback_url, video_id, 'Marking watched',
+ 'Unable to mark watched', fatal=False)
+
+ @staticmethod
+ def _extract_urls(webpage):
+ # Embedded YouTube player
+ entries = [
+ unescapeHTML(mobj.group('url'))
+ for mobj in re.finditer(r'''(?x)
+ (?:
+ <iframe[^>]+?src=|
+ data-video-url=|
+ <embed[^>]+?src=|
+ embedSWF\(?:\s*|
+ <object[^>]+data=|
+ new\s+SWFObject\(
+ )
+ (["\'])
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+ (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+ \1''', webpage)]
+
+ # lazyYT YouTube embed
+ entries.extend(list(map(
+ unescapeHTML,
+ re.findall(r'class="lazyYT" data-youtube-id="([^"]+)"', webpage))))
+
+ # Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
+ entries.extend(m[-1] for m in matches)
+
+ return entries
+
+ @staticmethod
+ def _extract_url(webpage):
+ urls = YoutubeIE._extract_urls(webpage)
+ return urls[0] if urls else None
+