- video_annotations = self._extract_annotations(video_id)
-
- chapters = self._extract_chapters(description_original, video_duration)
-
- if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
- self.report_rtmp_download()
- formats = [{
- 'format_id': '_rtmp',
- 'protocol': 'rtmp',
- 'url': video_info['conn'][0],
- 'player_url': player_url,
- }]
- elif len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1:
- encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
- if 'rtmpe%3Dyes' in encoded_url_map:
- raise ExtractorError('rtmpe downloads are not supported, see https://github.com/rg3/youtube-dl/issues/343 for more information.', expected=True)
- formats_spec = {}
- fmt_list = video_info.get('fmt_list', [''])[0]
- if fmt_list:
- for fmt in fmt_list.split(','):
- spec = fmt.split('/')
- if len(spec) > 1:
- width_height = spec[1].split('x')
- if len(width_height) == 2:
- formats_spec[spec[0]] = {
- 'resolution': spec[1],
- 'width': int_or_none(width_height[0]),
- 'height': int_or_none(width_height[1]),
- }
- formats = []
- for url_data_str in encoded_url_map.split(','):
- url_data = compat_parse_qs(url_data_str)
- if 'itag' not in url_data or 'url' not in url_data:
- continue
- format_id = url_data['itag'][0]
- url = url_data['url'][0]
-
- if 'sig' in url_data:
- url += '&signature=' + url_data['sig'][0]
- elif 's' in url_data:
- encrypted_sig = url_data['s'][0]
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
-
- jsplayer_url_json = self._search_regex(
- ASSETS_RE,
- embed_webpage if age_gate else video_webpage,
- 'JS player URL (1)', default=None)
- if not jsplayer_url_json and not age_gate:
- # We need the embed website after all
- if embed_webpage is None:
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
- embed_webpage = self._download_webpage(
- embed_url, video_id, 'Downloading embed webpage')
- jsplayer_url_json = self._search_regex(
- ASSETS_RE, embed_webpage, 'JS player URL')
-
- player_url = json.loads(jsplayer_url_json)
- if player_url is None:
- player_url_json = self._search_regex(
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
- video_webpage, 'age gate player URL')
- player_url = json.loads(player_url_json)
-
- if self._downloader.params.get('verbose'):
- if player_url is None:
- player_version = 'unknown'
- player_desc = 'unknown'
- else:
- if player_url.endswith('swf'):
- player_version = self._search_regex(
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
- 'flash player', fatal=False)
- player_desc = 'flash player %s' % player_version
- else:
- player_version = self._search_regex(
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
- r'(?:www|player)-([^/]+)(?:/[a-z]{2}_[A-Z]{2})?/base\.js'],
- player_url,
- 'html5 player', fatal=False)
- player_desc = 'html5 player %s' % player_version
-
- parts_sizes = self._signature_cache_id(encrypted_sig)
- self.to_screen('{%s} signature length %s, %s' %
- (format_id, parts_sizes, player_desc))
-
- signature = self._decrypt_signature(
- encrypted_sig, video_id, player_url, age_gate)
- url += '&signature=' + signature
- if 'ratebypass' not in url:
- url += '&ratebypass=yes'
-
- dct = {
- 'format_id': format_id,
- 'url': url,
- 'player_url': player_url,
- }
- if format_id in self._formats:
- dct.update(self._formats[format_id])
- if format_id in formats_spec:
- dct.update(formats_spec[format_id])
-
- # Some itags are not included in DASH manifest thus corresponding formats will
- # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
- # Trying to extract metadata from url_encoded_fmt_stream_map entry.
- mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
- width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
-
- more_fields = {
- 'filesize': int_or_none(url_data.get('clen', [None])[0]),
- 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
- 'width': width,
- 'height': height,
- 'fps': int_or_none(url_data.get('fps', [None])[0]),
- 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
- }
- for key, value in more_fields.items():
- if value:
- dct[key] = value
- type_ = url_data.get('type', [None])[0]
- if type_:
- type_split = type_.split(';')
- kind_ext = type_split[0].split('/')
- if len(kind_ext) == 2:
- kind, _ = kind_ext
- dct['ext'] = mimetype2ext(type_split[0])
- if kind in ('audio', 'video'):
- codecs = None
- for mobj in re.finditer(
- r'(?P<key>[a-zA-Z_-]+)=(?P<quote>["\']?)(?P<val>.+?)(?P=quote)(?:;|$)', type_):
- if mobj.group('key') == 'codecs':
- codecs = mobj.group('val')
- break
- if codecs:
- dct.update(parse_codecs(codecs))
- formats.append(dct)
- elif video_info.get('hlsvp'):
- manifest_url = video_info['hlsvp'][0]
- formats = []
- m3u8_formats = self._extract_m3u8_formats(
- manifest_url, video_id, 'mp4', fatal=False)
- for a_format in m3u8_formats:
- itag = self._search_regex(
- r'/itag/(\d+)/', a_format['url'], 'itag', default=None)
- if itag:
- a_format['format_id'] = itag
- if itag in self._formats:
- dct = self._formats[itag].copy()
- dct.update(a_format)
- a_format = dct
- a_format['player_url'] = player_url
- # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming
- a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
- formats.append(a_format)
- else:
- unavailable_message = self._html_search_regex(
- r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
- video_webpage, 'unavailable message', default=None)
- if unavailable_message:
- raise ExtractorError(unavailable_message, expected=True)
- raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')