+ def _apply_format_filter(self, format_spec, available_formats):
+ " Returns a tuple of the remaining format_spec and filtered formats "
+
+ OPERATORS = {
+ '<': operator.lt,
+ '<=': operator.le,
+ '>': operator.gt,
+ '>=': operator.ge,
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ operator_rex = re.compile(r'''(?x)\s*\[
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
+ \]$
+ ''' % '|'.join(map(re.escape, OPERATORS.keys())))
+ m = operator_rex.search(format_spec)
+ if m:
+ try:
+ comparison_value = int(m.group('value'))
+ except ValueError:
+ comparison_value = parse_filesize(m.group('value'))
+ if comparison_value is None:
+ comparison_value = parse_filesize(m.group('value') + 'B')
+ if comparison_value is None:
+ raise ValueError(
+ 'Invalid value %r in format specification %r' % (
+ m.group('value'), format_spec))
+ op = OPERATORS[m.group('op')]
+
+ if not m:
+ STR_OPERATORS = {
+ '=': operator.eq,
+ '!=': operator.ne,
+ }
+ str_operator_rex = re.compile(r'''(?x)\s*\[
+ \s*(?P<key>ext|acodec|vcodec|container|protocol)
+ \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
+ \s*(?P<value>[a-zA-Z0-9_-]+)
+ \s*\]$
+ ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
+ m = str_operator_rex.search(format_spec)
+ if m:
+ comparison_value = m.group('value')
+ op = STR_OPERATORS[m.group('op')]
+
+ if not m:
+ raise ValueError('Invalid format specification %r' % format_spec)
+
+ def _filter(f):
+ actual_value = f.get(m.group('key'))
+ if actual_value is None:
+ return m.group('none_inclusive')
+ return op(actual_value, comparison_value)
+ new_formats = [f for f in available_formats if _filter(f)]
+
+ new_format_spec = format_spec[:-len(m.group(0))]
+ if not new_format_spec:
+ new_format_spec = 'best'
+
+ return (new_format_spec, new_formats)
+
+ def select_format(self, format_spec, available_formats):
+ while format_spec.endswith(']'):
+ format_spec, available_formats = self._apply_format_filter(
+ format_spec, available_formats)
+ if not available_formats:
+ return None
+
+ if format_spec in ['best', 'worst', None]:
+ format_idx = 0 if format_spec == 'worst' else -1
+ audiovideo_formats = [
+ f for f in available_formats
+ if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
+ if audiovideo_formats:
+ return audiovideo_formats[format_idx]
+ # for audio only urls, select the best/worst audio format
+ elif all(f.get('acodec') != 'none' for f in available_formats):
+ return available_formats[format_idx]
+ elif format_spec == 'bestaudio':
+ audio_formats = [
+ f for f in available_formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ return audio_formats[-1]
+ elif format_spec == 'worstaudio':
+ audio_formats = [
+ f for f in available_formats
+ if f.get('vcodec') == 'none']
+ if audio_formats:
+ return audio_formats[0]
+ elif format_spec == 'bestvideo':
+ video_formats = [
+ f for f in available_formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ return video_formats[-1]
+ elif format_spec == 'worstvideo':
+ video_formats = [
+ f for f in available_formats
+ if f.get('acodec') == 'none']
+ if video_formats:
+ return video_formats[0]
+ else:
+ extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
+ if format_spec in extensions:
+ filter_f = lambda f: f['ext'] == format_spec
+ else:
+ filter_f = lambda f: f['format_id'] == format_spec
+ matches = list(filter(filter_f, available_formats))
+ if matches:
+ return matches[-1]
+ return None
+
+ def _calc_headers(self, info_dict):
+ res = std_headers.copy()
+
+ add_headers = info_dict.get('http_headers')
+ if add_headers:
+ res.update(add_headers)
+
+ cookies = self._calc_cookies(info_dict)
+ if cookies:
+ res['Cookie'] = cookies
+
+ return res
+
+ def _calc_cookies(self, info_dict):
+ pr = compat_urllib_request.Request(info_dict['url'])
+ self.cookiejar.add_cookie_header(pr)
+ return pr.get_header('Cookie')
+
+ def process_video_result(self, info_dict, download=True):
+ assert info_dict.get('_type', 'video') == 'video'
+
+ if 'id' not in info_dict:
+ raise ExtractorError('Missing "id" field in extractor result')
+ if 'title' not in info_dict:
+ raise ExtractorError('Missing "title" field in extractor result')
+
+ if 'playlist' not in info_dict:
+ # It isn't part of a playlist
+ info_dict['playlist'] = None
+ info_dict['playlist_index'] = None
+
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if thumbnails:
+ thumbnails.sort(key=lambda t: (
+ t.get('preference'), t.get('width'), t.get('height'),
+ t.get('id'), t.get('url')))
+ for i, t in enumerate(thumbnails):
+ if 'width' in t and 'height' in t:
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+
+ if thumbnails and 'thumbnail' not in info_dict:
+ info_dict['thumbnail'] = thumbnails[-1]['url']
+
+ if 'display_id' not in info_dict and 'id' in info_dict:
+ info_dict['display_id'] = info_dict['id']
+
+ if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
+ # Working around negative timestamps in Windows
+ # (see http://bugs.python.org/issue1646728)
+ if info_dict['timestamp'] < 0 and os.name == 'nt':
+ info_dict['timestamp'] = 0
+ upload_date = datetime.datetime.utcfromtimestamp(
+ info_dict['timestamp'])
+ info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
+
+ if self.params.get('listsubtitles', False):
+ if 'automatic_captions' in info_dict:
+ self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
+ self.list_subtitles(info_dict['id'], info_dict.get('subtitles'), 'subtitles')
+ return
+ info_dict['requested_subtitles'] = self.process_subtitles(
+ info_dict['id'], info_dict.get('subtitles'),
+ info_dict.get('automatic_captions'))
+
+ # This extractors handle format selection themselves
+ if info_dict['extractor'] in ['Youku']:
+ if download:
+ self.process_info(info_dict)
+ return info_dict
+
+ # We now pick which formats have to be downloaded
+ if info_dict.get('formats') is None:
+ # There's only one format available
+ formats = [info_dict]
+ else:
+ formats = info_dict['formats']
+
+ if not formats:
+ raise ExtractorError('No video formats found!')
+
+ # We check that all the formats have the format and format_id fields
+ for i, format in enumerate(formats):
+ if 'url' not in format:
+ raise ExtractorError('Missing "url" key in result (index %d)' % i)
+
+ if format.get('format_id') is None:
+ format['format_id'] = compat_str(i)
+ if format.get('format') is None:
+ format['format'] = '{id} - {res}{note}'.format(
+ id=format['format_id'],
+ res=self.format_resolution(format),
+ note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
+ )
+ # Automatically determine file extension if missing
+ if 'ext' not in format:
+ format['ext'] = determine_ext(format['url']).lower()
+ # Add HTTP headers, so that external programs can use them from the
+ # json output
+ full_format_info = info_dict.copy()
+ full_format_info.update(format)
+ format['http_headers'] = self._calc_headers(full_format_info)
+
+ # TODO Central sorting goes here
+
+ if formats[0] is not info_dict:
+ # only set the 'formats' fields if the original info_dict list them
+ # otherwise we end up with a circular reference, the first (and unique)
+ # element in the 'formats' field in info_dict is info_dict itself,
+ # wich can't be exported to json
+ info_dict['formats'] = formats
+ if self.params.get('listformats'):
+ self.list_formats(info_dict)
+ return
+ if self.params.get('list_thumbnails'):
+ self.list_thumbnails(info_dict)
+ return
+
+ req_format = self.params.get('format')
+ if req_format is None:
+ req_format_list = []
+ if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
+ info_dict['extractor'] in ['youtube', 'ted']):
+ merger = FFmpegMergerPP(self)
+ if merger.available and merger.can_merge():
+ req_format_list.append('bestvideo+bestaudio')
+ req_format_list.append('best')
+ req_format = '/'.join(req_format_list)
+ formats_to_download = []
+ if req_format == 'all':
+ formats_to_download = formats
+ else:
+ for rfstr in req_format.split(','):
+ # We can accept formats requested in the format: 34/5/best, we pick
+ # the first that is available, starting from left
+ req_formats = rfstr.split('/')
+ for rf in req_formats:
+ if re.match(r'.+?\+.+?', rf) is not None:
+ # Two formats have been requested like '137+139'
+ format_1, format_2 = rf.split('+')
+ formats_info = (self.select_format(format_1, formats),
+ self.select_format(format_2, formats))
+ if all(formats_info):
+ # The first format must contain the video and the
+ # second the audio
+ if formats_info[0].get('vcodec') == 'none':
+ self.report_error('The first format must '
+ 'contain the video, try using '
+ '"-f %s+%s"' % (format_2, format_1))
+ return
+ output_ext = (
+ formats_info[0]['ext']
+ if self.params.get('merge_output_format') is None
+ else self.params['merge_output_format'])
+ selected_format = {
+ 'requested_formats': formats_info,
+ 'format': '%s+%s' % (formats_info[0].get('format'),
+ formats_info[1].get('format')),
+ 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
+ formats_info[1].get('format_id')),
+ 'width': formats_info[0].get('width'),
+ 'height': formats_info[0].get('height'),
+ 'resolution': formats_info[0].get('resolution'),
+ 'fps': formats_info[0].get('fps'),
+ 'vcodec': formats_info[0].get('vcodec'),
+ 'vbr': formats_info[0].get('vbr'),
+ 'stretched_ratio': formats_info[0].get('stretched_ratio'),
+ 'acodec': formats_info[1].get('acodec'),
+ 'abr': formats_info[1].get('abr'),
+ 'ext': output_ext,
+ }
+ else:
+ selected_format = None
+ else:
+ selected_format = self.select_format(rf, formats)
+ if selected_format is not None:
+ formats_to_download.append(selected_format)
+ break
+ if not formats_to_download:
+ raise ExtractorError('requested format not available',
+ expected=True)
+
+ if download:
+ if len(formats_to_download) > 1:
+ self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
+ for format in formats_to_download:
+ new_info = dict(info_dict)
+ new_info.update(format)
+ self.process_info(new_info)
+ # We update the info dict with the best quality format (backwards compatibility)
+ info_dict.update(formats_to_download[-1])
+ return info_dict
+
+ def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
+ """Select the requested subtitles and their format"""
+ available_subs = {}
+ if normal_subtitles and self.params.get('writesubtitles'):
+ available_subs.update(normal_subtitles)
+ if automatic_captions and self.params.get('writeautomaticsub'):
+ for lang, cap_info in automatic_captions.items():
+ if lang not in available_subs:
+ available_subs[lang] = cap_info
+
+ if (not self.params.get('writesubtitles') and not
+ self.params.get('writeautomaticsub') or not
+ available_subs):
+ return None
+
+ if self.params.get('allsubtitles', False):
+ requested_langs = available_subs.keys()
+ else:
+ if self.params.get('subtitleslangs', False):
+ requested_langs = self.params.get('subtitleslangs')
+ elif 'en' in available_subs:
+ requested_langs = ['en']
+ else:
+ requested_langs = [list(available_subs.keys())[0]]
+
+ formats_query = self.params.get('subtitlesformat', 'best')
+ formats_preference = formats_query.split('/') if formats_query else []
+ subs = {}
+ for lang in requested_langs:
+ formats = available_subs.get(lang)
+ if formats is None:
+ self.report_warning('%s subtitles not available for %s' % (lang, video_id))
+ continue
+ for ext in formats_preference:
+ if ext == 'best':
+ f = formats[-1]
+ break
+ matches = list(filter(lambda f: f['ext'] == ext, formats))
+ if matches:
+ f = matches[-1]
+ break
+ else:
+ f = formats[-1]
+ self.report_warning(
+ 'No subtitle format found matching "%s" for language %s, '
+ 'using %s' % (formats_query, lang, f['ext']))
+ subs[lang] = f
+ return subs
+