- fmt = dict(format_dict)
- fmt['format_id'] = format_id
- all_formats.append(fmt)
- # Some formats use the m3u8 protocol
- all_formats = list(filter(lambda f: f.get('videoFormat') != 'M3U8', all_formats))
- def _match_lang(f):
- if f.get('versionCode') is None:
- return True
- # Return true if that format is in the language of the url
- if lang == 'fr':
- l = 'F'
- elif lang == 'de':
- l = 'A'
- else:
- l = lang
- regexes = [r'VO?%s' % l, r'VO?.-ST%s' % l]
- return any(re.match(r, f['versionCode']) for r in regexes)
- # Some formats may not be in the same language as the url
- # TODO: Might want not to drop videos that does not match requested language
- # but to process those formats with lower precedence
- formats = filter(_match_lang, all_formats)
- formats = list(formats) # in python3 filter returns an iterator
- if not formats:
- # Some videos are only available in the 'Originalversion'
- # they aren't tagged as being in French or German
- # Sometimes there are neither videos of requested lang code
- # nor original version videos available
- # For such cases we just take all_formats as is
- formats = all_formats
- if not formats:
- raise ExtractorError('The formats list is empty')
-
- if re.match(r'[A-Z]Q', formats[0]['quality']) is not None:
- def sort_key(f):
- return ['HQ', 'MQ', 'EQ', 'SQ'].index(f['quality'])
- else:
- def sort_key(f):
- versionCode = f.get('versionCode')
- if versionCode is None:
- versionCode = ''
- return (
- # Sort first by quality
- int(f.get('height', -1)),
- int(f.get('bitrate', -1)),
- # The original version with subtitles has lower relevance
- re.match(r'VO-ST(F|A)', versionCode) is None,
- # The version with sourds/mal subtitles has also lower relevance
- re.match(r'VO?(F|A)-STM\1', versionCode) is None,
- # Prefer http downloads over m3u8
- 0 if f['url'].endswith('m3u8') else 1,
- )
- formats = sorted(formats, key=sort_key)
- def _format(format_info):
- info = {
- 'format_id': format_info['format_id'],
- 'format_note': '%s, %s' % (format_info.get('versionCode'), format_info.get('versionLibelle')),
- 'width': int_or_none(format_info.get('width')),
- 'height': int_or_none(format_info.get('height')),
- 'tbr': int_or_none(format_info.get('bitrate')),
+ f = dict(format_dict)
+ versionCode = f.get('versionCode')
+ langcode = LANGS.get(lang, lang)
+ lang_rexs = [r'VO?%s-' % re.escape(langcode), r'VO?.-ST%s$' % re.escape(langcode)]
+ lang_pref = None
+ if versionCode:
+ matched_lang_rexs = [r for r in lang_rexs if re.match(r, versionCode)]
+ lang_pref = -10 if not matched_lang_rexs else 10 * len(matched_lang_rexs)
+ source_pref = 0
+ if versionCode is not None:
+ # The original version with subtitles has lower relevance
+ if re.match(r'VO-ST(F|A|E)', versionCode):
+ source_pref -= 10
+ # The version with sourds/mal subtitles has also lower relevance
+ elif re.match(r'VO?(F|A|E)-STM\1', versionCode):
+ source_pref -= 9
+ format = {
+ 'format_id': format_id,
+ 'preference': -10 if f.get('videoFormat') == 'M3U8' else None,
+ 'language_preference': lang_pref,
+ 'format_note': '%s, %s' % (f.get('versionCode'), f.get('versionLibelle')),
+ 'width': int_or_none(f.get('width')),
+ 'height': int_or_none(f.get('height')),
+ 'tbr': int_or_none(f.get('bitrate')),
+ 'quality': qfunc(f.get('quality')),
+ 'source_preference': source_pref,