X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/f46044c66663049e286c20ee015db99d47d9dd8a..57d0100a1c926cf2801d714091c1001a4136cba8:/youtube_dl/postprocessor/ffmpeg.py diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 30094c2..1881f48 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals import io import os import subprocess -import sys import time @@ -21,17 +20,31 @@ from ..utils import ( prepend_extension, shell_quote, subtitles_filename, + dfxp2srt, + ISO639Utils, ) +EXT_TO_OUT_FORMATS = { + "aac": "adts", + "m4a": "ipod", + "mka": "matroska", + "mkv": "matroska", + "mpg": "mpeg", + "ogv": "ogg", + "ts": "mpegts", + "wma": "asf", + "wmv": "asf", +} + + class FFmpegPostProcessorError(PostProcessingError): pass class FFmpegPostProcessor(PostProcessor): - def __init__(self, downloader=None, deletetempfiles=False): + def __init__(self, downloader=None): PostProcessor.__init__(self, downloader) - self._deletetempfiles = deletetempfiles self._determine_executables() def check_version(self): @@ -52,7 +65,7 @@ class FFmpegPostProcessor(PostProcessor): def _determine_executables(self): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] - prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False) + prefer_ffmpeg = False self.basename = None self.probe_basename = None @@ -60,6 +73,7 @@ class FFmpegPostProcessor(PostProcessor): self._paths = None self._versions = None if self._downloader: + prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False) location = self._downloader.params.get('ffmpeg_location') if location is not None: if not os.path.exists(location): @@ -117,19 +131,52 @@ class FFmpegPostProcessor(PostProcessor): def executable(self): return self._paths[self.basename] + @property + def probe_available(self): + return self.probe_basename is not None + @property def probe_executable(self): return self._paths[self.probe_basename] + def get_audio_codec(self, path): + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') + try: + cmd = [ + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), + encodeFilename(self._ffmpeg_filename_argument(path), True)] + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) + output = handle.communicate()[0] + if handle.wait() != 0: + return None + except (IOError, OSError): + return None + audio_codec = None + for line in output.decode('ascii', 'ignore').split('\n'): + if line.startswith('codec_name='): + audio_codec = line.split('=')[1].strip() + elif line.strip() == 'codec_type=audio' and audio_codec is not None: + return audio_codec + return None + def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): self.check_version() oldest_mtime = min( os.stat(encodeFilename(path)).st_mtime for path in input_paths) + opts += self._configuration_args() + files_cmd = [] for path in input_paths: - files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)]) + files_cmd.extend([ + encodeArgument('-i'), + encodeFilename(self._ffmpeg_filename_argument(path), True) + ]) cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + files_cmd + [encodeArgument(o) for o in opts] + @@ -143,19 +190,17 @@ class FFmpegPostProcessor(PostProcessor): stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) - os.utime(encodeFilename(out_path), (oldest_mtime, oldest_mtime)) - if self._deletetempfiles: - for ipath in input_paths: - os.remove(ipath) + self.try_utime(out_path, oldest_mtime, oldest_mtime) def run_ffmpeg(self, path, out_path, opts): self.run_ffmpeg_multiple_files([path], out_path, opts) def _ffmpeg_filename_argument(self, fn): - # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details - if fn.startswith('-'): - return './' + fn - return fn + # Always use 'file:' because the filename may contain ':' (ffmpeg + # interprets that as a protocol) or can start with '-' (-- is broken in + # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details) + # Also leave '-' intact in order not to break streaming to stdout. + return 'file:' + fn if fn != '-' else fn class FFmpegExtractAudioPP(FFmpegPostProcessor): @@ -167,31 +212,6 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): self._preferredquality = preferredquality self._nopostoverwrites = nopostoverwrites - def get_audio_codec(self, path): - - if not self.probe_executable: - raise PostProcessingError('ffprobe or avprobe not found. Please install one.') - try: - cmd = [ - encodeFilename(self.probe_executable, True), - encodeArgument('-show_streams'), - encodeFilename(self._ffmpeg_filename_argument(path), True)] - if self._downloader.params.get('verbose', False): - self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) - output = handle.communicate()[0] - if handle.wait() != 0: - return None - except (IOError, OSError): - return None - audio_codec = None - for line in output.decode('ascii', 'ignore').split('\n'): - if line.startswith('codec_name='): - audio_codec = line.split('=')[1].strip() - elif line.strip() == 'codec_type=audio' and audio_codec is not None: - return audio_codec - return None - def run_ffmpeg(self, path, out_path, codec, more_opts): if codec is None: acodec_opts = [] @@ -259,33 +279,31 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups new_path = prefix + sep + extension + information['filepath'] = new_path + information['ext'] = extension + # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. - if new_path == path: - self._nopostoverwrites = True + if (new_path == path or + (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path) + return [], information try: - if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): - self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) - else: - self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) - self.run_ffmpeg(path, new_path, acodec, more_opts) - except: - etype, e, tb = sys.exc_info() - if isinstance(e, AudioConversionError): - msg = 'audio conversion failed: ' + e.msg - else: - msg = 'error running ' + self.basename - raise PostProcessingError(msg) + self._downloader.to_screen('[ffmpeg] Destination: ' + new_path) + self.run_ffmpeg(path, new_path, acodec, more_opts) + except AudioConversionError as e: + raise PostProcessingError( + 'audio conversion failed: ' + e.msg) + except Exception: + raise PostProcessingError('error running ' + self.basename) # Try to update the date time for extracted audio file. if information.get('filetime') is not None: - try: - os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) - except: - self._downloader.report_warning('Cannot update utime of audio file') + self.try_utime( + new_path, time.time(), information['filetime'], + errnote='Cannot update utime of audio file') - information['filepath'] = new_path - return self._nopostoverwrites, information + return [path], information class FFmpegVideoConvertorPP(FFmpegPostProcessor): @@ -295,225 +313,53 @@ class FFmpegVideoConvertorPP(FFmpegPostProcessor): def run(self, information): path = information['filepath'] - prefix, sep, ext = path.rpartition('.') - outpath = prefix + sep + self._preferedformat if information['ext'] == self._preferedformat: self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) - return True, information + return [], information + options = [] + if self._preferedformat == 'avi': + options.extend(['-c:v', 'libxvid', '-vtag', 'XVID']) + prefix, sep, ext = path.rpartition('.') + outpath = prefix + sep + self._preferedformat self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) - self.run_ffmpeg(path, outpath, []) + self.run_ffmpeg(path, outpath, options) information['filepath'] = outpath information['format'] = self._preferedformat information['ext'] = self._preferedformat - return False, information + return [path], information class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): - # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt - _lang_map = { - 'aa': 'aar', - 'ab': 'abk', - 'ae': 'ave', - 'af': 'afr', - 'ak': 'aka', - 'am': 'amh', - 'an': 'arg', - 'ar': 'ara', - 'as': 'asm', - 'av': 'ava', - 'ay': 'aym', - 'az': 'aze', - 'ba': 'bak', - 'be': 'bel', - 'bg': 'bul', - 'bh': 'bih', - 'bi': 'bis', - 'bm': 'bam', - 'bn': 'ben', - 'bo': 'bod', - 'br': 'bre', - 'bs': 'bos', - 'ca': 'cat', - 'ce': 'che', - 'ch': 'cha', - 'co': 'cos', - 'cr': 'cre', - 'cs': 'ces', - 'cu': 'chu', - 'cv': 'chv', - 'cy': 'cym', - 'da': 'dan', - 'de': 'deu', - 'dv': 'div', - 'dz': 'dzo', - 'ee': 'ewe', - 'el': 'ell', - 'en': 'eng', - 'eo': 'epo', - 'es': 'spa', - 'et': 'est', - 'eu': 'eus', - 'fa': 'fas', - 'ff': 'ful', - 'fi': 'fin', - 'fj': 'fij', - 'fo': 'fao', - 'fr': 'fra', - 'fy': 'fry', - 'ga': 'gle', - 'gd': 'gla', - 'gl': 'glg', - 'gn': 'grn', - 'gu': 'guj', - 'gv': 'glv', - 'ha': 'hau', - 'he': 'heb', - 'hi': 'hin', - 'ho': 'hmo', - 'hr': 'hrv', - 'ht': 'hat', - 'hu': 'hun', - 'hy': 'hye', - 'hz': 'her', - 'ia': 'ina', - 'id': 'ind', - 'ie': 'ile', - 'ig': 'ibo', - 'ii': 'iii', - 'ik': 'ipk', - 'io': 'ido', - 'is': 'isl', - 'it': 'ita', - 'iu': 'iku', - 'ja': 'jpn', - 'jv': 'jav', - 'ka': 'kat', - 'kg': 'kon', - 'ki': 'kik', - 'kj': 'kua', - 'kk': 'kaz', - 'kl': 'kal', - 'km': 'khm', - 'kn': 'kan', - 'ko': 'kor', - 'kr': 'kau', - 'ks': 'kas', - 'ku': 'kur', - 'kv': 'kom', - 'kw': 'cor', - 'ky': 'kir', - 'la': 'lat', - 'lb': 'ltz', - 'lg': 'lug', - 'li': 'lim', - 'ln': 'lin', - 'lo': 'lao', - 'lt': 'lit', - 'lu': 'lub', - 'lv': 'lav', - 'mg': 'mlg', - 'mh': 'mah', - 'mi': 'mri', - 'mk': 'mkd', - 'ml': 'mal', - 'mn': 'mon', - 'mr': 'mar', - 'ms': 'msa', - 'mt': 'mlt', - 'my': 'mya', - 'na': 'nau', - 'nb': 'nob', - 'nd': 'nde', - 'ne': 'nep', - 'ng': 'ndo', - 'nl': 'nld', - 'nn': 'nno', - 'no': 'nor', - 'nr': 'nbl', - 'nv': 'nav', - 'ny': 'nya', - 'oc': 'oci', - 'oj': 'oji', - 'om': 'orm', - 'or': 'ori', - 'os': 'oss', - 'pa': 'pan', - 'pi': 'pli', - 'pl': 'pol', - 'ps': 'pus', - 'pt': 'por', - 'qu': 'que', - 'rm': 'roh', - 'rn': 'run', - 'ro': 'ron', - 'ru': 'rus', - 'rw': 'kin', - 'sa': 'san', - 'sc': 'srd', - 'sd': 'snd', - 'se': 'sme', - 'sg': 'sag', - 'si': 'sin', - 'sk': 'slk', - 'sl': 'slv', - 'sm': 'smo', - 'sn': 'sna', - 'so': 'som', - 'sq': 'sqi', - 'sr': 'srp', - 'ss': 'ssw', - 'st': 'sot', - 'su': 'sun', - 'sv': 'swe', - 'sw': 'swa', - 'ta': 'tam', - 'te': 'tel', - 'tg': 'tgk', - 'th': 'tha', - 'ti': 'tir', - 'tk': 'tuk', - 'tl': 'tgl', - 'tn': 'tsn', - 'to': 'ton', - 'tr': 'tur', - 'ts': 'tso', - 'tt': 'tat', - 'tw': 'twi', - 'ty': 'tah', - 'ug': 'uig', - 'uk': 'ukr', - 'ur': 'urd', - 'uz': 'uzb', - 've': 'ven', - 'vi': 'vie', - 'vo': 'vol', - 'wa': 'wln', - 'wo': 'wol', - 'xh': 'xho', - 'yi': 'yid', - 'yo': 'yor', - 'za': 'zha', - 'zh': 'zho', - 'zu': 'zul', - } - - @classmethod - def _conver_lang_code(cls, code): - """Convert language code from ISO 639-1 to ISO 639-2/T""" - return cls._lang_map.get(code[:2]) - def run(self, information): - if information['ext'] != 'mp4': - self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 files') - return True, information + if information['ext'] not in ('mp4', 'webm', 'mkv'): + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files') + return [], information subtitles = information.get('requested_subtitles') if not subtitles: self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') - return True, information + return [], information - sub_langs = list(subtitles.keys()) filename = information['filepath'] - input_files = [filename] + [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] + + ext = information['ext'] + sub_langs = [] + sub_filenames = [] + webm_vtt_warn = False + + for lang, sub_info in subtitles.items(): + sub_ext = sub_info['ext'] + if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt': + sub_langs.append(lang) + sub_filenames.append(subtitles_filename(filename, lang, sub_ext)) + else: + if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt': + webm_vtt_warn = True + self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files') + + if not sub_langs: + return [], information + + input_files = [filename] + sub_filenames opts = [ '-map', '0', @@ -521,11 +367,12 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): # Don't copy the existing subtitles, we may be running the # postprocessor a second time '-map', '-0:s', - '-c:s', 'mov_text', ] + if information['ext'] == 'mp4': + opts += ['-c:s', 'mov_text'] for (i, lang) in enumerate(sub_langs): opts.extend(['-map', '%d:0' % (i + 1)]) - lang_code = self._conver_lang_code(lang) + lang_code = ISO639Utils.short2long(lang) if lang_code is not None: opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) @@ -535,29 +382,40 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, information + return sub_filenames, information class FFmpegMetadataPP(FFmpegPostProcessor): def run(self, info): metadata = {} - if info.get('title') is not None: - metadata['title'] = info['title'] - if info.get('upload_date') is not None: - metadata['date'] = info['upload_date'] - if info.get('uploader') is not None: - metadata['artist'] = info['uploader'] - elif info.get('uploader_id') is not None: - metadata['artist'] = info['uploader_id'] - if info.get('description') is not None: - metadata['description'] = info['description'] - metadata['comment'] = info['description'] - if info.get('webpage_url') is not None: - metadata['purl'] = info['webpage_url'] + + def add(meta_list, info_list=None): + if not info_list: + info_list = meta_list + if not isinstance(meta_list, (list, tuple)): + meta_list = (meta_list,) + if not isinstance(info_list, (list, tuple)): + info_list = (info_list,) + for info_f in info_list: + if info.get(info_f) is not None: + for meta_f in meta_list: + metadata[meta_f] = info[info_f] + break + + add('title', ('track', 'title')) + add('date', 'upload_date') + add(('description', 'comment'), 'description') + add('purl', 'webpage_url') + add('track', 'track_number') + add('artist', ('artist', 'creator', 'uploader', 'uploader_id')) + add('genre') + add('album') + add('album_artist') + add('disc', 'disc_number') if not metadata: self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') - return True, info + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -574,38 +432,42 @@ class FFmpegMetadataPP(FFmpegPostProcessor): self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) - self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) - return True, info - - -class FFmpegAudioFixPP(FFmpegPostProcessor): - def run(self, info): - filename = info['filepath'] - temp_filename = prepend_extension(filename, 'temp') - - options = ['-vn', '-acodec', 'copy'] - self._downloader.to_screen('[ffmpeg] Fixing audio file "%s"' % filename) - self.run_ffmpeg(filename, temp_filename, options) - - os.remove(encodeFilename(filename)) + self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return info['__files_to_merge'], info - return True, info + def can_merge(self): + # TODO: figure out merge-capable ffmpeg version + if self.basename != 'avconv': + return True + + required_version = '10-0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, ' + 'youtube-dl will download single file media. ' + 'Update %s to version %s or newer to fix this.') % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + return False + return True class FFmpegFixupStretchedPP(FFmpegPostProcessor): def run(self, info): stretched_ratio = info.get('stretched_ratio') if stretched_ratio is None or stretched_ratio == 1: - return True, info + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -617,13 +479,13 @@ class FFmpegFixupStretchedPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegFixupM4aPP(FFmpegPostProcessor): def run(self, info): if info.get('container') != 'm4a_dash': - return True, info + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') @@ -635,7 +497,22 @@ class FFmpegFixupM4aPP(FFmpegPostProcessor): os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info + + +class FFmpegFixupM3u8PP(FFmpegPostProcessor): + def run(self, info): + filename = info['filepath'] + if self.get_audio_codec(filename) == 'aac': + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc'] + self._downloader.to_screen('[ffmpeg] Fixing malformated aac bitstream in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return [], info class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): @@ -652,8 +529,9 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): new_format = 'webvtt' if subs is None: self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') - return True, info + return [], info self._downloader.to_screen('[ffmpeg] Converting subtitles') + sub_filenames = [] for lang, sub in subs.items(): ext = sub['ext'] if ext == new_ext: @@ -661,15 +539,41 @@ class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): '[ffmpeg] Subtitle file for %s is already in the requested' 'format' % new_ext) continue + old_file = subtitles_filename(filename, lang, ext) + sub_filenames.append(old_file) new_file = subtitles_filename(filename, lang, new_ext) - self.run_ffmpeg( - subtitles_filename(filename, lang, ext), - new_file, ['-f', new_format]) + + if ext == 'dfxp' or ext == 'ttml' or ext == 'tt': + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = old_file + srt_file = subtitles_filename(filename, lang, 'srt') + + with io.open(dfxp_file, 'rt', encoding='utf-8') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + old_file = srt_file + + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + else: + sub_filenames.append(srt_file) + + self.run_ffmpeg(old_file, new_file, ['-f', new_format]) with io.open(new_file, 'rt', encoding='utf-8') as f: subs[lang] = { - 'ext': ext, + 'ext': new_ext, 'data': f.read(), } - return True, info + return sub_filenames, info