X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/46113edab215c2211a604c06245c16d5d4e57dcf..139d14b198add5b26a11ed5dca022feaa6581d98:/youtube_dl/postprocessor/ffmpeg.py diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 083c795..cc65b34 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -1,122 +1,160 @@ +from __future__ import unicode_literals + +import io import os -import re import subprocess -import sys import time from .common import AudioConversionError, PostProcessor -from ..utils import ( +from ..compat import ( compat_subprocess_get_DEVNULL, +) +from ..utils import ( encodeArgument, encodeFilename, + get_exe_version, is_outdated_version, PostProcessingError, prepend_extension, shell_quote, subtitles_filename, + dfxp2srt, ) -def get_version(executable): - """ Returns the version of the specified executable, - or False if the executable is not present """ - try: - out, err = subprocess.Popen( - [executable, '-version'], - stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate() - except OSError: - return False - firstline = out.partition(b'\n')[0].decode('ascii', 'ignore') - m = re.search(r'version\s+([0-9._-a-zA-Z]+)', firstline) - if not m: - return u'present' - else: - return m.group(1) - - class FFmpegPostProcessorError(PostProcessingError): pass class FFmpegPostProcessor(PostProcessor): - def __init__(self, downloader=None, deletetempfiles=False): + def __init__(self, downloader=None): PostProcessor.__init__(self, downloader) - self._versions = self.get_versions() - self._deletetempfiles = deletetempfiles + self._determine_executables() def check_version(self): - if not self._executable: - raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') + if not self.available: + raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.') - REQUIRED_VERSION = '1.0' + required_version = '10-0' if self.basename == 'avconv' else '1.0' if is_outdated_version( - self._versions[self._executable], REQUIRED_VERSION): - warning = u'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( - self._executable, self._executable, REQUIRED_VERSION) + self._versions[self.basename], required_version): + warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % ( + self.basename, self.basename, required_version) if self._downloader: self._downloader.report_warning(warning) @staticmethod - def get_versions(): - programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] - return dict((program, get_version(program)) for program in programs) + def get_versions(downloader=None): + return FFmpegPostProcessor(downloader)._versions - @property - def _executable(self): - if self._downloader.params.get('prefer_ffmpeg', False): + def _determine_executables(self): + programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] + prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False) + + self.basename = None + self.probe_basename = None + + self._paths = None + self._versions = None + if self._downloader: + location = self._downloader.params.get('ffmpeg_location') + if location is not None: + if not os.path.exists(location): + self._downloader.report_warning( + 'ffmpeg-location %s does not exist! ' + 'Continuing without avconv/ffmpeg.' % (location)) + self._versions = {} + return + elif not os.path.isdir(location): + basename = os.path.splitext(os.path.basename(location))[0] + if basename not in programs: + self._downloader.report_warning( + 'Cannot identify executable %s, its basename should be one of %s. ' + 'Continuing without avconv/ffmpeg.' % + (location, ', '.join(programs))) + self._versions = {} + return None + location = os.path.dirname(os.path.abspath(location)) + if basename in ('ffmpeg', 'ffprobe'): + prefer_ffmpeg = True + + self._paths = dict( + (p, os.path.join(location, p)) for p in programs) + self._versions = dict( + (p, get_exe_version(self._paths[p], args=['-version'])) + for p in programs) + if self._versions is None: + self._versions = dict( + (p, get_exe_version(p, args=['-version'])) for p in programs) + self._paths = dict((p, p) for p in programs) + + if prefer_ffmpeg: prefs = ('ffmpeg', 'avconv') else: prefs = ('avconv', 'ffmpeg') for p in prefs: if self._versions[p]: - return p - return None + self.basename = p + break - @property - def _probe_executable(self): - if self._downloader.params.get('prefer_ffmpeg', False): + if prefer_ffmpeg: prefs = ('ffprobe', 'avprobe') else: prefs = ('avprobe', 'ffprobe') for p in prefs: if self._versions[p]: - return p - return None + self.probe_basename = p + break + + @property + def available(self): + return self.basename is not None + + @property + def executable(self): + return self._paths[self.basename] - def _uses_avconv(self): - return self._executable == 'avconv' + @property + def probe_available(self): + return self.probe_basename is not None + + @property + def probe_executable(self): + return self._paths[self.probe_basename] def run_ffmpeg_multiple_files(self, input_paths, out_path, opts): self.check_version() + oldest_mtime = min( + os.stat(encodeFilename(path)).st_mtime for path in input_paths) + files_cmd = [] for path in input_paths: - files_cmd.extend(['-i', encodeFilename(path, True)]) - cmd = ([self._executable, '-y'] + files_cmd - + [encodeArgument(o) for o in opts] + + files_cmd.extend([encodeArgument('-i'), encodeFilename(path, True)]) + cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] + + files_cmd + + [encodeArgument(o) for o in opts] + [encodeFilename(self._ffmpeg_filename_argument(out_path), True)]) if self._downloader.params.get('verbose', False): - self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd)) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode != 0: stderr = stderr.decode('utf-8', 'replace') msg = stderr.strip().split('\n')[-1] raise FFmpegPostProcessorError(msg) - if self._deletetempfiles: - for ipath in input_paths: - os.remove(ipath) + self.try_utime(out_path, oldest_mtime, oldest_mtime) def run_ffmpeg(self, path, out_path, opts): self.run_ffmpeg_multiple_files([path], out_path, opts) def _ffmpeg_filename_argument(self, fn): # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details - if fn.startswith(u'-'): - return u'./' + fn + if fn.startswith('-'): + return './' + fn return fn @@ -131,14 +169,16 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): def get_audio_codec(self, path): - if not self._probe_executable: - raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.') + if not self.probe_available: + raise PostProcessingError('ffprobe or avprobe not found. Please install one.') try: cmd = [ - self._probe_executable, - '-show_streams', + encodeFilename(self.probe_executable, True), + encodeArgument('-show_streams'), encodeFilename(self._ffmpeg_filename_argument(path), True)] - handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) + if self._downloader.params.get('verbose', False): + self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd))) + handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: return None @@ -168,16 +208,15 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): filecodec = self.get_audio_codec(path) if filecodec is None: - raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe') + raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe') - uses_avconv = self._uses_avconv() more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']: # Lossless, but in another container acodec = 'copy' extension = 'm4a' - more_opts = ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc'] + more_opts = ['-bsf:a', 'aac_adtstoasc'] elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: # Lossless if possible acodec = 'copy' @@ -193,9 +232,9 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): more_opts = [] if self._preferredquality is not None: if int(self._preferredquality) < 10: - more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality] + more_opts += ['-q:a', self._preferredquality] else: - more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k'] + more_opts += ['-b:a', self._preferredquality + 'k'] else: # We convert the audio (lossy) acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] @@ -204,69 +243,67 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): if self._preferredquality is not None: # The opus codec doesn't support the -aq option if int(self._preferredquality) < 10 and extension != 'opus': - more_opts += ['-q:a' if uses_avconv else '-aq', self._preferredquality] + more_opts += ['-q:a', self._preferredquality] else: - more_opts += ['-b:a' if uses_avconv else '-ab', self._preferredquality + 'k'] + more_opts += ['-b:a', self._preferredquality + 'k'] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': - more_opts += ['-bsf:a' if uses_avconv else '-absf', 'aac_adtstoasc'] + more_opts += ['-bsf:a', 'aac_adtstoasc'] if self._preferredcodec == 'vorbis': extension = 'ogg' if self._preferredcodec == 'wav': extension = 'wav' more_opts += ['-f', 'wav'] - prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups + prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups new_path = prefix + sep + extension # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. - if new_path == path: - self._nopostoverwrites = True + if (new_path == path or + (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): + self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) + return [], information try: - if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)): - self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path) - else: - self._downloader.to_screen(u'[' + self._executable + '] Destination: ' + new_path) - self.run_ffmpeg(path, new_path, acodec, more_opts) - except: - etype,e,tb = sys.exc_info() - if isinstance(e, AudioConversionError): - msg = u'audio conversion failed: ' + e.msg - else: - msg = u'error running ' + self._executable - raise PostProcessingError(msg) + self._downloader.to_screen('[' + self.basename + '] Destination: ' + new_path) + self.run_ffmpeg(path, new_path, acodec, more_opts) + except AudioConversionError as e: + raise PostProcessingError( + 'audio conversion failed: ' + e.msg) + except Exception: + raise PostProcessingError('error running ' + self.basename) # Try to update the date time for extracted audio file. if information.get('filetime') is not None: - try: - os.utime(encodeFilename(new_path), (time.time(), information['filetime'])) - except: - self._downloader.report_warning(u'Cannot update utime of audio file') + self.try_utime( + new_path, time.time(), information['filetime'], + errnote='Cannot update utime of audio file') information['filepath'] = new_path - return self._nopostoverwrites,information + information['ext'] = extension + + return [path], information -class FFmpegVideoConvertor(FFmpegPostProcessor): - def __init__(self, downloader=None,preferedformat=None): - super(FFmpegVideoConvertor, self).__init__(downloader) - self._preferedformat=preferedformat +class FFmpegVideoConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, preferedformat=None): + super(FFmpegVideoConvertorPP, self).__init__(downloader) + self._preferedformat = preferedformat def run(self, information): path = information['filepath'] - prefix, sep, ext = path.rpartition(u'.') + prefix, sep, ext = path.rpartition('.') outpath = prefix + sep + self._preferedformat if information['ext'] == self._preferedformat: - self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) - return True,information - self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath) + self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) + return [], information + self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath) self.run_ffmpeg(path, outpath, []) information['filepath'] = outpath information['format'] = self._preferedformat information['ext'] = self._preferedformat - return False,information + return [path], information class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): @@ -458,42 +495,47 @@ class FFmpegEmbedSubtitlePP(FFmpegPostProcessor): 'zu': 'zul', } - def __init__(self, downloader=None, subtitlesformat='srt'): - super(FFmpegEmbedSubtitlePP, self).__init__(downloader) - self._subformat = subtitlesformat - @classmethod def _conver_lang_code(cls, code): """Convert language code from ISO 639-1 to ISO 639-2/T""" return cls._lang_map.get(code[:2]) def run(self, information): - if information['ext'] != u'mp4': - self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files') - return True, information - if not information.get('subtitles'): - self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed') - return True, information - - sub_langs = [key for key in information['subtitles']] + if information['ext'] not in ['mp4', 'mkv']: + self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4 or mkv files') + return [], information + subtitles = information.get('requested_subtitles') + if not subtitles: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed') + return [], information + + sub_langs = list(subtitles.keys()) filename = information['filepath'] - input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs] - - opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy'] + sub_filenames = [subtitles_filename(filename, lang, sub_info['ext']) for lang, sub_info in subtitles.items()] + input_files = [filename] + sub_filenames + + opts = [ + '-map', '0', + '-c', 'copy', + # Don't copy the existing subtitles, we may be running the + # postprocessor a second time + '-map', '-0:s', + ] + if information['ext'] == 'mp4': + opts += ['-c:s', 'mov_text'] for (i, lang) in enumerate(sub_langs): - opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text']) + opts.extend(['-map', '%d:0' % (i + 1)]) lang_code = self._conver_lang_code(lang) if lang_code is not None: opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code]) - opts.extend(['-f', 'mp4']) - temp_filename = filename + u'.temp' - self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename) + temp_filename = prepend_extension(filename, 'temp') + self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename) self.run_ffmpeg_multiple_files(input_files, temp_filename, opts) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, information + return sub_filenames, information class FFmpegMetadataPP(FFmpegPostProcessor): @@ -503,19 +545,28 @@ class FFmpegMetadataPP(FFmpegPostProcessor): metadata['title'] = info['title'] if info.get('upload_date') is not None: metadata['date'] = info['upload_date'] - if info.get('uploader') is not None: + if info.get('artist') is not None: + metadata['artist'] = info['artist'] + elif info.get('uploader') is not None: metadata['artist'] = info['uploader'] elif info.get('uploader_id') is not None: metadata['artist'] = info['uploader_id'] + if info.get('description') is not None: + metadata['description'] = info['description'] + metadata['comment'] = info['description'] + if info.get('webpage_url') is not None: + metadata['purl'] = info['webpage_url'] + if info.get('album') is not None: + metadata['album'] = info['album'] if not metadata: - self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add') - return True, info + self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add') + return [], info filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - if info['ext'] == u'm4a': + if info['ext'] == 'm4a': options = ['-vn', '-acodec', 'copy'] else: options = ['-c', 'copy'] @@ -523,32 +574,134 @@ class FFmpegMetadataPP(FFmpegPostProcessor): for (name, value) in metadata.items(): options.extend(['-metadata', '%s=%s' % (name, value)]) - self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename) + self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename) self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info class FFmpegMergerPP(FFmpegPostProcessor): def run(self, info): filename = info['filepath'] - args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0', '-shortest'] - self._downloader.to_screen(u'[ffmpeg] Merging formats into "%s"' % filename) - self.run_ffmpeg_multiple_files(info['__files_to_merge'], filename, args) - return True, info + temp_filename = prepend_extension(filename, 'temp') + args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0'] + self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename) + self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + return info['__files_to_merge'], info + + def can_merge(self): + # TODO: figure out merge-capable ffmpeg version + if self.basename != 'avconv': + return True + + required_version = '10-0' + if is_outdated_version( + self._versions[self.basename], required_version): + warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, ' + 'youtube-dl will download single file media. ' + 'Update %s to version %s or newer to fix this.') % ( + self.basename, self.basename, required_version) + if self._downloader: + self._downloader.report_warning(warning) + return False + return True -class FFmpegAudioFixPP(FFmpegPostProcessor): +class FFmpegFixupStretchedPP(FFmpegPostProcessor): def run(self, info): + stretched_ratio = info.get('stretched_ratio') + if stretched_ratio is None or stretched_ratio == 1: + return [], info + filename = info['filepath'] temp_filename = prepend_extension(filename, 'temp') - options = ['-vn', '-acodec', 'copy'] - self._downloader.to_screen(u'[ffmpeg] Fixing audio file "%s"' % filename) + options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio] + self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename) self.run_ffmpeg(filename, temp_filename, options) os.remove(encodeFilename(filename)) os.rename(encodeFilename(temp_filename), encodeFilename(filename)) - return True, info + return [], info + + +class FFmpegFixupM4aPP(FFmpegPostProcessor): + def run(self, info): + if info.get('container') != 'm4a_dash': + return [], info + + filename = info['filepath'] + temp_filename = prepend_extension(filename, 'temp') + + options = ['-c', 'copy', '-f', 'mp4'] + self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename) + self.run_ffmpeg(filename, temp_filename, options) + + os.remove(encodeFilename(filename)) + os.rename(encodeFilename(temp_filename), encodeFilename(filename)) + + return [], info + + +class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor): + def __init__(self, downloader=None, format=None): + super(FFmpegSubtitlesConvertorPP, self).__init__(downloader) + self.format = format + + def run(self, info): + subs = info.get('requested_subtitles') + filename = info['filepath'] + new_ext = self.format + new_format = new_ext + if new_format == 'vtt': + new_format = 'webvtt' + if subs is None: + self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert') + return [], info + self._downloader.to_screen('[ffmpeg] Converting subtitles') + for lang, sub in subs.items(): + ext = sub['ext'] + if ext == new_ext: + self._downloader.to_screen( + '[ffmpeg] Subtitle file for %s is already in the requested' + 'format' % new_ext) + continue + new_file = subtitles_filename(filename, lang, new_ext) + + if ext == 'dfxp' or ext == 'ttml': + self._downloader.report_warning( + 'You have requested to convert dfxp (TTML) subtitles into another format, ' + 'which results in style information loss') + + dfxp_file = subtitles_filename(filename, lang, ext) + srt_file = subtitles_filename(filename, lang, 'srt') + + with io.open(dfxp_file, 'rt', encoding='utf-8') as f: + srt_data = dfxp2srt(f.read()) + + with io.open(srt_file, 'wt', encoding='utf-8') as f: + f.write(srt_data) + + ext = 'srt' + subs[lang] = { + 'ext': 'srt', + 'data': srt_data + } + + if new_ext == 'srt': + continue + + self.run_ffmpeg( + subtitles_filename(filename, lang, ext), + new_file, ['-f', new_format]) + + with io.open(new_file, 'rt', encoding='utf-8') as f: + subs[lang] = { + 'ext': ext, + 'data': f.read(), + } + + return [], info