]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/postprocessor/ffmpeg.py
Annotate with more bugs closed.
[youtubedl] / youtube_dl / postprocessor / ffmpeg.py
1 from __future__ import unicode_literals
2
3 import io
4 import os
5 import subprocess
6 import time
7 import re
8
9
10 from .common import AudioConversionError, PostProcessor
11
12 from ..compat import (
13 compat_subprocess_get_DEVNULL,
14 )
15 from ..utils import (
16 encodeArgument,
17 encodeFilename,
18 get_exe_version,
19 is_outdated_version,
20 PostProcessingError,
21 prepend_extension,
22 shell_quote,
23 subtitles_filename,
24 dfxp2srt,
25 ISO639Utils,
26 replace_extension,
27 )
28
29
30 EXT_TO_OUT_FORMATS = {
31 'aac': 'adts',
32 'flac': 'flac',
33 'm4a': 'ipod',
34 'mka': 'matroska',
35 'mkv': 'matroska',
36 'mpg': 'mpeg',
37 'ogv': 'ogg',
38 'ts': 'mpegts',
39 'wma': 'asf',
40 'wmv': 'asf',
41 }
42 ACODECS = {
43 'mp3': 'libmp3lame',
44 'aac': 'aac',
45 'flac': 'flac',
46 'm4a': 'aac',
47 'opus': 'libopus',
48 'vorbis': 'libvorbis',
49 'wav': None,
50 }
51
52
53 class FFmpegPostProcessorError(PostProcessingError):
54 pass
55
56
57 class FFmpegPostProcessor(PostProcessor):
58 def __init__(self, downloader=None):
59 PostProcessor.__init__(self, downloader)
60 self._determine_executables()
61
62 def check_version(self):
63 if not self.available:
64 raise FFmpegPostProcessorError('ffmpeg or avconv not found. Please install one.')
65
66 required_version = '10-0' if self.basename == 'avconv' else '1.0'
67 if is_outdated_version(
68 self._versions[self.basename], required_version):
69 warning = 'Your copy of %s is outdated, update %s to version %s or newer if you encounter any errors.' % (
70 self.basename, self.basename, required_version)
71 if self._downloader:
72 self._downloader.report_warning(warning)
73
74 @staticmethod
75 def get_versions(downloader=None):
76 return FFmpegPostProcessor(downloader)._versions
77
78 def _determine_executables(self):
79 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
80 prefer_ffmpeg = False
81
82 self.basename = None
83 self.probe_basename = None
84
85 self._paths = None
86 self._versions = None
87 if self._downloader:
88 prefer_ffmpeg = self._downloader.params.get('prefer_ffmpeg', False)
89 location = self._downloader.params.get('ffmpeg_location')
90 if location is not None:
91 if not os.path.exists(location):
92 self._downloader.report_warning(
93 'ffmpeg-location %s does not exist! '
94 'Continuing without avconv/ffmpeg.' % (location))
95 self._versions = {}
96 return
97 elif not os.path.isdir(location):
98 basename = os.path.splitext(os.path.basename(location))[0]
99 if basename not in programs:
100 self._downloader.report_warning(
101 'Cannot identify executable %s, its basename should be one of %s. '
102 'Continuing without avconv/ffmpeg.' %
103 (location, ', '.join(programs)))
104 self._versions = {}
105 return None
106 location = os.path.dirname(os.path.abspath(location))
107 if basename in ('ffmpeg', 'ffprobe'):
108 prefer_ffmpeg = True
109
110 self._paths = dict(
111 (p, os.path.join(location, p)) for p in programs)
112 self._versions = dict(
113 (p, get_exe_version(self._paths[p], args=['-version']))
114 for p in programs)
115 if self._versions is None:
116 self._versions = dict(
117 (p, get_exe_version(p, args=['-version'])) for p in programs)
118 self._paths = dict((p, p) for p in programs)
119
120 if prefer_ffmpeg:
121 prefs = ('ffmpeg', 'avconv')
122 else:
123 prefs = ('avconv', 'ffmpeg')
124 for p in prefs:
125 if self._versions[p]:
126 self.basename = p
127 break
128
129 if prefer_ffmpeg:
130 prefs = ('ffprobe', 'avprobe')
131 else:
132 prefs = ('avprobe', 'ffprobe')
133 for p in prefs:
134 if self._versions[p]:
135 self.probe_basename = p
136 break
137
138 @property
139 def available(self):
140 return self.basename is not None
141
142 @property
143 def executable(self):
144 return self._paths[self.basename]
145
146 @property
147 def probe_available(self):
148 return self.probe_basename is not None
149
150 @property
151 def probe_executable(self):
152 return self._paths[self.probe_basename]
153
154 def get_audio_codec(self, path):
155 if not self.probe_available:
156 raise PostProcessingError('ffprobe or avprobe not found. Please install one.')
157 try:
158 cmd = [
159 encodeFilename(self.probe_executable, True),
160 encodeArgument('-show_streams'),
161 encodeFilename(self._ffmpeg_filename_argument(path), True)]
162 if self._downloader.params.get('verbose', False):
163 self._downloader.to_screen('[debug] %s command line: %s' % (self.basename, shell_quote(cmd)))
164 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE, stdin=subprocess.PIPE)
165 output = handle.communicate()[0]
166 if handle.wait() != 0:
167 return None
168 except (IOError, OSError):
169 return None
170 audio_codec = None
171 for line in output.decode('ascii', 'ignore').split('\n'):
172 if line.startswith('codec_name='):
173 audio_codec = line.split('=')[1].strip()
174 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
175 return audio_codec
176 return None
177
178 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
179 self.check_version()
180
181 oldest_mtime = min(
182 os.stat(encodeFilename(path)).st_mtime for path in input_paths)
183
184 opts += self._configuration_args()
185
186 files_cmd = []
187 for path in input_paths:
188 files_cmd.extend([
189 encodeArgument('-i'),
190 encodeFilename(self._ffmpeg_filename_argument(path), True)
191 ])
192 cmd = ([encodeFilename(self.executable, True), encodeArgument('-y')] +
193 files_cmd +
194 [encodeArgument(o) for o in opts] +
195 [encodeFilename(self._ffmpeg_filename_argument(out_path), True)])
196
197 if self._downloader.params.get('verbose', False):
198 self._downloader.to_screen('[debug] ffmpeg command line: %s' % shell_quote(cmd))
199 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
200 stdout, stderr = p.communicate()
201 if p.returncode != 0:
202 stderr = stderr.decode('utf-8', 'replace')
203 msg = stderr.strip().split('\n')[-1]
204 raise FFmpegPostProcessorError(msg)
205 self.try_utime(out_path, oldest_mtime, oldest_mtime)
206
207 def run_ffmpeg(self, path, out_path, opts):
208 self.run_ffmpeg_multiple_files([path], out_path, opts)
209
210 def _ffmpeg_filename_argument(self, fn):
211 # Always use 'file:' because the filename may contain ':' (ffmpeg
212 # interprets that as a protocol) or can start with '-' (-- is broken in
213 # ffmpeg, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details)
214 # Also leave '-' intact in order not to break streaming to stdout.
215 return 'file:' + fn if fn != '-' else fn
216
217
218 class FFmpegExtractAudioPP(FFmpegPostProcessor):
219 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
220 FFmpegPostProcessor.__init__(self, downloader)
221 if preferredcodec is None:
222 preferredcodec = 'best'
223 self._preferredcodec = preferredcodec
224 self._preferredquality = preferredquality
225 self._nopostoverwrites = nopostoverwrites
226
227 def run_ffmpeg(self, path, out_path, codec, more_opts):
228 if codec is None:
229 acodec_opts = []
230 else:
231 acodec_opts = ['-acodec', codec]
232 opts = ['-vn'] + acodec_opts + more_opts
233 try:
234 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
235 except FFmpegPostProcessorError as err:
236 raise AudioConversionError(err.msg)
237
238 def run(self, information):
239 path = information['filepath']
240
241 filecodec = self.get_audio_codec(path)
242 if filecodec is None:
243 raise PostProcessingError('WARNING: unable to obtain file audio codec with ffprobe')
244
245 more_opts = []
246 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
247 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
248 # Lossless, but in another container
249 acodec = 'copy'
250 extension = 'm4a'
251 more_opts = ['-bsf:a', 'aac_adtstoasc']
252 elif filecodec in ['aac', 'flac', 'mp3', 'vorbis', 'opus']:
253 # Lossless if possible
254 acodec = 'copy'
255 extension = filecodec
256 if filecodec == 'aac':
257 more_opts = ['-f', 'adts']
258 if filecodec == 'vorbis':
259 extension = 'ogg'
260 else:
261 # MP3 otherwise.
262 acodec = 'libmp3lame'
263 extension = 'mp3'
264 more_opts = []
265 if self._preferredquality is not None:
266 if int(self._preferredquality) < 10:
267 more_opts += ['-q:a', self._preferredquality]
268 else:
269 more_opts += ['-b:a', self._preferredquality + 'k']
270 else:
271 # We convert the audio (lossy if codec is lossy)
272 acodec = ACODECS[self._preferredcodec]
273 extension = self._preferredcodec
274 more_opts = []
275 if self._preferredquality is not None:
276 # The opus codec doesn't support the -aq option
277 if int(self._preferredquality) < 10 and extension != 'opus':
278 more_opts += ['-q:a', self._preferredquality]
279 else:
280 more_opts += ['-b:a', self._preferredquality + 'k']
281 if self._preferredcodec == 'aac':
282 more_opts += ['-f', 'adts']
283 if self._preferredcodec == 'm4a':
284 more_opts += ['-bsf:a', 'aac_adtstoasc']
285 if self._preferredcodec == 'vorbis':
286 extension = 'ogg'
287 if self._preferredcodec == 'wav':
288 extension = 'wav'
289 more_opts += ['-f', 'wav']
290
291 prefix, sep, ext = path.rpartition('.') # not os.path.splitext, since the latter does not work on unicode in all setups
292 new_path = prefix + sep + extension
293
294 information['filepath'] = new_path
295 information['ext'] = extension
296
297 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
298 if (new_path == path or
299 (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))):
300 self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path)
301 return [], information
302
303 try:
304 self._downloader.to_screen('[ffmpeg] Destination: ' + new_path)
305 self.run_ffmpeg(path, new_path, acodec, more_opts)
306 except AudioConversionError as e:
307 raise PostProcessingError(
308 'audio conversion failed: ' + e.msg)
309 except Exception:
310 raise PostProcessingError('error running ' + self.basename)
311
312 # Try to update the date time for extracted audio file.
313 if information.get('filetime') is not None:
314 self.try_utime(
315 new_path, time.time(), information['filetime'],
316 errnote='Cannot update utime of audio file')
317
318 return [path], information
319
320
321 class FFmpegVideoConvertorPP(FFmpegPostProcessor):
322 def __init__(self, downloader=None, preferedformat=None):
323 super(FFmpegVideoConvertorPP, self).__init__(downloader)
324 self._preferedformat = preferedformat
325
326 def run(self, information):
327 path = information['filepath']
328 if information['ext'] == self._preferedformat:
329 self._downloader.to_screen('[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
330 return [], information
331 options = []
332 if self._preferedformat == 'avi':
333 options.extend(['-c:v', 'libxvid', '-vtag', 'XVID'])
334 prefix, sep, ext = path.rpartition('.')
335 outpath = prefix + sep + self._preferedformat
336 self._downloader.to_screen('[' + 'ffmpeg' + '] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) + outpath)
337 self.run_ffmpeg(path, outpath, options)
338 information['filepath'] = outpath
339 information['format'] = self._preferedformat
340 information['ext'] = self._preferedformat
341 return [path], information
342
343
344 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
345 def run(self, information):
346 if information['ext'] not in ('mp4', 'webm', 'mkv'):
347 self._downloader.to_screen('[ffmpeg] Subtitles can only be embedded in mp4, webm or mkv files')
348 return [], information
349 subtitles = information.get('requested_subtitles')
350 if not subtitles:
351 self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to embed')
352 return [], information
353
354 filename = information['filepath']
355
356 ext = information['ext']
357 sub_langs = []
358 sub_filenames = []
359 webm_vtt_warn = False
360
361 for lang, sub_info in subtitles.items():
362 sub_ext = sub_info['ext']
363 if ext != 'webm' or ext == 'webm' and sub_ext == 'vtt':
364 sub_langs.append(lang)
365 sub_filenames.append(subtitles_filename(filename, lang, sub_ext))
366 else:
367 if not webm_vtt_warn and ext == 'webm' and sub_ext != 'vtt':
368 webm_vtt_warn = True
369 self._downloader.to_screen('[ffmpeg] Only WebVTT subtitles can be embedded in webm files')
370
371 if not sub_langs:
372 return [], information
373
374 input_files = [filename] + sub_filenames
375
376 opts = [
377 '-map', '0',
378 '-c', 'copy',
379 # Don't copy the existing subtitles, we may be running the
380 # postprocessor a second time
381 '-map', '-0:s',
382 ]
383 if information['ext'] == 'mp4':
384 opts += ['-c:s', 'mov_text']
385 for (i, lang) in enumerate(sub_langs):
386 opts.extend(['-map', '%d:0' % (i + 1)])
387 lang_code = ISO639Utils.short2long(lang)
388 if lang_code is not None:
389 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
390
391 temp_filename = prepend_extension(filename, 'temp')
392 self._downloader.to_screen('[ffmpeg] Embedding subtitles in \'%s\'' % filename)
393 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
394 os.remove(encodeFilename(filename))
395 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
396
397 return sub_filenames, information
398
399
400 class FFmpegMetadataPP(FFmpegPostProcessor):
401 def run(self, info):
402 metadata = {}
403
404 def add(meta_list, info_list=None):
405 if not info_list:
406 info_list = meta_list
407 if not isinstance(meta_list, (list, tuple)):
408 meta_list = (meta_list,)
409 if not isinstance(info_list, (list, tuple)):
410 info_list = (info_list,)
411 for info_f in info_list:
412 if info.get(info_f) is not None:
413 for meta_f in meta_list:
414 metadata[meta_f] = info[info_f]
415 break
416
417 add('title', ('track', 'title'))
418 add('date', 'upload_date')
419 add(('description', 'comment'), 'description')
420 add('purl', 'webpage_url')
421 add('track', 'track_number')
422 add('artist', ('artist', 'creator', 'uploader', 'uploader_id'))
423 add('genre')
424 add('album')
425 add('album_artist')
426 add('disc', 'disc_number')
427
428 if not metadata:
429 self._downloader.to_screen('[ffmpeg] There isn\'t any metadata to add')
430 return [], info
431
432 filename = info['filepath']
433 temp_filename = prepend_extension(filename, 'temp')
434 in_filenames = [filename]
435 options = []
436
437 if info['ext'] == 'm4a':
438 options.extend(['-vn', '-acodec', 'copy'])
439 else:
440 options.extend(['-c', 'copy'])
441
442 for (name, value) in metadata.items():
443 options.extend(['-metadata', '%s=%s' % (name, value)])
444
445 chapters = info.get('chapters', [])
446 if chapters:
447 metadata_filename = replace_extension(filename, 'meta')
448 with io.open(metadata_filename, 'wt', encoding='utf-8') as f:
449 def ffmpeg_escape(text):
450 return re.sub(r'(=|;|#|\\|\n)', r'\\\1', text)
451
452 metadata_file_content = ';FFMETADATA1\n'
453 for chapter in chapters:
454 metadata_file_content += '[CHAPTER]\nTIMEBASE=1/1000\n'
455 metadata_file_content += 'START=%d\n' % (chapter['start_time'] * 1000)
456 metadata_file_content += 'END=%d\n' % (chapter['end_time'] * 1000)
457 chapter_title = chapter.get('title')
458 if chapter_title:
459 metadata_file_content += 'title=%s\n' % ffmpeg_escape(chapter_title)
460 f.write(metadata_file_content)
461 in_filenames.append(metadata_filename)
462 options.extend(['-map_metadata', '1'])
463
464 self._downloader.to_screen('[ffmpeg] Adding metadata to \'%s\'' % filename)
465 self.run_ffmpeg_multiple_files(in_filenames, temp_filename, options)
466 if chapters:
467 os.remove(metadata_filename)
468 os.remove(encodeFilename(filename))
469 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
470 return [], info
471
472
473 class FFmpegMergerPP(FFmpegPostProcessor):
474 def run(self, info):
475 filename = info['filepath']
476 temp_filename = prepend_extension(filename, 'temp')
477 args = ['-c', 'copy', '-map', '0:v:0', '-map', '1:a:0']
478 self._downloader.to_screen('[ffmpeg] Merging formats into "%s"' % filename)
479 self.run_ffmpeg_multiple_files(info['__files_to_merge'], temp_filename, args)
480 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
481 return info['__files_to_merge'], info
482
483 def can_merge(self):
484 # TODO: figure out merge-capable ffmpeg version
485 if self.basename != 'avconv':
486 return True
487
488 required_version = '10-0'
489 if is_outdated_version(
490 self._versions[self.basename], required_version):
491 warning = ('Your copy of %s is outdated and unable to properly mux separate video and audio files, '
492 'youtube-dl will download single file media. '
493 'Update %s to version %s or newer to fix this.') % (
494 self.basename, self.basename, required_version)
495 if self._downloader:
496 self._downloader.report_warning(warning)
497 return False
498 return True
499
500
501 class FFmpegFixupStretchedPP(FFmpegPostProcessor):
502 def run(self, info):
503 stretched_ratio = info.get('stretched_ratio')
504 if stretched_ratio is None or stretched_ratio == 1:
505 return [], info
506
507 filename = info['filepath']
508 temp_filename = prepend_extension(filename, 'temp')
509
510 options = ['-c', 'copy', '-aspect', '%f' % stretched_ratio]
511 self._downloader.to_screen('[ffmpeg] Fixing aspect ratio in "%s"' % filename)
512 self.run_ffmpeg(filename, temp_filename, options)
513
514 os.remove(encodeFilename(filename))
515 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
516
517 return [], info
518
519
520 class FFmpegFixupM4aPP(FFmpegPostProcessor):
521 def run(self, info):
522 if info.get('container') != 'm4a_dash':
523 return [], info
524
525 filename = info['filepath']
526 temp_filename = prepend_extension(filename, 'temp')
527
528 options = ['-c', 'copy', '-f', 'mp4']
529 self._downloader.to_screen('[ffmpeg] Correcting container in "%s"' % filename)
530 self.run_ffmpeg(filename, temp_filename, options)
531
532 os.remove(encodeFilename(filename))
533 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
534
535 return [], info
536
537
538 class FFmpegFixupM3u8PP(FFmpegPostProcessor):
539 def run(self, info):
540 filename = info['filepath']
541 if self.get_audio_codec(filename) == 'aac':
542 temp_filename = prepend_extension(filename, 'temp')
543
544 options = ['-c', 'copy', '-f', 'mp4', '-bsf:a', 'aac_adtstoasc']
545 self._downloader.to_screen('[ffmpeg] Fixing malformed AAC bitstream in "%s"' % filename)
546 self.run_ffmpeg(filename, temp_filename, options)
547
548 os.remove(encodeFilename(filename))
549 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
550 return [], info
551
552
553 class FFmpegSubtitlesConvertorPP(FFmpegPostProcessor):
554 def __init__(self, downloader=None, format=None):
555 super(FFmpegSubtitlesConvertorPP, self).__init__(downloader)
556 self.format = format
557
558 def run(self, info):
559 subs = info.get('requested_subtitles')
560 filename = info['filepath']
561 new_ext = self.format
562 new_format = new_ext
563 if new_format == 'vtt':
564 new_format = 'webvtt'
565 if subs is None:
566 self._downloader.to_screen('[ffmpeg] There aren\'t any subtitles to convert')
567 return [], info
568 self._downloader.to_screen('[ffmpeg] Converting subtitles')
569 sub_filenames = []
570 for lang, sub in subs.items():
571 ext = sub['ext']
572 if ext == new_ext:
573 self._downloader.to_screen(
574 '[ffmpeg] Subtitle file for %s is already in the requested format' % new_ext)
575 continue
576 old_file = subtitles_filename(filename, lang, ext)
577 sub_filenames.append(old_file)
578 new_file = subtitles_filename(filename, lang, new_ext)
579
580 if ext in ('dfxp', 'ttml', 'tt'):
581 self._downloader.report_warning(
582 'You have requested to convert dfxp (TTML) subtitles into another format, '
583 'which results in style information loss')
584
585 dfxp_file = old_file
586 srt_file = subtitles_filename(filename, lang, 'srt')
587
588 with open(dfxp_file, 'rb') as f:
589 srt_data = dfxp2srt(f.read())
590
591 with io.open(srt_file, 'wt', encoding='utf-8') as f:
592 f.write(srt_data)
593 old_file = srt_file
594
595 subs[lang] = {
596 'ext': 'srt',
597 'data': srt_data
598 }
599
600 if new_ext == 'srt':
601 continue
602 else:
603 sub_filenames.append(srt_file)
604
605 self.run_ffmpeg(old_file, new_file, ['-f', new_format])
606
607 with io.open(new_file, 'rt', encoding='utf-8') as f:
608 subs[lang] = {
609 'ext': new_ext,
610 'data': f.read(),
611 }
612
613 return sub_filenames, info