]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/PostProcessor.py
debian/control: Add dependency on python-pkg-resources.
[youtubedl] / youtube_dl / PostProcessor.py
1 import os
2 import subprocess
3 import sys
4 import time
5
6
7 from .utils import (
8 compat_subprocess_get_DEVNULL,
9 encodeFilename,
10 PostProcessingError,
11 shell_quote,
12 subtitles_filename,
13 )
14
15
16 class PostProcessor(object):
17 """Post Processor class.
18
19 PostProcessor objects can be added to downloaders with their
20 add_post_processor() method. When the downloader has finished a
21 successful download, it will take its internal chain of PostProcessors
22 and start calling the run() method on each one of them, first with
23 an initial argument and then with the returned value of the previous
24 PostProcessor.
25
26 The chain will be stopped if one of them ever returns None or the end
27 of the chain is reached.
28
29 PostProcessor objects follow a "mutual registration" process similar
30 to InfoExtractor objects.
31 """
32
33 _downloader = None
34
35 def __init__(self, downloader=None):
36 self._downloader = downloader
37
38 def set_downloader(self, downloader):
39 """Sets the downloader for this PP."""
40 self._downloader = downloader
41
42 def run(self, information):
43 """Run the PostProcessor.
44
45 The "information" argument is a dictionary like the ones
46 composed by InfoExtractors. The only difference is that this
47 one has an extra field called "filepath" that points to the
48 downloaded file.
49
50 This method returns a tuple, the first element of which describes
51 whether the original file should be kept (i.e. not deleted - None for
52 no preference), and the second of which is the updated information.
53
54 In addition, this method may raise a PostProcessingError
55 exception if post processing fails.
56 """
57 return None, information # by default, keep file and do nothing
58
59 class FFmpegPostProcessorError(PostProcessingError):
60 pass
61
62 class AudioConversionError(PostProcessingError):
63 pass
64
65 class FFmpegPostProcessor(PostProcessor):
66 def __init__(self,downloader=None):
67 PostProcessor.__init__(self, downloader)
68 self._exes = self.detect_executables()
69
70 @staticmethod
71 def detect_executables():
72 def executable(exe):
73 try:
74 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
75 except OSError:
76 return False
77 return exe
78 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
79 return dict((program, executable(program)) for program in programs)
80
81 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
82 if not self._exes['ffmpeg'] and not self._exes['avconv']:
83 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
84
85 files_cmd = []
86 for path in input_paths:
87 files_cmd.extend(['-i', encodeFilename(path)])
88 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
89 + opts +
90 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
91
92 if self._downloader.params.get('verbose', False):
93 self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
94 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
95 stdout,stderr = p.communicate()
96 if p.returncode != 0:
97 stderr = stderr.decode('utf-8', 'replace')
98 msg = stderr.strip().split('\n')[-1]
99 raise FFmpegPostProcessorError(msg)
100
101 def run_ffmpeg(self, path, out_path, opts):
102 self.run_ffmpeg_multiple_files([path], out_path, opts)
103
104 def _ffmpeg_filename_argument(self, fn):
105 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
106 if fn.startswith(u'-'):
107 return u'./' + fn
108 return fn
109
110 class FFmpegExtractAudioPP(FFmpegPostProcessor):
111 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
112 FFmpegPostProcessor.__init__(self, downloader)
113 if preferredcodec is None:
114 preferredcodec = 'best'
115 self._preferredcodec = preferredcodec
116 self._preferredquality = preferredquality
117 self._nopostoverwrites = nopostoverwrites
118
119 def get_audio_codec(self, path):
120 if not self._exes['ffprobe'] and not self._exes['avprobe']:
121 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
122 try:
123 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
124 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
125 output = handle.communicate()[0]
126 if handle.wait() != 0:
127 return None
128 except (IOError, OSError):
129 return None
130 audio_codec = None
131 for line in output.decode('ascii', 'ignore').split('\n'):
132 if line.startswith('codec_name='):
133 audio_codec = line.split('=')[1].strip()
134 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
135 return audio_codec
136 return None
137
138 def run_ffmpeg(self, path, out_path, codec, more_opts):
139 if not self._exes['ffmpeg'] and not self._exes['avconv']:
140 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
141 if codec is None:
142 acodec_opts = []
143 else:
144 acodec_opts = ['-acodec', codec]
145 opts = ['-vn'] + acodec_opts + more_opts
146 try:
147 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
148 except FFmpegPostProcessorError as err:
149 raise AudioConversionError(err.msg)
150
151 def run(self, information):
152 path = information['filepath']
153
154 filecodec = self.get_audio_codec(path)
155 if filecodec is None:
156 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
157
158 more_opts = []
159 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
160 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
161 # Lossless, but in another container
162 acodec = 'copy'
163 extension = 'm4a'
164 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
165 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
166 # Lossless if possible
167 acodec = 'copy'
168 extension = filecodec
169 if filecodec == 'aac':
170 more_opts = ['-f', 'adts']
171 if filecodec == 'vorbis':
172 extension = 'ogg'
173 else:
174 # MP3 otherwise.
175 acodec = 'libmp3lame'
176 extension = 'mp3'
177 more_opts = []
178 if self._preferredquality is not None:
179 if int(self._preferredquality) < 10:
180 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
181 else:
182 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
183 else:
184 # We convert the audio (lossy)
185 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
186 extension = self._preferredcodec
187 more_opts = []
188 if self._preferredquality is not None:
189 # The opus codec doesn't support the -aq option
190 if int(self._preferredquality) < 10 and extension != 'opus':
191 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
192 else:
193 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
194 if self._preferredcodec == 'aac':
195 more_opts += ['-f', 'adts']
196 if self._preferredcodec == 'm4a':
197 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
198 if self._preferredcodec == 'vorbis':
199 extension = 'ogg'
200 if self._preferredcodec == 'wav':
201 extension = 'wav'
202 more_opts += ['-f', 'wav']
203
204 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
205 new_path = prefix + sep + extension
206
207 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
208 if new_path == path:
209 self._nopostoverwrites = True
210
211 try:
212 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
213 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
214 else:
215 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
216 self.run_ffmpeg(path, new_path, acodec, more_opts)
217 except:
218 etype,e,tb = sys.exc_info()
219 if isinstance(e, AudioConversionError):
220 msg = u'audio conversion failed: ' + e.msg
221 else:
222 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
223 raise PostProcessingError(msg)
224
225 # Try to update the date time for extracted audio file.
226 if information.get('filetime') is not None:
227 try:
228 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
229 except:
230 self._downloader.report_warning(u'Cannot update utime of audio file')
231
232 information['filepath'] = new_path
233 return self._nopostoverwrites,information
234
235 class FFmpegVideoConvertor(FFmpegPostProcessor):
236 def __init__(self, downloader=None,preferedformat=None):
237 super(FFmpegVideoConvertor, self).__init__(downloader)
238 self._preferedformat=preferedformat
239
240 def run(self, information):
241 path = information['filepath']
242 prefix, sep, ext = path.rpartition(u'.')
243 outpath = prefix + sep + self._preferedformat
244 if information['ext'] == self._preferedformat:
245 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
246 return True,information
247 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
248 self.run_ffmpeg(path, outpath, [])
249 information['filepath'] = outpath
250 information['format'] = self._preferedformat
251 information['ext'] = self._preferedformat
252 return False,information
253
254
255 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
256 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
257 _lang_map = {
258 'aa': 'aar',
259 'ab': 'abk',
260 'ae': 'ave',
261 'af': 'afr',
262 'ak': 'aka',
263 'am': 'amh',
264 'an': 'arg',
265 'ar': 'ara',
266 'as': 'asm',
267 'av': 'ava',
268 'ay': 'aym',
269 'az': 'aze',
270 'ba': 'bak',
271 'be': 'bel',
272 'bg': 'bul',
273 'bh': 'bih',
274 'bi': 'bis',
275 'bm': 'bam',
276 'bn': 'ben',
277 'bo': 'bod',
278 'br': 'bre',
279 'bs': 'bos',
280 'ca': 'cat',
281 'ce': 'che',
282 'ch': 'cha',
283 'co': 'cos',
284 'cr': 'cre',
285 'cs': 'ces',
286 'cu': 'chu',
287 'cv': 'chv',
288 'cy': 'cym',
289 'da': 'dan',
290 'de': 'deu',
291 'dv': 'div',
292 'dz': 'dzo',
293 'ee': 'ewe',
294 'el': 'ell',
295 'en': 'eng',
296 'eo': 'epo',
297 'es': 'spa',
298 'et': 'est',
299 'eu': 'eus',
300 'fa': 'fas',
301 'ff': 'ful',
302 'fi': 'fin',
303 'fj': 'fij',
304 'fo': 'fao',
305 'fr': 'fra',
306 'fy': 'fry',
307 'ga': 'gle',
308 'gd': 'gla',
309 'gl': 'glg',
310 'gn': 'grn',
311 'gu': 'guj',
312 'gv': 'glv',
313 'ha': 'hau',
314 'he': 'heb',
315 'hi': 'hin',
316 'ho': 'hmo',
317 'hr': 'hrv',
318 'ht': 'hat',
319 'hu': 'hun',
320 'hy': 'hye',
321 'hz': 'her',
322 'ia': 'ina',
323 'id': 'ind',
324 'ie': 'ile',
325 'ig': 'ibo',
326 'ii': 'iii',
327 'ik': 'ipk',
328 'io': 'ido',
329 'is': 'isl',
330 'it': 'ita',
331 'iu': 'iku',
332 'ja': 'jpn',
333 'jv': 'jav',
334 'ka': 'kat',
335 'kg': 'kon',
336 'ki': 'kik',
337 'kj': 'kua',
338 'kk': 'kaz',
339 'kl': 'kal',
340 'km': 'khm',
341 'kn': 'kan',
342 'ko': 'kor',
343 'kr': 'kau',
344 'ks': 'kas',
345 'ku': 'kur',
346 'kv': 'kom',
347 'kw': 'cor',
348 'ky': 'kir',
349 'la': 'lat',
350 'lb': 'ltz',
351 'lg': 'lug',
352 'li': 'lim',
353 'ln': 'lin',
354 'lo': 'lao',
355 'lt': 'lit',
356 'lu': 'lub',
357 'lv': 'lav',
358 'mg': 'mlg',
359 'mh': 'mah',
360 'mi': 'mri',
361 'mk': 'mkd',
362 'ml': 'mal',
363 'mn': 'mon',
364 'mr': 'mar',
365 'ms': 'msa',
366 'mt': 'mlt',
367 'my': 'mya',
368 'na': 'nau',
369 'nb': 'nob',
370 'nd': 'nde',
371 'ne': 'nep',
372 'ng': 'ndo',
373 'nl': 'nld',
374 'nn': 'nno',
375 'no': 'nor',
376 'nr': 'nbl',
377 'nv': 'nav',
378 'ny': 'nya',
379 'oc': 'oci',
380 'oj': 'oji',
381 'om': 'orm',
382 'or': 'ori',
383 'os': 'oss',
384 'pa': 'pan',
385 'pi': 'pli',
386 'pl': 'pol',
387 'ps': 'pus',
388 'pt': 'por',
389 'qu': 'que',
390 'rm': 'roh',
391 'rn': 'run',
392 'ro': 'ron',
393 'ru': 'rus',
394 'rw': 'kin',
395 'sa': 'san',
396 'sc': 'srd',
397 'sd': 'snd',
398 'se': 'sme',
399 'sg': 'sag',
400 'si': 'sin',
401 'sk': 'slk',
402 'sl': 'slv',
403 'sm': 'smo',
404 'sn': 'sna',
405 'so': 'som',
406 'sq': 'sqi',
407 'sr': 'srp',
408 'ss': 'ssw',
409 'st': 'sot',
410 'su': 'sun',
411 'sv': 'swe',
412 'sw': 'swa',
413 'ta': 'tam',
414 'te': 'tel',
415 'tg': 'tgk',
416 'th': 'tha',
417 'ti': 'tir',
418 'tk': 'tuk',
419 'tl': 'tgl',
420 'tn': 'tsn',
421 'to': 'ton',
422 'tr': 'tur',
423 'ts': 'tso',
424 'tt': 'tat',
425 'tw': 'twi',
426 'ty': 'tah',
427 'ug': 'uig',
428 'uk': 'ukr',
429 'ur': 'urd',
430 'uz': 'uzb',
431 've': 'ven',
432 'vi': 'vie',
433 'vo': 'vol',
434 'wa': 'wln',
435 'wo': 'wol',
436 'xh': 'xho',
437 'yi': 'yid',
438 'yo': 'yor',
439 'za': 'zha',
440 'zh': 'zho',
441 'zu': 'zul',
442 }
443
444 def __init__(self, downloader=None, subtitlesformat='srt'):
445 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
446 self._subformat = subtitlesformat
447
448 @classmethod
449 def _conver_lang_code(cls, code):
450 """Convert language code from ISO 639-1 to ISO 639-2/T"""
451 return cls._lang_map.get(code[:2])
452
453 def run(self, information):
454 if information['ext'] != u'mp4':
455 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
456 return True, information
457 if not information.get('subtitles'):
458 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
459 return True, information
460
461 sub_langs = [key for key in information['subtitles']]
462 filename = information['filepath']
463 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
464
465 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
466 for (i, lang) in enumerate(sub_langs):
467 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
468 lang_code = self._conver_lang_code(lang)
469 if lang_code is not None:
470 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
471 opts.extend(['-f', 'mp4'])
472
473 temp_filename = filename + u'.temp'
474 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
475 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
476 os.remove(encodeFilename(filename))
477 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
478
479 return True, information
480
481
482 class FFmpegMetadataPP(FFmpegPostProcessor):
483 def run(self, info):
484 metadata = {}
485 if info.get('title') is not None:
486 metadata['title'] = info['title']
487 if info.get('upload_date') is not None:
488 metadata['date'] = info['upload_date']
489 if info.get('uploader') is not None:
490 metadata['artist'] = info['uploader']
491 elif info.get('uploader_id') is not None:
492 metadata['artist'] = info['uploader_id']
493
494 if not metadata:
495 self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
496 return True, info
497
498 filename = info['filepath']
499 ext = os.path.splitext(filename)[1][1:]
500 temp_filename = filename + u'.temp'
501
502 options = ['-c', 'copy']
503 for (name, value) in metadata.items():
504 options.extend(['-metadata', '%s="%s"' % (name, value)])
505 options.extend(['-f', ext])
506
507 self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
508 self.run_ffmpeg(filename, temp_filename, options)
509 os.remove(encodeFilename(filename))
510 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
511 return True, info