]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/PostProcessor.py
Add bug number that we are closing.
[youtubedl] / youtube_dl / PostProcessor.py
1 import os
2 import subprocess
3 import sys
4 import time
5
6 from .utils import *
7
8
9 class PostProcessor(object):
10 """Post Processor class.
11
12 PostProcessor objects can be added to downloaders with their
13 add_post_processor() method. When the downloader has finished a
14 successful download, it will take its internal chain of PostProcessors
15 and start calling the run() method on each one of them, first with
16 an initial argument and then with the returned value of the previous
17 PostProcessor.
18
19 The chain will be stopped if one of them ever returns None or the end
20 of the chain is reached.
21
22 PostProcessor objects follow a "mutual registration" process similar
23 to InfoExtractor objects.
24 """
25
26 _downloader = None
27
28 def __init__(self, downloader=None):
29 self._downloader = downloader
30
31 def set_downloader(self, downloader):
32 """Sets the downloader for this PP."""
33 self._downloader = downloader
34
35 def run(self, information):
36 """Run the PostProcessor.
37
38 The "information" argument is a dictionary like the ones
39 composed by InfoExtractors. The only difference is that this
40 one has an extra field called "filepath" that points to the
41 downloaded file.
42
43 This method returns a tuple, the first element of which describes
44 whether the original file should be kept (i.e. not deleted - None for
45 no preference), and the second of which is the updated information.
46
47 In addition, this method may raise a PostProcessingError
48 exception if post processing fails.
49 """
50 return None, information # by default, keep file and do nothing
51
52 class FFmpegPostProcessorError(PostProcessingError):
53 pass
54
55 class AudioConversionError(PostProcessingError):
56 pass
57
58 class FFmpegPostProcessor(PostProcessor):
59 def __init__(self,downloader=None):
60 PostProcessor.__init__(self, downloader)
61 self._exes = self.detect_executables()
62
63 @staticmethod
64 def detect_executables():
65 def executable(exe):
66 try:
67 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
68 except OSError:
69 return False
70 return exe
71 programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
72 return dict((program, executable(program)) for program in programs)
73
74 def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
75 if not self._exes['ffmpeg'] and not self._exes['avconv']:
76 raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
77
78 files_cmd = []
79 for path in input_paths:
80 files_cmd.extend(['-i', encodeFilename(path)])
81 cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
82 + opts +
83 [encodeFilename(self._ffmpeg_filename_argument(out_path))])
84
85 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
86 stdout,stderr = p.communicate()
87 if p.returncode != 0:
88 stderr = stderr.decode('utf-8', 'replace')
89 msg = stderr.strip().split('\n')[-1]
90 raise FFmpegPostProcessorError(msg)
91
92 def run_ffmpeg(self, path, out_path, opts):
93 self.run_ffmpeg_multiple_files([path], out_path, opts)
94
95 def _ffmpeg_filename_argument(self, fn):
96 # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
97 if fn.startswith(u'-'):
98 return u'./' + fn
99 return fn
100
101 class FFmpegExtractAudioPP(FFmpegPostProcessor):
102 def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
103 FFmpegPostProcessor.__init__(self, downloader)
104 if preferredcodec is None:
105 preferredcodec = 'best'
106 self._preferredcodec = preferredcodec
107 self._preferredquality = preferredquality
108 self._nopostoverwrites = nopostoverwrites
109
110 def get_audio_codec(self, path):
111 if not self._exes['ffprobe'] and not self._exes['avprobe']:
112 raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
113 try:
114 cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
115 handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
116 output = handle.communicate()[0]
117 if handle.wait() != 0:
118 return None
119 except (IOError, OSError):
120 return None
121 audio_codec = None
122 for line in output.decode('ascii', 'ignore').split('\n'):
123 if line.startswith('codec_name='):
124 audio_codec = line.split('=')[1].strip()
125 elif line.strip() == 'codec_type=audio' and audio_codec is not None:
126 return audio_codec
127 return None
128
129 def run_ffmpeg(self, path, out_path, codec, more_opts):
130 if not self._exes['ffmpeg'] and not self._exes['avconv']:
131 raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
132 if codec is None:
133 acodec_opts = []
134 else:
135 acodec_opts = ['-acodec', codec]
136 opts = ['-vn'] + acodec_opts + more_opts
137 try:
138 FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
139 except FFmpegPostProcessorError as err:
140 raise AudioConversionError(err.msg)
141
142 def run(self, information):
143 path = information['filepath']
144
145 filecodec = self.get_audio_codec(path)
146 if filecodec is None:
147 raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
148
149 more_opts = []
150 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
151 if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
152 # Lossless, but in another container
153 acodec = 'copy'
154 extension = 'm4a'
155 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
156 elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
157 # Lossless if possible
158 acodec = 'copy'
159 extension = filecodec
160 if filecodec == 'aac':
161 more_opts = ['-f', 'adts']
162 if filecodec == 'vorbis':
163 extension = 'ogg'
164 else:
165 # MP3 otherwise.
166 acodec = 'libmp3lame'
167 extension = 'mp3'
168 more_opts = []
169 if self._preferredquality is not None:
170 if int(self._preferredquality) < 10:
171 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
172 else:
173 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
174 else:
175 # We convert the audio (lossy)
176 acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
177 extension = self._preferredcodec
178 more_opts = []
179 if self._preferredquality is not None:
180 if int(self._preferredquality) < 10:
181 more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
182 else:
183 more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
184 if self._preferredcodec == 'aac':
185 more_opts += ['-f', 'adts']
186 if self._preferredcodec == 'm4a':
187 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
188 if self._preferredcodec == 'vorbis':
189 extension = 'ogg'
190 if self._preferredcodec == 'wav':
191 extension = 'wav'
192 more_opts += ['-f', 'wav']
193
194 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
195 new_path = prefix + sep + extension
196
197 # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
198 if new_path == path:
199 self._nopostoverwrites = True
200
201 try:
202 if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
203 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
204 else:
205 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
206 self.run_ffmpeg(path, new_path, acodec, more_opts)
207 except:
208 etype,e,tb = sys.exc_info()
209 if isinstance(e, AudioConversionError):
210 msg = u'audio conversion failed: ' + e.msg
211 else:
212 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
213 raise PostProcessingError(msg)
214
215 # Try to update the date time for extracted audio file.
216 if information.get('filetime') is not None:
217 try:
218 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
219 except:
220 self._downloader.report_warning(u'Cannot update utime of audio file')
221
222 information['filepath'] = new_path
223 return self._nopostoverwrites,information
224
225 class FFmpegVideoConvertor(FFmpegPostProcessor):
226 def __init__(self, downloader=None,preferedformat=None):
227 super(FFmpegVideoConvertor, self).__init__(downloader)
228 self._preferedformat=preferedformat
229
230 def run(self, information):
231 path = information['filepath']
232 prefix, sep, ext = path.rpartition(u'.')
233 outpath = prefix + sep + self._preferedformat
234 if information['ext'] == self._preferedformat:
235 self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
236 return True,information
237 self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
238 self.run_ffmpeg(path, outpath, [])
239 information['filepath'] = outpath
240 information['format'] = self._preferedformat
241 information['ext'] = self._preferedformat
242 return False,information
243
244
245 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
246 # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
247 _lang_map = {
248 'aa': 'aar',
249 'ab': 'abk',
250 'ae': 'ave',
251 'af': 'afr',
252 'ak': 'aka',
253 'am': 'amh',
254 'an': 'arg',
255 'ar': 'ara',
256 'as': 'asm',
257 'av': 'ava',
258 'ay': 'aym',
259 'az': 'aze',
260 'ba': 'bak',
261 'be': 'bel',
262 'bg': 'bul',
263 'bh': 'bih',
264 'bi': 'bis',
265 'bm': 'bam',
266 'bn': 'ben',
267 'bo': 'bod',
268 'br': 'bre',
269 'bs': 'bos',
270 'ca': 'cat',
271 'ce': 'che',
272 'ch': 'cha',
273 'co': 'cos',
274 'cr': 'cre',
275 'cs': 'ces',
276 'cu': 'chu',
277 'cv': 'chv',
278 'cy': 'cym',
279 'da': 'dan',
280 'de': 'deu',
281 'dv': 'div',
282 'dz': 'dzo',
283 'ee': 'ewe',
284 'el': 'ell',
285 'en': 'eng',
286 'eo': 'epo',
287 'es': 'spa',
288 'et': 'est',
289 'eu': 'eus',
290 'fa': 'fas',
291 'ff': 'ful',
292 'fi': 'fin',
293 'fj': 'fij',
294 'fo': 'fao',
295 'fr': 'fra',
296 'fy': 'fry',
297 'ga': 'gle',
298 'gd': 'gla',
299 'gl': 'glg',
300 'gn': 'grn',
301 'gu': 'guj',
302 'gv': 'glv',
303 'ha': 'hau',
304 'he': 'heb',
305 'hi': 'hin',
306 'ho': 'hmo',
307 'hr': 'hrv',
308 'ht': 'hat',
309 'hu': 'hun',
310 'hy': 'hye',
311 'hz': 'her',
312 'ia': 'ina',
313 'id': 'ind',
314 'ie': 'ile',
315 'ig': 'ibo',
316 'ii': 'iii',
317 'ik': 'ipk',
318 'io': 'ido',
319 'is': 'isl',
320 'it': 'ita',
321 'iu': 'iku',
322 'ja': 'jpn',
323 'jv': 'jav',
324 'ka': 'kat',
325 'kg': 'kon',
326 'ki': 'kik',
327 'kj': 'kua',
328 'kk': 'kaz',
329 'kl': 'kal',
330 'km': 'khm',
331 'kn': 'kan',
332 'ko': 'kor',
333 'kr': 'kau',
334 'ks': 'kas',
335 'ku': 'kur',
336 'kv': 'kom',
337 'kw': 'cor',
338 'ky': 'kir',
339 'la': 'lat',
340 'lb': 'ltz',
341 'lg': 'lug',
342 'li': 'lim',
343 'ln': 'lin',
344 'lo': 'lao',
345 'lt': 'lit',
346 'lu': 'lub',
347 'lv': 'lav',
348 'mg': 'mlg',
349 'mh': 'mah',
350 'mi': 'mri',
351 'mk': 'mkd',
352 'ml': 'mal',
353 'mn': 'mon',
354 'mr': 'mar',
355 'ms': 'msa',
356 'mt': 'mlt',
357 'my': 'mya',
358 'na': 'nau',
359 'nb': 'nob',
360 'nd': 'nde',
361 'ne': 'nep',
362 'ng': 'ndo',
363 'nl': 'nld',
364 'nn': 'nno',
365 'no': 'nor',
366 'nr': 'nbl',
367 'nv': 'nav',
368 'ny': 'nya',
369 'oc': 'oci',
370 'oj': 'oji',
371 'om': 'orm',
372 'or': 'ori',
373 'os': 'oss',
374 'pa': 'pan',
375 'pi': 'pli',
376 'pl': 'pol',
377 'ps': 'pus',
378 'pt': 'por',
379 'qu': 'que',
380 'rm': 'roh',
381 'rn': 'run',
382 'ro': 'ron',
383 'ru': 'rus',
384 'rw': 'kin',
385 'sa': 'san',
386 'sc': 'srd',
387 'sd': 'snd',
388 'se': 'sme',
389 'sg': 'sag',
390 'si': 'sin',
391 'sk': 'slk',
392 'sl': 'slv',
393 'sm': 'smo',
394 'sn': 'sna',
395 'so': 'som',
396 'sq': 'sqi',
397 'sr': 'srp',
398 'ss': 'ssw',
399 'st': 'sot',
400 'su': 'sun',
401 'sv': 'swe',
402 'sw': 'swa',
403 'ta': 'tam',
404 'te': 'tel',
405 'tg': 'tgk',
406 'th': 'tha',
407 'ti': 'tir',
408 'tk': 'tuk',
409 'tl': 'tgl',
410 'tn': 'tsn',
411 'to': 'ton',
412 'tr': 'tur',
413 'ts': 'tso',
414 'tt': 'tat',
415 'tw': 'twi',
416 'ty': 'tah',
417 'ug': 'uig',
418 'uk': 'ukr',
419 'ur': 'urd',
420 'uz': 'uzb',
421 've': 'ven',
422 'vi': 'vie',
423 'vo': 'vol',
424 'wa': 'wln',
425 'wo': 'wol',
426 'xh': 'xho',
427 'yi': 'yid',
428 'yo': 'yor',
429 'za': 'zha',
430 'zh': 'zho',
431 'zu': 'zul',
432 }
433
434 def __init__(self, downloader=None, subtitlesformat='srt'):
435 super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
436 self._subformat = subtitlesformat
437
438 @classmethod
439 def _conver_lang_code(cls, code):
440 """Convert language code from ISO 639-1 to ISO 639-2/T"""
441 return cls._lang_map.get(code[:2])
442
443 def run(self, information):
444 if information['ext'] != u'mp4':
445 self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
446 return True, information
447 if not information.get('subtitles'):
448 self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
449 return True, information
450
451 sub_langs = [key for key in information['subtitles']]
452 filename = information['filepath']
453 input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
454
455 opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
456 for (i, lang) in enumerate(sub_langs):
457 opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
458 lang_code = self._conver_lang_code(lang)
459 if lang_code is not None:
460 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
461 opts.extend(['-f', 'mp4'])
462
463 temp_filename = filename + u'.temp'
464 self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
465 self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
466 os.remove(encodeFilename(filename))
467 os.rename(encodeFilename(temp_filename), encodeFilename(filename))
468
469 return True, information