Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/PostProcessor.py

   1 import os
   2 import subprocess
   3 import sys
   4 import time
   5
   6
   7 from .utils import (
   8     compat_subprocess_get_DEVNULL,
   9     encodeFilename,
  10     PostProcessingError,
  11     shell_quote,
  12     subtitles_filename,
  13 )
  14
  15
  16 class PostProcessor(object):
  17     """Post Processor class.
  18
  19     PostProcessor objects can be added to downloaders with their
  20     add_post_processor() method. When the downloader has finished a
  21     successful download, it will take its internal chain of PostProcessors
  22     and start calling the run() method on each one of them, first with
  23     an initial argument and then with the returned value of the previous
  24     PostProcessor.
  25
  26     The chain will be stopped if one of them ever returns None or the end
  27     of the chain is reached.
  28
  29     PostProcessor objects follow a "mutual registration" process similar
  30     to InfoExtractor objects.
  31     """
  32
  33     _downloader = None
  34
  35     def __init__(self, downloader=None):
  36         self._downloader = downloader
  37
  38     def set_downloader(self, downloader):
  39         """Sets the downloader for this PP."""
  40         self._downloader = downloader
  41
  42     def run(self, information):
  43         """Run the PostProcessor.
  44
  45         The "information" argument is a dictionary like the ones
  46         composed by InfoExtractors. The only difference is that this
  47         one has an extra field called "filepath" that points to the
  48         downloaded file.
  49
  50         This method returns a tuple, the first element of which describes
  51         whether the original file should be kept (i.e. not deleted - None for
  52         no preference), and the second of which is the updated information.
  53
  54         In addition, this method may raise a PostProcessingError
  55         exception if post processing fails.
  56         """
  57         return None, information # by default, keep file and do nothing
  58
  59 class FFmpegPostProcessorError(PostProcessingError):
  60     pass
  61
  62 class AudioConversionError(PostProcessingError):
  63     pass
  64
  65 class FFmpegPostProcessor(PostProcessor):
  66     def __init__(self,downloader=None):
  67         PostProcessor.__init__(self, downloader)
  68         self._exes = self.detect_executables()
  69
  70     @staticmethod
  71     def detect_executables():
  72         def executable(exe):
  73             try:
  74                 subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
  75             except OSError:
  76                 return False
  77             return exe
  78         programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe']
  79         return dict((program, executable(program)) for program in programs)
  80
  81     def run_ffmpeg_multiple_files(self, input_paths, out_path, opts):
  82         if not self._exes['ffmpeg'] and not self._exes['avconv']:
  83             raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.')
  84
  85         files_cmd = []
  86         for path in input_paths:
  87             files_cmd.extend(['-i', encodeFilename(path)])
  88         cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y'] + files_cmd
  89                + opts +
  90                [encodeFilename(self._ffmpeg_filename_argument(out_path))])
  91
  92         if self._downloader.params.get('verbose', False):
  93             self._downloader.to_screen(u'[debug] ffmpeg command line: %s' % shell_quote(cmd))
  94         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  95         stdout,stderr = p.communicate()
  96         if p.returncode != 0:
  97             stderr = stderr.decode('utf-8', 'replace')
  98             msg = stderr.strip().split('\n')[-1]
  99             raise FFmpegPostProcessorError(msg)
 100
 101     def run_ffmpeg(self, path, out_path, opts):
 102         self.run_ffmpeg_multiple_files([path], out_path, opts)
 103
 104     def _ffmpeg_filename_argument(self, fn):
 105         # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details
 106         if fn.startswith(u'-'):
 107             return u'./' + fn
 108         return fn
 109
 110 class FFmpegExtractAudioPP(FFmpegPostProcessor):
 111     def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False):
 112         FFmpegPostProcessor.__init__(self, downloader)
 113         if preferredcodec is None:
 114             preferredcodec = 'best'
 115         self._preferredcodec = preferredcodec
 116         self._preferredquality = preferredquality
 117         self._nopostoverwrites = nopostoverwrites
 118
 119     def get_audio_codec(self, path):
 120         if not self._exes['ffprobe'] and not self._exes['avprobe']:
 121             raise PostProcessingError(u'ffprobe or avprobe not found. Please install one.')
 122         try:
 123             cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))]
 124             handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE)
 125             output = handle.communicate()[0]
 126             if handle.wait() != 0:
 127                 return None
 128         except (IOError, OSError):
 129             return None
 130         audio_codec = None
 131         for line in output.decode('ascii', 'ignore').split('\n'):
 132             if line.startswith('codec_name='):
 133                 audio_codec = line.split('=')[1].strip()
 134             elif line.strip() == 'codec_type=audio' and audio_codec is not None:
 135                 return audio_codec
 136         return None
 137
 138     def run_ffmpeg(self, path, out_path, codec, more_opts):
 139         if not self._exes['ffmpeg'] and not self._exes['avconv']:
 140             raise AudioConversionError('ffmpeg or avconv not found. Please install one.')
 141         if codec is None:
 142             acodec_opts = []
 143         else:
 144             acodec_opts = ['-acodec', codec]
 145         opts = ['-vn'] + acodec_opts + more_opts
 146         try:
 147             FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts)
 148         except FFmpegPostProcessorError as err:
 149             raise AudioConversionError(err.msg)
 150
 151     def run(self, information):
 152         path = information['filepath']
 153
 154         filecodec = self.get_audio_codec(path)
 155         if filecodec is None:
 156             raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe')
 157
 158         more_opts = []
 159         if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'):
 160             if filecodec == 'aac' and self._preferredcodec in ['m4a', 'best']:
 161                 # Lossless, but in another container
 162                 acodec = 'copy'
 163                 extension = 'm4a'
 164                 more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 165             elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']:
 166                 # Lossless if possible
 167                 acodec = 'copy'
 168                 extension = filecodec
 169                 if filecodec == 'aac':
 170                     more_opts = ['-f', 'adts']
 171                 if filecodec == 'vorbis':
 172                     extension = 'ogg'
 173             else:
 174                 # MP3 otherwise.
 175                 acodec = 'libmp3lame'
 176                 extension = 'mp3'
 177                 more_opts = []
 178                 if self._preferredquality is not None:
 179                     if int(self._preferredquality) < 10:
 180                         more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 181                     else:
 182                         more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 183         else:
 184             # We convert the audio (lossy)
 185             acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec]
 186             extension = self._preferredcodec
 187             more_opts = []
 188             if self._preferredquality is not None:
 189                 # The opus codec doesn't support the -aq option
 190                 if int(self._preferredquality) < 10 and extension != 'opus':
 191                     more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality]
 192                 else:
 193                     more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k']
 194             if self._preferredcodec == 'aac':
 195                 more_opts += ['-f', 'adts']
 196             if self._preferredcodec == 'm4a':
 197                 more_opts += [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc']
 198             if self._preferredcodec == 'vorbis':
 199                 extension = 'ogg'
 200             if self._preferredcodec == 'wav':
 201                 extension = 'wav'
 202                 more_opts += ['-f', 'wav']
 203
 204         prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups
 205         new_path = prefix + sep + extension
 206
 207         # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly.
 208         if new_path == path:
 209             self._nopostoverwrites = True
 210
 211         try:
 212             if self._nopostoverwrites and os.path.exists(encodeFilename(new_path)):
 213                 self._downloader.to_screen(u'[youtube] Post-process file %s exists, skipping' % new_path)
 214             else:
 215                 self._downloader.to_screen(u'[' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + '] Destination: ' + new_path)
 216                 self.run_ffmpeg(path, new_path, acodec, more_opts)
 217         except:
 218             etype,e,tb = sys.exc_info()
 219             if isinstance(e, AudioConversionError):
 220                 msg = u'audio conversion failed: ' + e.msg
 221             else:
 222                 msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')
 223             raise PostProcessingError(msg)
 224
 225         # Try to update the date time for extracted audio file.
 226         if information.get('filetime') is not None:
 227             try:
 228                 os.utime(encodeFilename(new_path), (time.time(), information['filetime']))
 229             except:
 230                 self._downloader.report_warning(u'Cannot update utime of audio file')
 231
 232         information['filepath'] = new_path
 233         return self._nopostoverwrites,information
 234
 235 class FFmpegVideoConvertor(FFmpegPostProcessor):
 236     def __init__(self, downloader=None,preferedformat=None):
 237         super(FFmpegVideoConvertor, self).__init__(downloader)
 238         self._preferedformat=preferedformat
 239
 240     def run(self, information):
 241         path = information['filepath']
 242         prefix, sep, ext = path.rpartition(u'.')
 243         outpath = prefix + sep + self._preferedformat
 244         if information['ext'] == self._preferedformat:
 245             self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat))
 246             return True,information
 247         self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath)
 248         self.run_ffmpeg(path, outpath, [])
 249         information['filepath'] = outpath
 250         information['format'] = self._preferedformat
 251         information['ext'] = self._preferedformat
 252         return False,information
 253
 254
 255 class FFmpegEmbedSubtitlePP(FFmpegPostProcessor):
 256     # See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
 257     _lang_map = {
 258         'aa': 'aar',
 259         'ab': 'abk',
 260         'ae': 'ave',
 261         'af': 'afr',
 262         'ak': 'aka',
 263         'am': 'amh',
 264         'an': 'arg',
 265         'ar': 'ara',
 266         'as': 'asm',
 267         'av': 'ava',
 268         'ay': 'aym',
 269         'az': 'aze',
 270         'ba': 'bak',
 271         'be': 'bel',
 272         'bg': 'bul',
 273         'bh': 'bih',
 274         'bi': 'bis',
 275         'bm': 'bam',
 276         'bn': 'ben',
 277         'bo': 'bod',
 278         'br': 'bre',
 279         'bs': 'bos',
 280         'ca': 'cat',
 281         'ce': 'che',
 282         'ch': 'cha',
 283         'co': 'cos',
 284         'cr': 'cre',
 285         'cs': 'ces',
 286         'cu': 'chu',
 287         'cv': 'chv',
 288         'cy': 'cym',
 289         'da': 'dan',
 290         'de': 'deu',
 291         'dv': 'div',
 292         'dz': 'dzo',
 293         'ee': 'ewe',
 294         'el': 'ell',
 295         'en': 'eng',
 296         'eo': 'epo',
 297         'es': 'spa',
 298         'et': 'est',
 299         'eu': 'eus',
 300         'fa': 'fas',
 301         'ff': 'ful',
 302         'fi': 'fin',
 303         'fj': 'fij',
 304         'fo': 'fao',
 305         'fr': 'fra',
 306         'fy': 'fry',
 307         'ga': 'gle',
 308         'gd': 'gla',
 309         'gl': 'glg',
 310         'gn': 'grn',
 311         'gu': 'guj',
 312         'gv': 'glv',
 313         'ha': 'hau',
 314         'he': 'heb',
 315         'hi': 'hin',
 316         'ho': 'hmo',
 317         'hr': 'hrv',
 318         'ht': 'hat',
 319         'hu': 'hun',
 320         'hy': 'hye',
 321         'hz': 'her',
 322         'ia': 'ina',
 323         'id': 'ind',
 324         'ie': 'ile',
 325         'ig': 'ibo',
 326         'ii': 'iii',
 327         'ik': 'ipk',
 328         'io': 'ido',
 329         'is': 'isl',
 330         'it': 'ita',
 331         'iu': 'iku',
 332         'ja': 'jpn',
 333         'jv': 'jav',
 334         'ka': 'kat',
 335         'kg': 'kon',
 336         'ki': 'kik',
 337         'kj': 'kua',
 338         'kk': 'kaz',
 339         'kl': 'kal',
 340         'km': 'khm',
 341         'kn': 'kan',
 342         'ko': 'kor',
 343         'kr': 'kau',
 344         'ks': 'kas',
 345         'ku': 'kur',
 346         'kv': 'kom',
 347         'kw': 'cor',
 348         'ky': 'kir',
 349         'la': 'lat',
 350         'lb': 'ltz',
 351         'lg': 'lug',
 352         'li': 'lim',
 353         'ln': 'lin',
 354         'lo': 'lao',
 355         'lt': 'lit',
 356         'lu': 'lub',
 357         'lv': 'lav',
 358         'mg': 'mlg',
 359         'mh': 'mah',
 360         'mi': 'mri',
 361         'mk': 'mkd',
 362         'ml': 'mal',
 363         'mn': 'mon',
 364         'mr': 'mar',
 365         'ms': 'msa',
 366         'mt': 'mlt',
 367         'my': 'mya',
 368         'na': 'nau',
 369         'nb': 'nob',
 370         'nd': 'nde',
 371         'ne': 'nep',
 372         'ng': 'ndo',
 373         'nl': 'nld',
 374         'nn': 'nno',
 375         'no': 'nor',
 376         'nr': 'nbl',
 377         'nv': 'nav',
 378         'ny': 'nya',
 379         'oc': 'oci',
 380         'oj': 'oji',
 381         'om': 'orm',
 382         'or': 'ori',
 383         'os': 'oss',
 384         'pa': 'pan',
 385         'pi': 'pli',
 386         'pl': 'pol',
 387         'ps': 'pus',
 388         'pt': 'por',
 389         'qu': 'que',
 390         'rm': 'roh',
 391         'rn': 'run',
 392         'ro': 'ron',
 393         'ru': 'rus',
 394         'rw': 'kin',
 395         'sa': 'san',
 396         'sc': 'srd',
 397         'sd': 'snd',
 398         'se': 'sme',
 399         'sg': 'sag',
 400         'si': 'sin',
 401         'sk': 'slk',
 402         'sl': 'slv',
 403         'sm': 'smo',
 404         'sn': 'sna',
 405         'so': 'som',
 406         'sq': 'sqi',
 407         'sr': 'srp',
 408         'ss': 'ssw',
 409         'st': 'sot',
 410         'su': 'sun',
 411         'sv': 'swe',
 412         'sw': 'swa',
 413         'ta': 'tam',
 414         'te': 'tel',
 415         'tg': 'tgk',
 416         'th': 'tha',
 417         'ti': 'tir',
 418         'tk': 'tuk',
 419         'tl': 'tgl',
 420         'tn': 'tsn',
 421         'to': 'ton',
 422         'tr': 'tur',
 423         'ts': 'tso',
 424         'tt': 'tat',
 425         'tw': 'twi',
 426         'ty': 'tah',
 427         'ug': 'uig',
 428         'uk': 'ukr',
 429         'ur': 'urd',
 430         'uz': 'uzb',
 431         've': 'ven',
 432         'vi': 'vie',
 433         'vo': 'vol',
 434         'wa': 'wln',
 435         'wo': 'wol',
 436         'xh': 'xho',
 437         'yi': 'yid',
 438         'yo': 'yor',
 439         'za': 'zha',
 440         'zh': 'zho',
 441         'zu': 'zul',
 442     }
 443
 444     def __init__(self, downloader=None, subtitlesformat='srt'):
 445         super(FFmpegEmbedSubtitlePP, self).__init__(downloader)
 446         self._subformat = subtitlesformat
 447
 448     @classmethod
 449     def _conver_lang_code(cls, code):
 450         """Convert language code from ISO 639-1 to ISO 639-2/T"""
 451         return cls._lang_map.get(code[:2])
 452
 453     def run(self, information):
 454         if information['ext'] != u'mp4':
 455             self._downloader.to_screen(u'[ffmpeg] Subtitles can only be embedded in mp4 files')
 456             return True, information
 457         if not information.get('subtitles'):
 458             self._downloader.to_screen(u'[ffmpeg] There aren\'t any subtitles to embed')
 459             return True, information
 460
 461         sub_langs = [key for key in information['subtitles']]
 462         filename = information['filepath']
 463         input_files = [filename] + [subtitles_filename(filename, lang, self._subformat) for lang in sub_langs]
 464
 465         opts = ['-map', '0:0', '-map', '0:1', '-c:v', 'copy', '-c:a', 'copy']
 466         for (i, lang) in enumerate(sub_langs):
 467             opts.extend(['-map', '%d:0' % (i+1), '-c:s:%d' % i, 'mov_text'])
 468             lang_code = self._conver_lang_code(lang)
 469             if lang_code is not None:
 470                 opts.extend(['-metadata:s:s:%d' % i, 'language=%s' % lang_code])
 471         opts.extend(['-f', 'mp4'])
 472
 473         temp_filename = filename + u'.temp'
 474         self._downloader.to_screen(u'[ffmpeg] Embedding subtitles in \'%s\'' % filename)
 475         self.run_ffmpeg_multiple_files(input_files, temp_filename, opts)
 476         os.remove(encodeFilename(filename))
 477         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 478
 479         return True, information
 480
 481
 482 class FFmpegMetadataPP(FFmpegPostProcessor):
 483     def run(self, info):
 484         metadata = {}
 485         if info.get('title') is not None:
 486             metadata['title'] = info['title']
 487         if info.get('upload_date') is not None:
 488             metadata['date'] = info['upload_date']
 489         if info.get('uploader') is not None:
 490             metadata['artist'] = info['uploader']
 491         elif info.get('uploader_id') is not None:
 492             metadata['artist'] = info['uploader_id']
 493
 494         if not metadata:
 495             self._downloader.to_screen(u'[ffmpeg] There isn\'t any metadata to add')
 496             return True, info
 497
 498         filename = info['filepath']
 499         ext = os.path.splitext(filename)[1][1:]
 500         temp_filename = filename + u'.temp'
 501
 502         options = ['-c', 'copy']
 503         for (name, value) in metadata.items():
 504             options.extend(['-metadata', '%s="%s"' % (name, value)])
 505         options.extend(['-f', ext])
 506
 507         self._downloader.to_screen(u'[ffmpeg] Adding metadata to \'%s\'' % filename)
 508         self.run_ffmpeg(filename, temp_filename, options)
 509         os.remove(encodeFilename(filename))
 510         os.rename(encodeFilename(temp_filename), encodeFilename(filename))
 511         return True, info