debian/control: Remove obsolete DMUA flag.

[youtubedl] / youtube_dl / FileDownloader.py
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py

index b43acd19b42a1b7ad8b4c8329657864b35b365b3..f4ce48046f08b21d66fcdecd4e1979cedebfd5dc 100644 (file)
--- a/youtube_dl/FileDownloader.py
+++ b/youtube_dl/FileDownloader.py
@@ -4,8 +4,10 @@
  from __future__ import absolute_import
  
  import math
  from __future__ import absolute_import
  
  import math
+import io
  import os
  import re
  import os
  import re
+import shutil
  import socket
  import subprocess
  import sys
  import socket
  import subprocess
  import sys
@@ -16,6 +18,7 @@ if os.name == 'nt':
      import ctypes
  
  from .utils import *
      import ctypes
  
  from .utils import *
+from .InfoExtractors import get_info_extractor
  
  
  class FileDownloader(object):
  
  
  class FileDownloader(object):
@@ -51,6 +54,7 @@ class FileDownloader(object):
      quiet:             Do not print messages to stdout.
      forceurl:          Force printing final URL.
      forcetitle:        Force printing title.
      quiet:             Do not print messages to stdout.
      forceurl:          Force printing final URL.
      forcetitle:        Force printing title.
+    forceid:           Force printing ID.
      forcethumbnail:    Force printing thumbnail URL.
      forcedescription:  Force printing description.
      forcefilename:     Force printing final filename.
      forcethumbnail:    Force printing thumbnail URL.
      forcedescription:  Force printing description.
      forcefilename:     Force printing final filename.
@@ -77,9 +81,18 @@ class FileDownloader(object):
      updatetime:        Use the Last-modified header to set output file timestamps.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
      updatetime:        Use the Last-modified header to set output file timestamps.
      writedescription:  Write the video description to a .description file
      writeinfojson:     Write the video description to a .info.json file
-    writesubtitles:    Write the video subtitles to a .srt file
+    writethumbnail:    Write the thumbnail image to a file
+    writesubtitles:    Write the video subtitles to a file
+    allsubtitles:      Downloads all the subtitles of the video
+    listsubtitles:     Lists all available subtitles for the video
+    subtitlesformat:   Subtitle format [sbv/srt] (default=srt)
      subtitleslang:     Language of the subtitles to download
      test:              Download only first bytes to test the downloader.
      subtitleslang:     Language of the subtitles to download
      test:              Download only first bytes to test the downloader.
+    keepvideo:         Keep the video file after post-processing
+    min_filesize:      Skip files smaller than this size
+    max_filesize:      Skip files larger than this size
+    daterange:         A DateRange object, download only if the upload_date is in the range.
+    skip_download:     Skip the actual download of the video file
      """
  
      params = None
      """
  
      params = None
@@ -93,13 +106,14 @@ class FileDownloader(object):
          """Create a FileDownloader object with the given options."""
          self._ies = []
          self._pps = []
          """Create a FileDownloader object with the given options."""
          self._ies = []
          self._pps = []
+        self._progress_hooks = []
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self.params = params
  
          if '%(stitle)s' in self.params['outtmpl']:
          self._download_retcode = 0
          self._num_downloads = 0
          self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
          self.params = params
  
          if '%(stitle)s' in self.params['outtmpl']:
-            self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
+            self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
  
      @staticmethod
      def format_bytes(bytes):
  
      @staticmethod
      def format_bytes(bytes):
@@ -111,7 +125,7 @@ class FileDownloader(object):
              exponent = 0
          else:
              exponent = int(math.log(bytes, 1024.0))
              exponent = 0
          else:
              exponent = int(math.log(bytes, 1024.0))
-        suffix = 'bkMGTPEZY'[exponent]
+        suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
          converted = float(bytes) / float(1024 ** exponent)
          return '%.2f%s' % (converted, suffix)
  
          converted = float(bytes) / float(1024 ** exponent)
          return '%.2f%s' % (converted, suffix)
  
@@ -203,27 +217,66 @@ class FileDownloader(object):
              # already of type unicode()
              ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
          elif 'TERM' in os.environ:
              # already of type unicode()
              ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
          elif 'TERM' in os.environ:
-            sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
+            self.to_screen('\033]0;%s\007' % message, skip_eol=True)
  
      def fixed_template(self):
          """Checks if the output template is fixed."""
          return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
  
  
      def fixed_template(self):
          """Checks if the output template is fixed."""
          return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
  
-    def trouble(self, message=None):
+    def trouble(self, message=None, tb=None):
          """Determine action to take when a download problem appears.
  
          Depending on if the downloader has been configured to ignore
          download errors or not, this method may throw an exception or
          not when errors are found, after printing the message.
          """Determine action to take when a download problem appears.
  
          Depending on if the downloader has been configured to ignore
          download errors or not, this method may throw an exception or
          not when errors are found, after printing the message.
+
+        tb, if given, is additional traceback information.
          """
          if message is not None:
              self.to_stderr(message)
          if self.params.get('verbose'):
          """
          if message is not None:
              self.to_stderr(message)
          if self.params.get('verbose'):
-            self.to_stderr(u''.join(traceback.format_list(traceback.extract_stack())))
+            if tb is None:
+                if sys.exc_info()[0]:  # if .trouble has been called from an except block
+                    tb = u''
+                    if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+                        tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
+                    tb += compat_str(traceback.format_exc())
+                else:
+                    tb_data = traceback.format_list(traceback.extract_stack())
+                    tb = u''.join(tb_data)
+            self.to_stderr(tb)
          if not self.params.get('ignoreerrors', False):
          if not self.params.get('ignoreerrors', False):
-            raise DownloadError(message)
+            if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
+                exc_info = sys.exc_info()[1].exc_info
+            else:
+                exc_info = sys.exc_info()
+            raise DownloadError(message, exc_info)
          self._download_retcode = 1
  
          self._download_retcode = 1
  
+    def report_warning(self, message):
+        '''
+        Print the message to stderr, it will be prefixed with 'WARNING:'
+        If stderr is a tty file the 'WARNING:' will be colored
+        '''
+        if sys.stderr.isatty() and os.name != 'nt':
+            _msg_header=u'\033[0;33mWARNING:\033[0m'
+        else:
+            _msg_header=u'WARNING:'
+        warning_message=u'%s %s' % (_msg_header,message)
+        self.to_stderr(warning_message)
+
+    def report_error(self, message, tb=None):
+        '''
+        Do the same as trouble, but prefixes the message with 'ERROR:', colored
+        in red if stderr is a tty file.
+        '''
+        if sys.stderr.isatty() and os.name != 'nt':
+            _msg_header = u'\033[0;31mERROR:\033[0m'
+        else:
+            _msg_header = u'ERROR:'
+        error_message = u'%s %s' % (_msg_header, message)
+        self.trouble(error_message, tb)
+
      def slow_down(self, start_time, byte_counter):
          """Sleep if the download speed is over the rate limit."""
          rate_limit = self.params.get('ratelimit', None)
      def slow_down(self, start_time, byte_counter):
          """Sleep if the download speed is over the rate limit."""
          rate_limit = self.params.get('ratelimit', None)
@@ -255,7 +308,7 @@ class FileDownloader(object):
                  return
              os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
          except (IOError, OSError) as err:
                  return
              os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
          except (IOError, OSError) as err:
-            self.trouble(u'ERROR: unable to rename file')
+            self.report_error(u'unable to rename file')
  
      def try_utime(self, filename, last_modified_hdr):
          """Try to set the last-modified time of the given file."""
  
      def try_utime(self, filename, last_modified_hdr):
          """Try to set the last-modified time of the given file."""
@@ -269,6 +322,9 @@ class FileDownloader(object):
          filetime = timeconvert(timestr)
          if filetime is None:
              return filetime
          filetime = timeconvert(timestr)
          if filetime is None:
              return filetime
+        # Ignore obviously invalid dates
+        if filetime == 0:
+            return
          try:
              os.utime(filename, (time.time(), filetime))
          except:
          try:
              os.utime(filename, (time.time(), filetime))
          except:
@@ -279,9 +335,9 @@ class FileDownloader(object):
          """ Report that the description file is being written """
          self.to_screen(u'[info] Writing video description to: ' + descfn)
  
          """ Report that the description file is being written """
          self.to_screen(u'[info] Writing video description to: ' + descfn)
  
-    def report_writesubtitles(self, srtfn):
+    def report_writesubtitles(self, sub_filename):
          """ Report that the subtitles file is being written """
          """ Report that the subtitles file is being written """
-        self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
+        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
  
      def report_writeinfojson(self, infofn):
          """ Report that the metadata file has been written """
  
      def report_writeinfojson(self, infofn):
          """ Report that the metadata file has been written """
@@ -295,8 +351,13 @@ class FileDownloader(object):
          """Report download progress."""
          if self.params.get('noprogress', False):
              return
          """Report download progress."""
          if self.params.get('noprogress', False):
              return
-        self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
-                (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
+        clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+        if self.params.get('progress_with_newline', False):
+            self.to_screen(u'[download] %s of %s at %s ETA %s' %
+                (percent_str, data_len_str, speed_str, eta_str))
+        else:
+            self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
+                (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
          self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
                  (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  
          self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
                  (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
  
@@ -336,7 +397,13 @@ class FileDownloader(object):
              template_dict = dict(info_dict)
  
              template_dict['epoch'] = int(time.time())
              template_dict = dict(info_dict)
  
              template_dict['epoch'] = int(time.time())
-            template_dict['autonumber'] = u'%05d' % self._num_downloads
+            autonumber_size = self.params.get('autonumber_size')
+            if autonumber_size is None:
+                autonumber_size = 5
+            autonumber_templ = u'%0' + str(autonumber_size) + u'd'
+            template_dict['autonumber'] = autonumber_templ % self._num_downloads
+            if template_dict['playlist_index'] is not None:
+                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
  
              sanitize = lambda k,v: sanitize_filename(
                  u'NA' if v is None else compat_str(v),
  
              sanitize = lambda k,v: sanitize_filename(
                  u'NA' if v is None else compat_str(v),
@@ -346,8 +413,11 @@ class FileDownloader(object):
  
              filename = self.params['outtmpl'] % template_dict
              return filename
  
              filename = self.params['outtmpl'] % template_dict
              return filename
-        except (ValueError, KeyError) as err:
-            self.trouble(u'ERROR: invalid system charset or erroneous output template')
+        except KeyError as err:
+            self.report_error(u'Erroneous output template')
+            return None
+        except ValueError as err:
+            self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
              return None
  
      def _match_entry(self, info_dict):
              return None
  
      def _match_entry(self, info_dict):
@@ -356,18 +426,155 @@ class FileDownloader(object):
          title = info_dict['title']
          matchtitle = self.params.get('matchtitle', False)
          if matchtitle:
          title = info_dict['title']
          matchtitle = self.params.get('matchtitle', False)
          if matchtitle:
-            matchtitle = matchtitle.decode('utf8')
              if not re.search(matchtitle, title, re.IGNORECASE):
                  return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
          rejecttitle = self.params.get('rejecttitle', False)
          if rejecttitle:
              if not re.search(matchtitle, title, re.IGNORECASE):
                  return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
          rejecttitle = self.params.get('rejecttitle', False)
          if rejecttitle:
-            rejecttitle = rejecttitle.decode('utf8')
              if re.search(rejecttitle, title, re.IGNORECASE):
                  return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
              if re.search(rejecttitle, title, re.IGNORECASE):
                  return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+        date = info_dict.get('upload_date', None)
+        if date is not None:
+            dateRange = self.params.get('daterange', DateRange())
+            if date not in dateRange:
+                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
          return None
          return None
+        
+    def extract_info(self, url, download=True, ie_key=None, extra_info={}):
+        '''
+        Returns a list with a dictionary for each video we find.
+        If 'download', also downloads the videos.
+        extra_info is a dict containing the extra values to add to each result
+         '''
+        
+        if ie_key:
+            ie = get_info_extractor(ie_key)()
+            ie.set_downloader(self)
+            ies = [ie]
+        else:
+            ies = self._ies
+
+        for ie in ies:
+            if not ie.suitable(url):
+                continue
+
+            if not ie.working():
+                self.report_warning(u'The program functionality for this site has been marked as broken, '
+                                    u'and will probably not work.')
+
+            try:
+                ie_result = ie.extract(url)
+                if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+                    break
+                if isinstance(ie_result, list):
+                    # Backwards compatibility: old IE result format
+                    for result in ie_result:
+                        result.update(extra_info)
+                    ie_result = {
+                        '_type': 'compat_list',
+                        'entries': ie_result,
+                    }
+                else:
+                    ie_result.update(extra_info)
+                if 'extractor' not in ie_result:
+                    ie_result['extractor'] = ie.IE_NAME
+                return self.process_ie_result(ie_result, download=download)
+            except ExtractorError as de: # An error we somewhat expected
+                self.report_error(compat_str(de), de.format_traceback())
+                break
+            except Exception as e:
+                if self.params.get('ignoreerrors', False):
+                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
+                    break
+                else:
+                    raise
+        else:
+            self.report_error(u'no suitable InfoExtractor: %s' % url)
+        
+    def process_ie_result(self, ie_result, download=True, extra_info={}):
+        """
+        Take the result of the ie(may be modified) and resolve all unresolved
+        references (URLs, playlist items).
+
+        It will also download the videos if 'download'.
+        Returns the resolved ie_result.
+        """
+
+        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
+        if result_type == 'video':
+            if 'playlist' not in ie_result:
+                # It isn't part of a playlist
+                ie_result['playlist'] = None
+                ie_result['playlist_index'] = None
+            if download:
+                self.process_info(ie_result)
+            return ie_result
+        elif result_type == 'url':
+            # We have to add extra_info to the results because it may be
+            # contained in a playlist
+            return self.extract_info(ie_result['url'],
+                                     download,
+                                     ie_key=ie_result.get('ie_key'),
+                                     extra_info=extra_info)
+        elif result_type == 'playlist':
+            # We process each entry in the playlist
+            playlist = ie_result.get('title', None) or ie_result.get('id', None)
+            self.to_screen(u'[download] Downloading playlist: %s'  % playlist)
+
+            playlist_results = []
+
+            n_all_entries = len(ie_result['entries'])
+            playliststart = self.params.get('playliststart', 1) - 1
+            playlistend = self.params.get('playlistend', -1)
+
+            if playlistend == -1:
+                entries = ie_result['entries'][playliststart:]
+            else:
+                entries = ie_result['entries'][playliststart:playlistend]
+
+            n_entries = len(entries)
+
+            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
+                (ie_result['extractor'], playlist, n_all_entries, n_entries))
+
+            for i,entry in enumerate(entries,1):
+                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
+                extra = {
+                         'playlist': playlist, 
+                         'playlist_index': i + playliststart,
+                         }
+                if not 'extractor' in entry:
+                    # We set the extractor, if it's an url it will be set then to
+                    # the new extractor, but if it's already a video we must make
+                    # sure it's present: see issue #877
+                    entry['extractor'] = ie_result['extractor']
+                entry_result = self.process_ie_result(entry,
+                                                      download=download,
+                                                      extra_info=extra)
+                playlist_results.append(entry_result)
+            ie_result['entries'] = playlist_results
+            return ie_result
+        elif result_type == 'compat_list':
+            def _fixup(r):
+                r.setdefault('extractor', ie_result['extractor'])
+                return r
+            ie_result['entries'] = [
+                self.process_ie_result(_fixup(r), download=download)
+                for r in ie_result['entries']
+            ]
+            return ie_result
+        else:
+            raise Exception('Invalid result type: %s' % result_type)
  
      def process_info(self, info_dict):
  
      def process_info(self, info_dict):
-        """Process a single dictionary returned by an InfoExtractor."""
+        """Process a single resolved IE result."""
+
+        assert info_dict.get('_type', 'video') == 'video'
+        #We increment the download the download count here to match the previous behaviour.
+        self.increment_downloads()
+
+        info_dict['fulltitle'] = info_dict['title']
+        if len(info_dict['title']) > 200:
+            info_dict['title'] = info_dict['title'][:197] + u'...'
  
          # Keep for backwards compatibility
          info_dict['stitle'] = info_dict['title']
  
          # Keep for backwards compatibility
          info_dict['stitle'] = info_dict['title']
@@ -390,6 +597,8 @@ class FileDownloader(object):
          # Forced printings
          if self.params.get('forcetitle', False):
              compat_print(info_dict['title'])
          # Forced printings
          if self.params.get('forcetitle', False):
              compat_print(info_dict['title'])
+        if self.params.get('forceid', False):
+            compat_print(info_dict['id'])
          if self.params.get('forceurl', False):
              compat_print(info_dict['url'])
          if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
          if self.params.get('forceurl', False):
              compat_print(info_dict['url'])
          if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
@@ -410,59 +619,81 @@ class FileDownloader(object):
  
          try:
              dn = os.path.dirname(encodeFilename(filename))
  
          try:
              dn = os.path.dirname(encodeFilename(filename))
-            if dn != '' and not os.path.exists(dn): # dn is already encoded
+            if dn != '' and not os.path.exists(dn):
                  os.makedirs(dn)
          except (OSError, IOError) as err:
                  os.makedirs(dn)
          except (OSError, IOError) as err:
-            self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
+            self.report_error(u'unable to create directory ' + compat_str(err))
              return
  
          if self.params.get('writedescription', False):
              try:
                  descfn = filename + u'.description'
                  self.report_writedescription(descfn)
              return
  
          if self.params.get('writedescription', False):
              try:
                  descfn = filename + u'.description'
                  self.report_writedescription(descfn)
-                descfile = open(encodeFilename(descfn), 'wb')
-                try:
-                    descfile.write(info_dict['description'].encode('utf-8'))
-                finally:
-                    descfile.close()
+                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+                    descfile.write(info_dict['description'])
              except (OSError, IOError):
              except (OSError, IOError):
-                self.trouble(u'ERROR: Cannot write description file ' + descfn)
+                self.report_error(u'Cannot write description file ' + descfn)
                  return
  
          if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
                  return
  
          if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
              # subtitles download errors are already managed as troubles in relevant IE
              # that way it will silently go on when used with unsupporting IE
-            try:
-                srtfn = filename.rsplit('.', 1)[0] + u'.srt'
-                self.report_writesubtitles(srtfn)
-                srtfile = open(encodeFilename(srtfn), 'wb')
+            subtitle = info_dict['subtitles'][0]
+            (sub_error, sub_lang, sub) = subtitle
+            sub_format = self.params.get('subtitlesformat')
+            if sub_error:
+                self.report_warning("Some error while getting the subtitles")
+            else:
                  try:
                  try:
-                    srtfile.write(info_dict['subtitles'].encode('utf-8'))
-                finally:
-                    srtfile.close()
-            except (OSError, IOError):
-                self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
-                return
+                    sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+                    self.report_writesubtitles(sub_filename)
+                    with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                        subfile.write(sub)
+                except (OSError, IOError):
+                    self.report_error(u'Cannot write subtitles file ' + descfn)
+                    return
+
+        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
+            subtitles = info_dict['subtitles']
+            sub_format = self.params.get('subtitlesformat')
+            for subtitle in subtitles:
+                (sub_error, sub_lang, sub) = subtitle
+                if sub_error:
+                    self.report_warning("Some error while getting the subtitles")
+                else:
+                    try:
+                        sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
+                        self.report_writesubtitles(sub_filename)
+                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
+                                subfile.write(sub)
+                    except (OSError, IOError):
+                        self.report_error(u'Cannot write subtitles file ' + descfn)
+                        return
  
          if self.params.get('writeinfojson', False):
              infofn = filename + u'.info.json'
              self.report_writeinfojson(infofn)
              try:
  
          if self.params.get('writeinfojson', False):
              infofn = filename + u'.info.json'
              self.report_writeinfojson(infofn)
              try:
-                json.dump
-            except (NameError,AttributeError):
-                self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
-                return
-            try:
-                infof = open(encodeFilename(infofn), 'wb')
-                try:
-                    json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
-                    json.dump(json_info_dict, infof)
-                finally:
-                    infof.close()
+                json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
+                write_json_file(json_info_dict, encodeFilename(infofn))
              except (OSError, IOError):
              except (OSError, IOError):
-                self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
+                self.report_error(u'Cannot write metadata to JSON file ' + infofn)
                  return
  
                  return
  
+        if self.params.get('writethumbnail', False):
+            if 'thumbnail' in info_dict:
+                thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
+                if not thumb_format:
+                    thumb_format = 'jpg'
+                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
+                self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
+                               (info_dict['extractor'], info_dict['id']))
+                uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
+                with open(thumb_filename, 'wb') as thumbf:
+                    shutil.copyfileobj(uf, thumbf)
+                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
+                               (info_dict['extractor'], info_dict['id'], thumb_filename))
+
          if not self.params.get('skip_download', False):
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
                  success = True
          if not self.params.get('skip_download', False):
              if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
                  success = True
@@ -472,17 +703,17 @@ class FileDownloader(object):
                  except (OSError, IOError) as err:
                      raise UnavailableVideoError()
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
                  except (OSError, IOError) as err:
                      raise UnavailableVideoError()
                  except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
-                    self.trouble(u'ERROR: unable to download video data: %s' % str(err))
+                    self.report_error(u'unable to download video data: %s' % str(err))
                      return
                  except (ContentTooShortError, ) as err:
                      return
                  except (ContentTooShortError, ) as err:
-                    self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
+                    self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
                      return
  
              if success:
                  try:
                      self.post_process(filename, info_dict)
                  except (PostProcessingError) as err:
                      return
  
              if success:
                  try:
                      self.post_process(filename, info_dict)
                  except (PostProcessingError) as err:
-                    self.trouble(u'ERROR: postprocessing: %s' % str(err))
+                    self.report_error(u'postprocessing: %s' % str(err))
                      return
  
      def download(self, url_list):
                      return
  
      def download(self, url_list):
@@ -491,63 +722,65 @@ class FileDownloader(object):
              raise SameFileError(self.params['outtmpl'])
  
          for url in url_list:
              raise SameFileError(self.params['outtmpl'])
  
          for url in url_list:
-            suitable_found = False
-            for ie in self._ies:
-                # Go to next InfoExtractor if not suitable
-                if not ie.suitable(url):
-                    continue
-
-                # Warn if the _WORKING attribute is False
-                if not ie.working():
-                    self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
-                                 u'and will probably not work. If you want to go on, use the -i option.')
-
-                # Suitable InfoExtractor found
-                suitable_found = True
-
-                # Extract information from URL and process it
-                videos = ie.extract(url)
-                for video in videos or []:
-                    video['extractor'] = ie.IE_NAME
-                    try:
-                        self.increment_downloads()
-                        self.process_info(video)
-                    except UnavailableVideoError:
-                        self.trouble(u'\nERROR: unable to download video')
-
-                # Suitable InfoExtractor had been found; go to next URL
-                break
-
-            if not suitable_found:
-                self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
+            try:
+                #It also downloads the videos
+                videos = self.extract_info(url)
+            except UnavailableVideoError:
+                self.report_error(u'unable to download video')
+            except MaxDownloadsReached:
+                self.to_screen(u'[info] Maximum number of downloaded files reached.')
+                raise
  
          return self._download_retcode
  
      def post_process(self, filename, ie_info):
  
          return self._download_retcode
  
      def post_process(self, filename, ie_info):
-        """Run the postprocessing chain on the given file."""
+        """Run all the postprocessors on the given file."""
          info = dict(ie_info)
          info['filepath'] = filename
          info = dict(ie_info)
          info['filepath'] = filename
+        keep_video = None
          for pp in self._pps:
          for pp in self._pps:
-            info = pp.run(info)
-            if info is None:
-                break
+            try:
+                keep_video_wish,new_info = pp.run(info)
+                if keep_video_wish is not None:
+                    if keep_video_wish:
+                        keep_video = keep_video_wish
+                    elif keep_video is None:
+                        # No clear decision yet, let IE decide
+                        keep_video = keep_video_wish
+            except PostProcessingError as e:
+                self.to_stderr(u'ERROR: ' + e.msg)
+        if keep_video is False and not self.params.get('keepvideo', False):
+            try:
+                self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
+                os.remove(encodeFilename(filename))
+            except (IOError, OSError):
+                self.report_warning(u'Unable to remove downloaded video file')
  
  
-    def _download_with_rtmpdump(self, filename, url, player_url):
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
          # Check for rtmpdump first
          try:
          self.report_destination(filename)
          tmpfilename = self.temp_name(filename)
  
          # Check for rtmpdump first
          try:
-            subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+            subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
          except (OSError, IOError):
          except (OSError, IOError):
-            self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
+            self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
              return False
              return False
+        verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
  
          # Download using rtmpdump. rtmpdump returns exit code 2 when
          # the connection was interrumpted and resuming appears to be
          # possible. This is part of rtmpdump's normal usage, AFAIK.
  
          # Download using rtmpdump. rtmpdump returns exit code 2 when
          # the connection was interrumpted and resuming appears to be
          # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
-        args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
+        basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
+        if player_url is not None:
+            basic_args += ['--swfVfy', player_url]
+        if page_url is not None:
+            basic_args += ['--pageUrl', page_url]
+        if play_path is not None:
+            basic_args += ['--playpath', play_path]
+        if tc_url is not None:
+            basic_args += ['--tcUrl', url]
+        args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
          if self.params.get('verbose', False):
              try:
                  import pipes
          if self.params.get('verbose', False):
              try:
                  import pipes
@@ -570,31 +803,83 @@ class FileDownloader(object):
                  retval = 0
                  break
          if retval == 0:
                  retval = 0
                  break
          if retval == 0:
-            self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
              self.try_rename(tmpfilename, filename)
              self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
              return True
          else:
              return True
          else:
-            self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
+            self.to_stderr(u"\n")
+            self.report_error(u'rtmpdump exited with code %d' % retval)
+            return False
+
+    def _download_with_mplayer(self, filename, url):
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+
+        args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
+        # Check for mplayer first
+        try:
+            subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+        except (OSError, IOError):
+            self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
              return False
  
              return False
  
+        # Download using mplayer. 
+        retval = subprocess.call(args)
+        if retval == 0:
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
+            self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
+            return True
+        else:
+            self.to_stderr(u"\n")
+            self.report_error(u'mplayer exited with code %d' % retval)
+            return False
+
+
      def _do_download(self, filename, info_dict):
          url = info_dict['url']
      def _do_download(self, filename, info_dict):
          url = info_dict['url']
-        player_url = info_dict.get('player_url', None)
  
          # Check file already present
          if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
              self.report_file_already_downloaded(filename)
  
          # Check file already present
          if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
              self.report_file_already_downloaded(filename)
+            self._hook_progress({
+                'filename': filename,
+                'status': 'finished',
+            })
              return True
  
          # Attempt to download using rtmpdump
          if url.startswith('rtmp'):
              return True
  
          # Attempt to download using rtmpdump
          if url.startswith('rtmp'):
-            return self._download_with_rtmpdump(filename, url, player_url)
+            return self._download_with_rtmpdump(filename, url,
+                                                info_dict.get('player_url', None),
+                                                info_dict.get('page_url', None),
+                                                info_dict.get('play_path', None),
+                                                info_dict.get('tc_url', None))
+
+        # Attempt to download using mplayer
+        if url.startswith('mms') or url.startswith('rtsp'):
+            return self._download_with_mplayer(filename, url)
  
          tmpfilename = self.temp_name(filename)
          stream = None
  
          # Do not include the Accept-Encoding header
          headers = {'Youtubedl-no-compression': 'True'}
  
          tmpfilename = self.temp_name(filename)
          stream = None
  
          # Do not include the Accept-Encoding header
          headers = {'Youtubedl-no-compression': 'True'}
+        if 'user_agent' in info_dict:
+            headers['Youtubedl-user-agent'] = info_dict['user_agent']
          basic_request = compat_urllib_request.Request(url, None, headers)
          request = compat_urllib_request.Request(url, None, headers)
  
          basic_request = compat_urllib_request.Request(url, None, headers)
          request = compat_urllib_request.Request(url, None, headers)
  
@@ -651,6 +936,10 @@ class FileDownloader(object):
                              # the one in the hard drive.
                              self.report_file_already_downloaded(filename)
                              self.try_rename(tmpfilename, filename)
                              # the one in the hard drive.
                              self.report_file_already_downloaded(filename)
                              self.try_rename(tmpfilename, filename)
+                            self._hook_progress({
+                                'filename': filename,
+                                'status': 'finished',
+                            })
                              return True
                          else:
                              # The length does not match, we start the download over
                              return True
                          else:
                              # The length does not match, we start the download over
@@ -663,12 +952,21 @@ class FileDownloader(object):
                  self.report_retry(count, retries)
  
          if count > retries:
                  self.report_retry(count, retries)
  
          if count > retries:
-            self.trouble(u'ERROR: giving up after %s retries' % retries)
+            self.report_error(u'giving up after %s retries' % retries)
              return False
  
          data_len = data.info().get('Content-length', None)
          if data_len is not None:
              data_len = int(data_len) + resume_len
              return False
  
          data_len = data.info().get('Content-length', None)
          if data_len is not None:
              data_len = int(data_len) + resume_len
+            min_data_len = self.params.get("min_filesize", None)
+            max_data_len =  self.params.get("max_filesize", None)
+            if min_data_len is not None and data_len < min_data_len:
+                self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
+                return False
+            if max_data_len is not None and data_len > max_data_len:
+                self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
+                return False
+
          data_len_str = self.format_bytes(data_len)
          byte_counter = 0 + resume_len
          block_size = self.params.get('buffersize', 1024)
          data_len_str = self.format_bytes(data_len)
          byte_counter = 0 + resume_len
          block_size = self.params.get('buffersize', 1024)
@@ -690,12 +988,13 @@ class FileDownloader(object):
                      filename = self.undo_temp_name(tmpfilename)
                      self.report_destination(filename)
                  except (OSError, IOError) as err:
                      filename = self.undo_temp_name(tmpfilename)
                      self.report_destination(filename)
                  except (OSError, IOError) as err:
-                    self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
+                    self.report_error(u'unable to open for writing: %s' % str(err))
                      return False
              try:
                  stream.write(data_block)
              except (IOError, OSError) as err:
                      return False
              try:
                  stream.write(data_block)
              except (IOError, OSError) as err:
-                self.trouble(u'\nERROR: unable to write data: %s' % str(err))
+                self.to_stderr(u"\n")
+                self.report_error(u'unable to write data: %s' % str(err))
                  return False
              if not self.params.get('noresizebuffer', False):
                  block_size = self.best_block_size(after - before, len(data_block))
                  return False
              if not self.params.get('noresizebuffer', False):
                  block_size = self.best_block_size(after - before, len(data_block))
@@ -709,11 +1008,20 @@ class FileDownloader(object):
                  eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                  self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
                  eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
                  self.report_progress(percent_str, data_len_str, speed_str, eta_str)
  
+            self._hook_progress({
+                'downloaded_bytes': byte_counter,
+                'total_bytes': data_len,
+                'tmpfilename': tmpfilename,
+                'filename': filename,
+                'status': 'downloading',
+            })
+
              # Apply rate limit
              self.slow_down(start, byte_counter - resume_len)
  
          if stream is None:
              # Apply rate limit
              self.slow_down(start, byte_counter - resume_len)
  
          if stream is None:
-            self.trouble(u'\nERROR: Did not get any data blocks')
+            self.to_stderr(u"\n")
+            self.report_error(u'Did not get any data blocks')
              return False
          stream.close()
          self.report_finish()
              return False
          stream.close()
          self.report_finish()
@@ -725,4 +1033,31 @@ class FileDownloader(object):
          if self.params.get('updatetime', True):
              info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  
          if self.params.get('updatetime', True):
              info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
  
+        self._hook_progress({
+            'downloaded_bytes': byte_counter,
+            'total_bytes': byte_counter,
+            'filename': filename,
+            'status': 'finished',
+        })
+
          return True
          return True
+
+    def _hook_progress(self, status):
+        for ph in self._progress_hooks:
+            ph(status)
+
+    def add_progress_hook(self, ph):
+        """ ph gets called on download progress, with a dictionary with the entries
+        * filename: The final filename
+        * status: One of "downloading" and "finished"
+
+        It can also have some of the following entries:
+
+        * downloaded_bytes: Bytes on disks
+        * total_bytes: Total bytes, None if unknown
+        * tmpfilename: The filename we're currently writing to
+
+        Hooks are guaranteed to be called at least once (with status "finished")
+        if the download is successful.
+        """
+        self._progress_hooks.append(ph)