]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/downloader/common.py
New upstream version 2018.11.07
[youtubedl] / youtube_dl / downloader / common.py
index f85f0c94e7544b6eabad3d1f4158f88b9e3c981d..5979833c08ab877973893ed9fa4d24cd12c3193b 100644 (file)
@@ -1,12 +1,18 @@
+from __future__ import division, unicode_literals
+
 import os
 import re
 import sys
 import time
 import os
 import re
 import sys
 import time
+import random
 
 
+from ..compat import compat_os_name
 from ..utils import (
 from ..utils import (
-    compat_str,
+    decodeArgument,
     encodeFilename,
     encodeFilename,
+    error_to_compat_str,
     format_bytes,
     format_bytes,
+    shell_quote,
     timeconvert,
 )
 
     timeconvert,
 )
 
@@ -23,21 +29,28 @@ class FileDownloader(object):
 
     Available options:
 
 
     Available options:
 
-    verbose:           Print additional info to stdout.
-    quiet:             Do not print messages to stdout.
-    ratelimit:         Download speed limit, in bytes/sec.
-    retries:           Number of times to retry for HTTP error 5xx
-    buffersize:        Size of download buffer in bytes.
-    noresizebuffer:    Do not automatically resize the download buffer.
-    continuedl:        Try to continue downloads if possible.
-    noprogress:        Do not print the progress bar.
-    logtostderr:       Log messages to stderr instead of stdout.
-    consoletitle:      Display progress in console window's titlebar.
-    nopart:            Do not use temporary .part files.
-    updatetime:        Use the Last-modified header to set output file timestamps.
-    test:              Download only first bytes to test the downloader.
-    min_filesize:      Skip files smaller than this size
-    max_filesize:      Skip files larger than this size
+    verbose:            Print additional info to stdout.
+    quiet:              Do not print messages to stdout.
+    ratelimit:          Download speed limit, in bytes/sec.
+    retries:            Number of times to retry for HTTP error 5xx
+    buffersize:         Size of download buffer in bytes.
+    noresizebuffer:     Do not automatically resize the download buffer.
+    continuedl:         Try to continue downloads if possible.
+    noprogress:         Do not print the progress bar.
+    logtostderr:        Log messages to stderr instead of stdout.
+    consoletitle:       Display progress in console window's titlebar.
+    nopart:             Do not use temporary .part files.
+    updatetime:         Use the Last-modified header to set output file timestamps.
+    test:               Download only first bytes to test the downloader.
+    min_filesize:       Skip files smaller than this size
+    max_filesize:       Skip files larger than this size
+    xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
+    external_downloader_args:  A list of additional command-line arguments for the
+                        external downloader.
+    hls_use_mpegts:     Use the mpegts container for HLS videos.
+    http_chunk_size:    Size of a chunk for chunk-based HTTP downloading. May be
+                        useful for bypassing bandwidth throttling imposed by
+                        a webserver (experimental)
 
     Subclasses of this one must re-define the real_download method.
     """
 
     Subclasses of this one must re-define the real_download method.
     """
@@ -50,6 +63,7 @@ class FileDownloader(object):
         self.ydl = ydl
         self._progress_hooks = []
         self.params = params
         self.ydl = ydl
         self._progress_hooks = []
         self.params = params
+        self.add_progress_hook(self.report_progress)
 
     @staticmethod
     def format_seconds(seconds):
 
     @staticmethod
     def format_seconds(seconds):
@@ -78,8 +92,10 @@ class FileDownloader(object):
     def calc_eta(start, now, total, current):
         if total is None:
             return None
     def calc_eta(start, now, total, current):
         if total is None:
             return None
+        if now is None:
+            now = time.time()
         dif = now - start
         dif = now - start
-        if current == 0 or dif < 0.001: # One millisecond
+        if current == 0 or dif < 0.001:  # One millisecond
             return None
         rate = float(current) / dif
         return int((float(total) - float(current)) / rate)
             return None
         rate = float(current) / dif
         return int((float(total) - float(current)) / rate)
@@ -93,7 +109,7 @@ class FileDownloader(object):
     @staticmethod
     def calc_speed(start, now, bytes):
         dif = now - start
     @staticmethod
     def calc_speed(start, now, bytes):
         dif = now - start
-        if bytes == 0 or dif < 0.001: # One millisecond
+        if bytes == 0 or dif < 0.001:  # One millisecond
             return None
         return float(bytes) / dif
 
             return None
         return float(bytes) / dif
 
@@ -103,10 +119,14 @@ class FileDownloader(object):
             return '%10s' % '---b/s'
         return '%10s' % ('%s/s' % format_bytes(speed))
 
             return '%10s' % '---b/s'
         return '%10s' % ('%s/s' % format_bytes(speed))
 
+    @staticmethod
+    def format_retries(retries):
+        return 'inf' if retries == float('inf') else '%.0f' % retries
+
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
     @staticmethod
     def best_block_size(elapsed_time, bytes):
         new_min = max(bytes / 2.0, 1.0)
-        new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
+        new_max = min(max(bytes * 2.0, 1.0), 4194304)  # Do not surpass 4 MB
         if elapsed_time < 0.001:
             return int(new_max)
         rate = bytes / elapsed_time
         if elapsed_time < 0.001:
             return int(new_max)
         rate = bytes / elapsed_time
@@ -144,38 +164,42 @@ class FileDownloader(object):
     def report_error(self, *args, **kargs):
         self.ydl.report_error(*args, **kargs)
 
     def report_error(self, *args, **kargs):
         self.ydl.report_error(*args, **kargs)
 
-    def slow_down(self, start_time, byte_counter):
+    def slow_down(self, start_time, now, byte_counter):
         """Sleep if the download speed is over the rate limit."""
         """Sleep if the download speed is over the rate limit."""
-        rate_limit = self.params.get('ratelimit', None)
+        rate_limit = self.params.get('ratelimit')
         if rate_limit is None or byte_counter == 0:
             return
         if rate_limit is None or byte_counter == 0:
             return
-        now = time.time()
+        if now is None:
+            now = time.time()
         elapsed = now - start_time
         if elapsed <= 0.0:
             return
         speed = float(byte_counter) / elapsed
         if speed > rate_limit:
         elapsed = now - start_time
         if elapsed <= 0.0:
             return
         speed = float(byte_counter) / elapsed
         if speed > rate_limit:
-            time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
+            time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
 
     def temp_name(self, filename):
         """Returns a temporary filename for the given filename."""
 
     def temp_name(self, filename):
         """Returns a temporary filename for the given filename."""
-        if self.params.get('nopart', False) or filename == u'-' or \
+        if self.params.get('nopart', False) or filename == '-' or \
                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
             return filename
                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
             return filename
-        return filename + u'.part'
+        return filename + '.part'
 
     def undo_temp_name(self, filename):
 
     def undo_temp_name(self, filename):
-        if filename.endswith(u'.part'):
-            return filename[:-len(u'.part')]
+        if filename.endswith('.part'):
+            return filename[:-len('.part')]
         return filename
 
         return filename
 
+    def ytdl_filename(self, filename):
+        return filename + '.ytdl'
+
     def try_rename(self, old_filename, new_filename):
         try:
             if old_filename == new_filename:
                 return
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
         except (IOError, OSError) as err:
     def try_rename(self, old_filename, new_filename):
         try:
             if old_filename == new_filename:
                 return
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
         except (IOError, OSError) as err:
-            self.report_error(u'unable to rename file: %s' % compat_str(err))
+            self.report_error('unable to rename file: %s' % error_to_compat_str(err))
 
     def try_utime(self, filename, last_modified_hdr):
         """Try to set the last-modified time of the given file."""
 
     def try_utime(self, filename, last_modified_hdr):
         """Try to set the last-modified time of the given file."""
@@ -194,125 +218,172 @@ class FileDownloader(object):
             return
         try:
             os.utime(filename, (time.time(), filetime))
             return
         try:
             os.utime(filename, (time.time(), filetime))
-        except:
+        except Exception:
             pass
         return filetime
 
     def report_destination(self, filename):
         """Report destination filename."""
             pass
         return filetime
 
     def report_destination(self, filename):
         """Report destination filename."""
-        self.to_screen(u'[download] Destination: ' + filename)
+        self.to_screen('[download] Destination: ' + filename)
 
     def _report_progress_status(self, msg, is_last_line=False):
 
     def _report_progress_status(self, msg, is_last_line=False):
-        fullmsg = u'[download] ' + msg
+        fullmsg = '[download] ' + msg
         if self.params.get('progress_with_newline', False):
             self.to_screen(fullmsg)
         else:
         if self.params.get('progress_with_newline', False):
             self.to_screen(fullmsg)
         else:
-            if os.name == 'nt':
+            if compat_os_name == 'nt':
                 prev_len = getattr(self, '_report_progress_prev_line_length',
                                    0)
                 if prev_len > len(fullmsg):
                 prev_len = getattr(self, '_report_progress_prev_line_length',
                                    0)
                 if prev_len > len(fullmsg):
-                    fullmsg += u' ' * (prev_len - len(fullmsg))
+                    fullmsg += ' ' * (prev_len - len(fullmsg))
                 self._report_progress_prev_line_length = len(fullmsg)
                 self._report_progress_prev_line_length = len(fullmsg)
-                clear_line = u'\r'
+                clear_line = '\r'
             else:
             else:
-                clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
+                clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
             self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
-        self.to_console_title(u'youtube-dl ' + msg)
+        self.to_console_title('youtube-dl ' + msg)
 
 
-    def report_progress(self, percent, data_len_str, speed, eta):
-        """Report download progress."""
-        if self.params.get('noprogress', False):
+    def report_progress(self, s):
+        if s['status'] == 'finished':
+            if self.params.get('noprogress', False):
+                self.to_screen('[download] Download completed')
+            else:
+                msg_template = '100%%'
+                if s.get('total_bytes') is not None:
+                    s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+                    msg_template += ' of %(_total_bytes_str)s'
+                if s.get('elapsed') is not None:
+                    s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+                    msg_template += ' in %(_elapsed_str)s'
+                self._report_progress_status(
+                    msg_template % s, is_last_line=True)
+
+        if self.params.get('noprogress'):
             return
             return
-        if eta is not None:
-            eta_str = self.format_eta(eta)
-        else:
-            eta_str = 'Unknown ETA'
-        if percent is not None:
-            percent_str = self.format_percent(percent)
+
+        if s['status'] != 'downloading':
+            return
+
+        if s.get('eta') is not None:
+            s['_eta_str'] = self.format_eta(s['eta'])
         else:
         else:
-            percent_str = 'Unknown %'
-        speed_str = self.format_speed(speed)
+            s['_eta_str'] = 'Unknown ETA'
 
 
-        msg = (u'%s of %s at %s ETA %s' %
-               (percent_str, data_len_str, speed_str, eta_str))
-        self._report_progress_status(msg)
+        if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+            s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+        elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+            s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+        else:
+            if s.get('downloaded_bytes') == 0:
+                s['_percent_str'] = self.format_percent(0)
+            else:
+                s['_percent_str'] = 'Unknown %'
 
 
-    def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
-        if self.params.get('noprogress', False):
-            return
-        downloaded_str = format_bytes(downloaded_data_len)
-        speed_str = self.format_speed(speed)
-        elapsed_str = FileDownloader.format_seconds(elapsed)
-        msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
-        self._report_progress_status(msg)
-
-    def report_finish(self, data_len_str, tot_time):
-        """Report download finished."""
-        if self.params.get('noprogress', False):
-            self.to_screen(u'[download] Download completed')
+        if s.get('speed') is not None:
+            s['_speed_str'] = self.format_speed(s['speed'])
+        else:
+            s['_speed_str'] = 'Unknown speed'
+
+        if s.get('total_bytes') is not None:
+            s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+            msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+        elif s.get('total_bytes_estimate') is not None:
+            s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+            msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
         else:
         else:
-            self._report_progress_status(
-                (u'100%% of %s in %s' %
-                 (data_len_str, self.format_seconds(tot_time))),
-                is_last_line=True)
+            if s.get('downloaded_bytes') is not None:
+                s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+                if s.get('elapsed'):
+                    s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+                    msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+                else:
+                    msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+            else:
+                msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+        self._report_progress_status(msg_template % s)
 
     def report_resuming_byte(self, resume_len):
         """Report attempt to resume at given byte."""
 
     def report_resuming_byte(self, resume_len):
         """Report attempt to resume at given byte."""
-        self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
+        self.to_screen('[download] Resuming download at byte %s' % resume_len)
 
 
-    def report_retry(self, count, retries):
+    def report_retry(self, err, count, retries):
         """Report retry in case of HTTP error 5xx"""
         """Report retry in case of HTTP error 5xx"""
-        self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
+        self.to_screen(
+            '[download] Got server HTTP error: %s. Retrying (attempt %d of %s)...'
+            % (error_to_compat_str(err), count, self.format_retries(retries)))
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
         try:
 
     def report_file_already_downloaded(self, file_name):
         """Report file has already been fully downloaded."""
         try:
-            self.to_screen(u'[download] %s has already been downloaded' % file_name)
+            self.to_screen('[download] %s has already been downloaded' % file_name)
         except UnicodeEncodeError:
         except UnicodeEncodeError:
-            self.to_screen(u'[download] The file has already been downloaded')
+            self.to_screen('[download] The file has already been downloaded')
 
     def report_unable_to_resume(self):
         """Report it was impossible to resume download."""
 
     def report_unable_to_resume(self):
         """Report it was impossible to resume download."""
-        self.to_screen(u'[download] Unable to resume')
+        self.to_screen('[download] Unable to resume')
 
     def download(self, filename, info_dict):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
         """
 
     def download(self, filename, info_dict):
         """Download to a filename using the info from info_dict
         Return True on success and False otherwise
         """
-        # Check file already present
-        if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
-            self.report_file_already_downloaded(filename)
-            self._hook_progress({
-                'filename': filename,
-                'status': 'finished',
-                'total_bytes': os.path.getsize(encodeFilename(filename)),
-            })
-            return True
+
+        nooverwrites_and_exists = (
+            self.params.get('nooverwrites', False) and
+            os.path.exists(encodeFilename(filename))
+        )
+
+        if not hasattr(filename, 'write'):
+            continuedl_and_exists = (
+                self.params.get('continuedl', True) and
+                os.path.isfile(encodeFilename(filename)) and
+                not self.params.get('nopart', False)
+            )
+
+            # Check file already present
+            if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
+                self.report_file_already_downloaded(filename)
+                self._hook_progress({
+                    'filename': filename,
+                    'status': 'finished',
+                    'total_bytes': os.path.getsize(encodeFilename(filename)),
+                })
+                return True
+
+        min_sleep_interval = self.params.get('sleep_interval')
+        if min_sleep_interval:
+            max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
+            sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
+            self.to_screen(
+                '[download] Sleeping %s seconds...' % (
+                    int(sleep_interval) if sleep_interval.is_integer()
+                    else '%.2f' % sleep_interval))
+            time.sleep(sleep_interval)
 
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):
         """Real download process. Redefine in subclasses."""
 
         return self.real_download(filename, info_dict)
 
     def real_download(self, filename, info_dict):
         """Real download process. Redefine in subclasses."""
-        raise NotImplementedError(u'This method must be implemented by subclasses')
+        raise NotImplementedError('This method must be implemented by subclasses')
 
     def _hook_progress(self, status):
         for ph in self._progress_hooks:
             ph(status)
 
     def add_progress_hook(self, ph):
 
     def _hook_progress(self, status):
         for ph in self._progress_hooks:
             ph(status)
 
     def add_progress_hook(self, ph):
-        """ ph gets called on download progress, with a dictionary with the entries
-        * filename: The final filename
-        * status: One of "downloading" and "finished"
+        # See YoutubeDl.py (search for progress_hooks) for a description of
+        # this interface
+        self._progress_hooks.append(ph)
 
 
-        It can also have some of the following entries:
+    def _debug_cmd(self, args, exe=None):
+        if not self.params.get('verbose', False):
+            return
 
 
-        * downloaded_bytes: Bytes on disks
-        * total_bytes: Total bytes, None if unknown
-        * tmpfilename: The filename we're currently writing to
-        * eta: The estimated time in seconds, None if unknown
-        * speed: The download speed in bytes/second, None if unknown
+        str_args = [decodeArgument(a) for a in args]
 
 
-        Hooks are guaranteed to be called at least once (with status "finished")
-        if the download is successful.
-        """
-        self._progress_hooks.append(ph)
+        if exe is None:
+            exe = os.path.basename(str_args[0])
+
+        self.to_screen('[debug] %s command line: %s' % (
+            exe, shell_quote(str_args)))