]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/FileDownloader.py
debian/control: Update list of extractors.
[youtubedl] / youtube_dl / FileDownloader.py
index 445f3e85e6813fe82fc20bc74d3e33fc83d997b2..3ff9716b33b22e39a0a6d925bfa33aba8fa092f9 100644 (file)
@@ -1,15 +1,19 @@
-import math
 import os
 import re
 import subprocess
 import sys
 import time
 import os
 import re
 import subprocess
 import sys
 import time
-import traceback
 
 
-if os.name == 'nt':
-    import ctypes
-
-from .utils import *
+from .utils import (
+    compat_urllib_error,
+    compat_urllib_request,
+    ContentTooShortError,
+    determine_ext,
+    encodeFilename,
+    format_bytes,
+    sanitize_open,
+    timeconvert,
+)
 
 
 class FileDownloader(object):
 
 
 class FileDownloader(object):
@@ -50,45 +54,56 @@ class FileDownloader(object):
         self.params = params
 
     @staticmethod
         self.params = params
 
     @staticmethod
-    def format_bytes(bytes):
-        if bytes is None:
-            return 'N/A'
-        if type(bytes) is str:
-            bytes = float(bytes)
-        if bytes == 0.0:
-            exponent = 0
+    def format_seconds(seconds):
+        (mins, secs) = divmod(seconds, 60)
+        (hours, mins) = divmod(mins, 60)
+        if hours > 99:
+            return '--:--:--'
+        if hours == 0:
+            return '%02d:%02d' % (mins, secs)
         else:
         else:
-            exponent = int(math.log(bytes, 1024.0))
-        suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
-        converted = float(bytes) / float(1024 ** exponent)
-        return '%.2f%s' % (converted, suffix)
+            return '%02d:%02d:%02d' % (hours, mins, secs)
 
     @staticmethod
     def calc_percent(byte_counter, data_len):
         if data_len is None:
 
     @staticmethod
     def calc_percent(byte_counter, data_len):
         if data_len is None:
+            return None
+        return float(byte_counter) / float(data_len) * 100.0
+
+    @staticmethod
+    def format_percent(percent):
+        if percent is None:
             return '---.-%'
             return '---.-%'
-        return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
+        return '%6s' % ('%3.1f%%' % percent)
 
     @staticmethod
     def calc_eta(start, now, total, current):
         if total is None:
 
     @staticmethod
     def calc_eta(start, now, total, current):
         if total is None:
-            return '--:--'
+            return None
         dif = now - start
         if current == 0 or dif < 0.001: # One millisecond
         dif = now - start
         if current == 0 or dif < 0.001: # One millisecond
-            return '--:--'
+            return None
         rate = float(current) / dif
         rate = float(current) / dif
-        eta = int((float(total) - float(current)) / rate)
-        (eta_mins, eta_secs) = divmod(eta, 60)
-        if eta_mins > 99:
+        return int((float(total) - float(current)) / rate)
+
+    @staticmethod
+    def format_eta(eta):
+        if eta is None:
             return '--:--'
             return '--:--'
-        return '%02d:%02d' % (eta_mins, eta_secs)
+        return FileDownloader.format_seconds(eta)
 
     @staticmethod
     def calc_speed(start, now, bytes):
         dif = now - start
         if bytes == 0 or dif < 0.001: # One millisecond
 
     @staticmethod
     def calc_speed(start, now, bytes):
         dif = now - start
         if bytes == 0 or dif < 0.001: # One millisecond
+            return None
+        return float(bytes) / dif
+
+    @staticmethod
+    def format_speed(speed):
+        if speed is None:
             return '%10s' % '---b/s'
             return '%10s' % '---b/s'
-        return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
+        return '%10s' % ('%s/s' % format_bytes(speed))
 
     @staticmethod
     def best_block_size(elapsed_time, bytes):
 
     @staticmethod
     def best_block_size(elapsed_time, bytes):
@@ -119,16 +134,8 @@ class FileDownloader(object):
     def to_stderr(self, message):
         self.ydl.to_screen(message)
 
     def to_stderr(self, message):
         self.ydl.to_screen(message)
 
-    def to_cons_title(self, message):
-        """Set console/terminal window title to message."""
-        if not self.params.get('consoletitle', False):
-            return
-        if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
-            # c_wchar_p() might not be necessary if `message` is
-            # already of type unicode()
-            ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
-        elif 'TERM' in os.environ:
-            self.to_screen('\033]0;%s\007' % message, skip_eol=True)
+    def to_console_title(self, message):
+        self.ydl.to_console_title(message)
 
     def trouble(self, *args, **kargs):
         self.ydl.trouble(*args, **kargs)
 
     def trouble(self, *args, **kargs):
         self.ydl.trouble(*args, **kargs)
@@ -137,7 +144,7 @@ class FileDownloader(object):
         self.ydl.report_warning(*args, **kargs)
 
     def report_error(self, *args, **kargs):
         self.ydl.report_warning(*args, **kargs)
 
     def report_error(self, *args, **kargs):
-        self.ydl.error(*args, **kargs)
+        self.ydl.report_error(*args, **kargs)
 
     def slow_down(self, start_time, byte_counter):
         """Sleep if the download speed is over the rate limit."""
 
     def slow_down(self, start_time, byte_counter):
         """Sleep if the download speed is over the rate limit."""
@@ -169,7 +176,7 @@ class FileDownloader(object):
             if old_filename == new_filename:
                 return
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
             if old_filename == new_filename:
                 return
             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
-        except (IOError, OSError) as err:
+        except (IOError, OSError):
             self.report_error(u'unable to rename file')
 
     def try_utime(self, filename, last_modified_hdr):
             self.report_error(u'unable to rename file')
 
     def try_utime(self, filename, last_modified_hdr):
@@ -197,18 +204,27 @@ class FileDownloader(object):
         """Report destination filename."""
         self.to_screen(u'[download] Destination: ' + filename)
 
         """Report destination filename."""
         self.to_screen(u'[download] Destination: ' + filename)
 
-    def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
+    def report_progress(self, percent, data_len_str, speed, eta):
         """Report download progress."""
         if self.params.get('noprogress', False):
             return
         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
         """Report download progress."""
         if self.params.get('noprogress', False):
             return
         clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+        if eta is not None:
+            eta_str = self.format_eta(eta)
+        else:
+            eta_str = 'Unknown ETA'
+        if percent is not None:
+            percent_str = self.format_percent(percent)
+        else:
+            percent_str = 'Unknown %'
+        speed_str = self.format_speed(speed)
         if self.params.get('progress_with_newline', False):
             self.to_screen(u'[download] %s of %s at %s ETA %s' %
                 (percent_str, data_len_str, speed_str, eta_str))
         else:
             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
         if self.params.get('progress_with_newline', False):
             self.to_screen(u'[download] %s of %s at %s ETA %s' %
                 (percent_str, data_len_str, speed_str, eta_str))
         else:
             self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
                 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
-        self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
+        self.to_console_title(u'youtube-dl - %s of %s at %s ETA %s' %
                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 
     def report_resuming_byte(self, resume_len):
                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 
     def report_resuming_byte(self, resume_len):
@@ -223,23 +239,81 @@ class FileDownloader(object):
         """Report file has already been fully downloaded."""
         try:
             self.to_screen(u'[download] %s has already been downloaded' % file_name)
         """Report file has already been fully downloaded."""
         try:
             self.to_screen(u'[download] %s has already been downloaded' % file_name)
-        except (UnicodeEncodeError) as err:
+        except UnicodeEncodeError:
             self.to_screen(u'[download] The file has already been downloaded')
 
     def report_unable_to_resume(self):
         """Report it was impossible to resume download."""
         self.to_screen(u'[download] Unable to resume')
 
             self.to_screen(u'[download] The file has already been downloaded')
 
     def report_unable_to_resume(self):
         """Report it was impossible to resume download."""
         self.to_screen(u'[download] Unable to resume')
 
-    def report_finish(self):
+    def report_finish(self, data_len_str, tot_time):
         """Report download finished."""
         if self.params.get('noprogress', False):
             self.to_screen(u'[download] Download completed')
         else:
         """Report download finished."""
         if self.params.get('noprogress', False):
             self.to_screen(u'[download] Download completed')
         else:
-            self.to_screen(u'')
+            clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
+            self.to_screen(u'\r%s[download] 100%% of %s in %s' %
+                (clear_line, data_len_str, self.format_seconds(tot_time)))
+
+    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live):
+        def run_rtmpdump(args):
+            start = time.time()
+            resume_percent = None
+            resume_downloaded_data_len = None
+            proc = subprocess.Popen(args, stderr=subprocess.PIPE)
+            cursor_in_new_line = True
+            proc_stderr_closed = False
+            while not proc_stderr_closed:
+                # read line from stderr
+                line = u''
+                while True:
+                    char = proc.stderr.read(1)
+                    if not char:
+                        proc_stderr_closed = True
+                        break
+                    if char in [b'\r', b'\n']:
+                        break
+                    line += char.decode('ascii', 'replace')
+                if not line:
+                    # proc_stderr_closed is True
+                    continue
+                mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
+                if mobj:
+                    downloaded_data_len = int(float(mobj.group(1))*1024)
+                    percent = float(mobj.group(2))
+                    if not resume_percent:
+                        resume_percent = percent
+                        resume_downloaded_data_len = downloaded_data_len
+                    eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
+                    speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
+                    data_len = None
+                    if percent > 0:
+                        data_len = int(downloaded_data_len * 100 / percent)
+                    data_len_str = u'~' + format_bytes(data_len)
+                    self.report_progress(percent, data_len_str, speed, eta)
+                    cursor_in_new_line = False
+                    self._hook_progress({
+                        'downloaded_bytes': downloaded_data_len,
+                        'total_bytes': data_len,
+                        'tmpfilename': tmpfilename,
+                        'filename': filename,
+                        'status': 'downloading',
+                        'eta': eta,
+                        'speed': speed,
+                    })
+                elif self.params.get('verbose', False):
+                    if not cursor_in_new_line:
+                        self.to_screen(u'')
+                    cursor_in_new_line = True
+                    self.to_screen(u'[rtmpdump] '+line)
+            proc.wait()
+            if not cursor_in_new_line:
+                self.to_screen(u'')
+            return proc.returncode
 
 
-    def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
         self.report_destination(filename)
         tmpfilename = self.temp_name(filename)
+        test = self.params.get('test', False)
 
         # Check for rtmpdump first
         try:
 
         # Check for rtmpdump first
         try:
@@ -247,12 +321,11 @@ class FileDownloader(object):
         except (OSError, IOError):
             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
             return False
         except (OSError, IOError):
             self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
             return False
-        verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
 
         # Download using rtmpdump. rtmpdump returns exit code 2 when
         # the connection was interrumpted and resuming appears to be
         # possible. This is part of rtmpdump's normal usage, AFAIK.
 
         # Download using rtmpdump. rtmpdump returns exit code 2 when
         # the connection was interrumpted and resuming appears to be
         # possible. This is part of rtmpdump's normal usage, AFAIK.
-        basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
+        basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
         if player_url is not None:
             basic_args += ['--swfVfy', player_url]
         if page_url is not None:
         if player_url is not None:
             basic_args += ['--swfVfy', player_url]
         if page_url is not None:
@@ -261,31 +334,53 @@ class FileDownloader(object):
             basic_args += ['--playpath', play_path]
         if tc_url is not None:
             basic_args += ['--tcUrl', url]
             basic_args += ['--playpath', play_path]
         if tc_url is not None:
             basic_args += ['--tcUrl', url]
+        if test:
+            basic_args += ['--stop', '1']
+        if live:
+            basic_args += ['--live']
         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
         args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
+
+        if sys.platform == 'win32' and sys.version_info < (3, 0):
+            # Windows subprocess module does not actually support Unicode
+            # on Python 2.x
+            # See http://stackoverflow.com/a/9951851/35070
+            subprocess_encoding = sys.getfilesystemencoding()
+            args = [a.encode(subprocess_encoding, 'ignore') for a in args]
+        else:
+            subprocess_encoding = None
+
         if self.params.get('verbose', False):
         if self.params.get('verbose', False):
+            if subprocess_encoding:
+                str_args = [
+                    a.decode(subprocess_encoding) if isinstance(a, bytes) else a
+                    for a in args]
+            else:
+                str_args = args
             try:
                 import pipes
             try:
                 import pipes
-                shell_quote = lambda args: ' '.join(map(pipes.quote, args))
+                shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
             except ImportError:
                 shell_quote = repr
             except ImportError:
                 shell_quote = repr
-            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
-        retval = subprocess.call(args)
-        while retval == 2 or retval == 1:
+            self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
+
+        retval = run_rtmpdump(args)
+
+        while (retval == 2 or retval == 1) and not test:
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
             prevsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
+            self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
             time.sleep(5.0) # This seems to be needed
             time.sleep(5.0) # This seems to be needed
-            retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
+            retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
             cursize = os.path.getsize(encodeFilename(tmpfilename))
             if prevsize == cursize and retval == 1:
                 break
              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
             if prevsize == cursize and retval == 2 and cursize > 1024:
             cursize = os.path.getsize(encodeFilename(tmpfilename))
             if prevsize == cursize and retval == 1:
                 break
              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
             if prevsize == cursize and retval == 2 and cursize > 1024:
-                self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
+                self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
                 retval = 0
                 break
                 retval = 0
                 break
-        if retval == 0:
+        if retval == 0 or (test and retval == 2):
             fsize = os.path.getsize(encodeFilename(tmpfilename))
             fsize = os.path.getsize(encodeFilename(tmpfilename))
-            self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
+            self.to_screen(u'[rtmpdump] %s bytes' % fsize)
             self.try_rename(tmpfilename, filename)
             self._hook_progress({
                 'downloaded_bytes': fsize,
             self.try_rename(tmpfilename, filename)
             self._hook_progress({
                 'downloaded_bytes': fsize,
@@ -329,6 +424,40 @@ class FileDownloader(object):
             self.report_error(u'mplayer exited with code %d' % retval)
             return False
 
             self.report_error(u'mplayer exited with code %d' % retval)
             return False
 
+    def _download_m3u8_with_ffmpeg(self, filename, url):
+        self.report_destination(filename)
+        tmpfilename = self.temp_name(filename)
+
+        args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
+            '-bsf:a', 'aac_adtstoasc', tmpfilename]
+
+        for program in ['avconv', 'ffmpeg']:
+            try:
+                subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
+                break
+            except (OSError, IOError):
+                pass
+        else:
+            self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
+        cmd = [program] + args
+
+        retval = subprocess.call(cmd)
+        if retval == 0:
+            fsize = os.path.getsize(encodeFilename(tmpfilename))
+            self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
+            self.try_rename(tmpfilename, filename)
+            self._hook_progress({
+                'downloaded_bytes': fsize,
+                'total_bytes': fsize,
+                'filename': filename,
+                'status': 'finished',
+            })
+            return True
+        else:
+            self.to_stderr(u"\n")
+            self.report_error(u'ffmpeg exited with code %d' % retval)
+            return False
+
 
     def _do_download(self, filename, info_dict):
         url = info_dict['url']
 
     def _do_download(self, filename, info_dict):
         url = info_dict['url']
@@ -339,6 +468,7 @@ class FileDownloader(object):
             self._hook_progress({
                 'filename': filename,
                 'status': 'finished',
             self._hook_progress({
                 'filename': filename,
                 'status': 'finished',
+                'total_bytes': os.path.getsize(encodeFilename(filename)),
             })
             return True
 
             })
             return True
 
@@ -348,12 +478,17 @@ class FileDownloader(object):
                                                 info_dict.get('player_url', None),
                                                 info_dict.get('page_url', None),
                                                 info_dict.get('play_path', None),
                                                 info_dict.get('player_url', None),
                                                 info_dict.get('page_url', None),
                                                 info_dict.get('play_path', None),
-                                                info_dict.get('tc_url', None))
+                                                info_dict.get('tc_url', None),
+                                                info_dict.get('rtmp_live', False))
 
         # Attempt to download using mplayer
         if url.startswith('mms') or url.startswith('rtsp'):
             return self._download_with_mplayer(filename, url)
 
 
         # Attempt to download using mplayer
         if url.startswith('mms') or url.startswith('rtsp'):
             return self._download_with_mplayer(filename, url)
 
+        # m3u8 manifest are downloaded with ffmpeg
+        if determine_ext(url) == u'm3u8':
+            return self._download_m3u8_with_ffmpeg(filename, url)
+
         tmpfilename = self.temp_name(filename)
         stream = None
 
         tmpfilename = self.temp_name(filename)
         stream = None
 
@@ -448,7 +583,7 @@ class FileDownloader(object):
                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                 return False
 
                 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                 return False
 
-        data_len_str = self.format_bytes(data_len)
+        data_len_str = format_bytes(data_len)
         byte_counter = 0 + resume_len
         block_size = self.params.get('buffersize', 1024)
         start = time.time()
         byte_counter = 0 + resume_len
         block_size = self.params.get('buffersize', 1024)
         start = time.time()
@@ -481,13 +616,13 @@ class FileDownloader(object):
                 block_size = self.best_block_size(after - before, len(data_block))
 
             # Progress message
                 block_size = self.best_block_size(after - before, len(data_block))
 
             # Progress message
-            speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
+            speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
             if data_len is None:
             if data_len is None:
-                self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
+                eta = percent = None
             else:
             else:
-                percent_str = self.calc_percent(byte_counter, data_len)
-                eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-                self.report_progress(percent_str, data_len_str, speed_str, eta_str)
+                percent = self.calc_percent(byte_counter, data_len)
+                eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
+            self.report_progress(percent, data_len_str, speed, eta)
 
             self._hook_progress({
                 'downloaded_bytes': byte_counter,
 
             self._hook_progress({
                 'downloaded_bytes': byte_counter,
@@ -495,6 +630,8 @@ class FileDownloader(object):
                 'tmpfilename': tmpfilename,
                 'filename': filename,
                 'status': 'downloading',
                 'tmpfilename': tmpfilename,
                 'filename': filename,
                 'status': 'downloading',
+                'eta': eta,
+                'speed': speed,
             })
 
             # Apply rate limit
             })
 
             # Apply rate limit
@@ -505,7 +642,7 @@ class FileDownloader(object):
             self.report_error(u'Did not get any data blocks')
             return False
         stream.close()
             self.report_error(u'Did not get any data blocks')
             return False
         stream.close()
-        self.report_finish()
+        self.report_finish(data_len_str, (time.time() - start))
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
@@ -537,6 +674,8 @@ class FileDownloader(object):
         * downloaded_bytes: Bytes on disks
         * total_bytes: Total bytes, None if unknown
         * tmpfilename: The filename we're currently writing to
         * downloaded_bytes: Bytes on disks
         * total_bytes: Total bytes, None if unknown
         * tmpfilename: The filename we're currently writing to
+        * eta: The estimated time in seconds, None if unknown
+        * speed: The download speed in bytes/second, None if unknown
 
         Hooks are guaranteed to be called at least once (with status "finished")
         if the download is successful.
 
         Hooks are guaranteed to be called at least once (with status "finished")
         if the download is successful.