]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/downloader/http.py
Annotate changelog with bugs closed.
[youtubedl] / youtube_dl / downloader / http.py
index 49170cf9d47634602efe7832b235e4a751e25817..a29f5cf31fddf3f3b15f47311fafeef9ab4f3d7a 100644 (file)
@@ -1,10 +1,10 @@
 from __future__ import unicode_literals
 
+import errno
 import os
+import socket
 import time
-
-from socket import error as SocketError
-import errno
+import re
 
 from .common import FileDownloader
 from ..compat import (
@@ -15,7 +15,6 @@ from ..utils import (
     ContentTooShortError,
     encodeFilename,
     sanitize_open,
-    format_bytes,
 )
 
 
@@ -30,13 +29,8 @@ class HttpFD(FileDownloader):
         add_headers = info_dict.get('http_headers')
         if add_headers:
             headers.update(add_headers)
-        data = info_dict.get('http_post_data')
-        http_method = info_dict.get('http_method')
-        basic_request = compat_urllib_request.Request(url, data, headers)
-        request = compat_urllib_request.Request(url, data, headers)
-        if http_method is not None:
-            basic_request.get_method = lambda: http_method
-            request.get_method = lambda: http_method
+        basic_request = compat_urllib_request.Request(url, None, headers)
+        request = compat_urllib_request.Request(url, None, headers)
 
         is_test = self.params.get('test', False)
 
@@ -51,7 +45,7 @@ class HttpFD(FileDownloader):
 
         open_mode = 'wb'
         if resume_len != 0:
-            if self.params.get('continuedl', False):
+            if self.params.get('continuedl', True):
                 self.report_resuming_byte(resume_len)
                 request.add_header('Range', 'bytes=%d-' % resume_len)
                 open_mode = 'ab'
@@ -64,6 +58,24 @@ class HttpFD(FileDownloader):
             # Establish connection
             try:
                 data = self.ydl.urlopen(request)
+                # When trying to resume, Content-Range HTTP header of response has to be checked
+                # to match the value of requested Range HTTP header. This is due to a webservers
+                # that don't support resuming and serve a whole file with no Content-Range
+                # set in response despite of requested Range (see
+                # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
+                if resume_len > 0:
+                    content_range = data.headers.get('Content-Range')
+                    if content_range:
+                        content_range_m = re.search(r'bytes (\d+)-', content_range)
+                        # Content-Range is present and matches requested Range, resume is possible
+                        if content_range_m and resume_len == int(content_range_m.group(1)):
+                            break
+                    # Content-Range is either not present or invalid. Assuming remote webserver is
+                    # trying to send the whole file, resume is not possible, so wiping the local file
+                    # and performing entire redownload
+                    self.report_unable_to_resume()
+                    resume_len = 0
+                    open_mode = 'wb'
                 break
             except (compat_urllib_error.HTTPError, ) as err:
                 if (err.code < 500 or err.code >= 600) and err.code != 416:
@@ -94,6 +106,8 @@ class HttpFD(FileDownloader):
                             self._hook_progress({
                                 'filename': filename,
                                 'status': 'finished',
+                                'downloaded_bytes': resume_len,
+                                'total_bytes': resume_len,
                             })
                             return True
                         else:
@@ -102,7 +116,7 @@ class HttpFD(FileDownloader):
                             resume_len = 0
                             open_mode = 'wb'
                             break
-            except SocketError as e:
+            except socket.error as e:
                 if e.errno != errno.ECONNRESET:
                     # Connection reset is no problem, just retry
                     raise
@@ -137,7 +151,6 @@ class HttpFD(FileDownloader):
                 self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
                 return False
 
-        data_len_str = format_bytes(data_len)
         byte_counter = 0 + resume_len
         block_size = self.params.get('buffersize', 1024)
         start = time.time()
@@ -196,20 +209,19 @@ class HttpFD(FileDownloader):
             # Progress message
             speed = self.calc_speed(start, now, byte_counter - resume_len)
             if data_len is None:
-                eta = percent = None
+                eta = None
             else:
-                percent = self.calc_percent(byte_counter, data_len)
                 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
-            self.report_progress(percent, data_len_str, speed, eta)
 
             self._hook_progress({
+                'status': 'downloading',
                 'downloaded_bytes': byte_counter,
                 'total_bytes': data_len,
                 'tmpfilename': tmpfilename,
                 'filename': filename,
-                'status': 'downloading',
                 'eta': eta,
                 'speed': speed,
+                'elapsed': now - start,
             })
 
             if is_test and byte_counter == data_len:
@@ -221,7 +233,7 @@ class HttpFD(FileDownloader):
             return False
         if tmpfilename != '-':
             stream.close()
-        self.report_finish(data_len_str, (time.time() - start))
+
         if data_len is not None and byte_counter != data_len:
             raise ContentTooShortError(byte_counter, int(data_len))
         self.try_rename(tmpfilename, filename)
@@ -235,6 +247,7 @@ class HttpFD(FileDownloader):
             'total_bytes': byte_counter,
             'filename': filename,
             'status': 'finished',
+            'elapsed': time.time() - start,
         })
 
         return True