]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/http.py
   1 from __future__ 
import unicode_literals
 
   9 from .common 
import FileDownloader
 
  10 from ..compat 
import compat_urllib_error
 
  18     XAttrUnavailableError
, 
  22 class HttpFD(FileDownloader
): 
  23     def real_download(self
, filename
, info_dict
): 
  24         url 
= info_dict
['url'] 
  26         class DownloadContext(dict): 
  27             __getattr__ 
= dict.get
 
  28             __setattr__ 
= dict.__setitem
__ 
  29             __delattr__ 
= dict.__delitem
__ 
  31         ctx 
= DownloadContext() 
  32         ctx
.filename 
= filename
 
  33         ctx
.tmpfilename 
= self
.temp_name(filename
) 
  36         # Do not include the Accept-Encoding header 
  37         headers 
= {'Youtubedl-no-compression': 'True'} 
  38         add_headers 
= info_dict
.get('http_headers') 
  40             headers
.update(add_headers
) 
  41         basic_request 
= sanitized_Request(url
, None, headers
) 
  42         request 
= sanitized_Request(url
, None, headers
) 
  44         is_test 
= self
.params
.get('test', False) 
  47             request
.add_header('Range', 'bytes=0-%s' % str(self
._TEST
_FILE
_SIZE 
- 1)) 
  52         if self
.params
.get('continuedl', True): 
  53             # Establish possible resume length 
  54             if os
.path
.isfile(encodeFilename(ctx
.tmpfilename
)): 
  55                 ctx
.resume_len 
= os
.path
.getsize(encodeFilename(ctx
.tmpfilename
)) 
  58         retries 
= self
.params
.get('retries', 0) 
  60         class SucceedDownload(Exception): 
  63         class RetryDownload(Exception): 
  64             def __init__(self
, source_error
): 
  65                 self
.source_error 
= source_error
 
  67         def establish_connection(): 
  68             if ctx
.resume_len 
!= 0: 
  69                 self
.report_resuming_byte(ctx
.resume_len
) 
  70                 request
.add_header('Range', 'bytes=%d-' % ctx
.resume_len
) 
  72             # Establish connection 
  74                 ctx
.data 
= self
.ydl
.urlopen(request
) 
  75                 # When trying to resume, Content-Range HTTP header of response has to be checked 
  76                 # to match the value of requested Range HTTP header. This is due to a webservers 
  77                 # that don't support resuming and serve a whole file with no Content-Range 
  78                 # set in response despite of requested Range (see 
  79                 # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) 
  80                 if ctx
.resume_len 
> 0: 
  81                     content_range 
= ctx
.data
.headers
.get('Content-Range') 
  83                         content_range_m 
= re
.search(r
'bytes (\d+)-', content_range
) 
  84                         # Content-Range is present and matches requested Range, resume is possible 
  85                         if content_range_m 
and ctx
.resume_len 
== int(content_range_m
.group(1)): 
  87                     # Content-Range is either not present or invalid. Assuming remote webserver is 
  88                     # trying to send the whole file, resume is not possible, so wiping the local file 
  89                     # and performing entire redownload 
  90                     self
.report_unable_to_resume() 
  94             except (compat_urllib_error
.HTTPError
, ) as err
: 
  95                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
  96                     # Unexpected HTTP error 
  99                     # Unable to resume (requested range not satisfiable) 
 101                         # Open the connection again without the range header 
 102                         ctx
.data 
= self
.ydl
.urlopen(basic_request
) 
 103                         content_length 
= ctx
.data
.info()['Content-Length'] 
 104                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 105                         if err
.code 
< 500 or err
.code 
>= 600: 
 108                         # Examine the reported length 
 109                         if (content_length 
is not None and 
 110                                 (ctx
.resume_len 
- 100 < int(content_length
) < ctx
.resume_len 
+ 100)): 
 111                             # The file had already been fully downloaded. 
 112                             # Explanation to the above condition: in issue #175 it was revealed that 
 113                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 114                             # changing the file size slightly and causing problems for some users. So 
 115                             # I decided to implement a suggested change and consider the file 
 116                             # completely downloaded if the file size differs less than 100 bytes from 
 117                             # the one in the hard drive. 
 118                             self
.report_file_already_downloaded(ctx
.filename
) 
 119                             self
.try_rename(ctx
.tmpfilename
, ctx
.filename
) 
 120                             self
._hook
_progress
({ 
 121                                 'filename': ctx
.filename
, 
 122                                 'status': 'finished', 
 123                                 'downloaded_bytes': ctx
.resume_len
, 
 124                                 'total_bytes': ctx
.resume_len
, 
 126                             raise SucceedDownload() 
 128                             # The length does not match, we start the download over 
 129                             self
.report_unable_to_resume() 
 133                 raise RetryDownload(err
) 
 134             except socket
.error 
as err
: 
 135                 if err
.errno 
!= errno
.ECONNRESET
: 
 136                     # Connection reset is no problem, just retry 
 138                 raise RetryDownload(err
) 
 141             data_len 
= ctx
.data
.info().get('Content-length', None) 
 143             # Range HTTP header may be ignored/unsupported by a webserver 
 144             # (e.g. extractor/scivee.py, extractor/bambuser.py). 
 145             # However, for a test we still would like to download just a piece of a file. 
 146             # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control 
 147             # block size when downloading a file. 
 148             if is_test 
and (data_len 
is None or int(data_len
) > self
._TEST
_FILE
_SIZE
): 
 149                 data_len 
= self
._TEST
_FILE
_SIZE
 
 151             if data_len 
is not None: 
 152                 data_len 
= int(data_len
) + ctx
.resume_len
 
 153                 min_data_len 
= self
.params
.get('min_filesize') 
 154                 max_data_len 
= self
.params
.get('max_filesize') 
 155                 if min_data_len 
is not None and data_len 
< min_data_len
: 
 156                     self
.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 158                 if max_data_len 
is not None and data_len 
> max_data_len
: 
 159                     self
.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 162             byte_counter 
= 0 + ctx
.resume_len
 
 163             block_size 
= self
.params
.get('buffersize', 1024) 
 166             # measure time over whole while-loop, so slow_down() and best_block_size() work together properly 
 167             now 
= None  # needed for slow_down() in the first loop run 
 168             before 
= start  
# start measuring 
 171                 if ctx
.tmpfilename 
!= '-': 
 174                 ctx
.resume_len 
= os
.path
.getsize(encodeFilename(ctx
.tmpfilename
)) 
 175                 raise RetryDownload(e
) 
 180                     data_block 
= ctx
.data
.read(block_size 
if not is_test 
else min(block_size
, data_len 
- byte_counter
)) 
 181                 # socket.timeout is a subclass of socket.error but may not have 
 183                 except socket
.timeout 
as e
: 
 185                 except socket
.error 
as e
: 
 186                     if e
.errno 
not in (errno
.ECONNRESET
, errno
.ETIMEDOUT
): 
 190                 byte_counter 
+= len(data_block
) 
 192                 # exit loop when download is finished 
 193                 if len(data_block
) == 0: 
 196                 # Open destination file just in time 
 197                 if ctx
.stream 
is None: 
 199                         ctx
.stream
, ctx
.tmpfilename 
= sanitize_open( 
 200                             ctx
.tmpfilename
, ctx
.open_mode
) 
 201                         assert ctx
.stream 
is not None 
 202                         ctx
.filename 
= self
.undo_temp_name(ctx
.tmpfilename
) 
 203                         self
.report_destination(ctx
.filename
) 
 204                     except (OSError, IOError) as err
: 
 205                         self
.report_error('unable to open for writing: %s' % str(err
)) 
 208                     if self
.params
.get('xattr_set_filesize', False) and data_len 
is not None: 
 210                             write_xattr(ctx
.tmpfilename
, 'user.ytdl.filesize', str(data_len
).encode('utf-8')) 
 211                         except (XAttrUnavailableError
, XAttrMetadataError
) as err
: 
 212                             self
.report_error('unable to set filesize xattr: %s' % str(err
)) 
 215                     ctx
.stream
.write(data_block
) 
 216                 except (IOError, OSError) as err
: 
 218                     self
.report_error('unable to write data: %s' % str(err
)) 
 222                 self
.slow_down(start
, now
, byte_counter 
- ctx
.resume_len
) 
 224                 # end measuring of one loop run 
 229                 if not self
.params
.get('noresizebuffer', False): 
 230                     block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 235                 speed 
= self
.calc_speed(start
, now
, byte_counter 
- ctx
.resume_len
) 
 239                     eta 
= self
.calc_eta(start
, time
.time(), data_len 
- ctx
.resume_len
, byte_counter 
- ctx
.resume_len
) 
 241                 self
._hook
_progress
({ 
 242                     'status': 'downloading', 
 243                     'downloaded_bytes': byte_counter
, 
 244                     'total_bytes': data_len
, 
 245                     'tmpfilename': ctx
.tmpfilename
, 
 246                     'filename': ctx
.filename
, 
 249                     'elapsed': now 
- start
, 
 252                 if is_test 
and byte_counter 
== data_len
: 
 255             if ctx
.stream 
is None: 
 257                 self
.report_error('Did not get any data blocks') 
 259             if ctx
.tmpfilename 
!= '-': 
 262             if data_len 
is not None and byte_counter 
!= data_len
: 
 263                 err 
= ContentTooShortError(byte_counter
, int(data_len
)) 
 268             self
.try_rename(ctx
.tmpfilename
, ctx
.filename
) 
 270             # Update file modification time 
 271             if self
.params
.get('updatetime', True): 
 272                 info_dict
['filetime'] = self
.try_utime(ctx
.filename
, ctx
.data
.info().get('last-modified', None)) 
 274             self
._hook
_progress
({ 
 275                 'downloaded_bytes': byte_counter
, 
 276                 'total_bytes': byte_counter
, 
 277                 'filename': ctx
.filename
, 
 278                 'status': 'finished', 
 279                 'elapsed': time
.time() - start
, 
 284         while count 
<= retries
: 
 286                 establish_connection() 
 289             except RetryDownload 
as e
: 
 292                     self
.report_retry(e
.source_error
, count
, retries
) 
 294             except SucceedDownload
: 
 297         self
.report_error('giving up after %s retries' % retries
)