]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/downloader/http.py
   1 from __future__ 
import unicode_literals
 
  10 from .common 
import FileDownloader
 
  11 from ..compat 
import ( 
  23     XAttrUnavailableError
, 
  27 class HttpFD(FileDownloader
): 
  28     def real_download(self
, filename
, info_dict
): 
  29         url 
= info_dict
['url'] 
  31         class DownloadContext(dict): 
  32             __getattr__ 
= dict.get
 
  33             __setattr__ 
= dict.__setitem
__ 
  34             __delattr__ 
= dict.__delitem
__ 
  36         ctx 
= DownloadContext() 
  37         ctx
.filename 
= filename
 
  38         ctx
.tmpfilename 
= self
.temp_name(filename
) 
  41         # Do not include the Accept-Encoding header 
  42         headers 
= {'Youtubedl-no-compression': 'True'} 
  43         add_headers 
= info_dict
.get('http_headers') 
  45             headers
.update(add_headers
) 
  47         is_test 
= self
.params
.get('test', False) 
  48         chunk_size 
= self
._TEST
_FILE
_SIZE 
if is_test 
else ( 
  49             info_dict
.get('downloader_options', {}).get('http_chunk_size') 
  50             or self
.params
.get('http_chunk_size') or 0) 
  55         ctx
.block_size 
= self
.params
.get('buffersize', 1024) 
  56         ctx
.start_time 
= time
.time() 
  59         if self
.params
.get('continuedl', True): 
  60             # Establish possible resume length 
  61             if os
.path
.isfile(encodeFilename(ctx
.tmpfilename
)): 
  62                 ctx
.resume_len 
= os
.path
.getsize( 
  63                     encodeFilename(ctx
.tmpfilename
)) 
  65         ctx
.is_resume 
= ctx
.resume_len 
> 0 
  68         retries 
= self
.params
.get('retries', 0) 
  70         class SucceedDownload(Exception): 
  73         class RetryDownload(Exception): 
  74             def __init__(self
, source_error
): 
  75                 self
.source_error 
= source_error
 
  77         class NextFragment(Exception): 
  80         def set_range(req
, start
, end
): 
  81             range_header 
= 'bytes=%d-' % start
 
  83                 range_header 
+= compat_str(end
) 
  84             req
.add_header('Range', range_header
) 
  86         def establish_connection(): 
  87             ctx
.chunk_size 
= (random
.randint(int(chunk_size 
* 0.95), chunk_size
) 
  88                               if not is_test 
and chunk_size 
else chunk_size
) 
  89             if ctx
.resume_len 
> 0: 
  90                 range_start 
= ctx
.resume_len
 
  92                     self
.report_resuming_byte(ctx
.resume_len
) 
  94             elif ctx
.chunk_size 
> 0: 
  99             range_end 
= range_start 
+ ctx
.chunk_size 
- 1 if ctx
.chunk_size 
else None 
 100             if range_end 
and ctx
.data_len 
is not None and range_end 
>= ctx
.data_len
: 
 101                 range_end 
= ctx
.data_len 
- 1 
 102             has_range 
= range_start 
is not None 
 103             ctx
.has_range 
= has_range
 
 104             request 
= sanitized_Request(url
, None, headers
) 
 106                 set_range(request
, range_start
, range_end
) 
 107             # Establish connection 
 109                 ctx
.data 
= self
.ydl
.urlopen(request
) 
 110                 # When trying to resume, Content-Range HTTP header of response has to be checked 
 111                 # to match the value of requested Range HTTP header. This is due to a webservers 
 112                 # that don't support resuming and serve a whole file with no Content-Range 
 113                 # set in response despite of requested Range (see 
 114                 # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) 
 116                     content_range 
= ctx
.data
.headers
.get('Content-Range') 
 118                         content_range_m 
= re
.search(r
'bytes (\d+)-(\d+)?(?:/(\d+))?', content_range
) 
 119                         # Content-Range is present and matches requested Range, resume is possible 
 121                             if range_start 
== int(content_range_m
.group(1)): 
 122                                 content_range_end 
= int_or_none(content_range_m
.group(2)) 
 123                                 content_len 
= int_or_none(content_range_m
.group(3)) 
 124                                 accept_content_len 
= ( 
 125                                     # Non-chunked download 
 127                                     # Chunked download and requested piece or 
 128                                     # its part is promised to be served 
 129                                     or content_range_end 
== range_end
 
 130                                     or content_len 
< range_end
) 
 131                                 if accept_content_len
: 
 132                                     ctx
.data_len 
= content_len
 
 134                     # Content-Range is either not present or invalid. Assuming remote webserver is 
 135                     # trying to send the whole file, resume is not possible, so wiping the local file 
 136                     # and performing entire redownload 
 137                     self
.report_unable_to_resume() 
 140                 ctx
.data_len 
= int_or_none(ctx
.data
.info().get('Content-length', None)) 
 142             except (compat_urllib_error
.HTTPError
, ) as err
: 
 144                     # Unable to resume (requested range not satisfiable) 
 146                         # Open the connection again without the range header 
 147                         ctx
.data 
= self
.ydl
.urlopen( 
 148                             sanitized_Request(url
, None, headers
)) 
 149                         content_length 
= ctx
.data
.info()['Content-Length'] 
 150                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 151                         if err
.code 
< 500 or err
.code 
>= 600: 
 154                         # Examine the reported length 
 155                         if (content_length 
is not None 
 156                                 and (ctx
.resume_len 
- 100 < int(content_length
) < ctx
.resume_len 
+ 100)): 
 157                             # The file had already been fully downloaded. 
 158                             # Explanation to the above condition: in issue #175 it was revealed that 
 159                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 160                             # changing the file size slightly and causing problems for some users. So 
 161                             # I decided to implement a suggested change and consider the file 
 162                             # completely downloaded if the file size differs less than 100 bytes from 
 163                             # the one in the hard drive. 
 164                             self
.report_file_already_downloaded(ctx
.filename
) 
 165                             self
.try_rename(ctx
.tmpfilename
, ctx
.filename
) 
 166                             self
._hook
_progress
({ 
 167                                 'filename': ctx
.filename
, 
 168                                 'status': 'finished', 
 169                                 'downloaded_bytes': ctx
.resume_len
, 
 170                                 'total_bytes': ctx
.resume_len
, 
 172                             raise SucceedDownload() 
 174                             # The length does not match, we start the download over 
 175                             self
.report_unable_to_resume() 
 179                 elif err
.code 
< 500 or err
.code 
>= 600: 
 180                     # Unexpected HTTP error 
 182                 raise RetryDownload(err
) 
 183             except socket
.error 
as err
: 
 184                 if err
.errno 
!= errno
.ECONNRESET
: 
 185                     # Connection reset is no problem, just retry 
 187                 raise RetryDownload(err
) 
 190             data_len 
= ctx
.data
.info().get('Content-length', None) 
 192             # Range HTTP header may be ignored/unsupported by a webserver 
 193             # (e.g. extractor/scivee.py, extractor/bambuser.py). 
 194             # However, for a test we still would like to download just a piece of a file. 
 195             # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control 
 196             # block size when downloading a file. 
 197             if is_test 
and (data_len 
is None or int(data_len
) > self
._TEST
_FILE
_SIZE
): 
 198                 data_len 
= self
._TEST
_FILE
_SIZE
 
 200             if data_len 
is not None: 
 201                 data_len 
= int(data_len
) + ctx
.resume_len
 
 202                 min_data_len 
= self
.params
.get('min_filesize') 
 203                 max_data_len 
= self
.params
.get('max_filesize') 
 204                 if min_data_len 
is not None and data_len 
< min_data_len
: 
 205                     self
.to_screen('\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 207                 if max_data_len 
is not None and data_len 
> max_data_len
: 
 208                     self
.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 211             byte_counter 
= 0 + ctx
.resume_len
 
 212             block_size 
= ctx
.block_size
 
 215             # measure time over whole while-loop, so slow_down() and best_block_size() work together properly 
 216             now 
= None  # needed for slow_down() in the first loop run 
 217             before 
= start  
# start measuring 
 220                 to_stdout 
= ctx
.tmpfilename 
== '-' 
 224                 ctx
.resume_len 
= byte_counter 
if to_stdout 
else os
.path
.getsize(encodeFilename(ctx
.tmpfilename
)) 
 225                 raise RetryDownload(e
) 
 230                     data_block 
= ctx
.data
.read(block_size 
if data_len 
is None else min(block_size
, data_len 
- byte_counter
)) 
 231                 # socket.timeout is a subclass of socket.error but may not have 
 233                 except socket
.timeout 
as e
: 
 235                 except socket
.error 
as e
: 
 236                     if e
.errno 
not in (errno
.ECONNRESET
, errno
.ETIMEDOUT
): 
 240                 byte_counter 
+= len(data_block
) 
 242                 # exit loop when download is finished 
 243                 if len(data_block
) == 0: 
 246                 # Open destination file just in time 
 247                 if ctx
.stream 
is None: 
 249                         ctx
.stream
, ctx
.tmpfilename 
= sanitize_open( 
 250                             ctx
.tmpfilename
, ctx
.open_mode
) 
 251                         assert ctx
.stream 
is not None 
 252                         ctx
.filename 
= self
.undo_temp_name(ctx
.tmpfilename
) 
 253                         self
.report_destination(ctx
.filename
) 
 254                     except (OSError, IOError) as err
: 
 255                         self
.report_error('unable to open for writing: %s' % str(err
)) 
 258                     if self
.params
.get('xattr_set_filesize', False) and data_len 
is not None: 
 260                             write_xattr(ctx
.tmpfilename
, 'user.ytdl.filesize', str(data_len
).encode('utf-8')) 
 261                         except (XAttrUnavailableError
, XAttrMetadataError
) as err
: 
 262                             self
.report_error('unable to set filesize xattr: %s' % str(err
)) 
 265                     ctx
.stream
.write(data_block
) 
 266                 except (IOError, OSError) as err
: 
 268                     self
.report_error('unable to write data: %s' % str(err
)) 
 272                 self
.slow_down(start
, now
, byte_counter 
- ctx
.resume_len
) 
 274                 # end measuring of one loop run 
 279                 if not self
.params
.get('noresizebuffer', False): 
 280                     block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 285                 speed 
= self
.calc_speed(start
, now
, byte_counter 
- ctx
.resume_len
) 
 286                 if ctx
.data_len 
is None: 
 289                     eta 
= self
.calc_eta(start
, time
.time(), ctx
.data_len 
- ctx
.resume_len
, byte_counter 
- ctx
.resume_len
) 
 291                 self
._hook
_progress
({ 
 292                     'status': 'downloading', 
 293                     'downloaded_bytes': byte_counter
, 
 294                     'total_bytes': ctx
.data_len
, 
 295                     'tmpfilename': ctx
.tmpfilename
, 
 296                     'filename': ctx
.filename
, 
 299                     'elapsed': now 
- ctx
.start_time
, 
 302                 if data_len 
is not None and byte_counter 
== data_len
: 
 305             if not is_test 
and ctx
.chunk_size 
and ctx
.data_len 
is not None and byte_counter 
< ctx
.data_len
: 
 306                 ctx
.resume_len 
= byte_counter
 
 307                 # ctx.block_size = block_size 
 310             if ctx
.stream 
is None: 
 312                 self
.report_error('Did not get any data blocks') 
 314             if ctx
.tmpfilename 
!= '-': 
 317             if data_len 
is not None and byte_counter 
!= data_len
: 
 318                 err 
= ContentTooShortError(byte_counter
, int(data_len
)) 
 323             self
.try_rename(ctx
.tmpfilename
, ctx
.filename
) 
 325             # Update file modification time 
 326             if self
.params
.get('updatetime', True): 
 327                 info_dict
['filetime'] = self
.try_utime(ctx
.filename
, ctx
.data
.info().get('last-modified', None)) 
 329             self
._hook
_progress
({ 
 330                 'downloaded_bytes': byte_counter
, 
 331                 'total_bytes': byte_counter
, 
 332                 'filename': ctx
.filename
, 
 333                 'status': 'finished', 
 334                 'elapsed': time
.time() - ctx
.start_time
, 
 339         while count 
<= retries
: 
 341                 establish_connection() 
 343             except RetryDownload 
as e
: 
 346                     self
.report_retry(e
.source_error
, count
, retries
) 
 350             except SucceedDownload
: 
 353         self
.report_error('giving up after %s retries' % retries
)