]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
445f3e85e6813fe82fc20bc74d3e33fc83d997b2
  15 class FileDownloader(object): 
  16     """File Downloader class. 
  18     File downloader objects are the ones responsible of downloading the 
  19     actual video file and writing it to disk. 
  21     File downloaders accept a lot of parameters. In order not to saturate 
  22     the object constructor with arguments, it receives a dictionary of 
  27     verbose:           Print additional info to stdout. 
  28     quiet:             Do not print messages to stdout. 
  29     ratelimit:         Download speed limit, in bytes/sec. 
  30     retries:           Number of times to retry for HTTP error 5xx 
  31     buffersize:        Size of download buffer in bytes. 
  32     noresizebuffer:    Do not automatically resize the download buffer. 
  33     continuedl:        Try to continue downloads if possible. 
  34     noprogress:        Do not print the progress bar. 
  35     logtostderr:       Log messages to stderr instead of stdout. 
  36     consoletitle:      Display progress in console window's titlebar. 
  37     nopart:            Do not use temporary .part files. 
  38     updatetime:        Use the Last-modified header to set output file timestamps. 
  39     test:              Download only first bytes to test the downloader. 
  40     min_filesize:      Skip files smaller than this size 
  41     max_filesize:      Skip files larger than this size 
  46     def __init__(self
, ydl
, params
): 
  47         """Create a FileDownloader object with the given options.""" 
  49         self
._progress
_hooks 
= [] 
  53     def format_bytes(bytes): 
  56         if type(bytes) is str: 
  61             exponent 
= int(math
.log(bytes, 1024.0)) 
  62         suffix 
= ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent
] 
  63         converted 
= float(bytes) / float(1024 ** exponent
) 
  64         return '%.2f%s' % (converted
, suffix
) 
  67     def calc_percent(byte_counter
, data_len
): 
  70         return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
  73     def calc_eta(start
, now
, total
, current
): 
  77         if current 
== 0 or dif 
< 0.001: # One millisecond 
  79         rate 
= float(current
) / dif
 
  80         eta 
= int((float(total
) - float(current
)) / rate
) 
  81         (eta_mins
, eta_secs
) = divmod(eta
, 60) 
  84         return '%02d:%02d' % (eta_mins
, eta_secs
) 
  87     def calc_speed(start
, now
, bytes): 
  89         if bytes == 0 or dif 
< 0.001: # One millisecond 
  90             return '%10s' % '---b/s' 
  91         return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
  94     def best_block_size(elapsed_time
, bytes): 
  95         new_min 
= max(bytes / 2.0, 1.0) 
  96         new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
  97         if elapsed_time 
< 0.001: 
  99         rate 
= bytes / elapsed_time
 
 107     def parse_bytes(bytestr
): 
 108         """Parse a string indicating a byte quantity into an integer.""" 
 109         matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 112         number 
= float(matchobj
.group(1)) 
 113         multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 114         return int(round(number 
* multiplier
)) 
 116     def to_screen(self
, *args
, **kargs
): 
 117         self
.ydl
.to_screen(*args
, **kargs
) 
 119     def to_stderr(self
, message
): 
 120         self
.ydl
.to_screen(message
) 
 122     def to_cons_title(self
, message
): 
 123         """Set console/terminal window title to message.""" 
 124         if not self
.params
.get('consoletitle', False): 
 126         if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 127             # c_wchar_p() might not be necessary if `message` is 
 128             # already of type unicode() 
 129             ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 130         elif 'TERM' in os
.environ
: 
 131             self
.to_screen('\033]0;%s\007' % message
, skip_eol
=True) 
 133     def trouble(self
, *args
, **kargs
): 
 134         self
.ydl
.trouble(*args
, **kargs
) 
 136     def report_warning(self
, *args
, **kargs
): 
 137         self
.ydl
.report_warning(*args
, **kargs
) 
 139     def report_error(self
, *args
, **kargs
): 
 140         self
.ydl
.error(*args
, **kargs
) 
 142     def slow_down(self
, start_time
, byte_counter
): 
 143         """Sleep if the download speed is over the rate limit.""" 
 144         rate_limit 
= self
.params
.get('ratelimit', None) 
 145         if rate_limit 
is None or byte_counter 
== 0: 
 148         elapsed 
= now 
- start_time
 
 151         speed 
= float(byte_counter
) / elapsed
 
 152         if speed 
> rate_limit
: 
 153             time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 155     def temp_name(self
, filename
): 
 156         """Returns a temporary filename for the given filename.""" 
 157         if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 158                 (os
.path
.exists(encodeFilename(filename
)) and not os
.path
.isfile(encodeFilename(filename
))): 
 160         return filename 
+ u
'.part' 
 162     def undo_temp_name(self
, filename
): 
 163         if filename
.endswith(u
'.part'): 
 164             return filename
[:-len(u
'.part')] 
 167     def try_rename(self
, old_filename
, new_filename
): 
 169             if old_filename 
== new_filename
: 
 171             os
.rename(encodeFilename(old_filename
), encodeFilename(new_filename
)) 
 172         except (IOError, OSError) as err
: 
 173             self
.report_error(u
'unable to rename file') 
 175     def try_utime(self
, filename
, last_modified_hdr
): 
 176         """Try to set the last-modified time of the given file.""" 
 177         if last_modified_hdr 
is None: 
 179         if not os
.path
.isfile(encodeFilename(filename
)): 
 181         timestr 
= last_modified_hdr
 
 184         filetime 
= timeconvert(timestr
) 
 187         # Ignore obviously invalid dates 
 191             os
.utime(filename
, (time
.time(), filetime
)) 
 196     def report_destination(self
, filename
): 
 197         """Report destination filename.""" 
 198         self
.to_screen(u
'[download] Destination: ' + filename
) 
 200     def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 201         """Report download progress.""" 
 202         if self
.params
.get('noprogress', False): 
 204         clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 205         if self
.params
.get('progress_with_newline', False): 
 206             self
.to_screen(u
'[download] %s of %s at %s ETA %s' % 
 207                 (percent_str
, data_len_str
, speed_str
, eta_str
)) 
 209             self
.to_screen(u
'\r%s[download] %s of %s at %s ETA %s' % 
 210                 (clear_line
, percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 211         self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 212                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 214     def report_resuming_byte(self
, resume_len
): 
 215         """Report attempt to resume at given byte.""" 
 216         self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 218     def report_retry(self
, count
, retries
): 
 219         """Report retry in case of HTTP error 5xx""" 
 220         self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 222     def report_file_already_downloaded(self
, file_name
): 
 223         """Report file has already been fully downloaded.""" 
 225             self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 226         except (UnicodeEncodeError) as err
: 
 227             self
.to_screen(u
'[download] The file has already been downloaded') 
 229     def report_unable_to_resume(self
): 
 230         """Report it was impossible to resume download.""" 
 231         self
.to_screen(u
'[download] Unable to resume') 
 233     def report_finish(self
): 
 234         """Report download finished.""" 
 235         if self
.params
.get('noprogress', False): 
 236             self
.to_screen(u
'[download] Download completed') 
 240     def _download_with_rtmpdump(self
, filename
, url
, player_url
, page_url
, play_path
, tc_url
): 
 241         self
.report_destination(filename
) 
 242         tmpfilename 
= self
.temp_name(filename
) 
 244         # Check for rtmpdump first 
 246             subprocess
.call(['rtmpdump', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 247         except (OSError, IOError): 
 248             self
.report_error(u
'RTMP download detected but "rtmpdump" could not be run') 
 250         verbosity_option 
= '--verbose' if self
.params
.get('verbose', False) else '--quiet' 
 252         # Download using rtmpdump. rtmpdump returns exit code 2 when 
 253         # the connection was interrumpted and resuming appears to be 
 254         # possible. This is part of rtmpdump's normal usage, AFAIK. 
 255         basic_args 
= ['rtmpdump', verbosity_option
, '-r', url
, '-o', tmpfilename
] 
 256         if player_url 
is not None: 
 257             basic_args 
+= ['--swfVfy', player_url
] 
 258         if page_url 
is not None: 
 259             basic_args 
+= ['--pageUrl', page_url
] 
 260         if play_path 
is not None: 
 261             basic_args 
+= ['--playpath', play_path
] 
 262         if tc_url 
is not None: 
 263             basic_args 
+= ['--tcUrl', url
] 
 264         args 
= basic_args 
+ [[], ['--resume', '--skip', '1']][self
.params
.get('continuedl', False)] 
 265         if self
.params
.get('verbose', False): 
 268                 shell_quote 
= lambda args
: ' '.join(map(pipes
.quote
, args
)) 
 271             self
.to_screen(u
'[debug] rtmpdump command line: ' + shell_quote(args
)) 
 272         retval 
= subprocess
.call(args
) 
 273         while retval 
== 2 or retval 
== 1: 
 274             prevsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 275             self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 276             time
.sleep(5.0) # This seems to be needed 
 277             retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 278             cursize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 279             if prevsize 
== cursize 
and retval 
== 1: 
 281              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 282             if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 283                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 287             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 288             self
.to_screen(u
'\r[rtmpdump] %s bytes' % fsize
) 
 289             self
.try_rename(tmpfilename
, filename
) 
 290             self
._hook
_progress
({ 
 291                 'downloaded_bytes': fsize
, 
 292                 'total_bytes': fsize
, 
 293                 'filename': filename
, 
 294                 'status': 'finished', 
 298             self
.to_stderr(u
"\n") 
 299             self
.report_error(u
'rtmpdump exited with code %d' % retval
) 
 302     def _download_with_mplayer(self
, filename
, url
): 
 303         self
.report_destination(filename
) 
 304         tmpfilename 
= self
.temp_name(filename
) 
 306         args 
= ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename
, url
] 
 307         # Check for mplayer first 
 309             subprocess
.call(['mplayer', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 310         except (OSError, IOError): 
 311             self
.report_error(u
'MMS or RTSP download detected but "%s" could not be run' % args
[0] ) 
 314         # Download using mplayer.  
 315         retval 
= subprocess
.call(args
) 
 317             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 318             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 319             self
.try_rename(tmpfilename
, filename
) 
 320             self
._hook
_progress
({ 
 321                 'downloaded_bytes': fsize
, 
 322                 'total_bytes': fsize
, 
 323                 'filename': filename
, 
 324                 'status': 'finished', 
 328             self
.to_stderr(u
"\n") 
 329             self
.report_error(u
'mplayer exited with code %d' % retval
) 
 333     def _do_download(self
, filename
, info_dict
): 
 334         url 
= info_dict
['url'] 
 336         # Check file already present 
 337         if self
.params
.get('continuedl', False) and os
.path
.isfile(encodeFilename(filename
)) and not self
.params
.get('nopart', False): 
 338             self
.report_file_already_downloaded(filename
) 
 339             self
._hook
_progress
({ 
 340                 'filename': filename
, 
 341                 'status': 'finished', 
 345         # Attempt to download using rtmpdump 
 346         if url
.startswith('rtmp'): 
 347             return self
._download
_with
_rtmpdump
(filename
, url
, 
 348                                                 info_dict
.get('player_url', None), 
 349                                                 info_dict
.get('page_url', None), 
 350                                                 info_dict
.get('play_path', None), 
 351                                                 info_dict
.get('tc_url', None)) 
 353         # Attempt to download using mplayer 
 354         if url
.startswith('mms') or url
.startswith('rtsp'): 
 355             return self
._download
_with
_mplayer
(filename
, url
) 
 357         tmpfilename 
= self
.temp_name(filename
) 
 360         # Do not include the Accept-Encoding header 
 361         headers 
= {'Youtubedl-no-compression': 'True'} 
 362         if 'user_agent' in info_dict
: 
 363             headers
['Youtubedl-user-agent'] = info_dict
['user_agent'] 
 364         basic_request 
= compat_urllib_request
.Request(url
, None, headers
) 
 365         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 367         if self
.params
.get('test', False): 
 368             request
.add_header('Range','bytes=0-10240') 
 370         # Establish possible resume length 
 371         if os
.path
.isfile(encodeFilename(tmpfilename
)): 
 372             resume_len 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 378             if self
.params
.get('continuedl', False): 
 379                 self
.report_resuming_byte(resume_len
) 
 380                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 386         retries 
= self
.params
.get('retries', 0) 
 387         while count 
<= retries
: 
 388             # Establish connection 
 390                 if count 
== 0 and 'urlhandle' in info_dict
: 
 391                     data 
= info_dict
['urlhandle'] 
 392                 data 
= compat_urllib_request
.urlopen(request
) 
 394             except (compat_urllib_error
.HTTPError
, ) as err
: 
 395                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 396                     # Unexpected HTTP error 
 398                 elif err
.code 
== 416: 
 399                     # Unable to resume (requested range not satisfiable) 
 401                         # Open the connection again without the range header 
 402                         data 
= compat_urllib_request
.urlopen(basic_request
) 
 403                         content_length 
= data
.info()['Content-Length'] 
 404                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 405                         if err
.code 
< 500 or err
.code 
>= 600: 
 408                         # Examine the reported length 
 409                         if (content_length 
is not None and 
 410                                 (resume_len 
- 100 < int(content_length
) < resume_len 
+ 100)): 
 411                             # The file had already been fully downloaded. 
 412                             # Explanation to the above condition: in issue #175 it was revealed that 
 413                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 414                             # changing the file size slightly and causing problems for some users. So 
 415                             # I decided to implement a suggested change and consider the file 
 416                             # completely downloaded if the file size differs less than 100 bytes from 
 417                             # the one in the hard drive. 
 418                             self
.report_file_already_downloaded(filename
) 
 419                             self
.try_rename(tmpfilename
, filename
) 
 420                             self
._hook
_progress
({ 
 421                                 'filename': filename
, 
 422                                 'status': 'finished', 
 426                             # The length does not match, we start the download over 
 427                             self
.report_unable_to_resume() 
 433                 self
.report_retry(count
, retries
) 
 436             self
.report_error(u
'giving up after %s retries' % retries
) 
 439         data_len 
= data
.info().get('Content-length', None) 
 440         if data_len 
is not None: 
 441             data_len 
= int(data_len
) + resume_len
 
 442             min_data_len 
= self
.params
.get("min_filesize", None) 
 443             max_data_len 
=  self
.params
.get("max_filesize", None) 
 444             if min_data_len 
is not None and data_len 
< min_data_len
: 
 445                 self
.to_screen(u
'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 447             if max_data_len 
is not None and data_len 
> max_data_len
: 
 448                 self
.to_screen(u
'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 451         data_len_str 
= self
.format_bytes(data_len
) 
 452         byte_counter 
= 0 + resume_len
 
 453         block_size 
= self
.params
.get('buffersize', 1024) 
 458             data_block 
= data
.read(block_size
) 
 460             if len(data_block
) == 0: 
 462             byte_counter 
+= len(data_block
) 
 464             # Open file just in time 
 467                     (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 468                     assert stream 
is not None 
 469                     filename 
= self
.undo_temp_name(tmpfilename
) 
 470                     self
.report_destination(filename
) 
 471                 except (OSError, IOError) as err
: 
 472                     self
.report_error(u
'unable to open for writing: %s' % str(err
)) 
 475                 stream
.write(data_block
) 
 476             except (IOError, OSError) as err
: 
 477                 self
.to_stderr(u
"\n") 
 478                 self
.report_error(u
'unable to write data: %s' % str(err
)) 
 480             if not self
.params
.get('noresizebuffer', False): 
 481                 block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 484             speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 486                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
 488                 percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 489                 eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 490                 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 492             self
._hook
_progress
({ 
 493                 'downloaded_bytes': byte_counter
, 
 494                 'total_bytes': data_len
, 
 495                 'tmpfilename': tmpfilename
, 
 496                 'filename': filename
, 
 497                 'status': 'downloading', 
 501             self
.slow_down(start
, byte_counter 
- resume_len
) 
 504             self
.to_stderr(u
"\n") 
 505             self
.report_error(u
'Did not get any data blocks') 
 509         if data_len 
is not None and byte_counter 
!= data_len
: 
 510             raise ContentTooShortError(byte_counter
, int(data_len
)) 
 511         self
.try_rename(tmpfilename
, filename
) 
 513         # Update file modification time 
 514         if self
.params
.get('updatetime', True): 
 515             info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 517         self
._hook
_progress
({ 
 518             'downloaded_bytes': byte_counter
, 
 519             'total_bytes': byte_counter
, 
 520             'filename': filename
, 
 521             'status': 'finished', 
 526     def _hook_progress(self
, status
): 
 527         for ph 
in self
._progress
_hooks
: 
 530     def add_progress_hook(self
, ph
): 
 531         """ ph gets called on download progress, with a dictionary with the entries 
 532         * filename: The final filename 
 533         * status: One of "downloading" and "finished" 
 535         It can also have some of the following entries: 
 537         * downloaded_bytes: Bytes on disks 
 538         * total_bytes: Total bytes, None if unknown 
 539         * tmpfilename: The filename we're currently writing to 
 541         Hooks are guaranteed to be called at least once (with status "finished") 
 542         if the download is successful. 
 544         self
._progress
_hooks
.append(ph
)