]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
d6673fd3ab88d543086417f903cf2ff7d4019944
  15 class FileDownloader(object): 
  16     """File Downloader class. 
  18     File downloader objects are the ones responsible of downloading the 
  19     actual video file and writing it to disk. 
  21     File downloaders accept a lot of parameters. In order not to saturate 
  22     the object constructor with arguments, it receives a dictionary of 
  27     verbose:           Print additional info to stdout. 
  28     quiet:             Do not print messages to stdout. 
  29     ratelimit:         Download speed limit, in bytes/sec. 
  30     retries:           Number of times to retry for HTTP error 5xx 
  31     buffersize:        Size of download buffer in bytes. 
  32     noresizebuffer:    Do not automatically resize the download buffer. 
  33     continuedl:        Try to continue downloads if possible. 
  34     noprogress:        Do not print the progress bar. 
  35     logtostderr:       Log messages to stderr instead of stdout. 
  36     consoletitle:      Display progress in console window's titlebar. 
  37     nopart:            Do not use temporary .part files. 
  38     updatetime:        Use the Last-modified header to set output file timestamps. 
  39     test:              Download only first bytes to test the downloader. 
  40     min_filesize:      Skip files smaller than this size 
  41     max_filesize:      Skip files larger than this size 
  46     def __init__(self
, ydl
, params
): 
  47         """Create a FileDownloader object with the given options.""" 
  49         self
._progress
_hooks 
= [] 
  53     def format_bytes(bytes): 
  56         if type(bytes) is str: 
  61             exponent 
= int(math
.log(bytes, 1024.0)) 
  62         suffix 
= ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent
] 
  63         converted 
= float(bytes) / float(1024 ** exponent
) 
  64         return '%.2f%s' % (converted
, suffix
) 
  67     def format_seconds(seconds
): 
  68         (mins
, secs
) = divmod(seconds
, 60) 
  69         (hours
, mins
) = divmod(mins
, 60) 
  73             return '%02d:%02d' % (mins
, secs
) 
  75             return '%02d:%02d:%02d' % (hours
, mins
, secs
) 
  78     def calc_percent(byte_counter
, data_len
): 
  81         return float(byte_counter
) / float(data_len
) * 100.0 
  84     def format_percent(percent
): 
  87         return '%6s' % ('%3.1f%%' % percent
) 
  90     def calc_eta(start
, now
, total
, current
): 
  94         if current 
== 0 or dif 
< 0.001: # One millisecond 
  96         rate 
= float(current
) / dif
 
  97         return int((float(total
) - float(current
)) / rate
) 
 103         return FileDownloader
.format_seconds(eta
) 
 106     def calc_speed(start
, now
, bytes): 
 108         if bytes == 0 or dif 
< 0.001: # One millisecond 
 110         return float(bytes) / dif
 
 113     def format_speed(speed
): 
 115             return '%10s' % '---b/s' 
 116         return '%10s' % ('%s/s' % FileDownloader
.format_bytes(speed
)) 
 119     def best_block_size(elapsed_time
, bytes): 
 120         new_min 
= max(bytes / 2.0, 1.0) 
 121         new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 122         if elapsed_time 
< 0.001: 
 124         rate 
= bytes / elapsed_time
 
 132     def parse_bytes(bytestr
): 
 133         """Parse a string indicating a byte quantity into an integer.""" 
 134         matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 137         number 
= float(matchobj
.group(1)) 
 138         multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 139         return int(round(number 
* multiplier
)) 
 141     def to_screen(self
, *args
, **kargs
): 
 142         self
.ydl
.to_screen(*args
, **kargs
) 
 144     def to_stderr(self
, message
): 
 145         self
.ydl
.to_screen(message
) 
 147     def to_cons_title(self
, message
): 
 148         """Set console/terminal window title to message.""" 
 149         if not self
.params
.get('consoletitle', False): 
 151         if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 152             # c_wchar_p() might not be necessary if `message` is 
 153             # already of type unicode() 
 154             ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 155         elif 'TERM' in os
.environ
: 
 156             self
.to_screen('\033]0;%s\007' % message
, skip_eol
=True) 
 158     def trouble(self
, *args
, **kargs
): 
 159         self
.ydl
.trouble(*args
, **kargs
) 
 161     def report_warning(self
, *args
, **kargs
): 
 162         self
.ydl
.report_warning(*args
, **kargs
) 
 164     def report_error(self
, *args
, **kargs
): 
 165         self
.ydl
.report_error(*args
, **kargs
) 
 167     def slow_down(self
, start_time
, byte_counter
): 
 168         """Sleep if the download speed is over the rate limit.""" 
 169         rate_limit 
= self
.params
.get('ratelimit', None) 
 170         if rate_limit 
is None or byte_counter 
== 0: 
 173         elapsed 
= now 
- start_time
 
 176         speed 
= float(byte_counter
) / elapsed
 
 177         if speed 
> rate_limit
: 
 178             time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 180     def temp_name(self
, filename
): 
 181         """Returns a temporary filename for the given filename.""" 
 182         if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 183                 (os
.path
.exists(encodeFilename(filename
)) and not os
.path
.isfile(encodeFilename(filename
))): 
 185         return filename 
+ u
'.part' 
 187     def undo_temp_name(self
, filename
): 
 188         if filename
.endswith(u
'.part'): 
 189             return filename
[:-len(u
'.part')] 
 192     def try_rename(self
, old_filename
, new_filename
): 
 194             if old_filename 
== new_filename
: 
 196             os
.rename(encodeFilename(old_filename
), encodeFilename(new_filename
)) 
 197         except (IOError, OSError) as err
: 
 198             self
.report_error(u
'unable to rename file') 
 200     def try_utime(self
, filename
, last_modified_hdr
): 
 201         """Try to set the last-modified time of the given file.""" 
 202         if last_modified_hdr 
is None: 
 204         if not os
.path
.isfile(encodeFilename(filename
)): 
 206         timestr 
= last_modified_hdr
 
 209         filetime 
= timeconvert(timestr
) 
 212         # Ignore obviously invalid dates 
 216             os
.utime(filename
, (time
.time(), filetime
)) 
 221     def report_destination(self
, filename
): 
 222         """Report destination filename.""" 
 223         self
.to_screen(u
'[download] Destination: ' + filename
) 
 225     def report_progress(self
, percent
, data_len_str
, speed
, eta
): 
 226         """Report download progress.""" 
 227         if self
.params
.get('noprogress', False): 
 229         clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 230         eta_str 
= self
.format_eta(eta
) 
 231         percent_str 
= self
.format_percent(percent
) 
 232         speed_str 
= self
.format_speed(speed
) 
 233         if self
.params
.get('progress_with_newline', False): 
 234             self
.to_screen(u
'[download] %s of %s at %s ETA %s' % 
 235                 (percent_str
, data_len_str
, speed_str
, eta_str
)) 
 237             self
.to_screen(u
'\r%s[download] %s of %s at %s ETA %s' % 
 238                 (clear_line
, percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 239         self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 240                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 242     def report_resuming_byte(self
, resume_len
): 
 243         """Report attempt to resume at given byte.""" 
 244         self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 246     def report_retry(self
, count
, retries
): 
 247         """Report retry in case of HTTP error 5xx""" 
 248         self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 250     def report_file_already_downloaded(self
, file_name
): 
 251         """Report file has already been fully downloaded.""" 
 253             self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 254         except (UnicodeEncodeError) as err
: 
 255             self
.to_screen(u
'[download] The file has already been downloaded') 
 257     def report_unable_to_resume(self
): 
 258         """Report it was impossible to resume download.""" 
 259         self
.to_screen(u
'[download] Unable to resume') 
 261     def report_finish(self
, data_len_str
, tot_time
): 
 262         """Report download finished.""" 
 263         if self
.params
.get('noprogress', False): 
 264             self
.to_screen(u
'[download] Download completed') 
 266             clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 267             self
.to_screen(u
'\r%s[download] 100%% of %s in %s' % 
 268                 (clear_line
, data_len_str
, self
.format_seconds(tot_time
))) 
 270     def _download_with_rtmpdump(self
, filename
, url
, player_url
, page_url
, play_path
, tc_url
): 
 271         self
.report_destination(filename
) 
 272         tmpfilename 
= self
.temp_name(filename
) 
 274         # Check for rtmpdump first 
 276             subprocess
.call(['rtmpdump', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 277         except (OSError, IOError): 
 278             self
.report_error(u
'RTMP download detected but "rtmpdump" could not be run') 
 280         verbosity_option 
= '--verbose' if self
.params
.get('verbose', False) else '--quiet' 
 282         # Download using rtmpdump. rtmpdump returns exit code 2 when 
 283         # the connection was interrumpted and resuming appears to be 
 284         # possible. This is part of rtmpdump's normal usage, AFAIK. 
 285         basic_args 
= ['rtmpdump', verbosity_option
, '-r', url
, '-o', tmpfilename
] 
 286         if player_url 
is not None: 
 287             basic_args 
+= ['--swfVfy', player_url
] 
 288         if page_url 
is not None: 
 289             basic_args 
+= ['--pageUrl', page_url
] 
 290         if play_path 
is not None: 
 291             basic_args 
+= ['--playpath', play_path
] 
 292         if tc_url 
is not None: 
 293             basic_args 
+= ['--tcUrl', url
] 
 294         args 
= basic_args 
+ [[], ['--resume', '--skip', '1']][self
.params
.get('continuedl', False)] 
 295         if self
.params
.get('verbose', False): 
 298                 shell_quote 
= lambda args
: ' '.join(map(pipes
.quote
, args
)) 
 301             self
.to_screen(u
'[debug] rtmpdump command line: ' + shell_quote(args
)) 
 302         retval 
= subprocess
.call(args
) 
 303         while retval 
== 2 or retval 
== 1: 
 304             prevsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 305             self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 306             time
.sleep(5.0) # This seems to be needed 
 307             retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 308             cursize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 309             if prevsize 
== cursize 
and retval 
== 1: 
 311              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 312             if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 313                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 317             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 318             self
.to_screen(u
'\r[rtmpdump] %s bytes' % fsize
) 
 319             self
.try_rename(tmpfilename
, filename
) 
 320             self
._hook
_progress
({ 
 321                 'downloaded_bytes': fsize
, 
 322                 'total_bytes': fsize
, 
 323                 'filename': filename
, 
 324                 'status': 'finished', 
 328             self
.to_stderr(u
"\n") 
 329             self
.report_error(u
'rtmpdump exited with code %d' % retval
) 
 332     def _download_with_mplayer(self
, filename
, url
): 
 333         self
.report_destination(filename
) 
 334         tmpfilename 
= self
.temp_name(filename
) 
 336         args 
= ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename
, url
] 
 337         # Check for mplayer first 
 339             subprocess
.call(['mplayer', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 340         except (OSError, IOError): 
 341             self
.report_error(u
'MMS or RTSP download detected but "%s" could not be run' % args
[0] ) 
 344         # Download using mplayer.  
 345         retval 
= subprocess
.call(args
) 
 347             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 348             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 349             self
.try_rename(tmpfilename
, filename
) 
 350             self
._hook
_progress
({ 
 351                 'downloaded_bytes': fsize
, 
 352                 'total_bytes': fsize
, 
 353                 'filename': filename
, 
 354                 'status': 'finished', 
 358             self
.to_stderr(u
"\n") 
 359             self
.report_error(u
'mplayer exited with code %d' % retval
) 
 362     def _download_m3u8_with_ffmpeg(self
, filename
, url
): 
 363         self
.report_destination(filename
) 
 364         tmpfilename 
= self
.temp_name(filename
) 
 366         args 
= ['ffmpeg', '-y', '-i', url
, '-f', 'mp4', tmpfilename
] 
 367         # Check for ffmpeg first 
 369             subprocess
.call(['ffmpeg', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 370         except (OSError, IOError): 
 371             self
.report_error(u
'm3u8 download detected but "%s" could not be run' % args
[0] ) 
 374         retval 
= subprocess
.call(args
) 
 376             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 377             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 378             self
.try_rename(tmpfilename
, filename
) 
 379             self
._hook
_progress
({ 
 380                 'downloaded_bytes': fsize
, 
 381                 'total_bytes': fsize
, 
 382                 'filename': filename
, 
 383                 'status': 'finished', 
 387             self
.to_stderr(u
"\n") 
 388             self
.report_error(u
'ffmpeg exited with code %d' % retval
) 
 392     def _do_download(self
, filename
, info_dict
): 
 393         url 
= info_dict
['url'] 
 395         # Check file already present 
 396         if self
.params
.get('continuedl', False) and os
.path
.isfile(encodeFilename(filename
)) and not self
.params
.get('nopart', False): 
 397             self
.report_file_already_downloaded(filename
) 
 398             self
._hook
_progress
({ 
 399                 'filename': filename
, 
 400                 'status': 'finished', 
 401                 'total_bytes': os
.path
.getsize(encodeFilename(filename
)), 
 405         # Attempt to download using rtmpdump 
 406         if url
.startswith('rtmp'): 
 407             return self
._download
_with
_rtmpdump
(filename
, url
, 
 408                                                 info_dict
.get('player_url', None), 
 409                                                 info_dict
.get('page_url', None), 
 410                                                 info_dict
.get('play_path', None), 
 411                                                 info_dict
.get('tc_url', None)) 
 413         # Attempt to download using mplayer 
 414         if url
.startswith('mms') or url
.startswith('rtsp'): 
 415             return self
._download
_with
_mplayer
(filename
, url
) 
 417         # m3u8 manifest are downloaded with ffmpeg 
 418         if determine_ext(url
) == u
'm3u8': 
 419             return self
._download
_m
3u8_with
_ffmpeg
(filename
, url
) 
 421         tmpfilename 
= self
.temp_name(filename
) 
 424         # Do not include the Accept-Encoding header 
 425         headers 
= {'Youtubedl-no-compression': 'True'} 
 426         if 'user_agent' in info_dict
: 
 427             headers
['Youtubedl-user-agent'] = info_dict
['user_agent'] 
 428         basic_request 
= compat_urllib_request
.Request(url
, None, headers
) 
 429         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 431         if self
.params
.get('test', False): 
 432             request
.add_header('Range','bytes=0-10240') 
 434         # Establish possible resume length 
 435         if os
.path
.isfile(encodeFilename(tmpfilename
)): 
 436             resume_len 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 442             if self
.params
.get('continuedl', False): 
 443                 self
.report_resuming_byte(resume_len
) 
 444                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 450         retries 
= self
.params
.get('retries', 0) 
 451         while count 
<= retries
: 
 452             # Establish connection 
 454                 if count 
== 0 and 'urlhandle' in info_dict
: 
 455                     data 
= info_dict
['urlhandle'] 
 456                 data 
= compat_urllib_request
.urlopen(request
) 
 458             except (compat_urllib_error
.HTTPError
, ) as err
: 
 459                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 460                     # Unexpected HTTP error 
 462                 elif err
.code 
== 416: 
 463                     # Unable to resume (requested range not satisfiable) 
 465                         # Open the connection again without the range header 
 466                         data 
= compat_urllib_request
.urlopen(basic_request
) 
 467                         content_length 
= data
.info()['Content-Length'] 
 468                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 469                         if err
.code 
< 500 or err
.code 
>= 600: 
 472                         # Examine the reported length 
 473                         if (content_length 
is not None and 
 474                                 (resume_len 
- 100 < int(content_length
) < resume_len 
+ 100)): 
 475                             # The file had already been fully downloaded. 
 476                             # Explanation to the above condition: in issue #175 it was revealed that 
 477                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 478                             # changing the file size slightly and causing problems for some users. So 
 479                             # I decided to implement a suggested change and consider the file 
 480                             # completely downloaded if the file size differs less than 100 bytes from 
 481                             # the one in the hard drive. 
 482                             self
.report_file_already_downloaded(filename
) 
 483                             self
.try_rename(tmpfilename
, filename
) 
 484                             self
._hook
_progress
({ 
 485                                 'filename': filename
, 
 486                                 'status': 'finished', 
 490                             # The length does not match, we start the download over 
 491                             self
.report_unable_to_resume() 
 497                 self
.report_retry(count
, retries
) 
 500             self
.report_error(u
'giving up after %s retries' % retries
) 
 503         data_len 
= data
.info().get('Content-length', None) 
 504         if data_len 
is not None: 
 505             data_len 
= int(data_len
) + resume_len
 
 506             min_data_len 
= self
.params
.get("min_filesize", None) 
 507             max_data_len 
=  self
.params
.get("max_filesize", None) 
 508             if min_data_len 
is not None and data_len 
< min_data_len
: 
 509                 self
.to_screen(u
'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 511             if max_data_len 
is not None and data_len 
> max_data_len
: 
 512                 self
.to_screen(u
'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 515         data_len_str 
= self
.format_bytes(data_len
) 
 516         byte_counter 
= 0 + resume_len
 
 517         block_size 
= self
.params
.get('buffersize', 1024) 
 522             data_block 
= data
.read(block_size
) 
 524             if len(data_block
) == 0: 
 526             byte_counter 
+= len(data_block
) 
 528             # Open file just in time 
 531                     (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 532                     assert stream 
is not None 
 533                     filename 
= self
.undo_temp_name(tmpfilename
) 
 534                     self
.report_destination(filename
) 
 535                 except (OSError, IOError) as err
: 
 536                     self
.report_error(u
'unable to open for writing: %s' % str(err
)) 
 539                 stream
.write(data_block
) 
 540             except (IOError, OSError) as err
: 
 541                 self
.to_stderr(u
"\n") 
 542                 self
.report_error(u
'unable to write data: %s' % str(err
)) 
 544             if not self
.params
.get('noresizebuffer', False): 
 545                 block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 548             speed 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 550                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
 553                 percent 
= self
.calc_percent(byte_counter
, data_len
) 
 554                 eta 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 555                 self
.report_progress(percent
, data_len_str
, speed
, eta
) 
 557             self
._hook
_progress
({ 
 558                 'downloaded_bytes': byte_counter
, 
 559                 'total_bytes': data_len
, 
 560                 'tmpfilename': tmpfilename
, 
 561                 'filename': filename
, 
 562                 'status': 'downloading', 
 568             self
.slow_down(start
, byte_counter 
- resume_len
) 
 571             self
.to_stderr(u
"\n") 
 572             self
.report_error(u
'Did not get any data blocks') 
 575         self
.report_finish(data_len_str
, (time
.time() - start
)) 
 576         if data_len 
is not None and byte_counter 
!= data_len
: 
 577             raise ContentTooShortError(byte_counter
, int(data_len
)) 
 578         self
.try_rename(tmpfilename
, filename
) 
 580         # Update file modification time 
 581         if self
.params
.get('updatetime', True): 
 582             info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 584         self
._hook
_progress
({ 
 585             'downloaded_bytes': byte_counter
, 
 586             'total_bytes': byte_counter
, 
 587             'filename': filename
, 
 588             'status': 'finished', 
 593     def _hook_progress(self
, status
): 
 594         for ph 
in self
._progress
_hooks
: 
 597     def add_progress_hook(self
, ph
): 
 598         """ ph gets called on download progress, with a dictionary with the entries 
 599         * filename: The final filename 
 600         * status: One of "downloading" and "finished" 
 602         It can also have some of the following entries: 
 604         * downloaded_bytes: Bytes on disks 
 605         * total_bytes: Total bytes, None if unknown 
 606         * tmpfilename: The filename we're currently writing to 
 607         * eta: The estimated time in seconds, None if unknown 
 608         * speed: The download speed in bytes/second, None if unknown 
 610         Hooks are guaranteed to be called at least once (with status "finished") 
 611         if the download is successful. 
 613         self
._progress
_hooks
.append(ph
)