]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
088f595866372e360e425a3aeee196374f504b7c
  13     compat_urllib_request
, 
  22 class FileDownloader(object): 
  23     """File Downloader class. 
  25     File downloader objects are the ones responsible of downloading the 
  26     actual video file and writing it to disk. 
  28     File downloaders accept a lot of parameters. In order not to saturate 
  29     the object constructor with arguments, it receives a dictionary of 
  34     verbose:           Print additional info to stdout. 
  35     quiet:             Do not print messages to stdout. 
  36     ratelimit:         Download speed limit, in bytes/sec. 
  37     retries:           Number of times to retry for HTTP error 5xx 
  38     buffersize:        Size of download buffer in bytes. 
  39     noresizebuffer:    Do not automatically resize the download buffer. 
  40     continuedl:        Try to continue downloads if possible. 
  41     noprogress:        Do not print the progress bar. 
  42     logtostderr:       Log messages to stderr instead of stdout. 
  43     consoletitle:      Display progress in console window's titlebar. 
  44     nopart:            Do not use temporary .part files. 
  45     updatetime:        Use the Last-modified header to set output file timestamps. 
  46     test:              Download only first bytes to test the downloader. 
  47     min_filesize:      Skip files smaller than this size 
  48     max_filesize:      Skip files larger than this size 
  53     def __init__(self
, ydl
, params
): 
  54         """Create a FileDownloader object with the given options.""" 
  56         self
._progress
_hooks 
= [] 
  60     def format_bytes(bytes): 
  63         if type(bytes) is str: 
  68             exponent 
= int(math
.log(bytes, 1024.0)) 
  69         suffix 
= ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent
] 
  70         converted 
= float(bytes) / float(1024 ** exponent
) 
  71         return '%.2f%s' % (converted
, suffix
) 
  74     def format_seconds(seconds
): 
  75         (mins
, secs
) = divmod(seconds
, 60) 
  76         (hours
, mins
) = divmod(mins
, 60) 
  80             return '%02d:%02d' % (mins
, secs
) 
  82             return '%02d:%02d:%02d' % (hours
, mins
, secs
) 
  85     def calc_percent(byte_counter
, data_len
): 
  88         return float(byte_counter
) / float(data_len
) * 100.0 
  91     def format_percent(percent
): 
  94         return '%6s' % ('%3.1f%%' % percent
) 
  97     def calc_eta(start
, now
, total
, current
): 
 101         if current 
== 0 or dif 
< 0.001: # One millisecond 
 103         rate 
= float(current
) / dif
 
 104         return int((float(total
) - float(current
)) / rate
) 
 110         return FileDownloader
.format_seconds(eta
) 
 113     def calc_speed(start
, now
, bytes): 
 115         if bytes == 0 or dif 
< 0.001: # One millisecond 
 117         return float(bytes) / dif
 
 120     def format_speed(speed
): 
 122             return '%10s' % '---b/s' 
 123         return '%10s' % ('%s/s' % FileDownloader
.format_bytes(speed
)) 
 126     def best_block_size(elapsed_time
, bytes): 
 127         new_min 
= max(bytes / 2.0, 1.0) 
 128         new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 129         if elapsed_time 
< 0.001: 
 131         rate 
= bytes / elapsed_time
 
 139     def parse_bytes(bytestr
): 
 140         """Parse a string indicating a byte quantity into an integer.""" 
 141         matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 144         number 
= float(matchobj
.group(1)) 
 145         multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 146         return int(round(number 
* multiplier
)) 
 148     def to_screen(self
, *args
, **kargs
): 
 149         self
.ydl
.to_screen(*args
, **kargs
) 
 151     def to_stderr(self
, message
): 
 152         self
.ydl
.to_screen(message
) 
 154     def to_cons_title(self
, message
): 
 155         """Set console/terminal window title to message.""" 
 156         if not self
.params
.get('consoletitle', False): 
 158         if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 159             # c_wchar_p() might not be necessary if `message` is 
 160             # already of type unicode() 
 161             ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 162         elif 'TERM' in os
.environ
: 
 163             self
.to_screen('\033]0;%s\007' % message
, skip_eol
=True) 
 165     def trouble(self
, *args
, **kargs
): 
 166         self
.ydl
.trouble(*args
, **kargs
) 
 168     def report_warning(self
, *args
, **kargs
): 
 169         self
.ydl
.report_warning(*args
, **kargs
) 
 171     def report_error(self
, *args
, **kargs
): 
 172         self
.ydl
.report_error(*args
, **kargs
) 
 174     def slow_down(self
, start_time
, byte_counter
): 
 175         """Sleep if the download speed is over the rate limit.""" 
 176         rate_limit 
= self
.params
.get('ratelimit', None) 
 177         if rate_limit 
is None or byte_counter 
== 0: 
 180         elapsed 
= now 
- start_time
 
 183         speed 
= float(byte_counter
) / elapsed
 
 184         if speed 
> rate_limit
: 
 185             time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 187     def temp_name(self
, filename
): 
 188         """Returns a temporary filename for the given filename.""" 
 189         if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 190                 (os
.path
.exists(encodeFilename(filename
)) and not os
.path
.isfile(encodeFilename(filename
))): 
 192         return filename 
+ u
'.part' 
 194     def undo_temp_name(self
, filename
): 
 195         if filename
.endswith(u
'.part'): 
 196             return filename
[:-len(u
'.part')] 
 199     def try_rename(self
, old_filename
, new_filename
): 
 201             if old_filename 
== new_filename
: 
 203             os
.rename(encodeFilename(old_filename
), encodeFilename(new_filename
)) 
 204         except (IOError, OSError): 
 205             self
.report_error(u
'unable to rename file') 
 207     def try_utime(self
, filename
, last_modified_hdr
): 
 208         """Try to set the last-modified time of the given file.""" 
 209         if last_modified_hdr 
is None: 
 211         if not os
.path
.isfile(encodeFilename(filename
)): 
 213         timestr 
= last_modified_hdr
 
 216         filetime 
= timeconvert(timestr
) 
 219         # Ignore obviously invalid dates 
 223             os
.utime(filename
, (time
.time(), filetime
)) 
 228     def report_destination(self
, filename
): 
 229         """Report destination filename.""" 
 230         self
.to_screen(u
'[download] Destination: ' + filename
) 
 232     def report_progress(self
, percent
, data_len_str
, speed
, eta
): 
 233         """Report download progress.""" 
 234         if self
.params
.get('noprogress', False): 
 236         clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 238             eta_str 
= self
.format_eta(eta
) 
 240             eta_str 
= 'Unknown ETA' 
 241         if percent 
is not None: 
 242             percent_str 
= self
.format_percent(percent
) 
 244             percent_str 
= 'Unknown %' 
 245         speed_str 
= self
.format_speed(speed
) 
 246         if self
.params
.get('progress_with_newline', False): 
 247             self
.to_screen(u
'[download] %s of %s at %s ETA %s' % 
 248                 (percent_str
, data_len_str
, speed_str
, eta_str
)) 
 250             self
.to_screen(u
'\r%s[download] %s of %s at %s ETA %s' % 
 251                 (clear_line
, percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 252         self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 253                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 255     def report_resuming_byte(self
, resume_len
): 
 256         """Report attempt to resume at given byte.""" 
 257         self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 259     def report_retry(self
, count
, retries
): 
 260         """Report retry in case of HTTP error 5xx""" 
 261         self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 263     def report_file_already_downloaded(self
, file_name
): 
 264         """Report file has already been fully downloaded.""" 
 266             self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 267         except UnicodeEncodeError: 
 268             self
.to_screen(u
'[download] The file has already been downloaded') 
 270     def report_unable_to_resume(self
): 
 271         """Report it was impossible to resume download.""" 
 272         self
.to_screen(u
'[download] Unable to resume') 
 274     def report_finish(self
, data_len_str
, tot_time
): 
 275         """Report download finished.""" 
 276         if self
.params
.get('noprogress', False): 
 277             self
.to_screen(u
'[download] Download completed') 
 279             clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 280             self
.to_screen(u
'\r%s[download] 100%% of %s in %s' % 
 281                 (clear_line
, data_len_str
, self
.format_seconds(tot_time
))) 
 283     def _download_with_rtmpdump(self
, filename
, url
, player_url
, page_url
, play_path
, tc_url
, live
): 
 284         self
.report_destination(filename
) 
 285         tmpfilename 
= self
.temp_name(filename
) 
 286         test 
= self
.params
.get('test', False) 
 288         # Check for rtmpdump first 
 290             subprocess
.call(['rtmpdump', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 291         except (OSError, IOError): 
 292             self
.report_error(u
'RTMP download detected but "rtmpdump" could not be run') 
 294         verbosity_option 
= '--verbose' if self
.params
.get('verbose', False) else '--quiet' 
 296         # Download using rtmpdump. rtmpdump returns exit code 2 when 
 297         # the connection was interrumpted and resuming appears to be 
 298         # possible. This is part of rtmpdump's normal usage, AFAIK. 
 299         basic_args 
= ['rtmpdump', verbosity_option
, '-r', url
, '-o', tmpfilename
] 
 300         if player_url 
is not None: 
 301             basic_args 
+= ['--swfVfy', player_url
] 
 302         if page_url 
is not None: 
 303             basic_args 
+= ['--pageUrl', page_url
] 
 304         if play_path 
is not None: 
 305             basic_args 
+= ['--playpath', play_path
] 
 306         if tc_url 
is not None: 
 307             basic_args 
+= ['--tcUrl', url
] 
 309             basic_args 
+= ['--stop', '1'] 
 311             basic_args 
+= ['--live'] 
 312         args 
= basic_args 
+ [[], ['--resume', '--skip', '1']][self
.params
.get('continuedl', False)] 
 313         if self
.params
.get('verbose', False): 
 316                 shell_quote 
= lambda args
: ' '.join(map(pipes
.quote
, args
)) 
 319             self
.to_screen(u
'[debug] rtmpdump command line: ' + shell_quote(args
)) 
 320         retval 
= subprocess
.call(args
) 
 321         while (retval 
== 2 or retval 
== 1) and not test
: 
 322             prevsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 323             self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 324             time
.sleep(5.0) # This seems to be needed 
 325             retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 326             cursize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 327             if prevsize 
== cursize 
and retval 
== 1: 
 329              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 330             if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 331                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 334         if retval 
== 0 or (test 
and retval 
== 2): 
 335             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 336             self
.to_screen(u
'\r[rtmpdump] %s bytes' % fsize
) 
 337             self
.try_rename(tmpfilename
, filename
) 
 338             self
._hook
_progress
({ 
 339                 'downloaded_bytes': fsize
, 
 340                 'total_bytes': fsize
, 
 341                 'filename': filename
, 
 342                 'status': 'finished', 
 346             self
.to_stderr(u
"\n") 
 347             self
.report_error(u
'rtmpdump exited with code %d' % retval
) 
 350     def _download_with_mplayer(self
, filename
, url
): 
 351         self
.report_destination(filename
) 
 352         tmpfilename 
= self
.temp_name(filename
) 
 354         args 
= ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename
, url
] 
 355         # Check for mplayer first 
 357             subprocess
.call(['mplayer', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 358         except (OSError, IOError): 
 359             self
.report_error(u
'MMS or RTSP download detected but "%s" could not be run' % args
[0] ) 
 362         # Download using mplayer.  
 363         retval 
= subprocess
.call(args
) 
 365             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 366             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 367             self
.try_rename(tmpfilename
, filename
) 
 368             self
._hook
_progress
({ 
 369                 'downloaded_bytes': fsize
, 
 370                 'total_bytes': fsize
, 
 371                 'filename': filename
, 
 372                 'status': 'finished', 
 376             self
.to_stderr(u
"\n") 
 377             self
.report_error(u
'mplayer exited with code %d' % retval
) 
 380     def _download_m3u8_with_ffmpeg(self
, filename
, url
): 
 381         self
.report_destination(filename
) 
 382         tmpfilename 
= self
.temp_name(filename
) 
 384         args 
= ['-y', '-i', url
, '-f', 'mp4', '-c', 'copy', 
 385             '-bsf:a', 'aac_adtstoasc', tmpfilename
] 
 387         for program 
in ['avconv', 'ffmpeg']: 
 389                 subprocess
.call([program
, '-version'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 391             except (OSError, IOError): 
 394             self
.report_error(u
'm3u8 download detected but ffmpeg or avconv could not be found') 
 395         cmd 
= [program
] + args
 
 397         retval 
= subprocess
.call(cmd
) 
 399             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 400             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 401             self
.try_rename(tmpfilename
, filename
) 
 402             self
._hook
_progress
({ 
 403                 'downloaded_bytes': fsize
, 
 404                 'total_bytes': fsize
, 
 405                 'filename': filename
, 
 406                 'status': 'finished', 
 410             self
.to_stderr(u
"\n") 
 411             self
.report_error(u
'ffmpeg exited with code %d' % retval
) 
 415     def _do_download(self
, filename
, info_dict
): 
 416         url 
= info_dict
['url'] 
 418         # Check file already present 
 419         if self
.params
.get('continuedl', False) and os
.path
.isfile(encodeFilename(filename
)) and not self
.params
.get('nopart', False): 
 420             self
.report_file_already_downloaded(filename
) 
 421             self
._hook
_progress
({ 
 422                 'filename': filename
, 
 423                 'status': 'finished', 
 424                 'total_bytes': os
.path
.getsize(encodeFilename(filename
)), 
 428         # Attempt to download using rtmpdump 
 429         if url
.startswith('rtmp'): 
 430             return self
._download
_with
_rtmpdump
(filename
, url
, 
 431                                                 info_dict
.get('player_url', None), 
 432                                                 info_dict
.get('page_url', None), 
 433                                                 info_dict
.get('play_path', None), 
 434                                                 info_dict
.get('tc_url', None), 
 435                                                 info_dict
.get('rtmp_live', False)) 
 437         # Attempt to download using mplayer 
 438         if url
.startswith('mms') or url
.startswith('rtsp'): 
 439             return self
._download
_with
_mplayer
(filename
, url
) 
 441         # m3u8 manifest are downloaded with ffmpeg 
 442         if determine_ext(url
) == u
'm3u8': 
 443             return self
._download
_m
3u8_with
_ffmpeg
(filename
, url
) 
 445         tmpfilename 
= self
.temp_name(filename
) 
 448         # Do not include the Accept-Encoding header 
 449         headers 
= {'Youtubedl-no-compression': 'True'} 
 450         if 'user_agent' in info_dict
: 
 451             headers
['Youtubedl-user-agent'] = info_dict
['user_agent'] 
 452         basic_request 
= compat_urllib_request
.Request(url
, None, headers
) 
 453         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 455         if self
.params
.get('test', False): 
 456             request
.add_header('Range','bytes=0-10240') 
 458         # Establish possible resume length 
 459         if os
.path
.isfile(encodeFilename(tmpfilename
)): 
 460             resume_len 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 466             if self
.params
.get('continuedl', False): 
 467                 self
.report_resuming_byte(resume_len
) 
 468                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 474         retries 
= self
.params
.get('retries', 0) 
 475         while count 
<= retries
: 
 476             # Establish connection 
 478                 if count 
== 0 and 'urlhandle' in info_dict
: 
 479                     data 
= info_dict
['urlhandle'] 
 480                 data 
= compat_urllib_request
.urlopen(request
) 
 482             except (compat_urllib_error
.HTTPError
, ) as err
: 
 483                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 484                     # Unexpected HTTP error 
 486                 elif err
.code 
== 416: 
 487                     # Unable to resume (requested range not satisfiable) 
 489                         # Open the connection again without the range header 
 490                         data 
= compat_urllib_request
.urlopen(basic_request
) 
 491                         content_length 
= data
.info()['Content-Length'] 
 492                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 493                         if err
.code 
< 500 or err
.code 
>= 600: 
 496                         # Examine the reported length 
 497                         if (content_length 
is not None and 
 498                                 (resume_len 
- 100 < int(content_length
) < resume_len 
+ 100)): 
 499                             # The file had already been fully downloaded. 
 500                             # Explanation to the above condition: in issue #175 it was revealed that 
 501                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 502                             # changing the file size slightly and causing problems for some users. So 
 503                             # I decided to implement a suggested change and consider the file 
 504                             # completely downloaded if the file size differs less than 100 bytes from 
 505                             # the one in the hard drive. 
 506                             self
.report_file_already_downloaded(filename
) 
 507                             self
.try_rename(tmpfilename
, filename
) 
 508                             self
._hook
_progress
({ 
 509                                 'filename': filename
, 
 510                                 'status': 'finished', 
 514                             # The length does not match, we start the download over 
 515                             self
.report_unable_to_resume() 
 521                 self
.report_retry(count
, retries
) 
 524             self
.report_error(u
'giving up after %s retries' % retries
) 
 527         data_len 
= data
.info().get('Content-length', None) 
 528         if data_len 
is not None: 
 529             data_len 
= int(data_len
) + resume_len
 
 530             min_data_len 
= self
.params
.get("min_filesize", None) 
 531             max_data_len 
=  self
.params
.get("max_filesize", None) 
 532             if min_data_len 
is not None and data_len 
< min_data_len
: 
 533                 self
.to_screen(u
'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 535             if max_data_len 
is not None and data_len 
> max_data_len
: 
 536                 self
.to_screen(u
'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 539         data_len_str 
= self
.format_bytes(data_len
) 
 540         byte_counter 
= 0 + resume_len
 
 541         block_size 
= self
.params
.get('buffersize', 1024) 
 546             data_block 
= data
.read(block_size
) 
 548             if len(data_block
) == 0: 
 550             byte_counter 
+= len(data_block
) 
 552             # Open file just in time 
 555                     (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 556                     assert stream 
is not None 
 557                     filename 
= self
.undo_temp_name(tmpfilename
) 
 558                     self
.report_destination(filename
) 
 559                 except (OSError, IOError) as err
: 
 560                     self
.report_error(u
'unable to open for writing: %s' % str(err
)) 
 563                 stream
.write(data_block
) 
 564             except (IOError, OSError) as err
: 
 565                 self
.to_stderr(u
"\n") 
 566                 self
.report_error(u
'unable to write data: %s' % str(err
)) 
 568             if not self
.params
.get('noresizebuffer', False): 
 569                 block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 572             speed 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 576                 percent 
= self
.calc_percent(byte_counter
, data_len
) 
 577                 eta 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 578             self
.report_progress(percent
, data_len_str
, speed
, eta
) 
 580             self
._hook
_progress
({ 
 581                 'downloaded_bytes': byte_counter
, 
 582                 'total_bytes': data_len
, 
 583                 'tmpfilename': tmpfilename
, 
 584                 'filename': filename
, 
 585                 'status': 'downloading', 
 591             self
.slow_down(start
, byte_counter 
- resume_len
) 
 594             self
.to_stderr(u
"\n") 
 595             self
.report_error(u
'Did not get any data blocks') 
 598         self
.report_finish(data_len_str
, (time
.time() - start
)) 
 599         if data_len 
is not None and byte_counter 
!= data_len
: 
 600             raise ContentTooShortError(byte_counter
, int(data_len
)) 
 601         self
.try_rename(tmpfilename
, filename
) 
 603         # Update file modification time 
 604         if self
.params
.get('updatetime', True): 
 605             info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 607         self
._hook
_progress
({ 
 608             'downloaded_bytes': byte_counter
, 
 609             'total_bytes': byte_counter
, 
 610             'filename': filename
, 
 611             'status': 'finished', 
 616     def _hook_progress(self
, status
): 
 617         for ph 
in self
._progress
_hooks
: 
 620     def add_progress_hook(self
, ph
): 
 621         """ ph gets called on download progress, with a dictionary with the entries 
 622         * filename: The final filename 
 623         * status: One of "downloading" and "finished" 
 625         It can also have some of the following entries: 
 627         * downloaded_bytes: Bytes on disks 
 628         * total_bytes: Total bytes, None if unknown 
 629         * tmpfilename: The filename we're currently writing to 
 630         * eta: The estimated time in seconds, None if unknown 
 631         * speed: The download speed in bytes/second, None if unknown 
 633         Hooks are guaranteed to be called at least once (with status "finished") 
 634         if the download is successful. 
 636         self
._progress
_hooks
.append(ph
)