]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
ea6b9d626efa7a18eafe20afa8c473d1afee315b
  15 class FileDownloader(object): 
  16     """File Downloader class. 
  18     File downloader objects are the ones responsible of downloading the 
  19     actual video file and writing it to disk. 
  21     File downloaders accept a lot of parameters. In order not to saturate 
  22     the object constructor with arguments, it receives a dictionary of 
  27     verbose:           Print additional info to stdout. 
  28     quiet:             Do not print messages to stdout. 
  29     ratelimit:         Download speed limit, in bytes/sec. 
  30     retries:           Number of times to retry for HTTP error 5xx 
  31     buffersize:        Size of download buffer in bytes. 
  32     noresizebuffer:    Do not automatically resize the download buffer. 
  33     continuedl:        Try to continue downloads if possible. 
  34     noprogress:        Do not print the progress bar. 
  35     logtostderr:       Log messages to stderr instead of stdout. 
  36     consoletitle:      Display progress in console window's titlebar. 
  37     nopart:            Do not use temporary .part files. 
  38     updatetime:        Use the Last-modified header to set output file timestamps. 
  39     test:              Download only first bytes to test the downloader. 
  40     min_filesize:      Skip files smaller than this size 
  41     max_filesize:      Skip files larger than this size 
  46     def __init__(self
, ydl
, params
): 
  47         """Create a FileDownloader object with the given options.""" 
  49         self
._progress
_hooks 
= [] 
  53     def format_bytes(bytes): 
  56         if type(bytes) is str: 
  61             exponent 
= int(math
.log(bytes, 1024.0)) 
  62         suffix 
= ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent
] 
  63         converted 
= float(bytes) / float(1024 ** exponent
) 
  64         return '%.2f%s' % (converted
, suffix
) 
  67     def calc_percent(byte_counter
, data_len
): 
  70         return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
  73     def calc_eta(start
, now
, total
, current
): 
  77         if current 
== 0 or dif 
< 0.001: # One millisecond 
  79         rate 
= float(current
) / dif
 
  80         eta 
= int((float(total
) - float(current
)) / rate
) 
  81         (eta_mins
, eta_secs
) = divmod(eta
, 60) 
  84         return '%02d:%02d' % (eta_mins
, eta_secs
) 
  87     def calc_speed(start
, now
, bytes): 
  89         if bytes == 0 or dif 
< 0.001: # One millisecond 
  90             return '%10s' % '---b/s' 
  91         return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
  94     def best_block_size(elapsed_time
, bytes): 
  95         new_min 
= max(bytes / 2.0, 1.0) 
  96         new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
  97         if elapsed_time 
< 0.001: 
  99         rate 
= bytes / elapsed_time
 
 107     def parse_bytes(bytestr
): 
 108         """Parse a string indicating a byte quantity into an integer.""" 
 109         matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 112         number 
= float(matchobj
.group(1)) 
 113         multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 114         return int(round(number 
* multiplier
)) 
 116     def to_screen(self
, *args
, **kargs
): 
 117         self
.ydl
.to_screen(*args
, **kargs
) 
 119     def to_stderr(self
, message
): 
 120         self
.ydl
.to_screen(message
) 
 122     def to_cons_title(self
, message
): 
 123         """Set console/terminal window title to message.""" 
 124         if not self
.params
.get('consoletitle', False): 
 126         if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 127             # c_wchar_p() might not be necessary if `message` is 
 128             # already of type unicode() 
 129             ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 130         elif 'TERM' in os
.environ
: 
 131             self
.to_screen('\033]0;%s\007' % message
, skip_eol
=True) 
 133     def trouble(self
, *args
, **kargs
): 
 134         self
.ydl
.trouble(*args
, **kargs
) 
 136     def report_warning(self
, *args
, **kargs
): 
 137         self
.ydl
.report_warning(*args
, **kargs
) 
 139     def report_error(self
, *args
, **kargs
): 
 140         self
.ydl
.report_error(*args
, **kargs
) 
 142     def slow_down(self
, start_time
, byte_counter
): 
 143         """Sleep if the download speed is over the rate limit.""" 
 144         rate_limit 
= self
.params
.get('ratelimit', None) 
 145         if rate_limit 
is None or byte_counter 
== 0: 
 148         elapsed 
= now 
- start_time
 
 151         speed 
= float(byte_counter
) / elapsed
 
 152         if speed 
> rate_limit
: 
 153             time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 155     def temp_name(self
, filename
): 
 156         """Returns a temporary filename for the given filename.""" 
 157         if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 158                 (os
.path
.exists(encodeFilename(filename
)) and not os
.path
.isfile(encodeFilename(filename
))): 
 160         return filename 
+ u
'.part' 
 162     def undo_temp_name(self
, filename
): 
 163         if filename
.endswith(u
'.part'): 
 164             return filename
[:-len(u
'.part')] 
 167     def try_rename(self
, old_filename
, new_filename
): 
 169             if old_filename 
== new_filename
: 
 171             os
.rename(encodeFilename(old_filename
), encodeFilename(new_filename
)) 
 172         except (IOError, OSError) as err
: 
 173             self
.report_error(u
'unable to rename file') 
 175     def try_utime(self
, filename
, last_modified_hdr
): 
 176         """Try to set the last-modified time of the given file.""" 
 177         if last_modified_hdr 
is None: 
 179         if not os
.path
.isfile(encodeFilename(filename
)): 
 181         timestr 
= last_modified_hdr
 
 184         filetime 
= timeconvert(timestr
) 
 187         # Ignore obviously invalid dates 
 191             os
.utime(filename
, (time
.time(), filetime
)) 
 196     def report_destination(self
, filename
): 
 197         """Report destination filename.""" 
 198         self
.to_screen(u
'[download] Destination: ' + filename
) 
 200     def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 201         """Report download progress.""" 
 202         if self
.params
.get('noprogress', False): 
 204         clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 205         if self
.params
.get('progress_with_newline', False): 
 206             self
.to_screen(u
'[download] %s of %s at %s ETA %s' % 
 207                 (percent_str
, data_len_str
, speed_str
, eta_str
)) 
 209             self
.to_screen(u
'\r%s[download] %s of %s at %s ETA %s' % 
 210                 (clear_line
, percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 211         self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 212                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 214     def report_resuming_byte(self
, resume_len
): 
 215         """Report attempt to resume at given byte.""" 
 216         self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 218     def report_retry(self
, count
, retries
): 
 219         """Report retry in case of HTTP error 5xx""" 
 220         self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 222     def report_file_already_downloaded(self
, file_name
): 
 223         """Report file has already been fully downloaded.""" 
 225             self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 226         except (UnicodeEncodeError) as err
: 
 227             self
.to_screen(u
'[download] The file has already been downloaded') 
 229     def report_unable_to_resume(self
): 
 230         """Report it was impossible to resume download.""" 
 231         self
.to_screen(u
'[download] Unable to resume') 
 233     def report_finish(self
): 
 234         """Report download finished.""" 
 235         if self
.params
.get('noprogress', False): 
 236             self
.to_screen(u
'[download] Download completed') 
 240     def _download_with_rtmpdump(self
, filename
, url
, player_url
, page_url
, play_path
, tc_url
): 
 241         self
.report_destination(filename
) 
 242         tmpfilename 
= self
.temp_name(filename
) 
 244         # Check for rtmpdump first 
 246             subprocess
.call(['rtmpdump', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 247         except (OSError, IOError): 
 248             self
.report_error(u
'RTMP download detected but "rtmpdump" could not be run') 
 250         verbosity_option 
= '--verbose' if self
.params
.get('verbose', False) else '--quiet' 
 252         # Download using rtmpdump. rtmpdump returns exit code 2 when 
 253         # the connection was interrumpted and resuming appears to be 
 254         # possible. This is part of rtmpdump's normal usage, AFAIK. 
 255         basic_args 
= ['rtmpdump', verbosity_option
, '-r', url
, '-o', tmpfilename
] 
 256         if player_url 
is not None: 
 257             basic_args 
+= ['--swfVfy', player_url
] 
 258         if page_url 
is not None: 
 259             basic_args 
+= ['--pageUrl', page_url
] 
 260         if play_path 
is not None: 
 261             basic_args 
+= ['--playpath', play_path
] 
 262         if tc_url 
is not None: 
 263             basic_args 
+= ['--tcUrl', url
] 
 264         args 
= basic_args 
+ [[], ['--resume', '--skip', '1']][self
.params
.get('continuedl', False)] 
 265         if self
.params
.get('verbose', False): 
 268                 shell_quote 
= lambda args
: ' '.join(map(pipes
.quote
, args
)) 
 271             self
.to_screen(u
'[debug] rtmpdump command line: ' + shell_quote(args
)) 
 272         retval 
= subprocess
.call(args
) 
 273         while retval 
== 2 or retval 
== 1: 
 274             prevsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 275             self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 276             time
.sleep(5.0) # This seems to be needed 
 277             retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 278             cursize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 279             if prevsize 
== cursize 
and retval 
== 1: 
 281              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 282             if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 283                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 287             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 288             self
.to_screen(u
'\r[rtmpdump] %s bytes' % fsize
) 
 289             self
.try_rename(tmpfilename
, filename
) 
 290             self
._hook
_progress
({ 
 291                 'downloaded_bytes': fsize
, 
 292                 'total_bytes': fsize
, 
 293                 'filename': filename
, 
 294                 'status': 'finished', 
 298             self
.to_stderr(u
"\n") 
 299             self
.report_error(u
'rtmpdump exited with code %d' % retval
) 
 302     def _download_with_mplayer(self
, filename
, url
): 
 303         self
.report_destination(filename
) 
 304         tmpfilename 
= self
.temp_name(filename
) 
 306         args 
= ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename
, url
] 
 307         # Check for mplayer first 
 309             subprocess
.call(['mplayer', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 310         except (OSError, IOError): 
 311             self
.report_error(u
'MMS or RTSP download detected but "%s" could not be run' % args
[0] ) 
 314         # Download using mplayer.  
 315         retval 
= subprocess
.call(args
) 
 317             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 318             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 319             self
.try_rename(tmpfilename
, filename
) 
 320             self
._hook
_progress
({ 
 321                 'downloaded_bytes': fsize
, 
 322                 'total_bytes': fsize
, 
 323                 'filename': filename
, 
 324                 'status': 'finished', 
 328             self
.to_stderr(u
"\n") 
 329             self
.report_error(u
'mplayer exited with code %d' % retval
) 
 332     def _download_m3u8_with_ffmpeg(self
, filename
, url
): 
 333         self
.report_destination(filename
) 
 334         tmpfilename 
= self
.temp_name(filename
) 
 336         args 
= ['ffmpeg', '-y', '-i', url
, '-f', 'mp4', tmpfilename
] 
 337         # Check for ffmpeg first 
 339             subprocess
.call(['ffmpeg', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 340         except (OSError, IOError): 
 341             self
.report_error(u
'm3u8 download detected but "%s" could not be run' % args
[0] ) 
 344         retval 
= subprocess
.call(args
) 
 346             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 347             self
.to_screen(u
'\r[%s] %s bytes' % (args
[0], fsize
)) 
 348             self
.try_rename(tmpfilename
, filename
) 
 349             self
._hook
_progress
({ 
 350                 'downloaded_bytes': fsize
, 
 351                 'total_bytes': fsize
, 
 352                 'filename': filename
, 
 353                 'status': 'finished', 
 357             self
.to_stderr(u
"\n") 
 358             self
.report_error(u
'ffmpeg exited with code %d' % retval
) 
 362     def _do_download(self
, filename
, info_dict
): 
 363         url 
= info_dict
['url'] 
 365         # Check file already present 
 366         if self
.params
.get('continuedl', False) and os
.path
.isfile(encodeFilename(filename
)) and not self
.params
.get('nopart', False): 
 367             self
.report_file_already_downloaded(filename
) 
 368             self
._hook
_progress
({ 
 369                 'filename': filename
, 
 370                 'status': 'finished', 
 374         # Attempt to download using rtmpdump 
 375         if url
.startswith('rtmp'): 
 376             return self
._download
_with
_rtmpdump
(filename
, url
, 
 377                                                 info_dict
.get('player_url', None), 
 378                                                 info_dict
.get('page_url', None), 
 379                                                 info_dict
.get('play_path', None), 
 380                                                 info_dict
.get('tc_url', None)) 
 382         # Attempt to download using mplayer 
 383         if url
.startswith('mms') or url
.startswith('rtsp'): 
 384             return self
._download
_with
_mplayer
(filename
, url
) 
 386         # m3u8 manifest are downloaded with ffmpeg 
 387         if determine_ext(url
) == u
'm3u8': 
 388             return self
._download
_m
3u8_with
_ffmpeg
(filename
, url
) 
 390         tmpfilename 
= self
.temp_name(filename
) 
 393         # Do not include the Accept-Encoding header 
 394         headers 
= {'Youtubedl-no-compression': 'True'} 
 395         if 'user_agent' in info_dict
: 
 396             headers
['Youtubedl-user-agent'] = info_dict
['user_agent'] 
 397         basic_request 
= compat_urllib_request
.Request(url
, None, headers
) 
 398         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 400         if self
.params
.get('test', False): 
 401             request
.add_header('Range','bytes=0-10240') 
 403         # Establish possible resume length 
 404         if os
.path
.isfile(encodeFilename(tmpfilename
)): 
 405             resume_len 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 411             if self
.params
.get('continuedl', False): 
 412                 self
.report_resuming_byte(resume_len
) 
 413                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 419         retries 
= self
.params
.get('retries', 0) 
 420         while count 
<= retries
: 
 421             # Establish connection 
 423                 if count 
== 0 and 'urlhandle' in info_dict
: 
 424                     data 
= info_dict
['urlhandle'] 
 425                 data 
= compat_urllib_request
.urlopen(request
) 
 427             except (compat_urllib_error
.HTTPError
, ) as err
: 
 428                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 429                     # Unexpected HTTP error 
 431                 elif err
.code 
== 416: 
 432                     # Unable to resume (requested range not satisfiable) 
 434                         # Open the connection again without the range header 
 435                         data 
= compat_urllib_request
.urlopen(basic_request
) 
 436                         content_length 
= data
.info()['Content-Length'] 
 437                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 438                         if err
.code 
< 500 or err
.code 
>= 600: 
 441                         # Examine the reported length 
 442                         if (content_length 
is not None and 
 443                                 (resume_len 
- 100 < int(content_length
) < resume_len 
+ 100)): 
 444                             # The file had already been fully downloaded. 
 445                             # Explanation to the above condition: in issue #175 it was revealed that 
 446                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 447                             # changing the file size slightly and causing problems for some users. So 
 448                             # I decided to implement a suggested change and consider the file 
 449                             # completely downloaded if the file size differs less than 100 bytes from 
 450                             # the one in the hard drive. 
 451                             self
.report_file_already_downloaded(filename
) 
 452                             self
.try_rename(tmpfilename
, filename
) 
 453                             self
._hook
_progress
({ 
 454                                 'filename': filename
, 
 455                                 'status': 'finished', 
 459                             # The length does not match, we start the download over 
 460                             self
.report_unable_to_resume() 
 466                 self
.report_retry(count
, retries
) 
 469             self
.report_error(u
'giving up after %s retries' % retries
) 
 472         data_len 
= data
.info().get('Content-length', None) 
 473         if data_len 
is not None: 
 474             data_len 
= int(data_len
) + resume_len
 
 475             min_data_len 
= self
.params
.get("min_filesize", None) 
 476             max_data_len 
=  self
.params
.get("max_filesize", None) 
 477             if min_data_len 
is not None and data_len 
< min_data_len
: 
 478                 self
.to_screen(u
'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 480             if max_data_len 
is not None and data_len 
> max_data_len
: 
 481                 self
.to_screen(u
'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 484         data_len_str 
= self
.format_bytes(data_len
) 
 485         byte_counter 
= 0 + resume_len
 
 486         block_size 
= self
.params
.get('buffersize', 1024) 
 491             data_block 
= data
.read(block_size
) 
 493             if len(data_block
) == 0: 
 495             byte_counter 
+= len(data_block
) 
 497             # Open file just in time 
 500                     (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 501                     assert stream 
is not None 
 502                     filename 
= self
.undo_temp_name(tmpfilename
) 
 503                     self
.report_destination(filename
) 
 504                 except (OSError, IOError) as err
: 
 505                     self
.report_error(u
'unable to open for writing: %s' % str(err
)) 
 508                 stream
.write(data_block
) 
 509             except (IOError, OSError) as err
: 
 510                 self
.to_stderr(u
"\n") 
 511                 self
.report_error(u
'unable to write data: %s' % str(err
)) 
 513             if not self
.params
.get('noresizebuffer', False): 
 514                 block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 517             speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 519                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
 521                 percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 522                 eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 523                 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 525             self
._hook
_progress
({ 
 526                 'downloaded_bytes': byte_counter
, 
 527                 'total_bytes': data_len
, 
 528                 'tmpfilename': tmpfilename
, 
 529                 'filename': filename
, 
 530                 'status': 'downloading', 
 534             self
.slow_down(start
, byte_counter 
- resume_len
) 
 537             self
.to_stderr(u
"\n") 
 538             self
.report_error(u
'Did not get any data blocks') 
 542         if data_len 
is not None and byte_counter 
!= data_len
: 
 543             raise ContentTooShortError(byte_counter
, int(data_len
)) 
 544         self
.try_rename(tmpfilename
, filename
) 
 546         # Update file modification time 
 547         if self
.params
.get('updatetime', True): 
 548             info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 550         self
._hook
_progress
({ 
 551             'downloaded_bytes': byte_counter
, 
 552             'total_bytes': byte_counter
, 
 553             'filename': filename
, 
 554             'status': 'finished', 
 559     def _hook_progress(self
, status
): 
 560         for ph 
in self
._progress
_hooks
: 
 563     def add_progress_hook(self
, ph
): 
 564         """ ph gets called on download progress, with a dictionary with the entries 
 565         * filename: The final filename 
 566         * status: One of "downloading" and "finished" 
 568         It can also have some of the following entries: 
 570         * downloaded_bytes: Bytes on disks 
 571         * total_bytes: Total bytes, None if unknown 
 572         * tmpfilename: The filename we're currently writing to 
 574         Hooks are guaranteed to be called at least once (with status "finished") 
 575         if the download is successful. 
 577         self
._progress
_hooks
.append(ph
)