]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
b43acd19b42a1b7ad8b4c8329657864b35b365b3
[youtubedl] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import math
7 import os
8 import re
9 import socket
10 import subprocess
11 import sys
12 import time
13 import traceback
14
15 if os.name == 'nt':
16 import ctypes
17
18 from .utils import *
19
20
21 class FileDownloader(object):
22 """File Downloader class.
23
24 File downloader objects are the ones responsible of downloading the
25 actual video file and writing it to disk if the user has requested
26 it, among some other tasks. In most cases there should be one per
27 program. As, given a video URL, the downloader doesn't know how to
28 extract all the needed information, task that InfoExtractors do, it
29 has to pass the URL to one of them.
30
31 For this, file downloader objects have a method that allows
32 InfoExtractors to be registered in a given order. When it is passed
33 a URL, the file downloader handles it to the first InfoExtractor it
34 finds that reports being able to handle it. The InfoExtractor extracts
35 all the information about the video or videos the URL refers to, and
36 asks the FileDownloader to process the video information, possibly
37 downloading the video.
38
39 File downloaders accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead. These options are available through the params
42 attribute for the InfoExtractors to use. The FileDownloader also
43 registers itself as the downloader in charge for the InfoExtractors
44 that are added to it, so this is a "mutual registration".
45
46 Available options:
47
48 username: Username for authentication purposes.
49 password: Password for authentication purposes.
50 usenetrc: Use netrc for authentication instead.
51 quiet: Do not print messages to stdout.
52 forceurl: Force printing final URL.
53 forcetitle: Force printing title.
54 forcethumbnail: Force printing thumbnail URL.
55 forcedescription: Force printing description.
56 forcefilename: Force printing final filename.
57 simulate: Do not download the video files.
58 format: Video format code.
59 format_limit: Highest quality format to try.
60 outtmpl: Template for output names.
61 restrictfilenames: Do not allow "&" and spaces in file names
62 ignoreerrors: Do not stop on download errors.
63 ratelimit: Download speed limit, in bytes/sec.
64 nooverwrites: Prevent overwriting files.
65 retries: Number of times to retry for HTTP error 5xx
66 buffersize: Size of download buffer in bytes.
67 noresizebuffer: Do not automatically resize the download buffer.
68 continuedl: Try to continue downloads if possible.
69 noprogress: Do not print the progress bar.
70 playliststart: Playlist item to start at.
71 playlistend: Playlist item to end at.
72 matchtitle: Download only matching titles.
73 rejecttitle: Reject downloads for matching titles.
74 logtostderr: Log messages to stderr instead of stdout.
75 consoletitle: Display progress in console window's titlebar.
76 nopart: Do not use temporary .part files.
77 updatetime: Use the Last-modified header to set output file timestamps.
78 writedescription: Write the video description to a .description file
79 writeinfojson: Write the video description to a .info.json file
80 writesubtitles: Write the video subtitles to a .srt file
81 subtitleslang: Language of the subtitles to download
82 test: Download only first bytes to test the downloader.
83 """
84
85 params = None
86 _ies = []
87 _pps = []
88 _download_retcode = None
89 _num_downloads = None
90 _screen_file = None
91
92 def __init__(self, params):
93 """Create a FileDownloader object with the given options."""
94 self._ies = []
95 self._pps = []
96 self._download_retcode = 0
97 self._num_downloads = 0
98 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
99 self.params = params
100
101 if '%(stitle)s' in self.params['outtmpl']:
102 self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
103
104 @staticmethod
105 def format_bytes(bytes):
106 if bytes is None:
107 return 'N/A'
108 if type(bytes) is str:
109 bytes = float(bytes)
110 if bytes == 0.0:
111 exponent = 0
112 else:
113 exponent = int(math.log(bytes, 1024.0))
114 suffix = 'bkMGTPEZY'[exponent]
115 converted = float(bytes) / float(1024 ** exponent)
116 return '%.2f%s' % (converted, suffix)
117
118 @staticmethod
119 def calc_percent(byte_counter, data_len):
120 if data_len is None:
121 return '---.-%'
122 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
123
124 @staticmethod
125 def calc_eta(start, now, total, current):
126 if total is None:
127 return '--:--'
128 dif = now - start
129 if current == 0 or dif < 0.001: # One millisecond
130 return '--:--'
131 rate = float(current) / dif
132 eta = int((float(total) - float(current)) / rate)
133 (eta_mins, eta_secs) = divmod(eta, 60)
134 if eta_mins > 99:
135 return '--:--'
136 return '%02d:%02d' % (eta_mins, eta_secs)
137
138 @staticmethod
139 def calc_speed(start, now, bytes):
140 dif = now - start
141 if bytes == 0 or dif < 0.001: # One millisecond
142 return '%10s' % '---b/s'
143 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
144
145 @staticmethod
146 def best_block_size(elapsed_time, bytes):
147 new_min = max(bytes / 2.0, 1.0)
148 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
149 if elapsed_time < 0.001:
150 return int(new_max)
151 rate = bytes / elapsed_time
152 if rate > new_max:
153 return int(new_max)
154 if rate < new_min:
155 return int(new_min)
156 return int(rate)
157
158 @staticmethod
159 def parse_bytes(bytestr):
160 """Parse a string indicating a byte quantity into an integer."""
161 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
162 if matchobj is None:
163 return None
164 number = float(matchobj.group(1))
165 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
166 return int(round(number * multiplier))
167
168 def add_info_extractor(self, ie):
169 """Add an InfoExtractor object to the end of the list."""
170 self._ies.append(ie)
171 ie.set_downloader(self)
172
173 def add_post_processor(self, pp):
174 """Add a PostProcessor object to the end of the chain."""
175 self._pps.append(pp)
176 pp.set_downloader(self)
177
178 def to_screen(self, message, skip_eol=False):
179 """Print message to stdout if not in quiet mode."""
180 assert type(message) == type(u'')
181 if not self.params.get('quiet', False):
182 terminator = [u'\n', u''][skip_eol]
183 output = message + terminator
184 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
185 output = output.encode(preferredencoding(), 'ignore')
186 self._screen_file.write(output)
187 self._screen_file.flush()
188
189 def to_stderr(self, message):
190 """Print message to stderr."""
191 assert type(message) == type(u'')
192 output = message + u'\n'
193 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
194 output = output.encode(preferredencoding())
195 sys.stderr.write(output)
196
197 def to_cons_title(self, message):
198 """Set console/terminal window title to message."""
199 if not self.params.get('consoletitle', False):
200 return
201 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
202 # c_wchar_p() might not be necessary if `message` is
203 # already of type unicode()
204 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
205 elif 'TERM' in os.environ:
206 sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
207
208 def fixed_template(self):
209 """Checks if the output template is fixed."""
210 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
211
212 def trouble(self, message=None):
213 """Determine action to take when a download problem appears.
214
215 Depending on if the downloader has been configured to ignore
216 download errors or not, this method may throw an exception or
217 not when errors are found, after printing the message.
218 """
219 if message is not None:
220 self.to_stderr(message)
221 if self.params.get('verbose'):
222 self.to_stderr(u''.join(traceback.format_list(traceback.extract_stack())))
223 if not self.params.get('ignoreerrors', False):
224 raise DownloadError(message)
225 self._download_retcode = 1
226
227 def slow_down(self, start_time, byte_counter):
228 """Sleep if the download speed is over the rate limit."""
229 rate_limit = self.params.get('ratelimit', None)
230 if rate_limit is None or byte_counter == 0:
231 return
232 now = time.time()
233 elapsed = now - start_time
234 if elapsed <= 0.0:
235 return
236 speed = float(byte_counter) / elapsed
237 if speed > rate_limit:
238 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
239
240 def temp_name(self, filename):
241 """Returns a temporary filename for the given filename."""
242 if self.params.get('nopart', False) or filename == u'-' or \
243 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
244 return filename
245 return filename + u'.part'
246
247 def undo_temp_name(self, filename):
248 if filename.endswith(u'.part'):
249 return filename[:-len(u'.part')]
250 return filename
251
252 def try_rename(self, old_filename, new_filename):
253 try:
254 if old_filename == new_filename:
255 return
256 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
257 except (IOError, OSError) as err:
258 self.trouble(u'ERROR: unable to rename file')
259
260 def try_utime(self, filename, last_modified_hdr):
261 """Try to set the last-modified time of the given file."""
262 if last_modified_hdr is None:
263 return
264 if not os.path.isfile(encodeFilename(filename)):
265 return
266 timestr = last_modified_hdr
267 if timestr is None:
268 return
269 filetime = timeconvert(timestr)
270 if filetime is None:
271 return filetime
272 try:
273 os.utime(filename, (time.time(), filetime))
274 except:
275 pass
276 return filetime
277
278 def report_writedescription(self, descfn):
279 """ Report that the description file is being written """
280 self.to_screen(u'[info] Writing video description to: ' + descfn)
281
282 def report_writesubtitles(self, srtfn):
283 """ Report that the subtitles file is being written """
284 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
285
286 def report_writeinfojson(self, infofn):
287 """ Report that the metadata file has been written """
288 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
289
290 def report_destination(self, filename):
291 """Report destination filename."""
292 self.to_screen(u'[download] Destination: ' + filename)
293
294 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
295 """Report download progress."""
296 if self.params.get('noprogress', False):
297 return
298 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
299 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
300 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
301 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
302
303 def report_resuming_byte(self, resume_len):
304 """Report attempt to resume at given byte."""
305 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
306
307 def report_retry(self, count, retries):
308 """Report retry in case of HTTP error 5xx"""
309 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
310
311 def report_file_already_downloaded(self, file_name):
312 """Report file has already been fully downloaded."""
313 try:
314 self.to_screen(u'[download] %s has already been downloaded' % file_name)
315 except (UnicodeEncodeError) as err:
316 self.to_screen(u'[download] The file has already been downloaded')
317
318 def report_unable_to_resume(self):
319 """Report it was impossible to resume download."""
320 self.to_screen(u'[download] Unable to resume')
321
322 def report_finish(self):
323 """Report download finished."""
324 if self.params.get('noprogress', False):
325 self.to_screen(u'[download] Download completed')
326 else:
327 self.to_screen(u'')
328
329 def increment_downloads(self):
330 """Increment the ordinal that assigns a number to each file."""
331 self._num_downloads += 1
332
333 def prepare_filename(self, info_dict):
334 """Generate the output filename."""
335 try:
336 template_dict = dict(info_dict)
337
338 template_dict['epoch'] = int(time.time())
339 template_dict['autonumber'] = u'%05d' % self._num_downloads
340
341 sanitize = lambda k,v: sanitize_filename(
342 u'NA' if v is None else compat_str(v),
343 restricted=self.params.get('restrictfilenames'),
344 is_id=(k==u'id'))
345 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
346
347 filename = self.params['outtmpl'] % template_dict
348 return filename
349 except (ValueError, KeyError) as err:
350 self.trouble(u'ERROR: invalid system charset or erroneous output template')
351 return None
352
353 def _match_entry(self, info_dict):
354 """ Returns None iff the file should be downloaded """
355
356 title = info_dict['title']
357 matchtitle = self.params.get('matchtitle', False)
358 if matchtitle:
359 matchtitle = matchtitle.decode('utf8')
360 if not re.search(matchtitle, title, re.IGNORECASE):
361 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
362 rejecttitle = self.params.get('rejecttitle', False)
363 if rejecttitle:
364 rejecttitle = rejecttitle.decode('utf8')
365 if re.search(rejecttitle, title, re.IGNORECASE):
366 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
367 return None
368
369 def process_info(self, info_dict):
370 """Process a single dictionary returned by an InfoExtractor."""
371
372 # Keep for backwards compatibility
373 info_dict['stitle'] = info_dict['title']
374
375 if not 'format' in info_dict:
376 info_dict['format'] = info_dict['ext']
377
378 reason = self._match_entry(info_dict)
379 if reason is not None:
380 self.to_screen(u'[download] ' + reason)
381 return
382
383 max_downloads = self.params.get('max_downloads')
384 if max_downloads is not None:
385 if self._num_downloads > int(max_downloads):
386 raise MaxDownloadsReached()
387
388 filename = self.prepare_filename(info_dict)
389
390 # Forced printings
391 if self.params.get('forcetitle', False):
392 compat_print(info_dict['title'])
393 if self.params.get('forceurl', False):
394 compat_print(info_dict['url'])
395 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
396 compat_print(info_dict['thumbnail'])
397 if self.params.get('forcedescription', False) and 'description' in info_dict:
398 compat_print(info_dict['description'])
399 if self.params.get('forcefilename', False) and filename is not None:
400 compat_print(filename)
401 if self.params.get('forceformat', False):
402 compat_print(info_dict['format'])
403
404 # Do nothing else if in simulate mode
405 if self.params.get('simulate', False):
406 return
407
408 if filename is None:
409 return
410
411 try:
412 dn = os.path.dirname(encodeFilename(filename))
413 if dn != '' and not os.path.exists(dn): # dn is already encoded
414 os.makedirs(dn)
415 except (OSError, IOError) as err:
416 self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
417 return
418
419 if self.params.get('writedescription', False):
420 try:
421 descfn = filename + u'.description'
422 self.report_writedescription(descfn)
423 descfile = open(encodeFilename(descfn), 'wb')
424 try:
425 descfile.write(info_dict['description'].encode('utf-8'))
426 finally:
427 descfile.close()
428 except (OSError, IOError):
429 self.trouble(u'ERROR: Cannot write description file ' + descfn)
430 return
431
432 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
433 # subtitles download errors are already managed as troubles in relevant IE
434 # that way it will silently go on when used with unsupporting IE
435 try:
436 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
437 self.report_writesubtitles(srtfn)
438 srtfile = open(encodeFilename(srtfn), 'wb')
439 try:
440 srtfile.write(info_dict['subtitles'].encode('utf-8'))
441 finally:
442 srtfile.close()
443 except (OSError, IOError):
444 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
445 return
446
447 if self.params.get('writeinfojson', False):
448 infofn = filename + u'.info.json'
449 self.report_writeinfojson(infofn)
450 try:
451 json.dump
452 except (NameError,AttributeError):
453 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
454 return
455 try:
456 infof = open(encodeFilename(infofn), 'wb')
457 try:
458 json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
459 json.dump(json_info_dict, infof)
460 finally:
461 infof.close()
462 except (OSError, IOError):
463 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
464 return
465
466 if not self.params.get('skip_download', False):
467 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
468 success = True
469 else:
470 try:
471 success = self._do_download(filename, info_dict)
472 except (OSError, IOError) as err:
473 raise UnavailableVideoError()
474 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
475 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
476 return
477 except (ContentTooShortError, ) as err:
478 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
479 return
480
481 if success:
482 try:
483 self.post_process(filename, info_dict)
484 except (PostProcessingError) as err:
485 self.trouble(u'ERROR: postprocessing: %s' % str(err))
486 return
487
488 def download(self, url_list):
489 """Download a given list of URLs."""
490 if len(url_list) > 1 and self.fixed_template():
491 raise SameFileError(self.params['outtmpl'])
492
493 for url in url_list:
494 suitable_found = False
495 for ie in self._ies:
496 # Go to next InfoExtractor if not suitable
497 if not ie.suitable(url):
498 continue
499
500 # Warn if the _WORKING attribute is False
501 if not ie.working():
502 self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
503 u'and will probably not work. If you want to go on, use the -i option.')
504
505 # Suitable InfoExtractor found
506 suitable_found = True
507
508 # Extract information from URL and process it
509 videos = ie.extract(url)
510 for video in videos or []:
511 video['extractor'] = ie.IE_NAME
512 try:
513 self.increment_downloads()
514 self.process_info(video)
515 except UnavailableVideoError:
516 self.trouble(u'\nERROR: unable to download video')
517
518 # Suitable InfoExtractor had been found; go to next URL
519 break
520
521 if not suitable_found:
522 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
523
524 return self._download_retcode
525
526 def post_process(self, filename, ie_info):
527 """Run the postprocessing chain on the given file."""
528 info = dict(ie_info)
529 info['filepath'] = filename
530 for pp in self._pps:
531 info = pp.run(info)
532 if info is None:
533 break
534
535 def _download_with_rtmpdump(self, filename, url, player_url):
536 self.report_destination(filename)
537 tmpfilename = self.temp_name(filename)
538
539 # Check for rtmpdump first
540 try:
541 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
542 except (OSError, IOError):
543 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
544 return False
545
546 # Download using rtmpdump. rtmpdump returns exit code 2 when
547 # the connection was interrumpted and resuming appears to be
548 # possible. This is part of rtmpdump's normal usage, AFAIK.
549 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
550 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
551 if self.params.get('verbose', False):
552 try:
553 import pipes
554 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
555 except ImportError:
556 shell_quote = repr
557 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
558 retval = subprocess.call(args)
559 while retval == 2 or retval == 1:
560 prevsize = os.path.getsize(encodeFilename(tmpfilename))
561 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
562 time.sleep(5.0) # This seems to be needed
563 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
564 cursize = os.path.getsize(encodeFilename(tmpfilename))
565 if prevsize == cursize and retval == 1:
566 break
567 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
568 if prevsize == cursize and retval == 2 and cursize > 1024:
569 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
570 retval = 0
571 break
572 if retval == 0:
573 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
574 self.try_rename(tmpfilename, filename)
575 return True
576 else:
577 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
578 return False
579
580 def _do_download(self, filename, info_dict):
581 url = info_dict['url']
582 player_url = info_dict.get('player_url', None)
583
584 # Check file already present
585 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
586 self.report_file_already_downloaded(filename)
587 return True
588
589 # Attempt to download using rtmpdump
590 if url.startswith('rtmp'):
591 return self._download_with_rtmpdump(filename, url, player_url)
592
593 tmpfilename = self.temp_name(filename)
594 stream = None
595
596 # Do not include the Accept-Encoding header
597 headers = {'Youtubedl-no-compression': 'True'}
598 basic_request = compat_urllib_request.Request(url, None, headers)
599 request = compat_urllib_request.Request(url, None, headers)
600
601 if self.params.get('test', False):
602 request.add_header('Range','bytes=0-10240')
603
604 # Establish possible resume length
605 if os.path.isfile(encodeFilename(tmpfilename)):
606 resume_len = os.path.getsize(encodeFilename(tmpfilename))
607 else:
608 resume_len = 0
609
610 open_mode = 'wb'
611 if resume_len != 0:
612 if self.params.get('continuedl', False):
613 self.report_resuming_byte(resume_len)
614 request.add_header('Range','bytes=%d-' % resume_len)
615 open_mode = 'ab'
616 else:
617 resume_len = 0
618
619 count = 0
620 retries = self.params.get('retries', 0)
621 while count <= retries:
622 # Establish connection
623 try:
624 if count == 0 and 'urlhandle' in info_dict:
625 data = info_dict['urlhandle']
626 data = compat_urllib_request.urlopen(request)
627 break
628 except (compat_urllib_error.HTTPError, ) as err:
629 if (err.code < 500 or err.code >= 600) and err.code != 416:
630 # Unexpected HTTP error
631 raise
632 elif err.code == 416:
633 # Unable to resume (requested range not satisfiable)
634 try:
635 # Open the connection again without the range header
636 data = compat_urllib_request.urlopen(basic_request)
637 content_length = data.info()['Content-Length']
638 except (compat_urllib_error.HTTPError, ) as err:
639 if err.code < 500 or err.code >= 600:
640 raise
641 else:
642 # Examine the reported length
643 if (content_length is not None and
644 (resume_len - 100 < int(content_length) < resume_len + 100)):
645 # The file had already been fully downloaded.
646 # Explanation to the above condition: in issue #175 it was revealed that
647 # YouTube sometimes adds or removes a few bytes from the end of the file,
648 # changing the file size slightly and causing problems for some users. So
649 # I decided to implement a suggested change and consider the file
650 # completely downloaded if the file size differs less than 100 bytes from
651 # the one in the hard drive.
652 self.report_file_already_downloaded(filename)
653 self.try_rename(tmpfilename, filename)
654 return True
655 else:
656 # The length does not match, we start the download over
657 self.report_unable_to_resume()
658 open_mode = 'wb'
659 break
660 # Retry
661 count += 1
662 if count <= retries:
663 self.report_retry(count, retries)
664
665 if count > retries:
666 self.trouble(u'ERROR: giving up after %s retries' % retries)
667 return False
668
669 data_len = data.info().get('Content-length', None)
670 if data_len is not None:
671 data_len = int(data_len) + resume_len
672 data_len_str = self.format_bytes(data_len)
673 byte_counter = 0 + resume_len
674 block_size = self.params.get('buffersize', 1024)
675 start = time.time()
676 while True:
677 # Download and write
678 before = time.time()
679 data_block = data.read(block_size)
680 after = time.time()
681 if len(data_block) == 0:
682 break
683 byte_counter += len(data_block)
684
685 # Open file just in time
686 if stream is None:
687 try:
688 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
689 assert stream is not None
690 filename = self.undo_temp_name(tmpfilename)
691 self.report_destination(filename)
692 except (OSError, IOError) as err:
693 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
694 return False
695 try:
696 stream.write(data_block)
697 except (IOError, OSError) as err:
698 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
699 return False
700 if not self.params.get('noresizebuffer', False):
701 block_size = self.best_block_size(after - before, len(data_block))
702
703 # Progress message
704 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
705 if data_len is None:
706 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
707 else:
708 percent_str = self.calc_percent(byte_counter, data_len)
709 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
710 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
711
712 # Apply rate limit
713 self.slow_down(start, byte_counter - resume_len)
714
715 if stream is None:
716 self.trouble(u'\nERROR: Did not get any data blocks')
717 return False
718 stream.close()
719 self.report_finish()
720 if data_len is not None and byte_counter != data_len:
721 raise ContentTooShortError(byte_counter, int(data_len))
722 self.try_rename(tmpfilename, filename)
723
724 # Update file modification time
725 if self.params.get('updatetime', True):
726 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
727
728 return True