Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3
   4 from __future__ import absolute_import
   5
   6 import math
   7 import os
   8 import re
   9 import socket
  10 import subprocess
  11 import sys
  12 import time
  13 import traceback
  14
  15 if os.name == 'nt':
  16     import ctypes
  17
  18 from .utils import *
  19
  20
  21 class FileDownloader(object):
  22     """File Downloader class.
  23
  24     File downloader objects are the ones responsible of downloading the
  25     actual video file and writing it to disk if the user has requested
  26     it, among some other tasks. In most cases there should be one per
  27     program. As, given a video URL, the downloader doesn't know how to
  28     extract all the needed information, task that InfoExtractors do, it
  29     has to pass the URL to one of them.
  30
  31     For this, file downloader objects have a method that allows
  32     InfoExtractors to be registered in a given order. When it is passed
  33     a URL, the file downloader handles it to the first InfoExtractor it
  34     finds that reports being able to handle it. The InfoExtractor extracts
  35     all the information about the video or videos the URL refers to, and
  36     asks the FileDownloader to process the video information, possibly
  37     downloading the video.
  38
  39     File downloaders accept a lot of parameters. In order not to saturate
  40     the object constructor with arguments, it receives a dictionary of
  41     options instead. These options are available through the params
  42     attribute for the InfoExtractors to use. The FileDownloader also
  43     registers itself as the downloader in charge for the InfoExtractors
  44     that are added to it, so this is a "mutual registration".
  45
  46     Available options:
  47
  48     username:          Username for authentication purposes.
  49     password:          Password for authentication purposes.
  50     usenetrc:          Use netrc for authentication instead.
  51     quiet:             Do not print messages to stdout.
  52     forceurl:          Force printing final URL.
  53     forcetitle:        Force printing title.
  54     forcethumbnail:    Force printing thumbnail URL.
  55     forcedescription:  Force printing description.
  56     forcefilename:     Force printing final filename.
  57     simulate:          Do not download the video files.
  58     format:            Video format code.
  59     format_limit:      Highest quality format to try.
  60     outtmpl:           Template for output names.
  61     restrictfilenames: Do not allow "&" and spaces in file names
  62     ignoreerrors:      Do not stop on download errors.
  63     ratelimit:         Download speed limit, in bytes/sec.
  64     nooverwrites:      Prevent overwriting files.
  65     retries:           Number of times to retry for HTTP error 5xx
  66     buffersize:        Size of download buffer in bytes.
  67     noresizebuffer:    Do not automatically resize the download buffer.
  68     continuedl:        Try to continue downloads if possible.
  69     noprogress:        Do not print the progress bar.
  70     playliststart:     Playlist item to start at.
  71     playlistend:       Playlist item to end at.
  72     matchtitle:        Download only matching titles.
  73     rejecttitle:       Reject downloads for matching titles.
  74     logtostderr:       Log messages to stderr instead of stdout.
  75     consoletitle:      Display progress in console window's titlebar.
  76     nopart:            Do not use temporary .part files.
  77     updatetime:        Use the Last-modified header to set output file timestamps.
  78     writedescription:  Write the video description to a .description file
  79     writeinfojson:     Write the video description to a .info.json file
  80     writesubtitles:    Write the video subtitles to a .srt file
  81     subtitleslang:     Language of the subtitles to download
  82     test:              Download only first bytes to test the downloader.
  83     """
  84
  85     params = None
  86     _ies = []
  87     _pps = []
  88     _download_retcode = None
  89     _num_downloads = None
  90     _screen_file = None
  91
  92     def __init__(self, params):
  93         """Create a FileDownloader object with the given options."""
  94         self._ies = []
  95         self._pps = []
  96         self._download_retcode = 0
  97         self._num_downloads = 0
  98         self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
  99         self.params = params
 100
 101         if '%(stitle)s' in self.params['outtmpl']:
 102             self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
 103
 104     @staticmethod
 105     def format_bytes(bytes):
 106         if bytes is None:
 107             return 'N/A'
 108         if type(bytes) is str:
 109             bytes = float(bytes)
 110         if bytes == 0.0:
 111             exponent = 0
 112         else:
 113             exponent = int(math.log(bytes, 1024.0))
 114         suffix = 'bkMGTPEZY'[exponent]
 115         converted = float(bytes) / float(1024 ** exponent)
 116         return '%.2f%s' % (converted, suffix)
 117
 118     @staticmethod
 119     def calc_percent(byte_counter, data_len):
 120         if data_len is None:
 121             return '---.-%'
 122         return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 123
 124     @staticmethod
 125     def calc_eta(start, now, total, current):
 126         if total is None:
 127             return '--:--'
 128         dif = now - start
 129         if current == 0 or dif < 0.001: # One millisecond
 130             return '--:--'
 131         rate = float(current) / dif
 132         eta = int((float(total) - float(current)) / rate)
 133         (eta_mins, eta_secs) = divmod(eta, 60)
 134         if eta_mins > 99:
 135             return '--:--'
 136         return '%02d:%02d' % (eta_mins, eta_secs)
 137
 138     @staticmethod
 139     def calc_speed(start, now, bytes):
 140         dif = now - start
 141         if bytes == 0 or dif < 0.001: # One millisecond
 142             return '%10s' % '---b/s'
 143         return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 144
 145     @staticmethod
 146     def best_block_size(elapsed_time, bytes):
 147         new_min = max(bytes / 2.0, 1.0)
 148         new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 149         if elapsed_time < 0.001:
 150             return int(new_max)
 151         rate = bytes / elapsed_time
 152         if rate > new_max:
 153             return int(new_max)
 154         if rate < new_min:
 155             return int(new_min)
 156         return int(rate)
 157
 158     @staticmethod
 159     def parse_bytes(bytestr):
 160         """Parse a string indicating a byte quantity into an integer."""
 161         matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 162         if matchobj is None:
 163             return None
 164         number = float(matchobj.group(1))
 165         multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 166         return int(round(number * multiplier))
 167
 168     def add_info_extractor(self, ie):
 169         """Add an InfoExtractor object to the end of the list."""
 170         self._ies.append(ie)
 171         ie.set_downloader(self)
 172
 173     def add_post_processor(self, pp):
 174         """Add a PostProcessor object to the end of the chain."""
 175         self._pps.append(pp)
 176         pp.set_downloader(self)
 177
 178     def to_screen(self, message, skip_eol=False):
 179         """Print message to stdout if not in quiet mode."""
 180         assert type(message) == type(u'')
 181         if not self.params.get('quiet', False):
 182             terminator = [u'\n', u''][skip_eol]
 183             output = message + terminator
 184             if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 185                 output = output.encode(preferredencoding(), 'ignore')
 186             self._screen_file.write(output)
 187             self._screen_file.flush()
 188
 189     def to_stderr(self, message):
 190         """Print message to stderr."""
 191         assert type(message) == type(u'')
 192         output = message + u'\n'
 193         if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
 194             output = output.encode(preferredencoding())
 195         sys.stderr.write(output)
 196
 197     def to_cons_title(self, message):
 198         """Set console/terminal window title to message."""
 199         if not self.params.get('consoletitle', False):
 200             return
 201         if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
 202             # c_wchar_p() might not be necessary if `message` is
 203             # already of type unicode()
 204             ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
 205         elif 'TERM' in os.environ:
 206             sys.stderr.write('\033]0;%s\007' % message.encode(preferredencoding()))
 207
 208     def fixed_template(self):
 209         """Checks if the output template is fixed."""
 210         return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
 211
 212     def trouble(self, message=None):
 213         """Determine action to take when a download problem appears.
 214
 215         Depending on if the downloader has been configured to ignore
 216         download errors or not, this method may throw an exception or
 217         not when errors are found, after printing the message.
 218         """
 219         if message is not None:
 220             self.to_stderr(message)
 221         if self.params.get('verbose'):
 222             self.to_stderr(u''.join(traceback.format_list(traceback.extract_stack())))
 223         if not self.params.get('ignoreerrors', False):
 224             raise DownloadError(message)
 225         self._download_retcode = 1
 226
 227     def slow_down(self, start_time, byte_counter):
 228         """Sleep if the download speed is over the rate limit."""
 229         rate_limit = self.params.get('ratelimit', None)
 230         if rate_limit is None or byte_counter == 0:
 231             return
 232         now = time.time()
 233         elapsed = now - start_time
 234         if elapsed <= 0.0:
 235             return
 236         speed = float(byte_counter) / elapsed
 237         if speed > rate_limit:
 238             time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 239
 240     def temp_name(self, filename):
 241         """Returns a temporary filename for the given filename."""
 242         if self.params.get('nopart', False) or filename == u'-' or \
 243                 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
 244             return filename
 245         return filename + u'.part'
 246
 247     def undo_temp_name(self, filename):
 248         if filename.endswith(u'.part'):
 249             return filename[:-len(u'.part')]
 250         return filename
 251
 252     def try_rename(self, old_filename, new_filename):
 253         try:
 254             if old_filename == new_filename:
 255                 return
 256             os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
 257         except (IOError, OSError) as err:
 258             self.trouble(u'ERROR: unable to rename file')
 259
 260     def try_utime(self, filename, last_modified_hdr):
 261         """Try to set the last-modified time of the given file."""
 262         if last_modified_hdr is None:
 263             return
 264         if not os.path.isfile(encodeFilename(filename)):
 265             return
 266         timestr = last_modified_hdr
 267         if timestr is None:
 268             return
 269         filetime = timeconvert(timestr)
 270         if filetime is None:
 271             return filetime
 272         try:
 273             os.utime(filename, (time.time(), filetime))
 274         except:
 275             pass
 276         return filetime
 277
 278     def report_writedescription(self, descfn):
 279         """ Report that the description file is being written """
 280         self.to_screen(u'[info] Writing video description to: ' + descfn)
 281
 282     def report_writesubtitles(self, srtfn):
 283         """ Report that the subtitles file is being written """
 284         self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
 285
 286     def report_writeinfojson(self, infofn):
 287         """ Report that the metadata file has been written """
 288         self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
 289
 290     def report_destination(self, filename):
 291         """Report destination filename."""
 292         self.to_screen(u'[download] Destination: ' + filename)
 293
 294     def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 295         """Report download progress."""
 296         if self.params.get('noprogress', False):
 297             return
 298         self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
 299                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 300         self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
 301                 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
 302
 303     def report_resuming_byte(self, resume_len):
 304         """Report attempt to resume at given byte."""
 305         self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
 306
 307     def report_retry(self, count, retries):
 308         """Report retry in case of HTTP error 5xx"""
 309         self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
 310
 311     def report_file_already_downloaded(self, file_name):
 312         """Report file has already been fully downloaded."""
 313         try:
 314             self.to_screen(u'[download] %s has already been downloaded' % file_name)
 315         except (UnicodeEncodeError) as err:
 316             self.to_screen(u'[download] The file has already been downloaded')
 317
 318     def report_unable_to_resume(self):
 319         """Report it was impossible to resume download."""
 320         self.to_screen(u'[download] Unable to resume')
 321
 322     def report_finish(self):
 323         """Report download finished."""
 324         if self.params.get('noprogress', False):
 325             self.to_screen(u'[download] Download completed')
 326         else:
 327             self.to_screen(u'')
 328
 329     def increment_downloads(self):
 330         """Increment the ordinal that assigns a number to each file."""
 331         self._num_downloads += 1
 332
 333     def prepare_filename(self, info_dict):
 334         """Generate the output filename."""
 335         try:
 336             template_dict = dict(info_dict)
 337
 338             template_dict['epoch'] = int(time.time())
 339             template_dict['autonumber'] = u'%05d' % self._num_downloads
 340
 341             sanitize = lambda k,v: sanitize_filename(
 342                 u'NA' if v is None else compat_str(v),
 343                 restricted=self.params.get('restrictfilenames'),
 344                 is_id=(k==u'id'))
 345             template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
 346
 347             filename = self.params['outtmpl'] % template_dict
 348             return filename
 349         except (ValueError, KeyError) as err:
 350             self.trouble(u'ERROR: invalid system charset or erroneous output template')
 351             return None
 352
 353     def _match_entry(self, info_dict):
 354         """ Returns None iff the file should be downloaded """
 355
 356         title = info_dict['title']
 357         matchtitle = self.params.get('matchtitle', False)
 358         if matchtitle:
 359             matchtitle = matchtitle.decode('utf8')
 360             if not re.search(matchtitle, title, re.IGNORECASE):
 361                 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
 362         rejecttitle = self.params.get('rejecttitle', False)
 363         if rejecttitle:
 364             rejecttitle = rejecttitle.decode('utf8')
 365             if re.search(rejecttitle, title, re.IGNORECASE):
 366                 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
 367         return None
 368
 369     def process_info(self, info_dict):
 370         """Process a single dictionary returned by an InfoExtractor."""
 371
 372         # Keep for backwards compatibility
 373         info_dict['stitle'] = info_dict['title']
 374
 375         if not 'format' in info_dict:
 376             info_dict['format'] = info_dict['ext']
 377
 378         reason = self._match_entry(info_dict)
 379         if reason is not None:
 380             self.to_screen(u'[download] ' + reason)
 381             return
 382
 383         max_downloads = self.params.get('max_downloads')
 384         if max_downloads is not None:
 385             if self._num_downloads > int(max_downloads):
 386                 raise MaxDownloadsReached()
 387
 388         filename = self.prepare_filename(info_dict)
 389
 390         # Forced printings
 391         if self.params.get('forcetitle', False):
 392             compat_print(info_dict['title'])
 393         if self.params.get('forceurl', False):
 394             compat_print(info_dict['url'])
 395         if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
 396             compat_print(info_dict['thumbnail'])
 397         if self.params.get('forcedescription', False) and 'description' in info_dict:
 398             compat_print(info_dict['description'])
 399         if self.params.get('forcefilename', False) and filename is not None:
 400             compat_print(filename)
 401         if self.params.get('forceformat', False):
 402             compat_print(info_dict['format'])
 403
 404         # Do nothing else if in simulate mode
 405         if self.params.get('simulate', False):
 406             return
 407
 408         if filename is None:
 409             return
 410
 411         try:
 412             dn = os.path.dirname(encodeFilename(filename))
 413             if dn != '' and not os.path.exists(dn): # dn is already encoded
 414                 os.makedirs(dn)
 415         except (OSError, IOError) as err:
 416             self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
 417             return
 418
 419         if self.params.get('writedescription', False):
 420             try:
 421                 descfn = filename + u'.description'
 422                 self.report_writedescription(descfn)
 423                 descfile = open(encodeFilename(descfn), 'wb')
 424                 try:
 425                     descfile.write(info_dict['description'].encode('utf-8'))
 426                 finally:
 427                     descfile.close()
 428             except (OSError, IOError):
 429                 self.trouble(u'ERROR: Cannot write description file ' + descfn)
 430                 return
 431
 432         if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
 433             # subtitles download errors are already managed as troubles in relevant IE
 434             # that way it will silently go on when used with unsupporting IE
 435             try:
 436                 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
 437                 self.report_writesubtitles(srtfn)
 438                 srtfile = open(encodeFilename(srtfn), 'wb')
 439                 try:
 440                     srtfile.write(info_dict['subtitles'].encode('utf-8'))
 441                 finally:
 442                     srtfile.close()
 443             except (OSError, IOError):
 444                 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
 445                 return
 446
 447         if self.params.get('writeinfojson', False):
 448             infofn = filename + u'.info.json'
 449             self.report_writeinfojson(infofn)
 450             try:
 451                 json.dump
 452             except (NameError,AttributeError):
 453                 self.trouble(u'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.')
 454                 return
 455             try:
 456                 infof = open(encodeFilename(infofn), 'wb')
 457                 try:
 458                     json_info_dict = dict((k,v) for k,v in info_dict.iteritems() if not k in ('urlhandle',))
 459                     json.dump(json_info_dict, infof)
 460                 finally:
 461                     infof.close()
 462             except (OSError, IOError):
 463                 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
 464                 return
 465
 466         if not self.params.get('skip_download', False):
 467             if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
 468                 success = True
 469             else:
 470                 try:
 471                     success = self._do_download(filename, info_dict)
 472                 except (OSError, IOError) as err:
 473                     raise UnavailableVideoError()
 474                 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
 475                     self.trouble(u'ERROR: unable to download video data: %s' % str(err))
 476                     return
 477                 except (ContentTooShortError, ) as err:
 478                     self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 479                     return
 480
 481             if success:
 482                 try:
 483                     self.post_process(filename, info_dict)
 484                 except (PostProcessingError) as err:
 485                     self.trouble(u'ERROR: postprocessing: %s' % str(err))
 486                     return
 487
 488     def download(self, url_list):
 489         """Download a given list of URLs."""
 490         if len(url_list) > 1 and self.fixed_template():
 491             raise SameFileError(self.params['outtmpl'])
 492
 493         for url in url_list:
 494             suitable_found = False
 495             for ie in self._ies:
 496                 # Go to next InfoExtractor if not suitable
 497                 if not ie.suitable(url):
 498                     continue
 499
 500                 # Warn if the _WORKING attribute is False
 501                 if not ie.working():
 502                     self.trouble(u'WARNING: the program functionality for this site has been marked as broken, '
 503                                  u'and will probably not work. If you want to go on, use the -i option.')
 504
 505                 # Suitable InfoExtractor found
 506                 suitable_found = True
 507
 508                 # Extract information from URL and process it
 509                 videos = ie.extract(url)
 510                 for video in videos or []:
 511                     video['extractor'] = ie.IE_NAME
 512                     try:
 513                         self.increment_downloads()
 514                         self.process_info(video)
 515                     except UnavailableVideoError:
 516                         self.trouble(u'\nERROR: unable to download video')
 517
 518                 # Suitable InfoExtractor had been found; go to next URL
 519                 break
 520
 521             if not suitable_found:
 522                 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
 523
 524         return self._download_retcode
 525
 526     def post_process(self, filename, ie_info):
 527         """Run the postprocessing chain on the given file."""
 528         info = dict(ie_info)
 529         info['filepath'] = filename
 530         for pp in self._pps:
 531             info = pp.run(info)
 532             if info is None:
 533                 break
 534
 535     def _download_with_rtmpdump(self, filename, url, player_url):
 536         self.report_destination(filename)
 537         tmpfilename = self.temp_name(filename)
 538
 539         # Check for rtmpdump first
 540         try:
 541             subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 542         except (OSError, IOError):
 543             self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 544             return False
 545
 546         # Download using rtmpdump. rtmpdump returns exit code 2 when
 547         # the connection was interrumpted and resuming appears to be
 548         # possible. This is part of rtmpdump's normal usage, AFAIK.
 549         basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
 550         args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
 551         if self.params.get('verbose', False):
 552             try:
 553                 import pipes
 554                 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
 555             except ImportError:
 556                 shell_quote = repr
 557             self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
 558         retval = subprocess.call(args)
 559         while retval == 2 or retval == 1:
 560             prevsize = os.path.getsize(encodeFilename(tmpfilename))
 561             self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
 562             time.sleep(5.0) # This seems to be needed
 563             retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 564             cursize = os.path.getsize(encodeFilename(tmpfilename))
 565             if prevsize == cursize and retval == 1:
 566                 break
 567              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
 568             if prevsize == cursize and retval == 2 and cursize > 1024:
 569                 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
 570                 retval = 0
 571                 break
 572         if retval == 0:
 573             self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(encodeFilename(tmpfilename)))
 574             self.try_rename(tmpfilename, filename)
 575             return True
 576         else:
 577             self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
 578             return False
 579
 580     def _do_download(self, filename, info_dict):
 581         url = info_dict['url']
 582         player_url = info_dict.get('player_url', None)
 583
 584         # Check file already present
 585         if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
 586             self.report_file_already_downloaded(filename)
 587             return True
 588
 589         # Attempt to download using rtmpdump
 590         if url.startswith('rtmp'):
 591             return self._download_with_rtmpdump(filename, url, player_url)
 592
 593         tmpfilename = self.temp_name(filename)
 594         stream = None
 595
 596         # Do not include the Accept-Encoding header
 597         headers = {'Youtubedl-no-compression': 'True'}
 598         basic_request = compat_urllib_request.Request(url, None, headers)
 599         request = compat_urllib_request.Request(url, None, headers)
 600
 601         if self.params.get('test', False):
 602             request.add_header('Range','bytes=0-10240')
 603
 604         # Establish possible resume length
 605         if os.path.isfile(encodeFilename(tmpfilename)):
 606             resume_len = os.path.getsize(encodeFilename(tmpfilename))
 607         else:
 608             resume_len = 0
 609
 610         open_mode = 'wb'
 611         if resume_len != 0:
 612             if self.params.get('continuedl', False):
 613                 self.report_resuming_byte(resume_len)
 614                 request.add_header('Range','bytes=%d-' % resume_len)
 615                 open_mode = 'ab'
 616             else:
 617                 resume_len = 0
 618
 619         count = 0
 620         retries = self.params.get('retries', 0)
 621         while count <= retries:
 622             # Establish connection
 623             try:
 624                 if count == 0 and 'urlhandle' in info_dict:
 625                     data = info_dict['urlhandle']
 626                 data = compat_urllib_request.urlopen(request)
 627                 break
 628             except (compat_urllib_error.HTTPError, ) as err:
 629                 if (err.code < 500 or err.code >= 600) and err.code != 416:
 630                     # Unexpected HTTP error
 631                     raise
 632                 elif err.code == 416:
 633                     # Unable to resume (requested range not satisfiable)
 634                     try:
 635                         # Open the connection again without the range header
 636                         data = compat_urllib_request.urlopen(basic_request)
 637                         content_length = data.info()['Content-Length']
 638                     except (compat_urllib_error.HTTPError, ) as err:
 639                         if err.code < 500 or err.code >= 600:
 640                             raise
 641                     else:
 642                         # Examine the reported length
 643                         if (content_length is not None and
 644                                 (resume_len - 100 < int(content_length) < resume_len + 100)):
 645                             # The file had already been fully downloaded.
 646                             # Explanation to the above condition: in issue #175 it was revealed that
 647                             # YouTube sometimes adds or removes a few bytes from the end of the file,
 648                             # changing the file size slightly and causing problems for some users. So
 649                             # I decided to implement a suggested change and consider the file
 650                             # completely downloaded if the file size differs less than 100 bytes from
 651                             # the one in the hard drive.
 652                             self.report_file_already_downloaded(filename)
 653                             self.try_rename(tmpfilename, filename)
 654                             return True
 655                         else:
 656                             # The length does not match, we start the download over
 657                             self.report_unable_to_resume()
 658                             open_mode = 'wb'
 659                             break
 660             # Retry
 661             count += 1
 662             if count <= retries:
 663                 self.report_retry(count, retries)
 664
 665         if count > retries:
 666             self.trouble(u'ERROR: giving up after %s retries' % retries)
 667             return False
 668
 669         data_len = data.info().get('Content-length', None)
 670         if data_len is not None:
 671             data_len = int(data_len) + resume_len
 672         data_len_str = self.format_bytes(data_len)
 673         byte_counter = 0 + resume_len
 674         block_size = self.params.get('buffersize', 1024)
 675         start = time.time()
 676         while True:
 677             # Download and write
 678             before = time.time()
 679             data_block = data.read(block_size)
 680             after = time.time()
 681             if len(data_block) == 0:
 682                 break
 683             byte_counter += len(data_block)
 684
 685             # Open file just in time
 686             if stream is None:
 687                 try:
 688                     (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
 689                     assert stream is not None
 690                     filename = self.undo_temp_name(tmpfilename)
 691                     self.report_destination(filename)
 692                 except (OSError, IOError) as err:
 693                     self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
 694                     return False
 695             try:
 696                 stream.write(data_block)
 697             except (IOError, OSError) as err:
 698                 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
 699                 return False
 700             if not self.params.get('noresizebuffer', False):
 701                 block_size = self.best_block_size(after - before, len(data_block))
 702
 703             # Progress message
 704             speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
 705             if data_len is None:
 706                 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
 707             else:
 708                 percent_str = self.calc_percent(byte_counter, data_len)
 709                 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
 710                 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 711
 712             # Apply rate limit
 713             self.slow_down(start, byte_counter - resume_len)
 714
 715         if stream is None:
 716             self.trouble(u'\nERROR: Did not get any data blocks')
 717             return False
 718         stream.close()
 719         self.report_finish()
 720         if data_len is not None and byte_counter != data_len:
 721             raise ContentTooShortError(byte_counter, int(data_len))
 722         self.try_rename(tmpfilename, filename)
 723
 724         # Update file modification time
 725         if self.params.get('updatetime', True):
 726             info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
 727
 728         return True