]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
8a3bdf21b55ac39942cd1c421079750e9c44f9be
   2 # -*- coding: utf-8 -*- 
   4 from __future__ 
import absolute_import
 
  21 from .InfoExtractors 
import get_info_extractor
 
  24 class FileDownloader(object): 
  25     """File Downloader class. 
  27     File downloader objects are the ones responsible of downloading the 
  28     actual video file and writing it to disk if the user has requested 
  29     it, among some other tasks. In most cases there should be one per 
  30     program. As, given a video URL, the downloader doesn't know how to 
  31     extract all the needed information, task that InfoExtractors do, it 
  32     has to pass the URL to one of them. 
  34     For this, file downloader objects have a method that allows 
  35     InfoExtractors to be registered in a given order. When it is passed 
  36     a URL, the file downloader handles it to the first InfoExtractor it 
  37     finds that reports being able to handle it. The InfoExtractor extracts 
  38     all the information about the video or videos the URL refers to, and 
  39     asks the FileDownloader to process the video information, possibly 
  40     downloading the video. 
  42     File downloaders accept a lot of parameters. In order not to saturate 
  43     the object constructor with arguments, it receives a dictionary of 
  44     options instead. These options are available through the params 
  45     attribute for the InfoExtractors to use. The FileDownloader also 
  46     registers itself as the downloader in charge for the InfoExtractors 
  47     that are added to it, so this is a "mutual registration". 
  51     username:          Username for authentication purposes. 
  52     password:          Password for authentication purposes. 
  53     usenetrc:          Use netrc for authentication instead. 
  54     quiet:             Do not print messages to stdout. 
  55     forceurl:          Force printing final URL. 
  56     forcetitle:        Force printing title. 
  57     forceid:           Force printing ID. 
  58     forcethumbnail:    Force printing thumbnail URL. 
  59     forcedescription:  Force printing description. 
  60     forcefilename:     Force printing final filename. 
  61     simulate:          Do not download the video files. 
  62     format:            Video format code. 
  63     format_limit:      Highest quality format to try. 
  64     outtmpl:           Template for output names. 
  65     restrictfilenames: Do not allow "&" and spaces in file names 
  66     ignoreerrors:      Do not stop on download errors. 
  67     ratelimit:         Download speed limit, in bytes/sec. 
  68     nooverwrites:      Prevent overwriting files. 
  69     retries:           Number of times to retry for HTTP error 5xx 
  70     buffersize:        Size of download buffer in bytes. 
  71     noresizebuffer:    Do not automatically resize the download buffer. 
  72     continuedl:        Try to continue downloads if possible. 
  73     noprogress:        Do not print the progress bar. 
  74     playliststart:     Playlist item to start at. 
  75     playlistend:       Playlist item to end at. 
  76     matchtitle:        Download only matching titles. 
  77     rejecttitle:       Reject downloads for matching titles. 
  78     logtostderr:       Log messages to stderr instead of stdout. 
  79     consoletitle:      Display progress in console window's titlebar. 
  80     nopart:            Do not use temporary .part files. 
  81     updatetime:        Use the Last-modified header to set output file timestamps. 
  82     writedescription:  Write the video description to a .description file 
  83     writeinfojson:     Write the video description to a .info.json file 
  84     writethumbnail:    Write the thumbnail image to a file 
  85     writesubtitles:    Write the video subtitles to a file 
  86     allsubtitles:      Downloads all the subtitles of the video 
  87     listsubtitles:     Lists all available subtitles for the video 
  88     subtitlesformat:   Subtitle format [sbv/srt] (default=srt) 
  89     subtitleslang:     Language of the subtitles to download 
  90     test:              Download only first bytes to test the downloader. 
  91     keepvideo:         Keep the video file after post-processing 
  92     min_filesize:      Skip files smaller than this size 
  93     max_filesize:      Skip files larger than this size 
  94     daterange:         A DateRange object, download only if the upload_date is in the range. 
  95     skip_download:     Skip the actual download of the video file 
 101     _download_retcode 
= None 
 102     _num_downloads 
= None 
 105     def __init__(self
, params
): 
 106         """Create a FileDownloader object with the given options.""" 
 109         self
._progress
_hooks 
= [] 
 110         self
._download
_retcode 
= 0 
 111         self
._num
_downloads 
= 0 
 112         self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 115         if '%(stitle)s' in self
.params
['outtmpl']: 
 116             self
.report_warning(u
'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') 
 119     def format_bytes(bytes): 
 122         if type(bytes) is str: 
 127             exponent 
= int(math
.log(bytes, 1024.0)) 
 128         suffix 
= ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent
] 
 129         converted 
= float(bytes) / float(1024 ** exponent
) 
 130         return '%.2f%s' % (converted
, suffix
) 
 133     def calc_percent(byte_counter
, data_len
): 
 136         return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 139     def calc_eta(start
, now
, total
, current
): 
 143         if current 
== 0 or dif 
< 0.001: # One millisecond 
 145         rate 
= float(current
) / dif
 
 146         eta 
= int((float(total
) - float(current
)) / rate
) 
 147         (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 150         return '%02d:%02d' % (eta_mins
, eta_secs
) 
 153     def calc_speed(start
, now
, bytes): 
 155         if bytes == 0 or dif 
< 0.001: # One millisecond 
 156             return '%10s' % '---b/s' 
 157         return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 160     def best_block_size(elapsed_time
, bytes): 
 161         new_min 
= max(bytes / 2.0, 1.0) 
 162         new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 163         if elapsed_time 
< 0.001: 
 165         rate 
= bytes / elapsed_time
 
 173     def parse_bytes(bytestr
): 
 174         """Parse a string indicating a byte quantity into an integer.""" 
 175         matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 178         number 
= float(matchobj
.group(1)) 
 179         multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 180         return int(round(number 
* multiplier
)) 
 182     def add_info_extractor(self
, ie
): 
 183         """Add an InfoExtractor object to the end of the list.""" 
 185         ie
.set_downloader(self
) 
 187     def add_post_processor(self
, pp
): 
 188         """Add a PostProcessor object to the end of the chain.""" 
 190         pp
.set_downloader(self
) 
 192     def to_screen(self
, message
, skip_eol
=False): 
 193         """Print message to stdout if not in quiet mode.""" 
 194         assert type(message
) == type(u
'') 
 195         if not self
.params
.get('quiet', False): 
 196             terminator 
= [u
'\n', u
''][skip_eol
] 
 197             output 
= message 
+ terminator
 
 198             if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr 
 199                 output 
= output
.encode(preferredencoding(), 'ignore') 
 200             self
._screen
_file
.write(output
) 
 201             self
._screen
_file
.flush() 
 203     def to_stderr(self
, message
): 
 204         """Print message to stderr.""" 
 205         assert type(message
) == type(u
'') 
 206         output 
= message 
+ u
'\n' 
 207         if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr 
 208             output 
= output
.encode(preferredencoding()) 
 209         sys
.stderr
.write(output
) 
 211     def to_cons_title(self
, message
): 
 212         """Set console/terminal window title to message.""" 
 213         if not self
.params
.get('consoletitle', False): 
 215         if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 216             # c_wchar_p() might not be necessary if `message` is 
 217             # already of type unicode() 
 218             ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 219         elif 'TERM' in os
.environ
: 
 220             self
.to_screen('\033]0;%s\007' % message
, skip_eol
=True) 
 222     def fixed_template(self
): 
 223         """Checks if the output template is fixed.""" 
 224         return (re
.search(u
'(?u)%\\(.+?\\)s', self
.params
['outtmpl']) is None) 
 226     def trouble(self
, message
=None, tb
=None): 
 227         """Determine action to take when a download problem appears. 
 229         Depending on if the downloader has been configured to ignore 
 230         download errors or not, this method may throw an exception or 
 231         not when errors are found, after printing the message. 
 233         tb, if given, is additional traceback information. 
 235         if message 
is not None: 
 236             self
.to_stderr(message
) 
 237         if self
.params
.get('verbose'): 
 239                 if sys
.exc_info()[0]:  # if .trouble has been called from an except block 
 241                     if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]: 
 242                         tb 
+= u
''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
)) 
 243                     tb 
+= compat_str(traceback
.format_exc()) 
 245                     tb_data 
= traceback
.format_list(traceback
.extract_stack()) 
 246                     tb 
= u
''.join(tb_data
) 
 248         if not self
.params
.get('ignoreerrors', False): 
 249             if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]: 
 250                 exc_info 
= sys
.exc_info()[1].exc_info
 
 252                 exc_info 
= sys
.exc_info() 
 253             raise DownloadError(message
, exc_info
) 
 254         self
._download
_retcode 
= 1 
 256     def report_warning(self
, message
): 
 258         Print the message to stderr, it will be prefixed with 'WARNING:' 
 259         If stderr is a tty file the 'WARNING:' will be colored 
 261         if sys
.stderr
.isatty() and os
.name 
!= 'nt': 
 262             _msg_header
=u
'\033[0;33mWARNING:\033[0m' 
 264             _msg_header
=u
'WARNING:' 
 265         warning_message
=u
'%s %s' % (_msg_header
,message
) 
 266         self
.to_stderr(warning_message
) 
 268     def report_error(self
, message
, tb
=None): 
 270         Do the same as trouble, but prefixes the message with 'ERROR:', colored 
 271         in red if stderr is a tty file. 
 273         if sys
.stderr
.isatty() and os
.name 
!= 'nt': 
 274             _msg_header 
= u
'\033[0;31mERROR:\033[0m' 
 276             _msg_header 
= u
'ERROR:' 
 277         error_message 
= u
'%s %s' % (_msg_header
, message
) 
 278         self
.trouble(error_message
, tb
) 
 280     def slow_down(self
, start_time
, byte_counter
): 
 281         """Sleep if the download speed is over the rate limit.""" 
 282         rate_limit 
= self
.params
.get('ratelimit', None) 
 283         if rate_limit 
is None or byte_counter 
== 0: 
 286         elapsed 
= now 
- start_time
 
 289         speed 
= float(byte_counter
) / elapsed
 
 290         if speed 
> rate_limit
: 
 291             time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 293     def temp_name(self
, filename
): 
 294         """Returns a temporary filename for the given filename.""" 
 295         if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 296                 (os
.path
.exists(encodeFilename(filename
)) and not os
.path
.isfile(encodeFilename(filename
))): 
 298         return filename 
+ u
'.part' 
 300     def undo_temp_name(self
, filename
): 
 301         if filename
.endswith(u
'.part'): 
 302             return filename
[:-len(u
'.part')] 
 305     def try_rename(self
, old_filename
, new_filename
): 
 307             if old_filename 
== new_filename
: 
 309             os
.rename(encodeFilename(old_filename
), encodeFilename(new_filename
)) 
 310         except (IOError, OSError) as err
: 
 311             self
.report_error(u
'unable to rename file') 
 313     def try_utime(self
, filename
, last_modified_hdr
): 
 314         """Try to set the last-modified time of the given file.""" 
 315         if last_modified_hdr 
is None: 
 317         if not os
.path
.isfile(encodeFilename(filename
)): 
 319         timestr 
= last_modified_hdr
 
 322         filetime 
= timeconvert(timestr
) 
 326             os
.utime(filename
, (time
.time(), filetime
)) 
 331     def report_writedescription(self
, descfn
): 
 332         """ Report that the description file is being written """ 
 333         self
.to_screen(u
'[info] Writing video description to: ' + descfn
) 
 335     def report_writesubtitles(self
, sub_filename
): 
 336         """ Report that the subtitles file is being written """ 
 337         self
.to_screen(u
'[info] Writing video subtitles to: ' + sub_filename
) 
 339     def report_writeinfojson(self
, infofn
): 
 340         """ Report that the metadata file has been written """ 
 341         self
.to_screen(u
'[info] Video description metadata as JSON to: ' + infofn
) 
 343     def report_destination(self
, filename
): 
 344         """Report destination filename.""" 
 345         self
.to_screen(u
'[download] Destination: ' + filename
) 
 347     def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 348         """Report download progress.""" 
 349         if self
.params
.get('noprogress', False): 
 351         clear_line 
= (u
'\x1b[K' if sys
.stderr
.isatty() and os
.name 
!= 'nt' else u
'') 
 352         if self
.params
.get('progress_with_newline', False): 
 353             self
.to_screen(u
'[download] %s of %s at %s ETA %s' % 
 354                 (percent_str
, data_len_str
, speed_str
, eta_str
)) 
 356             self
.to_screen(u
'\r%s[download] %s of %s at %s ETA %s' % 
 357                 (clear_line
, percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 358         self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 359                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 361     def report_resuming_byte(self
, resume_len
): 
 362         """Report attempt to resume at given byte.""" 
 363         self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 365     def report_retry(self
, count
, retries
): 
 366         """Report retry in case of HTTP error 5xx""" 
 367         self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 369     def report_file_already_downloaded(self
, file_name
): 
 370         """Report file has already been fully downloaded.""" 
 372             self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 373         except (UnicodeEncodeError) as err
: 
 374             self
.to_screen(u
'[download] The file has already been downloaded') 
 376     def report_unable_to_resume(self
): 
 377         """Report it was impossible to resume download.""" 
 378         self
.to_screen(u
'[download] Unable to resume') 
 380     def report_finish(self
): 
 381         """Report download finished.""" 
 382         if self
.params
.get('noprogress', False): 
 383             self
.to_screen(u
'[download] Download completed') 
 387     def increment_downloads(self
): 
 388         """Increment the ordinal that assigns a number to each file.""" 
 389         self
._num
_downloads 
+= 1 
 391     def prepare_filename(self
, info_dict
): 
 392         """Generate the output filename.""" 
 394             template_dict 
= dict(info_dict
) 
 396             template_dict
['epoch'] = int(time
.time()) 
 397             autonumber_size 
= self
.params
.get('autonumber_size') 
 398             if autonumber_size 
is None: 
 400             autonumber_templ 
= u
'%0' + str(autonumber_size
) + u
'd' 
 401             template_dict
['autonumber'] = autonumber_templ 
% self
._num
_downloads
 
 402             if template_dict
['playlist_index'] is not None: 
 403                 template_dict
['playlist_index'] = u
'%05d' % template_dict
['playlist_index'] 
 405             sanitize 
= lambda k
,v
: sanitize_filename( 
 406                 u
'NA' if v 
is None else compat_str(v
), 
 407                 restricted
=self
.params
.get('restrictfilenames'), 
 409             template_dict 
= dict((k
, sanitize(k
, v
)) for k
,v 
in template_dict
.items()) 
 411             filename 
= self
.params
['outtmpl'] % template_dict
 
 413         except KeyError as err
: 
 414             self
.report_error(u
'Erroneous output template') 
 416         except ValueError as err
: 
 417             self
.report_error(u
'Insufficient system charset ' + repr(preferredencoding())) 
 420     def _match_entry(self
, info_dict
): 
 421         """ Returns None iff the file should be downloaded """ 
 423         title 
= info_dict
['title'] 
 424         matchtitle 
= self
.params
.get('matchtitle', False) 
 426             if not re
.search(matchtitle
, title
, re
.IGNORECASE
): 
 427                 return u
'[download] "' + title 
+ '" title did not match pattern "' + matchtitle 
+ '"' 
 428         rejecttitle 
= self
.params
.get('rejecttitle', False) 
 430             if re
.search(rejecttitle
, title
, re
.IGNORECASE
): 
 431                 return u
'"' + title 
+ '" title matched reject pattern "' + rejecttitle 
+ '"' 
 432         date 
= info_dict
.get('upload_date', None) 
 434             dateRange 
= self
.params
.get('daterange', DateRange()) 
 435             if date 
not in dateRange
: 
 436                 return u
'[download] %s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
) 
 439     def extract_info(self
, url
, download
=True, ie_key
=None): 
 441         Returns a list with a dictionary for each video we find. 
 442         If 'download', also downloads the videos. 
 446             ie 
= get_info_extractor(ie_key
)() 
 447             ie
.set_downloader(self
) 
 453             if not ie
.suitable(url
): 
 457                 self
.report_warning(u
'The program functionality for this site has been marked as broken, ' 
 458                                     u
'and will probably not work.') 
 461                 ie_result 
= ie
.extract(url
) 
 462                 if ie_result 
is None: # Finished already (backwards compatibility; listformats and friends should be moved here) 
 464                 if isinstance(ie_result
, list): 
 465                     # Backwards compatibility: old IE result format 
 467                         '_type': 'compat_list', 
 468                         'entries': ie_result
, 
 470                 if 'extractor' not in ie_result
: 
 471                     ie_result
['extractor'] = ie
.IE_NAME
 
 472                 return self
.process_ie_result(ie_result
, download
=download
) 
 473             except ExtractorError 
as de
: # An error we somewhat expected 
 474                 self
.report_error(compat_str(de
), de
.format_traceback()) 
 476             except Exception as e
: 
 477                 if self
.params
.get('ignoreerrors', False): 
 478                     self
.report_error(compat_str(e
), tb
=compat_str(traceback
.format_exc())) 
 483             self
.report_error(u
'no suitable InfoExtractor: %s' % url
) 
 485     def process_ie_result(self
, ie_result
, download
=True): 
 487         Take the result of the ie(may be modified) and resolve all unresolved 
 488         references (URLs, playlist items). 
 490         It will also download the videos if 'download'. 
 491         Returns the resolved ie_result. 
 494         result_type 
= ie_result
.get('_type', 'video') # If not given we suppose it's a video, support the default old system 
 495         if result_type 
== 'video': 
 496             if 'playlist' not in ie_result
: 
 497                 # It isn't part of a playlist 
 498                 ie_result
['playlist'] = None 
 499                 ie_result
['playlist_index'] = None 
 501                 self
.process_info(ie_result
) 
 503         elif result_type 
== 'url': 
 504             return self
.extract_info(ie_result
['url'], download
, ie_key
=ie_result
.get('ie_key')) 
 505         elif result_type 
== 'playlist': 
 506             # We process each entry in the playlist 
 507             playlist 
= ie_result
.get('title', None) or ie_result
.get('id', None) 
 508             self
.to_screen(u
'[download] Downloading playlist: %s'  % playlist
) 
 510             playlist_results 
= [] 
 512             n_all_entries 
= len(ie_result
['entries']) 
 513             playliststart 
= self
.params
.get('playliststart', 1) - 1 
 514             playlistend 
= self
.params
.get('playlistend', -1) 
 516             if playlistend 
== -1: 
 517                 entries 
= ie_result
['entries'][playliststart
:] 
 519                 entries 
= ie_result
['entries'][playliststart
:playlistend
] 
 521             n_entries 
= len(entries
) 
 523             self
.to_screen(u
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % 
 524                 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
)) 
 526             for i
,entry 
in enumerate(entries
,1): 
 527                 self
.to_screen(u
'[download] Downloading video #%s of %s' %(i
, n_entries
)) 
 528                 entry
['playlist'] = playlist
 
 529                 entry
['playlist_index'] = i 
+ playliststart
 
 530                 entry_result 
= self
.process_ie_result(entry
, download
=download
) 
 531                 playlist_results
.append(entry_result
) 
 532             ie_result
['entries'] = playlist_results
 
 534         elif result_type 
== 'compat_list': 
 536                 r
.setdefault('extractor', ie_result
['extractor']) 
 538             ie_result
['entries'] = [ 
 539                 self
.process_ie_result(_fixup(r
), download
=download
) 
 540                 for r 
in ie_result
['entries'] 
 544             raise Exception('Invalid result type: %s' % result_type
) 
 546     def process_info(self
, info_dict
): 
 547         """Process a single resolved IE result.""" 
 549         assert info_dict
.get('_type', 'video') == 'video' 
 550         #We increment the download the download count here to match the previous behaviour. 
 551         self
.increment_downloads() 
 553         info_dict
['fulltitle'] = info_dict
['title'] 
 554         if len(info_dict
['title']) > 200: 
 555             info_dict
['title'] = info_dict
['title'][:197] + u
'...' 
 557         # Keep for backwards compatibility 
 558         info_dict
['stitle'] = info_dict
['title'] 
 560         if not 'format' in info_dict
: 
 561             info_dict
['format'] = info_dict
['ext'] 
 563         reason 
= self
._match
_entry
(info_dict
) 
 564         if reason 
is not None: 
 565             self
.to_screen(u
'[download] ' + reason
) 
 568         max_downloads 
= self
.params
.get('max_downloads') 
 569         if max_downloads 
is not None: 
 570             if self
._num
_downloads 
> int(max_downloads
): 
 571                 raise MaxDownloadsReached() 
 573         filename 
= self
.prepare_filename(info_dict
) 
 576         if self
.params
.get('forcetitle', False): 
 577             compat_print(info_dict
['title']) 
 578         if self
.params
.get('forceid', False): 
 579             compat_print(info_dict
['id']) 
 580         if self
.params
.get('forceurl', False): 
 581             compat_print(info_dict
['url']) 
 582         if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 583             compat_print(info_dict
['thumbnail']) 
 584         if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 585             compat_print(info_dict
['description']) 
 586         if self
.params
.get('forcefilename', False) and filename 
is not None: 
 587             compat_print(filename
) 
 588         if self
.params
.get('forceformat', False): 
 589             compat_print(info_dict
['format']) 
 591         # Do nothing else if in simulate mode 
 592         if self
.params
.get('simulate', False): 
 599             dn 
= os
.path
.dirname(encodeFilename(filename
)) 
 600             if dn 
!= '' and not os
.path
.exists(dn
): 
 602         except (OSError, IOError) as err
: 
 603             self
.report_error(u
'unable to create directory ' + compat_str(err
)) 
 606         if self
.params
.get('writedescription', False): 
 608                 descfn 
= filename 
+ u
'.description' 
 609                 self
.report_writedescription(descfn
) 
 610                 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
: 
 611                     descfile
.write(info_dict
['description']) 
 612             except (OSError, IOError): 
 613                 self
.report_error(u
'Cannot write description file ' + descfn
) 
 616         if self
.params
.get('writesubtitles', False) and 'subtitles' in info_dict 
and info_dict
['subtitles']: 
 617             # subtitles download errors are already managed as troubles in relevant IE 
 618             # that way it will silently go on when used with unsupporting IE 
 619             subtitle 
= info_dict
['subtitles'][0] 
 620             (sub_error
, sub_lang
, sub
) = subtitle
 
 621             sub_format 
= self
.params
.get('subtitlesformat') 
 623                 self
.report_warning("Some error while getting the subtitles") 
 626                     sub_filename 
= filename
.rsplit('.', 1)[0] + u
'.' + sub_lang 
+ u
'.' + sub_format
 
 627                     self
.report_writesubtitles(sub_filename
) 
 628                     with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
: 
 630                 except (OSError, IOError): 
 631                     self
.report_error(u
'Cannot write subtitles file ' + descfn
) 
 634         if self
.params
.get('allsubtitles', False) and 'subtitles' in info_dict 
and info_dict
['subtitles']: 
 635             subtitles 
= info_dict
['subtitles'] 
 636             sub_format 
= self
.params
.get('subtitlesformat') 
 637             for subtitle 
in subtitles
: 
 638                 (sub_error
, sub_lang
, sub
) = subtitle
 
 640                     self
.report_warning("Some error while getting the subtitles") 
 643                         sub_filename 
= filename
.rsplit('.', 1)[0] + u
'.' + sub_lang 
+ u
'.' + sub_format
 
 644                         self
.report_writesubtitles(sub_filename
) 
 645                         with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
: 
 647                     except (OSError, IOError): 
 648                         self
.report_error(u
'Cannot write subtitles file ' + descfn
) 
 651         if self
.params
.get('writeinfojson', False): 
 652             infofn 
= filename 
+ u
'.info.json' 
 653             self
.report_writeinfojson(infofn
) 
 655                 json_info_dict 
= dict((k
, v
) for k
,v 
in info_dict
.items() if not k 
in ['urlhandle']) 
 656                 write_json_file(json_info_dict
, encodeFilename(infofn
)) 
 657             except (OSError, IOError): 
 658                 self
.report_error(u
'Cannot write metadata to JSON file ' + infofn
) 
 661         if self
.params
.get('writethumbnail', False): 
 662             if 'thumbnail' in info_dict
: 
 663                 thumb_format 
= info_dict
['thumbnail'].rpartition(u
'/')[2].rpartition(u
'.')[2] 
 666                 thumb_filename 
= filename
.rpartition('.')[0] + u
'.' + thumb_format
 
 667                 self
.to_screen(u
'[%s] %s: Downloading thumbnail ...' % 
 668                                (info_dict
['extractor'], info_dict
['id'])) 
 669                 uf 
= compat_urllib_request
.urlopen(info_dict
['thumbnail']) 
 670                 with open(thumb_filename
, 'wb') as thumbf
: 
 671                     shutil
.copyfileobj(uf
, thumbf
) 
 672                 self
.to_screen(u
'[%s] %s: Writing thumbnail to: %s' % 
 673                                (info_dict
['extractor'], info_dict
['id'], thumb_filename
)) 
 675         if not self
.params
.get('skip_download', False): 
 676             if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(filename
)): 
 680                     success 
= self
._do
_download
(filename
, info_dict
) 
 681                 except (OSError, IOError) as err
: 
 682                     raise UnavailableVideoError() 
 683                 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
 684                     self
.report_error(u
'unable to download video data: %s' % str(err
)) 
 686                 except (ContentTooShortError
, ) as err
: 
 687                     self
.report_error(u
'content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 692                     self
.post_process(filename
, info_dict
) 
 693                 except (PostProcessingError
) as err
: 
 694                     self
.report_error(u
'postprocessing: %s' % str(err
)) 
 697     def download(self
, url_list
): 
 698         """Download a given list of URLs.""" 
 699         if len(url_list
) > 1 and self
.fixed_template(): 
 700             raise SameFileError(self
.params
['outtmpl']) 
 704                 #It also downloads the videos 
 705                 videos 
= self
.extract_info(url
) 
 706             except UnavailableVideoError
: 
 707                 self
.report_error(u
'unable to download video') 
 708             except MaxDownloadsReached
: 
 709                 self
.to_screen(u
'[info] Maximum number of downloaded files reached.') 
 712         return self
._download
_retcode
 
 714     def post_process(self
, filename
, ie_info
): 
 715         """Run all the postprocessors on the given file.""" 
 717         info
['filepath'] = filename
 
 721                 keep_video_wish
,new_info 
= pp
.run(info
) 
 722                 if keep_video_wish 
is not None: 
 724                         keep_video 
= keep_video_wish
 
 725                     elif keep_video 
is None: 
 726                         # No clear decision yet, let IE decide 
 727                         keep_video 
= keep_video_wish
 
 728             except PostProcessingError 
as e
: 
 729                 self
.to_stderr(u
'ERROR: ' + e
.msg
) 
 730         if keep_video 
is False and not self
.params
.get('keepvideo', False): 
 732                 self
.to_screen(u
'Deleting original file %s (pass -k to keep)' % filename
) 
 733                 os
.remove(encodeFilename(filename
)) 
 734             except (IOError, OSError): 
 735                 self
.report_warning(u
'Unable to remove downloaded video file') 
 737     def _download_with_rtmpdump(self
, filename
, url
, player_url
, page_url
, play_path
): 
 738         self
.report_destination(filename
) 
 739         tmpfilename 
= self
.temp_name(filename
) 
 741         # Check for rtmpdump first 
 743             subprocess
.call(['rtmpdump', '-h'], stdout
=(open(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 744         except (OSError, IOError): 
 745             self
.report_error(u
'RTMP download detected but "rtmpdump" could not be run') 
 748         # Download using rtmpdump. rtmpdump returns exit code 2 when 
 749         # the connection was interrumpted and resuming appears to be 
 750         # possible. This is part of rtmpdump's normal usage, AFAIK. 
 751         basic_args 
= ['rtmpdump', '-q', '-r', url
, '-o', tmpfilename
] 
 752         if player_url 
is not None: 
 753             basic_args 
+= ['-W', player_url
] 
 754         if page_url 
is not None: 
 755             basic_args 
+= ['--pageUrl', page_url
] 
 756         if play_path 
is not None: 
 757             basic_args 
+= ['-y', play_path
] 
 758         args 
= basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)] 
 759         if self
.params
.get('verbose', False): 
 762                 shell_quote 
= lambda args
: ' '.join(map(pipes
.quote
, args
)) 
 765             self
.to_screen(u
'[debug] rtmpdump command line: ' + shell_quote(args
)) 
 766         retval 
= subprocess
.call(args
) 
 767         while retval 
== 2 or retval 
== 1: 
 768             prevsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 769             self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 770             time
.sleep(5.0) # This seems to be needed 
 771             retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 772             cursize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 773             if prevsize 
== cursize 
and retval 
== 1: 
 775              # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 776             if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 777                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 781             fsize 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 782             self
.to_screen(u
'\r[rtmpdump] %s bytes' % fsize
) 
 783             self
.try_rename(tmpfilename
, filename
) 
 784             self
._hook
_progress
({ 
 785                 'downloaded_bytes': fsize
, 
 786                 'total_bytes': fsize
, 
 787                 'filename': filename
, 
 788                 'status': 'finished', 
 792             self
.to_stderr(u
"\n") 
 793             self
.report_error(u
'rtmpdump exited with code %d' % retval
) 
 796     def _do_download(self
, filename
, info_dict
): 
 797         url 
= info_dict
['url'] 
 799         # Check file already present 
 800         if self
.params
.get('continuedl', False) and os
.path
.isfile(encodeFilename(filename
)) and not self
.params
.get('nopart', False): 
 801             self
.report_file_already_downloaded(filename
) 
 802             self
._hook
_progress
({ 
 803                 'filename': filename
, 
 804                 'status': 'finished', 
 808         # Attempt to download using rtmpdump 
 809         if url
.startswith('rtmp'): 
 810             return self
._download
_with
_rtmpdump
(filename
, url
, 
 811                                                 info_dict
.get('player_url', None), 
 812                                                 info_dict
.get('page_url', None), 
 813                                                 info_dict
.get('play_path', None)) 
 815         tmpfilename 
= self
.temp_name(filename
) 
 818         # Do not include the Accept-Encoding header 
 819         headers 
= {'Youtubedl-no-compression': 'True'} 
 820         if 'user_agent' in info_dict
: 
 821             headers
['Youtubedl-user-agent'] = info_dict
['user_agent'] 
 822         basic_request 
= compat_urllib_request
.Request(url
, None, headers
) 
 823         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 825         if self
.params
.get('test', False): 
 826             request
.add_header('Range','bytes=0-10240') 
 828         # Establish possible resume length 
 829         if os
.path
.isfile(encodeFilename(tmpfilename
)): 
 830             resume_len 
= os
.path
.getsize(encodeFilename(tmpfilename
)) 
 836             if self
.params
.get('continuedl', False): 
 837                 self
.report_resuming_byte(resume_len
) 
 838                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 844         retries 
= self
.params
.get('retries', 0) 
 845         while count 
<= retries
: 
 846             # Establish connection 
 848                 if count 
== 0 and 'urlhandle' in info_dict
: 
 849                     data 
= info_dict
['urlhandle'] 
 850                 data 
= compat_urllib_request
.urlopen(request
) 
 852             except (compat_urllib_error
.HTTPError
, ) as err
: 
 853                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 854                     # Unexpected HTTP error 
 856                 elif err
.code 
== 416: 
 857                     # Unable to resume (requested range not satisfiable) 
 859                         # Open the connection again without the range header 
 860                         data 
= compat_urllib_request
.urlopen(basic_request
) 
 861                         content_length 
= data
.info()['Content-Length'] 
 862                     except (compat_urllib_error
.HTTPError
, ) as err
: 
 863                         if err
.code 
< 500 or err
.code 
>= 600: 
 866                         # Examine the reported length 
 867                         if (content_length 
is not None and 
 868                                 (resume_len 
- 100 < int(content_length
) < resume_len 
+ 100)): 
 869                             # The file had already been fully downloaded. 
 870                             # Explanation to the above condition: in issue #175 it was revealed that 
 871                             # YouTube sometimes adds or removes a few bytes from the end of the file, 
 872                             # changing the file size slightly and causing problems for some users. So 
 873                             # I decided to implement a suggested change and consider the file 
 874                             # completely downloaded if the file size differs less than 100 bytes from 
 875                             # the one in the hard drive. 
 876                             self
.report_file_already_downloaded(filename
) 
 877                             self
.try_rename(tmpfilename
, filename
) 
 878                             self
._hook
_progress
({ 
 879                                 'filename': filename
, 
 880                                 'status': 'finished', 
 884                             # The length does not match, we start the download over 
 885                             self
.report_unable_to_resume() 
 891                 self
.report_retry(count
, retries
) 
 894             self
.report_error(u
'giving up after %s retries' % retries
) 
 897         data_len 
= data
.info().get('Content-length', None) 
 898         if data_len 
is not None: 
 899             data_len 
= int(data_len
) + resume_len
 
 900             min_data_len 
= self
.params
.get("min_filesize", None) 
 901             max_data_len 
=  self
.params
.get("max_filesize", None) 
 902             if min_data_len 
is not None and data_len 
< min_data_len
: 
 903                 self
.to_screen(u
'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len
, min_data_len
)) 
 905             if max_data_len 
is not None and data_len 
> max_data_len
: 
 906                 self
.to_screen(u
'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len
, max_data_len
)) 
 909         data_len_str 
= self
.format_bytes(data_len
) 
 910         byte_counter 
= 0 + resume_len
 
 911         block_size 
= self
.params
.get('buffersize', 1024) 
 916             data_block 
= data
.read(block_size
) 
 918             if len(data_block
) == 0: 
 920             byte_counter 
+= len(data_block
) 
 922             # Open file just in time 
 925                     (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 926                     assert stream 
is not None 
 927                     filename 
= self
.undo_temp_name(tmpfilename
) 
 928                     self
.report_destination(filename
) 
 929                 except (OSError, IOError) as err
: 
 930                     self
.report_error(u
'unable to open for writing: %s' % str(err
)) 
 933                 stream
.write(data_block
) 
 934             except (IOError, OSError) as err
: 
 935                 self
.to_stderr(u
"\n") 
 936                 self
.report_error(u
'unable to write data: %s' % str(err
)) 
 938             if not self
.params
.get('noresizebuffer', False): 
 939                 block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 942             speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 944                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
 946                 percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 947                 eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 948                 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 950             self
._hook
_progress
({ 
 951                 'downloaded_bytes': byte_counter
, 
 952                 'total_bytes': data_len
, 
 953                 'tmpfilename': tmpfilename
, 
 954                 'filename': filename
, 
 955                 'status': 'downloading', 
 959             self
.slow_down(start
, byte_counter 
- resume_len
) 
 962             self
.to_stderr(u
"\n") 
 963             self
.report_error(u
'Did not get any data blocks') 
 967         if data_len 
is not None and byte_counter 
!= data_len
: 
 968             raise ContentTooShortError(byte_counter
, int(data_len
)) 
 969         self
.try_rename(tmpfilename
, filename
) 
 971         # Update file modification time 
 972         if self
.params
.get('updatetime', True): 
 973             info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 975         self
._hook
_progress
({ 
 976             'downloaded_bytes': byte_counter
, 
 977             'total_bytes': byte_counter
, 
 978             'filename': filename
, 
 979             'status': 'finished', 
 984     def _hook_progress(self
, status
): 
 985         for ph 
in self
._progress
_hooks
: 
 988     def add_progress_hook(self
, ph
): 
 989         """ ph gets called on download progress, with a dictionary with the entries 
 990         * filename: The final filename 
 991         * status: One of "downloading" and "finished" 
 993         It can also have some of the following entries: 
 995         * downloaded_bytes: Bytes on disks 
 996         * total_bytes: Total bytes, None if unknown 
 997         * tmpfilename: The filename we're currently writing to 
 999         Hooks are guaranteed to be called at least once (with status "finished") 
1000         if the download is successful. 
1002         self
._progress
_hooks
.append(ph
)