]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
Imported Upstream version 2013.05.14
[youtubedl] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import math
7 import io
8 import os
9 import re
10 import shutil
11 import socket
12 import subprocess
13 import sys
14 import time
15 import traceback
16
17 if os.name == 'nt':
18 import ctypes
19
20 from .utils import *
21 from .InfoExtractors import get_info_extractor
22
23
24 class FileDownloader(object):
25 """File Downloader class.
26
27 File downloader objects are the ones responsible of downloading the
28 actual video file and writing it to disk if the user has requested
29 it, among some other tasks. In most cases there should be one per
30 program. As, given a video URL, the downloader doesn't know how to
31 extract all the needed information, task that InfoExtractors do, it
32 has to pass the URL to one of them.
33
34 For this, file downloader objects have a method that allows
35 InfoExtractors to be registered in a given order. When it is passed
36 a URL, the file downloader handles it to the first InfoExtractor it
37 finds that reports being able to handle it. The InfoExtractor extracts
38 all the information about the video or videos the URL refers to, and
39 asks the FileDownloader to process the video information, possibly
40 downloading the video.
41
42 File downloaders accept a lot of parameters. In order not to saturate
43 the object constructor with arguments, it receives a dictionary of
44 options instead. These options are available through the params
45 attribute for the InfoExtractors to use. The FileDownloader also
46 registers itself as the downloader in charge for the InfoExtractors
47 that are added to it, so this is a "mutual registration".
48
49 Available options:
50
51 username: Username for authentication purposes.
52 password: Password for authentication purposes.
53 usenetrc: Use netrc for authentication instead.
54 quiet: Do not print messages to stdout.
55 forceurl: Force printing final URL.
56 forcetitle: Force printing title.
57 forceid: Force printing ID.
58 forcethumbnail: Force printing thumbnail URL.
59 forcedescription: Force printing description.
60 forcefilename: Force printing final filename.
61 simulate: Do not download the video files.
62 format: Video format code.
63 format_limit: Highest quality format to try.
64 outtmpl: Template for output names.
65 restrictfilenames: Do not allow "&" and spaces in file names
66 ignoreerrors: Do not stop on download errors.
67 ratelimit: Download speed limit, in bytes/sec.
68 nooverwrites: Prevent overwriting files.
69 retries: Number of times to retry for HTTP error 5xx
70 buffersize: Size of download buffer in bytes.
71 noresizebuffer: Do not automatically resize the download buffer.
72 continuedl: Try to continue downloads if possible.
73 noprogress: Do not print the progress bar.
74 playliststart: Playlist item to start at.
75 playlistend: Playlist item to end at.
76 matchtitle: Download only matching titles.
77 rejecttitle: Reject downloads for matching titles.
78 logtostderr: Log messages to stderr instead of stdout.
79 consoletitle: Display progress in console window's titlebar.
80 nopart: Do not use temporary .part files.
81 updatetime: Use the Last-modified header to set output file timestamps.
82 writedescription: Write the video description to a .description file
83 writeinfojson: Write the video description to a .info.json file
84 writethumbnail: Write the thumbnail image to a file
85 writesubtitles: Write the video subtitles to a file
86 allsubtitles: Downloads all the subtitles of the video
87 listsubtitles: Lists all available subtitles for the video
88 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
89 subtitleslang: Language of the subtitles to download
90 test: Download only first bytes to test the downloader.
91 keepvideo: Keep the video file after post-processing
92 min_filesize: Skip files smaller than this size
93 max_filesize: Skip files larger than this size
94 daterange: A DateRange object, download only if the upload_date is in the range.
95 skip_download: Skip the actual download of the video file
96 """
97
98 params = None
99 _ies = []
100 _pps = []
101 _download_retcode = None
102 _num_downloads = None
103 _screen_file = None
104
105 def __init__(self, params):
106 """Create a FileDownloader object with the given options."""
107 self._ies = []
108 self._pps = []
109 self._progress_hooks = []
110 self._download_retcode = 0
111 self._num_downloads = 0
112 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
113 self.params = params
114
115 if '%(stitle)s' in self.params['outtmpl']:
116 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
117
118 @staticmethod
119 def format_bytes(bytes):
120 if bytes is None:
121 return 'N/A'
122 if type(bytes) is str:
123 bytes = float(bytes)
124 if bytes == 0.0:
125 exponent = 0
126 else:
127 exponent = int(math.log(bytes, 1024.0))
128 suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
129 converted = float(bytes) / float(1024 ** exponent)
130 return '%.2f%s' % (converted, suffix)
131
132 @staticmethod
133 def calc_percent(byte_counter, data_len):
134 if data_len is None:
135 return '---.-%'
136 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
137
138 @staticmethod
139 def calc_eta(start, now, total, current):
140 if total is None:
141 return '--:--'
142 dif = now - start
143 if current == 0 or dif < 0.001: # One millisecond
144 return '--:--'
145 rate = float(current) / dif
146 eta = int((float(total) - float(current)) / rate)
147 (eta_mins, eta_secs) = divmod(eta, 60)
148 if eta_mins > 99:
149 return '--:--'
150 return '%02d:%02d' % (eta_mins, eta_secs)
151
152 @staticmethod
153 def calc_speed(start, now, bytes):
154 dif = now - start
155 if bytes == 0 or dif < 0.001: # One millisecond
156 return '%10s' % '---b/s'
157 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
158
159 @staticmethod
160 def best_block_size(elapsed_time, bytes):
161 new_min = max(bytes / 2.0, 1.0)
162 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
163 if elapsed_time < 0.001:
164 return int(new_max)
165 rate = bytes / elapsed_time
166 if rate > new_max:
167 return int(new_max)
168 if rate < new_min:
169 return int(new_min)
170 return int(rate)
171
172 @staticmethod
173 def parse_bytes(bytestr):
174 """Parse a string indicating a byte quantity into an integer."""
175 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
176 if matchobj is None:
177 return None
178 number = float(matchobj.group(1))
179 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
180 return int(round(number * multiplier))
181
182 def add_info_extractor(self, ie):
183 """Add an InfoExtractor object to the end of the list."""
184 self._ies.append(ie)
185 ie.set_downloader(self)
186
187 def add_post_processor(self, pp):
188 """Add a PostProcessor object to the end of the chain."""
189 self._pps.append(pp)
190 pp.set_downloader(self)
191
192 def to_screen(self, message, skip_eol=False):
193 """Print message to stdout if not in quiet mode."""
194 assert type(message) == type(u'')
195 if not self.params.get('quiet', False):
196 terminator = [u'\n', u''][skip_eol]
197 output = message + terminator
198 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
199 output = output.encode(preferredencoding(), 'ignore')
200 self._screen_file.write(output)
201 self._screen_file.flush()
202
203 def to_stderr(self, message):
204 """Print message to stderr."""
205 assert type(message) == type(u'')
206 output = message + u'\n'
207 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
208 output = output.encode(preferredencoding())
209 sys.stderr.write(output)
210
211 def to_cons_title(self, message):
212 """Set console/terminal window title to message."""
213 if not self.params.get('consoletitle', False):
214 return
215 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
216 # c_wchar_p() might not be necessary if `message` is
217 # already of type unicode()
218 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
219 elif 'TERM' in os.environ:
220 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
221
222 def fixed_template(self):
223 """Checks if the output template is fixed."""
224 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
225
226 def trouble(self, message=None, tb=None):
227 """Determine action to take when a download problem appears.
228
229 Depending on if the downloader has been configured to ignore
230 download errors or not, this method may throw an exception or
231 not when errors are found, after printing the message.
232
233 tb, if given, is additional traceback information.
234 """
235 if message is not None:
236 self.to_stderr(message)
237 if self.params.get('verbose'):
238 if tb is None:
239 if sys.exc_info()[0]: # if .trouble has been called from an except block
240 tb = u''
241 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
242 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
243 tb += compat_str(traceback.format_exc())
244 else:
245 tb_data = traceback.format_list(traceback.extract_stack())
246 tb = u''.join(tb_data)
247 self.to_stderr(tb)
248 if not self.params.get('ignoreerrors', False):
249 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
250 exc_info = sys.exc_info()[1].exc_info
251 else:
252 exc_info = sys.exc_info()
253 raise DownloadError(message, exc_info)
254 self._download_retcode = 1
255
256 def report_warning(self, message):
257 '''
258 Print the message to stderr, it will be prefixed with 'WARNING:'
259 If stderr is a tty file the 'WARNING:' will be colored
260 '''
261 if sys.stderr.isatty() and os.name != 'nt':
262 _msg_header=u'\033[0;33mWARNING:\033[0m'
263 else:
264 _msg_header=u'WARNING:'
265 warning_message=u'%s %s' % (_msg_header,message)
266 self.to_stderr(warning_message)
267
268 def report_error(self, message, tb=None):
269 '''
270 Do the same as trouble, but prefixes the message with 'ERROR:', colored
271 in red if stderr is a tty file.
272 '''
273 if sys.stderr.isatty() and os.name != 'nt':
274 _msg_header = u'\033[0;31mERROR:\033[0m'
275 else:
276 _msg_header = u'ERROR:'
277 error_message = u'%s %s' % (_msg_header, message)
278 self.trouble(error_message, tb)
279
280 def slow_down(self, start_time, byte_counter):
281 """Sleep if the download speed is over the rate limit."""
282 rate_limit = self.params.get('ratelimit', None)
283 if rate_limit is None or byte_counter == 0:
284 return
285 now = time.time()
286 elapsed = now - start_time
287 if elapsed <= 0.0:
288 return
289 speed = float(byte_counter) / elapsed
290 if speed > rate_limit:
291 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
292
293 def temp_name(self, filename):
294 """Returns a temporary filename for the given filename."""
295 if self.params.get('nopart', False) or filename == u'-' or \
296 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
297 return filename
298 return filename + u'.part'
299
300 def undo_temp_name(self, filename):
301 if filename.endswith(u'.part'):
302 return filename[:-len(u'.part')]
303 return filename
304
305 def try_rename(self, old_filename, new_filename):
306 try:
307 if old_filename == new_filename:
308 return
309 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
310 except (IOError, OSError) as err:
311 self.report_error(u'unable to rename file')
312
313 def try_utime(self, filename, last_modified_hdr):
314 """Try to set the last-modified time of the given file."""
315 if last_modified_hdr is None:
316 return
317 if not os.path.isfile(encodeFilename(filename)):
318 return
319 timestr = last_modified_hdr
320 if timestr is None:
321 return
322 filetime = timeconvert(timestr)
323 if filetime is None:
324 return filetime
325 try:
326 os.utime(filename, (time.time(), filetime))
327 except:
328 pass
329 return filetime
330
331 def report_writedescription(self, descfn):
332 """ Report that the description file is being written """
333 self.to_screen(u'[info] Writing video description to: ' + descfn)
334
335 def report_writesubtitles(self, sub_filename):
336 """ Report that the subtitles file is being written """
337 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
338
339 def report_writeinfojson(self, infofn):
340 """ Report that the metadata file has been written """
341 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
342
343 def report_destination(self, filename):
344 """Report destination filename."""
345 self.to_screen(u'[download] Destination: ' + filename)
346
347 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
348 """Report download progress."""
349 if self.params.get('noprogress', False):
350 return
351 clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
352 if self.params.get('progress_with_newline', False):
353 self.to_screen(u'[download] %s of %s at %s ETA %s' %
354 (percent_str, data_len_str, speed_str, eta_str))
355 else:
356 self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
357 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
358 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
359 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
360
361 def report_resuming_byte(self, resume_len):
362 """Report attempt to resume at given byte."""
363 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
364
365 def report_retry(self, count, retries):
366 """Report retry in case of HTTP error 5xx"""
367 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
368
369 def report_file_already_downloaded(self, file_name):
370 """Report file has already been fully downloaded."""
371 try:
372 self.to_screen(u'[download] %s has already been downloaded' % file_name)
373 except (UnicodeEncodeError) as err:
374 self.to_screen(u'[download] The file has already been downloaded')
375
376 def report_unable_to_resume(self):
377 """Report it was impossible to resume download."""
378 self.to_screen(u'[download] Unable to resume')
379
380 def report_finish(self):
381 """Report download finished."""
382 if self.params.get('noprogress', False):
383 self.to_screen(u'[download] Download completed')
384 else:
385 self.to_screen(u'')
386
387 def increment_downloads(self):
388 """Increment the ordinal that assigns a number to each file."""
389 self._num_downloads += 1
390
391 def prepare_filename(self, info_dict):
392 """Generate the output filename."""
393 try:
394 template_dict = dict(info_dict)
395
396 template_dict['epoch'] = int(time.time())
397 autonumber_size = self.params.get('autonumber_size')
398 if autonumber_size is None:
399 autonumber_size = 5
400 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
401 template_dict['autonumber'] = autonumber_templ % self._num_downloads
402 if template_dict['playlist_index'] is not None:
403 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
404
405 sanitize = lambda k,v: sanitize_filename(
406 u'NA' if v is None else compat_str(v),
407 restricted=self.params.get('restrictfilenames'),
408 is_id=(k==u'id'))
409 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
410
411 filename = self.params['outtmpl'] % template_dict
412 return filename
413 except KeyError as err:
414 self.report_error(u'Erroneous output template')
415 return None
416 except ValueError as err:
417 self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
418 return None
419
420 def _match_entry(self, info_dict):
421 """ Returns None iff the file should be downloaded """
422
423 title = info_dict['title']
424 matchtitle = self.params.get('matchtitle', False)
425 if matchtitle:
426 if not re.search(matchtitle, title, re.IGNORECASE):
427 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
428 rejecttitle = self.params.get('rejecttitle', False)
429 if rejecttitle:
430 if re.search(rejecttitle, title, re.IGNORECASE):
431 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
432 date = info_dict.get('upload_date', None)
433 if date is not None:
434 dateRange = self.params.get('daterange', DateRange())
435 if date not in dateRange:
436 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
437 return None
438
439 def extract_info(self, url, download=True, ie_key=None):
440 '''
441 Returns a list with a dictionary for each video we find.
442 If 'download', also downloads the videos.
443 '''
444
445 if ie_key:
446 ie = get_info_extractor(ie_key)()
447 ie.set_downloader(self)
448 ies = [ie]
449 else:
450 ies = self._ies
451
452 for ie in ies:
453 if not ie.suitable(url):
454 continue
455
456 if not ie.working():
457 self.report_warning(u'The program functionality for this site has been marked as broken, '
458 u'and will probably not work.')
459
460 try:
461 ie_result = ie.extract(url)
462 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
463 break
464 if isinstance(ie_result, list):
465 # Backwards compatibility: old IE result format
466 ie_result = {
467 '_type': 'compat_list',
468 'entries': ie_result,
469 }
470 if 'extractor' not in ie_result:
471 ie_result['extractor'] = ie.IE_NAME
472 return self.process_ie_result(ie_result, download=download)
473 except ExtractorError as de: # An error we somewhat expected
474 self.report_error(compat_str(de), de.format_traceback())
475 break
476 except Exception as e:
477 if self.params.get('ignoreerrors', False):
478 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
479 break
480 else:
481 raise
482 else:
483 self.report_error(u'no suitable InfoExtractor: %s' % url)
484
485 def process_ie_result(self, ie_result, download=True):
486 """
487 Take the result of the ie(may be modified) and resolve all unresolved
488 references (URLs, playlist items).
489
490 It will also download the videos if 'download'.
491 Returns the resolved ie_result.
492 """
493
494 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
495 if result_type == 'video':
496 if 'playlist' not in ie_result:
497 # It isn't part of a playlist
498 ie_result['playlist'] = None
499 ie_result['playlist_index'] = None
500 if download:
501 self.process_info(ie_result)
502 return ie_result
503 elif result_type == 'url':
504 return self.extract_info(ie_result['url'], download, ie_key=ie_result.get('ie_key'))
505 elif result_type == 'playlist':
506 # We process each entry in the playlist
507 playlist = ie_result.get('title', None) or ie_result.get('id', None)
508 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
509
510 playlist_results = []
511
512 n_all_entries = len(ie_result['entries'])
513 playliststart = self.params.get('playliststart', 1) - 1
514 playlistend = self.params.get('playlistend', -1)
515
516 if playlistend == -1:
517 entries = ie_result['entries'][playliststart:]
518 else:
519 entries = ie_result['entries'][playliststart:playlistend]
520
521 n_entries = len(entries)
522
523 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
524 (ie_result['extractor'], playlist, n_all_entries, n_entries))
525
526 for i,entry in enumerate(entries,1):
527 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
528 entry['playlist'] = playlist
529 entry['playlist_index'] = i + playliststart
530 entry_result = self.process_ie_result(entry, download=download)
531 playlist_results.append(entry_result)
532 ie_result['entries'] = playlist_results
533 return ie_result
534 elif result_type == 'compat_list':
535 def _fixup(r):
536 r.setdefault('extractor', ie_result['extractor'])
537 return r
538 ie_result['entries'] = [
539 self.process_ie_result(_fixup(r), download=download)
540 for r in ie_result['entries']
541 ]
542 return ie_result
543 else:
544 raise Exception('Invalid result type: %s' % result_type)
545
546 def process_info(self, info_dict):
547 """Process a single resolved IE result."""
548
549 assert info_dict.get('_type', 'video') == 'video'
550 #We increment the download the download count here to match the previous behaviour.
551 self.increment_downloads()
552
553 info_dict['fulltitle'] = info_dict['title']
554 if len(info_dict['title']) > 200:
555 info_dict['title'] = info_dict['title'][:197] + u'...'
556
557 # Keep for backwards compatibility
558 info_dict['stitle'] = info_dict['title']
559
560 if not 'format' in info_dict:
561 info_dict['format'] = info_dict['ext']
562
563 reason = self._match_entry(info_dict)
564 if reason is not None:
565 self.to_screen(u'[download] ' + reason)
566 return
567
568 max_downloads = self.params.get('max_downloads')
569 if max_downloads is not None:
570 if self._num_downloads > int(max_downloads):
571 raise MaxDownloadsReached()
572
573 filename = self.prepare_filename(info_dict)
574
575 # Forced printings
576 if self.params.get('forcetitle', False):
577 compat_print(info_dict['title'])
578 if self.params.get('forceid', False):
579 compat_print(info_dict['id'])
580 if self.params.get('forceurl', False):
581 compat_print(info_dict['url'])
582 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
583 compat_print(info_dict['thumbnail'])
584 if self.params.get('forcedescription', False) and 'description' in info_dict:
585 compat_print(info_dict['description'])
586 if self.params.get('forcefilename', False) and filename is not None:
587 compat_print(filename)
588 if self.params.get('forceformat', False):
589 compat_print(info_dict['format'])
590
591 # Do nothing else if in simulate mode
592 if self.params.get('simulate', False):
593 return
594
595 if filename is None:
596 return
597
598 try:
599 dn = os.path.dirname(encodeFilename(filename))
600 if dn != '' and not os.path.exists(dn):
601 os.makedirs(dn)
602 except (OSError, IOError) as err:
603 self.report_error(u'unable to create directory ' + compat_str(err))
604 return
605
606 if self.params.get('writedescription', False):
607 try:
608 descfn = filename + u'.description'
609 self.report_writedescription(descfn)
610 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
611 descfile.write(info_dict['description'])
612 except (OSError, IOError):
613 self.report_error(u'Cannot write description file ' + descfn)
614 return
615
616 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
617 # subtitles download errors are already managed as troubles in relevant IE
618 # that way it will silently go on when used with unsupporting IE
619 subtitle = info_dict['subtitles'][0]
620 (sub_error, sub_lang, sub) = subtitle
621 sub_format = self.params.get('subtitlesformat')
622 if sub_error:
623 self.report_warning("Some error while getting the subtitles")
624 else:
625 try:
626 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
627 self.report_writesubtitles(sub_filename)
628 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
629 subfile.write(sub)
630 except (OSError, IOError):
631 self.report_error(u'Cannot write subtitles file ' + descfn)
632 return
633
634 if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
635 subtitles = info_dict['subtitles']
636 sub_format = self.params.get('subtitlesformat')
637 for subtitle in subtitles:
638 (sub_error, sub_lang, sub) = subtitle
639 if sub_error:
640 self.report_warning("Some error while getting the subtitles")
641 else:
642 try:
643 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
644 self.report_writesubtitles(sub_filename)
645 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
646 subfile.write(sub)
647 except (OSError, IOError):
648 self.report_error(u'Cannot write subtitles file ' + descfn)
649 return
650
651 if self.params.get('writeinfojson', False):
652 infofn = filename + u'.info.json'
653 self.report_writeinfojson(infofn)
654 try:
655 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
656 write_json_file(json_info_dict, encodeFilename(infofn))
657 except (OSError, IOError):
658 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
659 return
660
661 if self.params.get('writethumbnail', False):
662 if 'thumbnail' in info_dict:
663 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
664 if not thumb_format:
665 thumb_format = 'jpg'
666 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
667 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
668 (info_dict['extractor'], info_dict['id']))
669 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
670 with open(thumb_filename, 'wb') as thumbf:
671 shutil.copyfileobj(uf, thumbf)
672 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
673 (info_dict['extractor'], info_dict['id'], thumb_filename))
674
675 if not self.params.get('skip_download', False):
676 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
677 success = True
678 else:
679 try:
680 success = self._do_download(filename, info_dict)
681 except (OSError, IOError) as err:
682 raise UnavailableVideoError()
683 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
684 self.report_error(u'unable to download video data: %s' % str(err))
685 return
686 except (ContentTooShortError, ) as err:
687 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
688 return
689
690 if success:
691 try:
692 self.post_process(filename, info_dict)
693 except (PostProcessingError) as err:
694 self.report_error(u'postprocessing: %s' % str(err))
695 return
696
697 def download(self, url_list):
698 """Download a given list of URLs."""
699 if len(url_list) > 1 and self.fixed_template():
700 raise SameFileError(self.params['outtmpl'])
701
702 for url in url_list:
703 try:
704 #It also downloads the videos
705 videos = self.extract_info(url)
706 except UnavailableVideoError:
707 self.report_error(u'unable to download video')
708 except MaxDownloadsReached:
709 self.to_screen(u'[info] Maximum number of downloaded files reached.')
710 raise
711
712 return self._download_retcode
713
714 def post_process(self, filename, ie_info):
715 """Run all the postprocessors on the given file."""
716 info = dict(ie_info)
717 info['filepath'] = filename
718 keep_video = None
719 for pp in self._pps:
720 try:
721 keep_video_wish,new_info = pp.run(info)
722 if keep_video_wish is not None:
723 if keep_video_wish:
724 keep_video = keep_video_wish
725 elif keep_video is None:
726 # No clear decision yet, let IE decide
727 keep_video = keep_video_wish
728 except PostProcessingError as e:
729 self.to_stderr(u'ERROR: ' + e.msg)
730 if keep_video is False and not self.params.get('keepvideo', False):
731 try:
732 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
733 os.remove(encodeFilename(filename))
734 except (IOError, OSError):
735 self.report_warning(u'Unable to remove downloaded video file')
736
737 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path):
738 self.report_destination(filename)
739 tmpfilename = self.temp_name(filename)
740
741 # Check for rtmpdump first
742 try:
743 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
744 except (OSError, IOError):
745 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
746 return False
747
748 # Download using rtmpdump. rtmpdump returns exit code 2 when
749 # the connection was interrumpted and resuming appears to be
750 # possible. This is part of rtmpdump's normal usage, AFAIK.
751 basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
752 if player_url is not None:
753 basic_args += ['-W', player_url]
754 if page_url is not None:
755 basic_args += ['--pageUrl', page_url]
756 if play_path is not None:
757 basic_args += ['-y', play_path]
758 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
759 if self.params.get('verbose', False):
760 try:
761 import pipes
762 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
763 except ImportError:
764 shell_quote = repr
765 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
766 retval = subprocess.call(args)
767 while retval == 2 or retval == 1:
768 prevsize = os.path.getsize(encodeFilename(tmpfilename))
769 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
770 time.sleep(5.0) # This seems to be needed
771 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
772 cursize = os.path.getsize(encodeFilename(tmpfilename))
773 if prevsize == cursize and retval == 1:
774 break
775 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
776 if prevsize == cursize and retval == 2 and cursize > 1024:
777 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
778 retval = 0
779 break
780 if retval == 0:
781 fsize = os.path.getsize(encodeFilename(tmpfilename))
782 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
783 self.try_rename(tmpfilename, filename)
784 self._hook_progress({
785 'downloaded_bytes': fsize,
786 'total_bytes': fsize,
787 'filename': filename,
788 'status': 'finished',
789 })
790 return True
791 else:
792 self.to_stderr(u"\n")
793 self.report_error(u'rtmpdump exited with code %d' % retval)
794 return False
795
796 def _do_download(self, filename, info_dict):
797 url = info_dict['url']
798
799 # Check file already present
800 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
801 self.report_file_already_downloaded(filename)
802 self._hook_progress({
803 'filename': filename,
804 'status': 'finished',
805 })
806 return True
807
808 # Attempt to download using rtmpdump
809 if url.startswith('rtmp'):
810 return self._download_with_rtmpdump(filename, url,
811 info_dict.get('player_url', None),
812 info_dict.get('page_url', None),
813 info_dict.get('play_path', None))
814
815 tmpfilename = self.temp_name(filename)
816 stream = None
817
818 # Do not include the Accept-Encoding header
819 headers = {'Youtubedl-no-compression': 'True'}
820 if 'user_agent' in info_dict:
821 headers['Youtubedl-user-agent'] = info_dict['user_agent']
822 basic_request = compat_urllib_request.Request(url, None, headers)
823 request = compat_urllib_request.Request(url, None, headers)
824
825 if self.params.get('test', False):
826 request.add_header('Range','bytes=0-10240')
827
828 # Establish possible resume length
829 if os.path.isfile(encodeFilename(tmpfilename)):
830 resume_len = os.path.getsize(encodeFilename(tmpfilename))
831 else:
832 resume_len = 0
833
834 open_mode = 'wb'
835 if resume_len != 0:
836 if self.params.get('continuedl', False):
837 self.report_resuming_byte(resume_len)
838 request.add_header('Range','bytes=%d-' % resume_len)
839 open_mode = 'ab'
840 else:
841 resume_len = 0
842
843 count = 0
844 retries = self.params.get('retries', 0)
845 while count <= retries:
846 # Establish connection
847 try:
848 if count == 0 and 'urlhandle' in info_dict:
849 data = info_dict['urlhandle']
850 data = compat_urllib_request.urlopen(request)
851 break
852 except (compat_urllib_error.HTTPError, ) as err:
853 if (err.code < 500 or err.code >= 600) and err.code != 416:
854 # Unexpected HTTP error
855 raise
856 elif err.code == 416:
857 # Unable to resume (requested range not satisfiable)
858 try:
859 # Open the connection again without the range header
860 data = compat_urllib_request.urlopen(basic_request)
861 content_length = data.info()['Content-Length']
862 except (compat_urllib_error.HTTPError, ) as err:
863 if err.code < 500 or err.code >= 600:
864 raise
865 else:
866 # Examine the reported length
867 if (content_length is not None and
868 (resume_len - 100 < int(content_length) < resume_len + 100)):
869 # The file had already been fully downloaded.
870 # Explanation to the above condition: in issue #175 it was revealed that
871 # YouTube sometimes adds or removes a few bytes from the end of the file,
872 # changing the file size slightly and causing problems for some users. So
873 # I decided to implement a suggested change and consider the file
874 # completely downloaded if the file size differs less than 100 bytes from
875 # the one in the hard drive.
876 self.report_file_already_downloaded(filename)
877 self.try_rename(tmpfilename, filename)
878 self._hook_progress({
879 'filename': filename,
880 'status': 'finished',
881 })
882 return True
883 else:
884 # The length does not match, we start the download over
885 self.report_unable_to_resume()
886 open_mode = 'wb'
887 break
888 # Retry
889 count += 1
890 if count <= retries:
891 self.report_retry(count, retries)
892
893 if count > retries:
894 self.report_error(u'giving up after %s retries' % retries)
895 return False
896
897 data_len = data.info().get('Content-length', None)
898 if data_len is not None:
899 data_len = int(data_len) + resume_len
900 min_data_len = self.params.get("min_filesize", None)
901 max_data_len = self.params.get("max_filesize", None)
902 if min_data_len is not None and data_len < min_data_len:
903 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
904 return False
905 if max_data_len is not None and data_len > max_data_len:
906 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
907 return False
908
909 data_len_str = self.format_bytes(data_len)
910 byte_counter = 0 + resume_len
911 block_size = self.params.get('buffersize', 1024)
912 start = time.time()
913 while True:
914 # Download and write
915 before = time.time()
916 data_block = data.read(block_size)
917 after = time.time()
918 if len(data_block) == 0:
919 break
920 byte_counter += len(data_block)
921
922 # Open file just in time
923 if stream is None:
924 try:
925 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
926 assert stream is not None
927 filename = self.undo_temp_name(tmpfilename)
928 self.report_destination(filename)
929 except (OSError, IOError) as err:
930 self.report_error(u'unable to open for writing: %s' % str(err))
931 return False
932 try:
933 stream.write(data_block)
934 except (IOError, OSError) as err:
935 self.to_stderr(u"\n")
936 self.report_error(u'unable to write data: %s' % str(err))
937 return False
938 if not self.params.get('noresizebuffer', False):
939 block_size = self.best_block_size(after - before, len(data_block))
940
941 # Progress message
942 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
943 if data_len is None:
944 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
945 else:
946 percent_str = self.calc_percent(byte_counter, data_len)
947 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
948 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
949
950 self._hook_progress({
951 'downloaded_bytes': byte_counter,
952 'total_bytes': data_len,
953 'tmpfilename': tmpfilename,
954 'filename': filename,
955 'status': 'downloading',
956 })
957
958 # Apply rate limit
959 self.slow_down(start, byte_counter - resume_len)
960
961 if stream is None:
962 self.to_stderr(u"\n")
963 self.report_error(u'Did not get any data blocks')
964 return False
965 stream.close()
966 self.report_finish()
967 if data_len is not None and byte_counter != data_len:
968 raise ContentTooShortError(byte_counter, int(data_len))
969 self.try_rename(tmpfilename, filename)
970
971 # Update file modification time
972 if self.params.get('updatetime', True):
973 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
974
975 self._hook_progress({
976 'downloaded_bytes': byte_counter,
977 'total_bytes': byte_counter,
978 'filename': filename,
979 'status': 'finished',
980 })
981
982 return True
983
984 def _hook_progress(self, status):
985 for ph in self._progress_hooks:
986 ph(status)
987
988 def add_progress_hook(self, ph):
989 """ ph gets called on download progress, with a dictionary with the entries
990 * filename: The final filename
991 * status: One of "downloading" and "finished"
992
993 It can also have some of the following entries:
994
995 * downloaded_bytes: Bytes on disks
996 * total_bytes: Total bytes, None if unknown
997 * tmpfilename: The filename we're currently writing to
998
999 Hooks are guaranteed to be called at least once (with status "finished")
1000 if the download is successful.
1001 """
1002 self._progress_hooks.append(ph)