]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/FileDownloader.py
Imported Upstream version 2013.06.21
[youtubedl] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import math
7 import io
8 import os
9 import re
10 import shutil
11 import socket
12 import subprocess
13 import sys
14 import time
15 import traceback
16
17 if os.name == 'nt':
18 import ctypes
19
20 from .utils import *
21 from .InfoExtractors import get_info_extractor
22
23
24 class FileDownloader(object):
25 """File Downloader class.
26
27 File downloader objects are the ones responsible of downloading the
28 actual video file and writing it to disk if the user has requested
29 it, among some other tasks. In most cases there should be one per
30 program. As, given a video URL, the downloader doesn't know how to
31 extract all the needed information, task that InfoExtractors do, it
32 has to pass the URL to one of them.
33
34 For this, file downloader objects have a method that allows
35 InfoExtractors to be registered in a given order. When it is passed
36 a URL, the file downloader handles it to the first InfoExtractor it
37 finds that reports being able to handle it. The InfoExtractor extracts
38 all the information about the video or videos the URL refers to, and
39 asks the FileDownloader to process the video information, possibly
40 downloading the video.
41
42 File downloaders accept a lot of parameters. In order not to saturate
43 the object constructor with arguments, it receives a dictionary of
44 options instead. These options are available through the params
45 attribute for the InfoExtractors to use. The FileDownloader also
46 registers itself as the downloader in charge for the InfoExtractors
47 that are added to it, so this is a "mutual registration".
48
49 Available options:
50
51 username: Username for authentication purposes.
52 password: Password for authentication purposes.
53 usenetrc: Use netrc for authentication instead.
54 quiet: Do not print messages to stdout.
55 forceurl: Force printing final URL.
56 forcetitle: Force printing title.
57 forceid: Force printing ID.
58 forcethumbnail: Force printing thumbnail URL.
59 forcedescription: Force printing description.
60 forcefilename: Force printing final filename.
61 simulate: Do not download the video files.
62 format: Video format code.
63 format_limit: Highest quality format to try.
64 outtmpl: Template for output names.
65 restrictfilenames: Do not allow "&" and spaces in file names
66 ignoreerrors: Do not stop on download errors.
67 ratelimit: Download speed limit, in bytes/sec.
68 nooverwrites: Prevent overwriting files.
69 retries: Number of times to retry for HTTP error 5xx
70 buffersize: Size of download buffer in bytes.
71 noresizebuffer: Do not automatically resize the download buffer.
72 continuedl: Try to continue downloads if possible.
73 noprogress: Do not print the progress bar.
74 playliststart: Playlist item to start at.
75 playlistend: Playlist item to end at.
76 matchtitle: Download only matching titles.
77 rejecttitle: Reject downloads for matching titles.
78 logtostderr: Log messages to stderr instead of stdout.
79 consoletitle: Display progress in console window's titlebar.
80 nopart: Do not use temporary .part files.
81 updatetime: Use the Last-modified header to set output file timestamps.
82 writedescription: Write the video description to a .description file
83 writeinfojson: Write the video description to a .info.json file
84 writethumbnail: Write the thumbnail image to a file
85 writesubtitles: Write the video subtitles to a file
86 allsubtitles: Downloads all the subtitles of the video
87 listsubtitles: Lists all available subtitles for the video
88 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
89 subtitleslang: Language of the subtitles to download
90 test: Download only first bytes to test the downloader.
91 keepvideo: Keep the video file after post-processing
92 min_filesize: Skip files smaller than this size
93 max_filesize: Skip files larger than this size
94 daterange: A DateRange object, download only if the upload_date is in the range.
95 skip_download: Skip the actual download of the video file
96 """
97
98 params = None
99 _ies = []
100 _pps = []
101 _download_retcode = None
102 _num_downloads = None
103 _screen_file = None
104
105 def __init__(self, params):
106 """Create a FileDownloader object with the given options."""
107 self._ies = []
108 self._pps = []
109 self._progress_hooks = []
110 self._download_retcode = 0
111 self._num_downloads = 0
112 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
113 self.params = params
114
115 if '%(stitle)s' in self.params['outtmpl']:
116 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
117
118 @staticmethod
119 def format_bytes(bytes):
120 if bytes is None:
121 return 'N/A'
122 if type(bytes) is str:
123 bytes = float(bytes)
124 if bytes == 0.0:
125 exponent = 0
126 else:
127 exponent = int(math.log(bytes, 1024.0))
128 suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
129 converted = float(bytes) / float(1024 ** exponent)
130 return '%.2f%s' % (converted, suffix)
131
132 @staticmethod
133 def calc_percent(byte_counter, data_len):
134 if data_len is None:
135 return '---.-%'
136 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
137
138 @staticmethod
139 def calc_eta(start, now, total, current):
140 if total is None:
141 return '--:--'
142 dif = now - start
143 if current == 0 or dif < 0.001: # One millisecond
144 return '--:--'
145 rate = float(current) / dif
146 eta = int((float(total) - float(current)) / rate)
147 (eta_mins, eta_secs) = divmod(eta, 60)
148 if eta_mins > 99:
149 return '--:--'
150 return '%02d:%02d' % (eta_mins, eta_secs)
151
152 @staticmethod
153 def calc_speed(start, now, bytes):
154 dif = now - start
155 if bytes == 0 or dif < 0.001: # One millisecond
156 return '%10s' % '---b/s'
157 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
158
159 @staticmethod
160 def best_block_size(elapsed_time, bytes):
161 new_min = max(bytes / 2.0, 1.0)
162 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
163 if elapsed_time < 0.001:
164 return int(new_max)
165 rate = bytes / elapsed_time
166 if rate > new_max:
167 return int(new_max)
168 if rate < new_min:
169 return int(new_min)
170 return int(rate)
171
172 @staticmethod
173 def parse_bytes(bytestr):
174 """Parse a string indicating a byte quantity into an integer."""
175 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
176 if matchobj is None:
177 return None
178 number = float(matchobj.group(1))
179 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
180 return int(round(number * multiplier))
181
182 def add_info_extractor(self, ie):
183 """Add an InfoExtractor object to the end of the list."""
184 self._ies.append(ie)
185 ie.set_downloader(self)
186
187 def add_post_processor(self, pp):
188 """Add a PostProcessor object to the end of the chain."""
189 self._pps.append(pp)
190 pp.set_downloader(self)
191
192 def to_screen(self, message, skip_eol=False):
193 """Print message to stdout if not in quiet mode."""
194 assert type(message) == type(u'')
195 if not self.params.get('quiet', False):
196 terminator = [u'\n', u''][skip_eol]
197 output = message + terminator
198 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
199 output = output.encode(preferredencoding(), 'ignore')
200 self._screen_file.write(output)
201 self._screen_file.flush()
202
203 def to_stderr(self, message):
204 """Print message to stderr."""
205 assert type(message) == type(u'')
206 output = message + u'\n'
207 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
208 output = output.encode(preferredencoding())
209 sys.stderr.write(output)
210
211 def to_cons_title(self, message):
212 """Set console/terminal window title to message."""
213 if not self.params.get('consoletitle', False):
214 return
215 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
216 # c_wchar_p() might not be necessary if `message` is
217 # already of type unicode()
218 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
219 elif 'TERM' in os.environ:
220 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
221
222 def fixed_template(self):
223 """Checks if the output template is fixed."""
224 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
225
226 def trouble(self, message=None, tb=None):
227 """Determine action to take when a download problem appears.
228
229 Depending on if the downloader has been configured to ignore
230 download errors or not, this method may throw an exception or
231 not when errors are found, after printing the message.
232
233 tb, if given, is additional traceback information.
234 """
235 if message is not None:
236 self.to_stderr(message)
237 if self.params.get('verbose'):
238 if tb is None:
239 if sys.exc_info()[0]: # if .trouble has been called from an except block
240 tb = u''
241 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
242 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
243 tb += compat_str(traceback.format_exc())
244 else:
245 tb_data = traceback.format_list(traceback.extract_stack())
246 tb = u''.join(tb_data)
247 self.to_stderr(tb)
248 if not self.params.get('ignoreerrors', False):
249 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
250 exc_info = sys.exc_info()[1].exc_info
251 else:
252 exc_info = sys.exc_info()
253 raise DownloadError(message, exc_info)
254 self._download_retcode = 1
255
256 def report_warning(self, message):
257 '''
258 Print the message to stderr, it will be prefixed with 'WARNING:'
259 If stderr is a tty file the 'WARNING:' will be colored
260 '''
261 if sys.stderr.isatty() and os.name != 'nt':
262 _msg_header=u'\033[0;33mWARNING:\033[0m'
263 else:
264 _msg_header=u'WARNING:'
265 warning_message=u'%s %s' % (_msg_header,message)
266 self.to_stderr(warning_message)
267
268 def report_error(self, message, tb=None):
269 '''
270 Do the same as trouble, but prefixes the message with 'ERROR:', colored
271 in red if stderr is a tty file.
272 '''
273 if sys.stderr.isatty() and os.name != 'nt':
274 _msg_header = u'\033[0;31mERROR:\033[0m'
275 else:
276 _msg_header = u'ERROR:'
277 error_message = u'%s %s' % (_msg_header, message)
278 self.trouble(error_message, tb)
279
280 def slow_down(self, start_time, byte_counter):
281 """Sleep if the download speed is over the rate limit."""
282 rate_limit = self.params.get('ratelimit', None)
283 if rate_limit is None or byte_counter == 0:
284 return
285 now = time.time()
286 elapsed = now - start_time
287 if elapsed <= 0.0:
288 return
289 speed = float(byte_counter) / elapsed
290 if speed > rate_limit:
291 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
292
293 def temp_name(self, filename):
294 """Returns a temporary filename for the given filename."""
295 if self.params.get('nopart', False) or filename == u'-' or \
296 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
297 return filename
298 return filename + u'.part'
299
300 def undo_temp_name(self, filename):
301 if filename.endswith(u'.part'):
302 return filename[:-len(u'.part')]
303 return filename
304
305 def try_rename(self, old_filename, new_filename):
306 try:
307 if old_filename == new_filename:
308 return
309 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
310 except (IOError, OSError) as err:
311 self.report_error(u'unable to rename file')
312
313 def try_utime(self, filename, last_modified_hdr):
314 """Try to set the last-modified time of the given file."""
315 if last_modified_hdr is None:
316 return
317 if not os.path.isfile(encodeFilename(filename)):
318 return
319 timestr = last_modified_hdr
320 if timestr is None:
321 return
322 filetime = timeconvert(timestr)
323 if filetime is None:
324 return filetime
325 # Ignore obviously invalid dates
326 if filetime == 0:
327 return
328 try:
329 os.utime(filename, (time.time(), filetime))
330 except:
331 pass
332 return filetime
333
334 def report_writedescription(self, descfn):
335 """ Report that the description file is being written """
336 self.to_screen(u'[info] Writing video description to: ' + descfn)
337
338 def report_writesubtitles(self, sub_filename):
339 """ Report that the subtitles file is being written """
340 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
341
342 def report_writeinfojson(self, infofn):
343 """ Report that the metadata file has been written """
344 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
345
346 def report_destination(self, filename):
347 """Report destination filename."""
348 self.to_screen(u'[download] Destination: ' + filename)
349
350 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
351 """Report download progress."""
352 if self.params.get('noprogress', False):
353 return
354 clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
355 if self.params.get('progress_with_newline', False):
356 self.to_screen(u'[download] %s of %s at %s ETA %s' %
357 (percent_str, data_len_str, speed_str, eta_str))
358 else:
359 self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
360 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
361 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
362 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
363
364 def report_resuming_byte(self, resume_len):
365 """Report attempt to resume at given byte."""
366 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
367
368 def report_retry(self, count, retries):
369 """Report retry in case of HTTP error 5xx"""
370 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
371
372 def report_file_already_downloaded(self, file_name):
373 """Report file has already been fully downloaded."""
374 try:
375 self.to_screen(u'[download] %s has already been downloaded' % file_name)
376 except (UnicodeEncodeError) as err:
377 self.to_screen(u'[download] The file has already been downloaded')
378
379 def report_unable_to_resume(self):
380 """Report it was impossible to resume download."""
381 self.to_screen(u'[download] Unable to resume')
382
383 def report_finish(self):
384 """Report download finished."""
385 if self.params.get('noprogress', False):
386 self.to_screen(u'[download] Download completed')
387 else:
388 self.to_screen(u'')
389
390 def increment_downloads(self):
391 """Increment the ordinal that assigns a number to each file."""
392 self._num_downloads += 1
393
394 def prepare_filename(self, info_dict):
395 """Generate the output filename."""
396 try:
397 template_dict = dict(info_dict)
398
399 template_dict['epoch'] = int(time.time())
400 autonumber_size = self.params.get('autonumber_size')
401 if autonumber_size is None:
402 autonumber_size = 5
403 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
404 template_dict['autonumber'] = autonumber_templ % self._num_downloads
405 if template_dict['playlist_index'] is not None:
406 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
407
408 sanitize = lambda k,v: sanitize_filename(
409 u'NA' if v is None else compat_str(v),
410 restricted=self.params.get('restrictfilenames'),
411 is_id=(k==u'id'))
412 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
413
414 filename = self.params['outtmpl'] % template_dict
415 return filename
416 except KeyError as err:
417 self.report_error(u'Erroneous output template')
418 return None
419 except ValueError as err:
420 self.report_error(u'Insufficient system charset ' + repr(preferredencoding()))
421 return None
422
423 def _match_entry(self, info_dict):
424 """ Returns None iff the file should be downloaded """
425
426 title = info_dict['title']
427 matchtitle = self.params.get('matchtitle', False)
428 if matchtitle:
429 if not re.search(matchtitle, title, re.IGNORECASE):
430 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
431 rejecttitle = self.params.get('rejecttitle', False)
432 if rejecttitle:
433 if re.search(rejecttitle, title, re.IGNORECASE):
434 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
435 date = info_dict.get('upload_date', None)
436 if date is not None:
437 dateRange = self.params.get('daterange', DateRange())
438 if date not in dateRange:
439 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
440 return None
441
442 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
443 '''
444 Returns a list with a dictionary for each video we find.
445 If 'download', also downloads the videos.
446 extra_info is a dict containing the extra values to add to each result
447 '''
448
449 if ie_key:
450 ie = get_info_extractor(ie_key)()
451 ie.set_downloader(self)
452 ies = [ie]
453 else:
454 ies = self._ies
455
456 for ie in ies:
457 if not ie.suitable(url):
458 continue
459
460 if not ie.working():
461 self.report_warning(u'The program functionality for this site has been marked as broken, '
462 u'and will probably not work.')
463
464 try:
465 ie_result = ie.extract(url)
466 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
467 break
468 if isinstance(ie_result, list):
469 # Backwards compatibility: old IE result format
470 for result in ie_result:
471 result.update(extra_info)
472 ie_result = {
473 '_type': 'compat_list',
474 'entries': ie_result,
475 }
476 else:
477 ie_result.update(extra_info)
478 if 'extractor' not in ie_result:
479 ie_result['extractor'] = ie.IE_NAME
480 return self.process_ie_result(ie_result, download=download)
481 except ExtractorError as de: # An error we somewhat expected
482 self.report_error(compat_str(de), de.format_traceback())
483 break
484 except Exception as e:
485 if self.params.get('ignoreerrors', False):
486 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
487 break
488 else:
489 raise
490 else:
491 self.report_error(u'no suitable InfoExtractor: %s' % url)
492
493 def process_ie_result(self, ie_result, download=True, extra_info={}):
494 """
495 Take the result of the ie(may be modified) and resolve all unresolved
496 references (URLs, playlist items).
497
498 It will also download the videos if 'download'.
499 Returns the resolved ie_result.
500 """
501
502 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
503 if result_type == 'video':
504 if 'playlist' not in ie_result:
505 # It isn't part of a playlist
506 ie_result['playlist'] = None
507 ie_result['playlist_index'] = None
508 if download:
509 self.process_info(ie_result)
510 return ie_result
511 elif result_type == 'url':
512 # We have to add extra_info to the results because it may be
513 # contained in a playlist
514 return self.extract_info(ie_result['url'],
515 download,
516 ie_key=ie_result.get('ie_key'),
517 extra_info=extra_info)
518 elif result_type == 'playlist':
519 # We process each entry in the playlist
520 playlist = ie_result.get('title', None) or ie_result.get('id', None)
521 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
522
523 playlist_results = []
524
525 n_all_entries = len(ie_result['entries'])
526 playliststart = self.params.get('playliststart', 1) - 1
527 playlistend = self.params.get('playlistend', -1)
528
529 if playlistend == -1:
530 entries = ie_result['entries'][playliststart:]
531 else:
532 entries = ie_result['entries'][playliststart:playlistend]
533
534 n_entries = len(entries)
535
536 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
537 (ie_result['extractor'], playlist, n_all_entries, n_entries))
538
539 for i,entry in enumerate(entries,1):
540 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
541 extra = {
542 'playlist': playlist,
543 'playlist_index': i + playliststart,
544 }
545 if not 'extractor' in entry:
546 # We set the extractor, if it's an url it will be set then to
547 # the new extractor, but if it's already a video we must make
548 # sure it's present: see issue #877
549 entry['extractor'] = ie_result['extractor']
550 entry_result = self.process_ie_result(entry,
551 download=download,
552 extra_info=extra)
553 playlist_results.append(entry_result)
554 ie_result['entries'] = playlist_results
555 return ie_result
556 elif result_type == 'compat_list':
557 def _fixup(r):
558 r.setdefault('extractor', ie_result['extractor'])
559 return r
560 ie_result['entries'] = [
561 self.process_ie_result(_fixup(r), download=download)
562 for r in ie_result['entries']
563 ]
564 return ie_result
565 else:
566 raise Exception('Invalid result type: %s' % result_type)
567
568 def process_info(self, info_dict):
569 """Process a single resolved IE result."""
570
571 assert info_dict.get('_type', 'video') == 'video'
572 #We increment the download the download count here to match the previous behaviour.
573 self.increment_downloads()
574
575 info_dict['fulltitle'] = info_dict['title']
576 if len(info_dict['title']) > 200:
577 info_dict['title'] = info_dict['title'][:197] + u'...'
578
579 # Keep for backwards compatibility
580 info_dict['stitle'] = info_dict['title']
581
582 if not 'format' in info_dict:
583 info_dict['format'] = info_dict['ext']
584
585 reason = self._match_entry(info_dict)
586 if reason is not None:
587 self.to_screen(u'[download] ' + reason)
588 return
589
590 max_downloads = self.params.get('max_downloads')
591 if max_downloads is not None:
592 if self._num_downloads > int(max_downloads):
593 raise MaxDownloadsReached()
594
595 filename = self.prepare_filename(info_dict)
596
597 # Forced printings
598 if self.params.get('forcetitle', False):
599 compat_print(info_dict['title'])
600 if self.params.get('forceid', False):
601 compat_print(info_dict['id'])
602 if self.params.get('forceurl', False):
603 compat_print(info_dict['url'])
604 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
605 compat_print(info_dict['thumbnail'])
606 if self.params.get('forcedescription', False) and 'description' in info_dict:
607 compat_print(info_dict['description'])
608 if self.params.get('forcefilename', False) and filename is not None:
609 compat_print(filename)
610 if self.params.get('forceformat', False):
611 compat_print(info_dict['format'])
612
613 # Do nothing else if in simulate mode
614 if self.params.get('simulate', False):
615 return
616
617 if filename is None:
618 return
619
620 try:
621 dn = os.path.dirname(encodeFilename(filename))
622 if dn != '' and not os.path.exists(dn):
623 os.makedirs(dn)
624 except (OSError, IOError) as err:
625 self.report_error(u'unable to create directory ' + compat_str(err))
626 return
627
628 if self.params.get('writedescription', False):
629 try:
630 descfn = filename + u'.description'
631 self.report_writedescription(descfn)
632 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
633 descfile.write(info_dict['description'])
634 except (OSError, IOError):
635 self.report_error(u'Cannot write description file ' + descfn)
636 return
637
638 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
639 # subtitles download errors are already managed as troubles in relevant IE
640 # that way it will silently go on when used with unsupporting IE
641 subtitle = info_dict['subtitles'][0]
642 (sub_error, sub_lang, sub) = subtitle
643 sub_format = self.params.get('subtitlesformat')
644 if sub_error:
645 self.report_warning("Some error while getting the subtitles")
646 else:
647 try:
648 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
649 self.report_writesubtitles(sub_filename)
650 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
651 subfile.write(sub)
652 except (OSError, IOError):
653 self.report_error(u'Cannot write subtitles file ' + descfn)
654 return
655
656 if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
657 subtitles = info_dict['subtitles']
658 sub_format = self.params.get('subtitlesformat')
659 for subtitle in subtitles:
660 (sub_error, sub_lang, sub) = subtitle
661 if sub_error:
662 self.report_warning("Some error while getting the subtitles")
663 else:
664 try:
665 sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format
666 self.report_writesubtitles(sub_filename)
667 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
668 subfile.write(sub)
669 except (OSError, IOError):
670 self.report_error(u'Cannot write subtitles file ' + descfn)
671 return
672
673 if self.params.get('writeinfojson', False):
674 infofn = filename + u'.info.json'
675 self.report_writeinfojson(infofn)
676 try:
677 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
678 write_json_file(json_info_dict, encodeFilename(infofn))
679 except (OSError, IOError):
680 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
681 return
682
683 if self.params.get('writethumbnail', False):
684 if 'thumbnail' in info_dict:
685 thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2]
686 if not thumb_format:
687 thumb_format = 'jpg'
688 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
689 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
690 (info_dict['extractor'], info_dict['id']))
691 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
692 with open(thumb_filename, 'wb') as thumbf:
693 shutil.copyfileobj(uf, thumbf)
694 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
695 (info_dict['extractor'], info_dict['id'], thumb_filename))
696
697 if not self.params.get('skip_download', False):
698 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
699 success = True
700 else:
701 try:
702 success = self._do_download(filename, info_dict)
703 except (OSError, IOError) as err:
704 raise UnavailableVideoError()
705 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
706 self.report_error(u'unable to download video data: %s' % str(err))
707 return
708 except (ContentTooShortError, ) as err:
709 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
710 return
711
712 if success:
713 try:
714 self.post_process(filename, info_dict)
715 except (PostProcessingError) as err:
716 self.report_error(u'postprocessing: %s' % str(err))
717 return
718
719 def download(self, url_list):
720 """Download a given list of URLs."""
721 if len(url_list) > 1 and self.fixed_template():
722 raise SameFileError(self.params['outtmpl'])
723
724 for url in url_list:
725 try:
726 #It also downloads the videos
727 videos = self.extract_info(url)
728 except UnavailableVideoError:
729 self.report_error(u'unable to download video')
730 except MaxDownloadsReached:
731 self.to_screen(u'[info] Maximum number of downloaded files reached.')
732 raise
733
734 return self._download_retcode
735
736 def post_process(self, filename, ie_info):
737 """Run all the postprocessors on the given file."""
738 info = dict(ie_info)
739 info['filepath'] = filename
740 keep_video = None
741 for pp in self._pps:
742 try:
743 keep_video_wish,new_info = pp.run(info)
744 if keep_video_wish is not None:
745 if keep_video_wish:
746 keep_video = keep_video_wish
747 elif keep_video is None:
748 # No clear decision yet, let IE decide
749 keep_video = keep_video_wish
750 except PostProcessingError as e:
751 self.to_stderr(u'ERROR: ' + e.msg)
752 if keep_video is False and not self.params.get('keepvideo', False):
753 try:
754 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
755 os.remove(encodeFilename(filename))
756 except (IOError, OSError):
757 self.report_warning(u'Unable to remove downloaded video file')
758
759 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
760 self.report_destination(filename)
761 tmpfilename = self.temp_name(filename)
762
763 # Check for rtmpdump first
764 try:
765 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
766 except (OSError, IOError):
767 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
768 return False
769 verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
770
771 # Download using rtmpdump. rtmpdump returns exit code 2 when
772 # the connection was interrumpted and resuming appears to be
773 # possible. This is part of rtmpdump's normal usage, AFAIK.
774 basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
775 if player_url is not None:
776 basic_args += ['--swfVfy', player_url]
777 if page_url is not None:
778 basic_args += ['--pageUrl', page_url]
779 if play_path is not None:
780 basic_args += ['--playpath', play_path]
781 if tc_url is not None:
782 basic_args += ['--tcUrl', url]
783 args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
784 if self.params.get('verbose', False):
785 try:
786 import pipes
787 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
788 except ImportError:
789 shell_quote = repr
790 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
791 retval = subprocess.call(args)
792 while retval == 2 or retval == 1:
793 prevsize = os.path.getsize(encodeFilename(tmpfilename))
794 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
795 time.sleep(5.0) # This seems to be needed
796 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
797 cursize = os.path.getsize(encodeFilename(tmpfilename))
798 if prevsize == cursize and retval == 1:
799 break
800 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
801 if prevsize == cursize and retval == 2 and cursize > 1024:
802 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
803 retval = 0
804 break
805 if retval == 0:
806 fsize = os.path.getsize(encodeFilename(tmpfilename))
807 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
808 self.try_rename(tmpfilename, filename)
809 self._hook_progress({
810 'downloaded_bytes': fsize,
811 'total_bytes': fsize,
812 'filename': filename,
813 'status': 'finished',
814 })
815 return True
816 else:
817 self.to_stderr(u"\n")
818 self.report_error(u'rtmpdump exited with code %d' % retval)
819 return False
820
821 def _download_with_mplayer(self, filename, url):
822 self.report_destination(filename)
823 tmpfilename = self.temp_name(filename)
824
825 args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
826 # Check for mplayer first
827 try:
828 subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
829 except (OSError, IOError):
830 self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
831 return False
832
833 # Download using mplayer.
834 retval = subprocess.call(args)
835 if retval == 0:
836 fsize = os.path.getsize(encodeFilename(tmpfilename))
837 self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
838 self.try_rename(tmpfilename, filename)
839 self._hook_progress({
840 'downloaded_bytes': fsize,
841 'total_bytes': fsize,
842 'filename': filename,
843 'status': 'finished',
844 })
845 return True
846 else:
847 self.to_stderr(u"\n")
848 self.report_error(u'mplayer exited with code %d' % retval)
849 return False
850
851
852 def _do_download(self, filename, info_dict):
853 url = info_dict['url']
854
855 # Check file already present
856 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
857 self.report_file_already_downloaded(filename)
858 self._hook_progress({
859 'filename': filename,
860 'status': 'finished',
861 })
862 return True
863
864 # Attempt to download using rtmpdump
865 if url.startswith('rtmp'):
866 return self._download_with_rtmpdump(filename, url,
867 info_dict.get('player_url', None),
868 info_dict.get('page_url', None),
869 info_dict.get('play_path', None),
870 info_dict.get('tc_url', None))
871
872 # Attempt to download using mplayer
873 if url.startswith('mms') or url.startswith('rtsp'):
874 return self._download_with_mplayer(filename, url)
875
876 tmpfilename = self.temp_name(filename)
877 stream = None
878
879 # Do not include the Accept-Encoding header
880 headers = {'Youtubedl-no-compression': 'True'}
881 if 'user_agent' in info_dict:
882 headers['Youtubedl-user-agent'] = info_dict['user_agent']
883 basic_request = compat_urllib_request.Request(url, None, headers)
884 request = compat_urllib_request.Request(url, None, headers)
885
886 if self.params.get('test', False):
887 request.add_header('Range','bytes=0-10240')
888
889 # Establish possible resume length
890 if os.path.isfile(encodeFilename(tmpfilename)):
891 resume_len = os.path.getsize(encodeFilename(tmpfilename))
892 else:
893 resume_len = 0
894
895 open_mode = 'wb'
896 if resume_len != 0:
897 if self.params.get('continuedl', False):
898 self.report_resuming_byte(resume_len)
899 request.add_header('Range','bytes=%d-' % resume_len)
900 open_mode = 'ab'
901 else:
902 resume_len = 0
903
904 count = 0
905 retries = self.params.get('retries', 0)
906 while count <= retries:
907 # Establish connection
908 try:
909 if count == 0 and 'urlhandle' in info_dict:
910 data = info_dict['urlhandle']
911 data = compat_urllib_request.urlopen(request)
912 break
913 except (compat_urllib_error.HTTPError, ) as err:
914 if (err.code < 500 or err.code >= 600) and err.code != 416:
915 # Unexpected HTTP error
916 raise
917 elif err.code == 416:
918 # Unable to resume (requested range not satisfiable)
919 try:
920 # Open the connection again without the range header
921 data = compat_urllib_request.urlopen(basic_request)
922 content_length = data.info()['Content-Length']
923 except (compat_urllib_error.HTTPError, ) as err:
924 if err.code < 500 or err.code >= 600:
925 raise
926 else:
927 # Examine the reported length
928 if (content_length is not None and
929 (resume_len - 100 < int(content_length) < resume_len + 100)):
930 # The file had already been fully downloaded.
931 # Explanation to the above condition: in issue #175 it was revealed that
932 # YouTube sometimes adds or removes a few bytes from the end of the file,
933 # changing the file size slightly and causing problems for some users. So
934 # I decided to implement a suggested change and consider the file
935 # completely downloaded if the file size differs less than 100 bytes from
936 # the one in the hard drive.
937 self.report_file_already_downloaded(filename)
938 self.try_rename(tmpfilename, filename)
939 self._hook_progress({
940 'filename': filename,
941 'status': 'finished',
942 })
943 return True
944 else:
945 # The length does not match, we start the download over
946 self.report_unable_to_resume()
947 open_mode = 'wb'
948 break
949 # Retry
950 count += 1
951 if count <= retries:
952 self.report_retry(count, retries)
953
954 if count > retries:
955 self.report_error(u'giving up after %s retries' % retries)
956 return False
957
958 data_len = data.info().get('Content-length', None)
959 if data_len is not None:
960 data_len = int(data_len) + resume_len
961 min_data_len = self.params.get("min_filesize", None)
962 max_data_len = self.params.get("max_filesize", None)
963 if min_data_len is not None and data_len < min_data_len:
964 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
965 return False
966 if max_data_len is not None and data_len > max_data_len:
967 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
968 return False
969
970 data_len_str = self.format_bytes(data_len)
971 byte_counter = 0 + resume_len
972 block_size = self.params.get('buffersize', 1024)
973 start = time.time()
974 while True:
975 # Download and write
976 before = time.time()
977 data_block = data.read(block_size)
978 after = time.time()
979 if len(data_block) == 0:
980 break
981 byte_counter += len(data_block)
982
983 # Open file just in time
984 if stream is None:
985 try:
986 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
987 assert stream is not None
988 filename = self.undo_temp_name(tmpfilename)
989 self.report_destination(filename)
990 except (OSError, IOError) as err:
991 self.report_error(u'unable to open for writing: %s' % str(err))
992 return False
993 try:
994 stream.write(data_block)
995 except (IOError, OSError) as err:
996 self.to_stderr(u"\n")
997 self.report_error(u'unable to write data: %s' % str(err))
998 return False
999 if not self.params.get('noresizebuffer', False):
1000 block_size = self.best_block_size(after - before, len(data_block))
1001
1002 # Progress message
1003 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
1004 if data_len is None:
1005 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
1006 else:
1007 percent_str = self.calc_percent(byte_counter, data_len)
1008 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
1009 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
1010
1011 self._hook_progress({
1012 'downloaded_bytes': byte_counter,
1013 'total_bytes': data_len,
1014 'tmpfilename': tmpfilename,
1015 'filename': filename,
1016 'status': 'downloading',
1017 })
1018
1019 # Apply rate limit
1020 self.slow_down(start, byte_counter - resume_len)
1021
1022 if stream is None:
1023 self.to_stderr(u"\n")
1024 self.report_error(u'Did not get any data blocks')
1025 return False
1026 stream.close()
1027 self.report_finish()
1028 if data_len is not None and byte_counter != data_len:
1029 raise ContentTooShortError(byte_counter, int(data_len))
1030 self.try_rename(tmpfilename, filename)
1031
1032 # Update file modification time
1033 if self.params.get('updatetime', True):
1034 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
1035
1036 self._hook_progress({
1037 'downloaded_bytes': byte_counter,
1038 'total_bytes': byte_counter,
1039 'filename': filename,
1040 'status': 'finished',
1041 })
1042
1043 return True
1044
1045 def _hook_progress(self, status):
1046 for ph in self._progress_hooks:
1047 ph(status)
1048
1049 def add_progress_hook(self, ph):
1050 """ ph gets called on download progress, with a dictionary with the entries
1051 * filename: The final filename
1052 * status: One of "downloading" and "finished"
1053
1054 It can also have some of the following entries:
1055
1056 * downloaded_bytes: Bytes on disks
1057 * total_bytes: Total bytes, None if unknown
1058 * tmpfilename: The filename we're currently writing to
1059
1060 Hooks are guaranteed to be called at least once (with status "finished")
1061 if the download is successful.
1062 """
1063 self._progress_hooks.append(ph)