]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
16 from .extractor
import get_info_extractor
17 from .FileDownloader
import FileDownloader
20 class YoutubeDL(object):
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
49 videopassword: Password for acces a video.
50 usenetrc: Use netrc for authentication instead.
51 verbose: Print additional info to stdout.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forceid: Force printing ID.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 nooverwrites: Prevent overwriting files.
66 playliststart: Playlist item to start at.
67 playlistend: Playlist item to end at.
68 matchtitle: Download only matching titles.
69 rejecttitle: Reject downloads for matching titles.
70 logtostderr: Log messages to stderr instead of stdout.
71 writedescription: Write the video description to a .description file
72 writeinfojson: Write the video description to a .info.json file
73 writethumbnail: Write the thumbnail image to a file
74 writesubtitles: Write the video subtitles to a file
75 allsubtitles: Downloads all the subtitles of the video
76 listsubtitles: Lists all available subtitles for the video
77 subtitlesformat: Subtitle format [sbv/srt] (default=srt)
78 subtitleslang: Language of the subtitles to download
79 keepvideo: Keep the video file after post-processing
80 daterange: A DateRange object, download only if the upload_date is in the range.
81 skip_download: Skip the actual download of the video file
83 The following parameters are not used by YoutubeDL itself, they are used by
85 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
86 noresizebuffer, retries, continuedl, noprogress, consoletitle
92 _download_retcode
= None
96 def __init__(self
, params
):
97 """Create a FileDownloader object with the given options."""
100 self
._progress
_hooks
= []
101 self
._download
_retcode
= 0
102 self
._num
_downloads
= 0
103 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
105 self
.fd
= FileDownloader(self
, self
.params
)
107 if '%(stitle)s' in self
.params
['outtmpl']:
108 self
.report_warning(u
'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
110 def add_info_extractor(self
, ie
):
111 """Add an InfoExtractor object to the end of the list."""
113 ie
.set_downloader(self
)
115 def add_post_processor(self
, pp
):
116 """Add a PostProcessor object to the end of the chain."""
118 pp
.set_downloader(self
)
120 def to_screen(self
, message
, skip_eol
=False):
121 """Print message to stdout if not in quiet mode."""
122 assert type(message
) == type(u
'')
123 if not self
.params
.get('quiet', False):
124 terminator
= [u
'\n', u
''][skip_eol
]
125 output
= message
+ terminator
126 if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
127 output
= output
.encode(preferredencoding(), 'ignore')
128 self
._screen
_file
.write(output
)
129 self
._screen
_file
.flush()
131 def to_stderr(self
, message
):
132 """Print message to stderr."""
133 assert type(message
) == type(u
'')
134 output
= message
+ u
'\n'
135 if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
136 output
= output
.encode(preferredencoding())
137 sys
.stderr
.write(output
)
139 def fixed_template(self
):
140 """Checks if the output template is fixed."""
141 return (re
.search(u
'(?u)%\\(.+?\\)s', self
.params
['outtmpl']) is None)
143 def trouble(self
, message
=None, tb
=None):
144 """Determine action to take when a download problem appears.
146 Depending on if the downloader has been configured to ignore
147 download errors or not, this method may throw an exception or
148 not when errors are found, after printing the message.
150 tb, if given, is additional traceback information.
152 if message
is not None:
153 self
.to_stderr(message
)
154 if self
.params
.get('verbose'):
156 if sys
.exc_info()[0]: # if .trouble has been called from an except block
158 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
159 tb
+= u
''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
160 tb
+= compat_str(traceback
.format_exc())
162 tb_data
= traceback
.format_list(traceback
.extract_stack())
163 tb
= u
''.join(tb_data
)
165 if not self
.params
.get('ignoreerrors', False):
166 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
167 exc_info
= sys
.exc_info()[1].exc_info
169 exc_info
= sys
.exc_info()
170 raise DownloadError(message
, exc_info
)
171 self
._download
_retcode
= 1
173 def report_warning(self
, message
):
175 Print the message to stderr, it will be prefixed with 'WARNING:'
176 If stderr is a tty file the 'WARNING:' will be colored
178 if sys
.stderr
.isatty() and os
.name
!= 'nt':
179 _msg_header
=u
'\033[0;33mWARNING:\033[0m'
181 _msg_header
=u
'WARNING:'
182 warning_message
=u
'%s %s' % (_msg_header
,message
)
183 self
.to_stderr(warning_message
)
185 def report_error(self
, message
, tb
=None):
187 Do the same as trouble, but prefixes the message with 'ERROR:', colored
188 in red if stderr is a tty file.
190 if sys
.stderr
.isatty() and os
.name
!= 'nt':
191 _msg_header
= u
'\033[0;31mERROR:\033[0m'
193 _msg_header
= u
'ERROR:'
194 error_message
= u
'%s %s' % (_msg_header
, message
)
195 self
.trouble(error_message
, tb
)
197 def slow_down(self
, start_time
, byte_counter
):
198 """Sleep if the download speed is over the rate limit."""
199 rate_limit
= self
.params
.get('ratelimit', None)
200 if rate_limit
is None or byte_counter
== 0:
203 elapsed
= now
- start_time
206 speed
= float(byte_counter
) / elapsed
207 if speed
> rate_limit
:
208 time
.sleep((byte_counter
- rate_limit
* (now
- start_time
)) / rate_limit
)
210 def report_writedescription(self
, descfn
):
211 """ Report that the description file is being written """
212 self
.to_screen(u
'[info] Writing video description to: ' + descfn
)
214 def report_writesubtitles(self
, sub_filename
):
215 """ Report that the subtitles file is being written """
216 self
.to_screen(u
'[info] Writing video subtitles to: ' + sub_filename
)
218 def report_writeinfojson(self
, infofn
):
219 """ Report that the metadata file has been written """
220 self
.to_screen(u
'[info] Video description metadata as JSON to: ' + infofn
)
222 def report_file_already_downloaded(self
, file_name
):
223 """Report file has already been fully downloaded."""
225 self
.to_screen(u
'[download] %s has already been downloaded' % file_name
)
226 except (UnicodeEncodeError) as err
:
227 self
.to_screen(u
'[download] The file has already been downloaded')
229 def increment_downloads(self
):
230 """Increment the ordinal that assigns a number to each file."""
231 self
._num
_downloads
+= 1
233 def prepare_filename(self
, info_dict
):
234 """Generate the output filename."""
236 template_dict
= dict(info_dict
)
238 template_dict
['epoch'] = int(time
.time())
239 autonumber_size
= self
.params
.get('autonumber_size')
240 if autonumber_size
is None:
242 autonumber_templ
= u
'%0' + str(autonumber_size
) + u
'd'
243 template_dict
['autonumber'] = autonumber_templ
% self
._num
_downloads
244 if template_dict
['playlist_index'] is not None:
245 template_dict
['playlist_index'] = u
'%05d' % template_dict
['playlist_index']
247 sanitize
= lambda k
,v
: sanitize_filename(
248 u
'NA' if v
is None else compat_str(v
),
249 restricted
=self
.params
.get('restrictfilenames'),
251 template_dict
= dict((k
, sanitize(k
, v
)) for k
,v
in template_dict
.items())
253 filename
= self
.params
['outtmpl'] % template_dict
255 except KeyError as err
:
256 self
.report_error(u
'Erroneous output template')
258 except ValueError as err
:
259 self
.report_error(u
'Insufficient system charset ' + repr(preferredencoding()))
262 def _match_entry(self
, info_dict
):
263 """ Returns None iff the file should be downloaded """
265 title
= info_dict
['title']
266 matchtitle
= self
.params
.get('matchtitle', False)
268 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
269 return u
'[download] "' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
270 rejecttitle
= self
.params
.get('rejecttitle', False)
272 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
273 return u
'"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
274 date
= info_dict
.get('upload_date', None)
276 dateRange
= self
.params
.get('daterange', DateRange())
277 if date
not in dateRange
:
278 return u
'[download] %s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
281 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={}):
283 Returns a list with a dictionary for each video we find.
284 If 'download', also downloads the videos.
285 extra_info is a dict containing the extra values to add to each result
289 ie
= get_info_extractor(ie_key
)()
290 ie
.set_downloader(self
)
296 if not ie
.suitable(url
):
300 self
.report_warning(u
'The program functionality for this site has been marked as broken, '
301 u
'and will probably not work.')
304 ie_result
= ie
.extract(url
)
305 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
307 if isinstance(ie_result
, list):
308 # Backwards compatibility: old IE result format
309 for result
in ie_result
:
310 result
.update(extra_info
)
312 '_type': 'compat_list',
313 'entries': ie_result
,
316 ie_result
.update(extra_info
)
317 if 'extractor' not in ie_result
:
318 ie_result
['extractor'] = ie
.IE_NAME
319 return self
.process_ie_result(ie_result
, download
=download
)
320 except ExtractorError
as de
: # An error we somewhat expected
321 self
.report_error(compat_str(de
), de
.format_traceback())
323 except Exception as e
:
324 if self
.params
.get('ignoreerrors', False):
325 self
.report_error(compat_str(e
), tb
=compat_str(traceback
.format_exc()))
330 self
.report_error(u
'no suitable InfoExtractor: %s' % url
)
332 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
334 Take the result of the ie(may be modified) and resolve all unresolved
335 references (URLs, playlist items).
337 It will also download the videos if 'download'.
338 Returns the resolved ie_result.
341 result_type
= ie_result
.get('_type', 'video') # If not given we suppose it's a video, support the default old system
342 if result_type
== 'video':
343 if 'playlist' not in ie_result
:
344 # It isn't part of a playlist
345 ie_result
['playlist'] = None
346 ie_result
['playlist_index'] = None
348 self
.process_info(ie_result
)
350 elif result_type
== 'url':
351 # We have to add extra_info to the results because it may be
352 # contained in a playlist
353 return self
.extract_info(ie_result
['url'],
355 ie_key
=ie_result
.get('ie_key'),
356 extra_info
=extra_info
)
357 elif result_type
== 'playlist':
358 # We process each entry in the playlist
359 playlist
= ie_result
.get('title', None) or ie_result
.get('id', None)
360 self
.to_screen(u
'[download] Downloading playlist: %s' % playlist
)
362 playlist_results
= []
364 n_all_entries
= len(ie_result
['entries'])
365 playliststart
= self
.params
.get('playliststart', 1) - 1
366 playlistend
= self
.params
.get('playlistend', -1)
368 if playlistend
== -1:
369 entries
= ie_result
['entries'][playliststart
:]
371 entries
= ie_result
['entries'][playliststart
:playlistend
]
373 n_entries
= len(entries
)
375 self
.to_screen(u
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
376 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
378 for i
,entry
in enumerate(entries
,1):
379 self
.to_screen(u
'[download] Downloading video #%s of %s' %(i
, n_entries
))
381 'playlist': playlist
,
382 'playlist_index': i
+ playliststart
,
384 if not 'extractor' in entry
:
385 # We set the extractor, if it's an url it will be set then to
386 # the new extractor, but if it's already a video we must make
387 # sure it's present: see issue #877
388 entry
['extractor'] = ie_result
['extractor']
389 entry_result
= self
.process_ie_result(entry
,
392 playlist_results
.append(entry_result
)
393 ie_result
['entries'] = playlist_results
395 elif result_type
== 'compat_list':
397 r
.setdefault('extractor', ie_result
['extractor'])
399 ie_result
['entries'] = [
400 self
.process_ie_result(_fixup(r
), download
=download
)
401 for r
in ie_result
['entries']
405 raise Exception('Invalid result type: %s' % result_type
)
407 def process_info(self
, info_dict
):
408 """Process a single resolved IE result."""
410 assert info_dict
.get('_type', 'video') == 'video'
411 #We increment the download the download count here to match the previous behaviour.
412 self
.increment_downloads()
414 info_dict
['fulltitle'] = info_dict
['title']
415 if len(info_dict
['title']) > 200:
416 info_dict
['title'] = info_dict
['title'][:197] + u
'...'
418 # Keep for backwards compatibility
419 info_dict
['stitle'] = info_dict
['title']
421 if not 'format' in info_dict
:
422 info_dict
['format'] = info_dict
['ext']
424 reason
= self
._match
_entry
(info_dict
)
425 if reason
is not None:
426 self
.to_screen(u
'[download] ' + reason
)
429 max_downloads
= self
.params
.get('max_downloads')
430 if max_downloads
is not None:
431 if self
._num
_downloads
> int(max_downloads
):
432 raise MaxDownloadsReached()
434 filename
= self
.prepare_filename(info_dict
)
437 if self
.params
.get('forcetitle', False):
438 compat_print(info_dict
['title'])
439 if self
.params
.get('forceid', False):
440 compat_print(info_dict
['id'])
441 if self
.params
.get('forceurl', False):
442 compat_print(info_dict
['url'])
443 if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
:
444 compat_print(info_dict
['thumbnail'])
445 if self
.params
.get('forcedescription', False) and 'description' in info_dict
:
446 compat_print(info_dict
['description'])
447 if self
.params
.get('forcefilename', False) and filename
is not None:
448 compat_print(filename
)
449 if self
.params
.get('forceformat', False):
450 compat_print(info_dict
['format'])
452 # Do nothing else if in simulate mode
453 if self
.params
.get('simulate', False):
460 dn
= os
.path
.dirname(encodeFilename(filename
))
461 if dn
!= '' and not os
.path
.exists(dn
):
463 except (OSError, IOError) as err
:
464 self
.report_error(u
'unable to create directory ' + compat_str(err
))
467 if self
.params
.get('writedescription', False):
469 descfn
= filename
+ u
'.description'
470 self
.report_writedescription(descfn
)
471 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
472 descfile
.write(info_dict
['description'])
473 except (OSError, IOError):
474 self
.report_error(u
'Cannot write description file ' + descfn
)
477 if self
.params
.get('writesubtitles', False) and 'subtitles' in info_dict
and info_dict
['subtitles']:
478 # subtitles download errors are already managed as troubles in relevant IE
479 # that way it will silently go on when used with unsupporting IE
480 subtitle
= info_dict
['subtitles'][0]
481 (sub_error
, sub_lang
, sub
) = subtitle
482 sub_format
= self
.params
.get('subtitlesformat')
484 self
.report_warning("Some error while getting the subtitles")
487 sub_filename
= filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
488 self
.report_writesubtitles(sub_filename
)
489 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
491 except (OSError, IOError):
492 self
.report_error(u
'Cannot write subtitles file ' + descfn
)
495 if self
.params
.get('allsubtitles', False) and 'subtitles' in info_dict
and info_dict
['subtitles']:
496 subtitles
= info_dict
['subtitles']
497 sub_format
= self
.params
.get('subtitlesformat')
498 for subtitle
in subtitles
:
499 (sub_error
, sub_lang
, sub
) = subtitle
501 self
.report_warning("Some error while getting the subtitles")
504 sub_filename
= filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
505 self
.report_writesubtitles(sub_filename
)
506 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
508 except (OSError, IOError):
509 self
.report_error(u
'Cannot write subtitles file ' + descfn
)
512 if self
.params
.get('writeinfojson', False):
513 infofn
= filename
+ u
'.info.json'
514 self
.report_writeinfojson(infofn
)
516 json_info_dict
= dict((k
, v
) for k
,v
in info_dict
.items() if not k
in ['urlhandle'])
517 write_json_file(json_info_dict
, encodeFilename(infofn
))
518 except (OSError, IOError):
519 self
.report_error(u
'Cannot write metadata to JSON file ' + infofn
)
522 if self
.params
.get('writethumbnail', False):
523 if 'thumbnail' in info_dict
:
524 thumb_format
= info_dict
['thumbnail'].rpartition(u
'/')[2].rpartition(u
'.')[2]
527 thumb_filename
= filename
.rpartition('.')[0] + u
'.' + thumb_format
528 self
.to_screen(u
'[%s] %s: Downloading thumbnail ...' %
529 (info_dict
['extractor'], info_dict
['id']))
530 uf
= compat_urllib_request
.urlopen(info_dict
['thumbnail'])
531 with open(thumb_filename
, 'wb') as thumbf
:
532 shutil
.copyfileobj(uf
, thumbf
)
533 self
.to_screen(u
'[%s] %s: Writing thumbnail to: %s' %
534 (info_dict
['extractor'], info_dict
['id'], thumb_filename
))
536 if not self
.params
.get('skip_download', False):
537 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(filename
)):
541 success
= self
.fd
._do
_download
(filename
, info_dict
)
542 except (OSError, IOError) as err
:
543 raise UnavailableVideoError()
544 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
545 self
.report_error(u
'unable to download video data: %s' % str(err
))
547 except (ContentTooShortError
, ) as err
:
548 self
.report_error(u
'content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
553 self
.post_process(filename
, info_dict
)
554 except (PostProcessingError
) as err
:
555 self
.report_error(u
'postprocessing: %s' % str(err
))
558 def download(self
, url_list
):
559 """Download a given list of URLs."""
560 if len(url_list
) > 1 and self
.fixed_template():
561 raise SameFileError(self
.params
['outtmpl'])
565 #It also downloads the videos
566 videos
= self
.extract_info(url
)
567 except UnavailableVideoError
:
568 self
.report_error(u
'unable to download video')
569 except MaxDownloadsReached
:
570 self
.to_screen(u
'[info] Maximum number of downloaded files reached.')
573 return self
._download
_retcode
575 def post_process(self
, filename
, ie_info
):
576 """Run all the postprocessors on the given file."""
578 info
['filepath'] = filename
582 keep_video_wish
,new_info
= pp
.run(info
)
583 if keep_video_wish
is not None:
585 keep_video
= keep_video_wish
586 elif keep_video
is None:
587 # No clear decision yet, let IE decide
588 keep_video
= keep_video_wish
589 except PostProcessingError
as e
:
590 self
.to_stderr(u
'ERROR: ' + e
.msg
)
591 if keep_video
is False and not self
.params
.get('keepvideo', False):
593 self
.to_screen(u
'Deleting original file %s (pass -k to keep)' % filename
)
594 os
.remove(encodeFilename(filename
))
595 except (IOError, OSError):
596 self
.report_warning(u
'Unable to remove downloaded video file')