]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/YoutubeDL.py
2 # -*- coding: utf-8 -*-
4 from __future__
import absolute_import
16 from .extractor
import get_info_extractor
17 from .FileDownloader
import FileDownloader
20 class YoutubeDL(object):
23 YoutubeDL objects are the ones responsible of downloading the
24 actual video file and writing it to disk if the user has requested
25 it, among some other tasks. In most cases there should be one per
26 program. As, given a video URL, the downloader doesn't know how to
27 extract all the needed information, task that InfoExtractors do, it
28 has to pass the URL to one of them.
30 For this, YoutubeDL objects have a method that allows
31 InfoExtractors to be registered in a given order. When it is passed
32 a URL, the YoutubeDL object handles it to the first InfoExtractor it
33 finds that reports being able to handle it. The InfoExtractor extracts
34 all the information about the video or videos the URL refers to, and
35 YoutubeDL process the extracted information, possibly using a File
36 Downloader to download the video.
38 YoutubeDL objects accept a lot of parameters. In order not to saturate
39 the object constructor with arguments, it receives a dictionary of
40 options instead. These options are available through the params
41 attribute for the InfoExtractors to use. The YoutubeDL also
42 registers itself as the downloader in charge for the InfoExtractors
43 that are added to it, so this is a "mutual registration".
47 username: Username for authentication purposes.
48 password: Password for authentication purposes.
49 videopassword: Password for acces a video.
50 usenetrc: Use netrc for authentication instead.
51 verbose: Print additional info to stdout.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forceid: Force printing ID.
56 forcethumbnail: Force printing thumbnail URL.
57 forcedescription: Force printing description.
58 forcefilename: Force printing final filename.
59 simulate: Do not download the video files.
60 format: Video format code.
61 format_limit: Highest quality format to try.
62 outtmpl: Template for output names.
63 restrictfilenames: Do not allow "&" and spaces in file names
64 ignoreerrors: Do not stop on download errors.
65 nooverwrites: Prevent overwriting files.
66 playliststart: Playlist item to start at.
67 playlistend: Playlist item to end at.
68 matchtitle: Download only matching titles.
69 rejecttitle: Reject downloads for matching titles.
70 logtostderr: Log messages to stderr instead of stdout.
71 writedescription: Write the video description to a .description file
72 writeinfojson: Write the video description to a .info.json file
73 writethumbnail: Write the thumbnail image to a file
74 writesubtitles: Write the video subtitles to a file
75 writeautomaticsub: Write the automatic subtitles to a file
76 allsubtitles: Downloads all the subtitles of the video
77 listsubtitles: Lists all available subtitles for the video
78 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
79 subtitleslang: Language of the subtitles to download
80 keepvideo: Keep the video file after post-processing
81 daterange: A DateRange object, download only if the upload_date is in the range.
82 skip_download: Skip the actual download of the video file
84 The following parameters are not used by YoutubeDL itself, they are used by
86 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
87 noresizebuffer, retries, continuedl, noprogress, consoletitle
93 _download_retcode
= None
97 def __init__(self
, params
):
98 """Create a FileDownloader object with the given options."""
101 self
._progress
_hooks
= []
102 self
._download
_retcode
= 0
103 self
._num
_downloads
= 0
104 self
._screen
_file
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)]
106 self
.fd
= FileDownloader(self
, self
.params
)
108 if '%(stitle)s' in self
.params
['outtmpl']:
109 self
.report_warning(u
'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
111 def add_info_extractor(self
, ie
):
112 """Add an InfoExtractor object to the end of the list."""
114 ie
.set_downloader(self
)
116 def add_post_processor(self
, pp
):
117 """Add a PostProcessor object to the end of the chain."""
119 pp
.set_downloader(self
)
121 def to_screen(self
, message
, skip_eol
=False):
122 """Print message to stdout if not in quiet mode."""
123 assert type(message
) == type(u
'')
124 if not self
.params
.get('quiet', False):
125 terminator
= [u
'\n', u
''][skip_eol
]
126 output
= message
+ terminator
127 if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
128 output
= output
.encode(preferredencoding(), 'ignore')
129 self
._screen
_file
.write(output
)
130 self
._screen
_file
.flush()
132 def to_stderr(self
, message
):
133 """Print message to stderr."""
134 assert type(message
) == type(u
'')
135 output
= message
+ u
'\n'
136 if 'b' in getattr(self
._screen
_file
, 'mode', '') or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
137 output
= output
.encode(preferredencoding())
138 sys
.stderr
.write(output
)
140 def fixed_template(self
):
141 """Checks if the output template is fixed."""
142 return (re
.search(u
'(?u)%\\(.+?\\)s', self
.params
['outtmpl']) is None)
144 def trouble(self
, message
=None, tb
=None):
145 """Determine action to take when a download problem appears.
147 Depending on if the downloader has been configured to ignore
148 download errors or not, this method may throw an exception or
149 not when errors are found, after printing the message.
151 tb, if given, is additional traceback information.
153 if message
is not None:
154 self
.to_stderr(message
)
155 if self
.params
.get('verbose'):
157 if sys
.exc_info()[0]: # if .trouble has been called from an except block
159 if hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
160 tb
+= u
''.join(traceback
.format_exception(*sys
.exc_info()[1].exc_info
))
161 tb
+= compat_str(traceback
.format_exc())
163 tb_data
= traceback
.format_list(traceback
.extract_stack())
164 tb
= u
''.join(tb_data
)
166 if not self
.params
.get('ignoreerrors', False):
167 if sys
.exc_info()[0] and hasattr(sys
.exc_info()[1], 'exc_info') and sys
.exc_info()[1].exc_info
[0]:
168 exc_info
= sys
.exc_info()[1].exc_info
170 exc_info
= sys
.exc_info()
171 raise DownloadError(message
, exc_info
)
172 self
._download
_retcode
= 1
174 def report_warning(self
, message
):
176 Print the message to stderr, it will be prefixed with 'WARNING:'
177 If stderr is a tty file the 'WARNING:' will be colored
179 if sys
.stderr
.isatty() and os
.name
!= 'nt':
180 _msg_header
=u
'\033[0;33mWARNING:\033[0m'
182 _msg_header
=u
'WARNING:'
183 warning_message
=u
'%s %s' % (_msg_header
,message
)
184 self
.to_stderr(warning_message
)
186 def report_error(self
, message
, tb
=None):
188 Do the same as trouble, but prefixes the message with 'ERROR:', colored
189 in red if stderr is a tty file.
191 if sys
.stderr
.isatty() and os
.name
!= 'nt':
192 _msg_header
= u
'\033[0;31mERROR:\033[0m'
194 _msg_header
= u
'ERROR:'
195 error_message
= u
'%s %s' % (_msg_header
, message
)
196 self
.trouble(error_message
, tb
)
198 def slow_down(self
, start_time
, byte_counter
):
199 """Sleep if the download speed is over the rate limit."""
200 rate_limit
= self
.params
.get('ratelimit', None)
201 if rate_limit
is None or byte_counter
== 0:
204 elapsed
= now
- start_time
207 speed
= float(byte_counter
) / elapsed
208 if speed
> rate_limit
:
209 time
.sleep((byte_counter
- rate_limit
* (now
- start_time
)) / rate_limit
)
211 def report_writedescription(self
, descfn
):
212 """ Report that the description file is being written """
213 self
.to_screen(u
'[info] Writing video description to: ' + descfn
)
215 def report_writesubtitles(self
, sub_filename
):
216 """ Report that the subtitles file is being written """
217 self
.to_screen(u
'[info] Writing video subtitles to: ' + sub_filename
)
219 def report_writeinfojson(self
, infofn
):
220 """ Report that the metadata file has been written """
221 self
.to_screen(u
'[info] Video description metadata as JSON to: ' + infofn
)
223 def report_file_already_downloaded(self
, file_name
):
224 """Report file has already been fully downloaded."""
226 self
.to_screen(u
'[download] %s has already been downloaded' % file_name
)
227 except (UnicodeEncodeError) as err
:
228 self
.to_screen(u
'[download] The file has already been downloaded')
230 def increment_downloads(self
):
231 """Increment the ordinal that assigns a number to each file."""
232 self
._num
_downloads
+= 1
234 def prepare_filename(self
, info_dict
):
235 """Generate the output filename."""
237 template_dict
= dict(info_dict
)
239 template_dict
['epoch'] = int(time
.time())
240 autonumber_size
= self
.params
.get('autonumber_size')
241 if autonumber_size
is None:
243 autonumber_templ
= u
'%0' + str(autonumber_size
) + u
'd'
244 template_dict
['autonumber'] = autonumber_templ
% self
._num
_downloads
245 if template_dict
['playlist_index'] is not None:
246 template_dict
['playlist_index'] = u
'%05d' % template_dict
['playlist_index']
248 sanitize
= lambda k
,v
: sanitize_filename(
249 u
'NA' if v
is None else compat_str(v
),
250 restricted
=self
.params
.get('restrictfilenames'),
252 template_dict
= dict((k
, sanitize(k
, v
)) for k
,v
in template_dict
.items())
254 filename
= self
.params
['outtmpl'] % template_dict
256 except KeyError as err
:
257 self
.report_error(u
'Erroneous output template')
259 except ValueError as err
:
260 self
.report_error(u
'Insufficient system charset ' + repr(preferredencoding()))
263 def _match_entry(self
, info_dict
):
264 """ Returns None iff the file should be downloaded """
266 title
= info_dict
['title']
267 matchtitle
= self
.params
.get('matchtitle', False)
269 if not re
.search(matchtitle
, title
, re
.IGNORECASE
):
270 return u
'[download] "' + title
+ '" title did not match pattern "' + matchtitle
+ '"'
271 rejecttitle
= self
.params
.get('rejecttitle', False)
273 if re
.search(rejecttitle
, title
, re
.IGNORECASE
):
274 return u
'"' + title
+ '" title matched reject pattern "' + rejecttitle
+ '"'
275 date
= info_dict
.get('upload_date', None)
277 dateRange
= self
.params
.get('daterange', DateRange())
278 if date
not in dateRange
:
279 return u
'[download] %s upload date is not in range %s' % (date_from_str(date
).isoformat(), dateRange
)
282 def extract_info(self
, url
, download
=True, ie_key
=None, extra_info
={}):
284 Returns a list with a dictionary for each video we find.
285 If 'download', also downloads the videos.
286 extra_info is a dict containing the extra values to add to each result
290 ie
= get_info_extractor(ie_key
)()
291 ie
.set_downloader(self
)
297 if not ie
.suitable(url
):
301 self
.report_warning(u
'The program functionality for this site has been marked as broken, '
302 u
'and will probably not work.')
305 ie_result
= ie
.extract(url
)
306 if ie_result
is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
308 if isinstance(ie_result
, list):
309 # Backwards compatibility: old IE result format
310 for result
in ie_result
:
311 result
.update(extra_info
)
313 '_type': 'compat_list',
314 'entries': ie_result
,
317 ie_result
.update(extra_info
)
318 if 'extractor' not in ie_result
:
319 ie_result
['extractor'] = ie
.IE_NAME
320 return self
.process_ie_result(ie_result
, download
=download
)
321 except ExtractorError
as de
: # An error we somewhat expected
322 self
.report_error(compat_str(de
), de
.format_traceback())
324 except Exception as e
:
325 if self
.params
.get('ignoreerrors', False):
326 self
.report_error(compat_str(e
), tb
=compat_str(traceback
.format_exc()))
331 self
.report_error(u
'no suitable InfoExtractor: %s' % url
)
333 def process_ie_result(self
, ie_result
, download
=True, extra_info
={}):
335 Take the result of the ie(may be modified) and resolve all unresolved
336 references (URLs, playlist items).
338 It will also download the videos if 'download'.
339 Returns the resolved ie_result.
342 result_type
= ie_result
.get('_type', 'video') # If not given we suppose it's a video, support the default old system
343 if result_type
== 'video':
344 if 'playlist' not in ie_result
:
345 # It isn't part of a playlist
346 ie_result
['playlist'] = None
347 ie_result
['playlist_index'] = None
349 self
.process_info(ie_result
)
351 elif result_type
== 'url':
352 # We have to add extra_info to the results because it may be
353 # contained in a playlist
354 return self
.extract_info(ie_result
['url'],
356 ie_key
=ie_result
.get('ie_key'),
357 extra_info
=extra_info
)
358 elif result_type
== 'playlist':
359 # We process each entry in the playlist
360 playlist
= ie_result
.get('title', None) or ie_result
.get('id', None)
361 self
.to_screen(u
'[download] Downloading playlist: %s' % playlist
)
363 playlist_results
= []
365 n_all_entries
= len(ie_result
['entries'])
366 playliststart
= self
.params
.get('playliststart', 1) - 1
367 playlistend
= self
.params
.get('playlistend', -1)
369 if playlistend
== -1:
370 entries
= ie_result
['entries'][playliststart
:]
372 entries
= ie_result
['entries'][playliststart
:playlistend
]
374 n_entries
= len(entries
)
376 self
.to_screen(u
"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
377 (ie_result
['extractor'], playlist
, n_all_entries
, n_entries
))
379 for i
,entry
in enumerate(entries
,1):
380 self
.to_screen(u
'[download] Downloading video #%s of %s' %(i
, n_entries
))
382 'playlist': playlist
,
383 'playlist_index': i
+ playliststart
,
385 if not 'extractor' in entry
:
386 # We set the extractor, if it's an url it will be set then to
387 # the new extractor, but if it's already a video we must make
388 # sure it's present: see issue #877
389 entry
['extractor'] = ie_result
['extractor']
390 entry_result
= self
.process_ie_result(entry
,
393 playlist_results
.append(entry_result
)
394 ie_result
['entries'] = playlist_results
396 elif result_type
== 'compat_list':
398 r
.setdefault('extractor', ie_result
['extractor'])
400 ie_result
['entries'] = [
401 self
.process_ie_result(_fixup(r
), download
=download
)
402 for r
in ie_result
['entries']
406 raise Exception('Invalid result type: %s' % result_type
)
408 def process_info(self
, info_dict
):
409 """Process a single resolved IE result."""
411 assert info_dict
.get('_type', 'video') == 'video'
412 #We increment the download the download count here to match the previous behaviour.
413 self
.increment_downloads()
415 info_dict
['fulltitle'] = info_dict
['title']
416 if len(info_dict
['title']) > 200:
417 info_dict
['title'] = info_dict
['title'][:197] + u
'...'
419 # Keep for backwards compatibility
420 info_dict
['stitle'] = info_dict
['title']
422 if not 'format' in info_dict
:
423 info_dict
['format'] = info_dict
['ext']
425 reason
= self
._match
_entry
(info_dict
)
426 if reason
is not None:
427 self
.to_screen(u
'[download] ' + reason
)
430 max_downloads
= self
.params
.get('max_downloads')
431 if max_downloads
is not None:
432 if self
._num
_downloads
> int(max_downloads
):
433 raise MaxDownloadsReached()
435 filename
= self
.prepare_filename(info_dict
)
438 if self
.params
.get('forcetitle', False):
439 compat_print(info_dict
['title'])
440 if self
.params
.get('forceid', False):
441 compat_print(info_dict
['id'])
442 if self
.params
.get('forceurl', False):
443 compat_print(info_dict
['url'])
444 if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
:
445 compat_print(info_dict
['thumbnail'])
446 if self
.params
.get('forcedescription', False) and 'description' in info_dict
:
447 compat_print(info_dict
['description'])
448 if self
.params
.get('forcefilename', False) and filename
is not None:
449 compat_print(filename
)
450 if self
.params
.get('forceformat', False):
451 compat_print(info_dict
['format'])
453 # Do nothing else if in simulate mode
454 if self
.params
.get('simulate', False):
461 dn
= os
.path
.dirname(encodeFilename(filename
))
462 if dn
!= '' and not os
.path
.exists(dn
):
464 except (OSError, IOError) as err
:
465 self
.report_error(u
'unable to create directory ' + compat_str(err
))
468 if self
.params
.get('writedescription', False):
470 descfn
= filename
+ u
'.description'
471 self
.report_writedescription(descfn
)
472 with io
.open(encodeFilename(descfn
), 'w', encoding
='utf-8') as descfile
:
473 descfile
.write(info_dict
['description'])
474 except (OSError, IOError):
475 self
.report_error(u
'Cannot write description file ' + descfn
)
478 if (self
.params
.get('writesubtitles', False) or self
.params
.get('writeautomaticsub')) and 'subtitles' in info_dict
and info_dict
['subtitles']:
479 # subtitles download errors are already managed as troubles in relevant IE
480 # that way it will silently go on when used with unsupporting IE
481 subtitle
= info_dict
['subtitles'][0]
482 (sub_error
, sub_lang
, sub
) = subtitle
483 sub_format
= self
.params
.get('subtitlesformat')
485 self
.report_warning("Some error while getting the subtitles")
488 sub_filename
= filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
489 self
.report_writesubtitles(sub_filename
)
490 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
492 except (OSError, IOError):
493 self
.report_error(u
'Cannot write subtitles file ' + descfn
)
496 if self
.params
.get('allsubtitles', False) and 'subtitles' in info_dict
and info_dict
['subtitles']:
497 subtitles
= info_dict
['subtitles']
498 sub_format
= self
.params
.get('subtitlesformat')
499 for subtitle
in subtitles
:
500 (sub_error
, sub_lang
, sub
) = subtitle
502 self
.report_warning("Some error while getting the subtitles")
505 sub_filename
= filename
.rsplit('.', 1)[0] + u
'.' + sub_lang
+ u
'.' + sub_format
506 self
.report_writesubtitles(sub_filename
)
507 with io
.open(encodeFilename(sub_filename
), 'w', encoding
='utf-8') as subfile
:
509 except (OSError, IOError):
510 self
.report_error(u
'Cannot write subtitles file ' + descfn
)
513 if self
.params
.get('writeinfojson', False):
514 infofn
= filename
+ u
'.info.json'
515 self
.report_writeinfojson(infofn
)
517 json_info_dict
= dict((k
, v
) for k
,v
in info_dict
.items() if not k
in ['urlhandle'])
518 write_json_file(json_info_dict
, encodeFilename(infofn
))
519 except (OSError, IOError):
520 self
.report_error(u
'Cannot write metadata to JSON file ' + infofn
)
523 if self
.params
.get('writethumbnail', False):
524 if 'thumbnail' in info_dict
:
525 thumb_format
= info_dict
['thumbnail'].rpartition(u
'/')[2].rpartition(u
'.')[2]
528 thumb_filename
= filename
.rpartition('.')[0] + u
'.' + thumb_format
529 self
.to_screen(u
'[%s] %s: Downloading thumbnail ...' %
530 (info_dict
['extractor'], info_dict
['id']))
531 uf
= compat_urllib_request
.urlopen(info_dict
['thumbnail'])
532 with open(thumb_filename
, 'wb') as thumbf
:
533 shutil
.copyfileobj(uf
, thumbf
)
534 self
.to_screen(u
'[%s] %s: Writing thumbnail to: %s' %
535 (info_dict
['extractor'], info_dict
['id'], thumb_filename
))
537 if not self
.params
.get('skip_download', False):
538 if self
.params
.get('nooverwrites', False) and os
.path
.exists(encodeFilename(filename
)):
542 success
= self
.fd
._do
_download
(filename
, info_dict
)
543 except (OSError, IOError) as err
:
544 raise UnavailableVideoError()
545 except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
:
546 self
.report_error(u
'unable to download video data: %s' % str(err
))
548 except (ContentTooShortError
, ) as err
:
549 self
.report_error(u
'content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
554 self
.post_process(filename
, info_dict
)
555 except (PostProcessingError
) as err
:
556 self
.report_error(u
'postprocessing: %s' % str(err
))
559 def download(self
, url_list
):
560 """Download a given list of URLs."""
561 if len(url_list
) > 1 and self
.fixed_template():
562 raise SameFileError(self
.params
['outtmpl'])
566 #It also downloads the videos
567 videos
= self
.extract_info(url
)
568 except UnavailableVideoError
:
569 self
.report_error(u
'unable to download video')
570 except MaxDownloadsReached
:
571 self
.to_screen(u
'[info] Maximum number of downloaded files reached.')
574 return self
._download
_retcode
576 def post_process(self
, filename
, ie_info
):
577 """Run all the postprocessors on the given file."""
579 info
['filepath'] = filename
583 keep_video_wish
,new_info
= pp
.run(info
)
584 if keep_video_wish
is not None:
586 keep_video
= keep_video_wish
587 elif keep_video
is None:
588 # No clear decision yet, let IE decide
589 keep_video
= keep_video_wish
590 except PostProcessingError
as e
:
591 self
.to_stderr(u
'ERROR: ' + e
.msg
)
592 if keep_video
is False and not self
.params
.get('keepvideo', False):
594 self
.to_screen(u
'Deleting original file %s (pass -k to keep)' % filename
)
595 os
.remove(encodeFilename(filename
))
596 except (IOError, OSError):
597 self
.report_warning(u
'Unable to remove downloaded video file')