]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/__init__.py
Imported Upstream version 2012.09.27
[youtubedl] / youtube_dl / __init__.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 __authors__ = (
5 'Ricardo Garcia Gonzalez',
6 'Danny Colligan',
7 'Benjamin Johnson',
8 'Vasyl\' Vavrychuk',
9 'Witold Baryluk',
10 'Paweł Paprota',
11 'Gergely Imreh',
12 'Rogério Brito',
13 'Philipp Hagemeister',
14 'Sören Schulze',
15 'Kevin Ngo',
16 'Ori Avtalion',
17 'shizeeg',
18 'Filippo Valsorda',
19 )
20
21 __license__ = 'Public Domain'
22 __version__ = '2012.09.27'
23
24 UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl'
25 UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION'
26 UPDATE_URL_EXE = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl.exe'
27
28
29 import cookielib
30 import getpass
31 import optparse
32 import os
33 import re
34 import shlex
35 import socket
36 import subprocess
37 import sys
38 import urllib2
39 import warnings
40
41 from utils import *
42 from FileDownloader import *
43 from InfoExtractors import *
44 from PostProcessor import *
45
46 def updateSelf(downloader, filename):
47 ''' Update the program file with the latest version from the repository '''
48 # Note: downloader only used for options
49
50 if not os.access(filename, os.W_OK):
51 sys.exit('ERROR: no write permissions on %s' % filename)
52
53 downloader.to_screen(u'Updating to latest version...')
54
55 urlv = urllib2.urlopen(UPDATE_URL_VERSION)
56 newversion = urlv.read().strip()
57 if newversion == __version__:
58 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')')
59 return
60 urlv.close()
61
62 if hasattr(sys, "frozen"): #py2exe
63 exe = os.path.abspath(filename)
64 directory = os.path.dirname(exe)
65 if not os.access(directory, os.W_OK):
66 sys.exit('ERROR: no write permissions on %s' % directory)
67
68 try:
69 urlh = urllib2.urlopen(UPDATE_URL_EXE)
70 newcontent = urlh.read()
71 urlh.close()
72 with open(exe + '.new', 'wb') as outf:
73 outf.write(newcontent)
74 except (IOError, OSError), err:
75 sys.exit('ERROR: unable to download latest version')
76
77 try:
78 bat = os.path.join(directory, 'youtube-dl-updater.bat')
79 b = open(bat, 'w')
80
81 print >> b, """
82 echo Updating youtube-dl...
83 ping 127.0.0.1 -n 5 -w 1000 > NUL
84 move /Y "%s.new" "%s"
85 del "%s"
86 """ %(exe, exe, bat)
87
88 b.close()
89
90 os.startfile(bat)
91 except (IOError, OSError), err:
92 sys.exit('ERROR: unable to overwrite current version')
93
94 else:
95 try:
96 urlh = urllib2.urlopen(UPDATE_URL)
97 newcontent = urlh.read()
98 urlh.close()
99 except (IOError, OSError), err:
100 sys.exit('ERROR: unable to download latest version')
101
102 try:
103 with open(filename, 'wb') as outf:
104 outf.write(newcontent)
105 except (IOError, OSError), err:
106 sys.exit('ERROR: unable to overwrite current version')
107
108 downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.')
109
110 def parseOpts():
111 def _readOptions(filename_bytes):
112 try:
113 optionf = open(filename_bytes)
114 except IOError:
115 return [] # silently skip if file is not present
116 try:
117 res = []
118 for l in optionf:
119 res += shlex.split(l, comments=True)
120 finally:
121 optionf.close()
122 return res
123
124 def _format_option_string(option):
125 ''' ('-o', '--option') -> -o, --format METAVAR'''
126
127 opts = []
128
129 if option._short_opts: opts.append(option._short_opts[0])
130 if option._long_opts: opts.append(option._long_opts[0])
131 if len(opts) > 1: opts.insert(1, ', ')
132
133 if option.takes_value(): opts.append(' %s' % option.metavar)
134
135 return "".join(opts)
136
137 def _find_term_columns():
138 columns = os.environ.get('COLUMNS', None)
139 if columns:
140 return int(columns)
141
142 try:
143 sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
144 out,err = sp.communicate()
145 return int(out.split()[1])
146 except:
147 pass
148 return None
149
150 max_width = 80
151 max_help_position = 80
152
153 # No need to wrap help messages if we're on a wide console
154 columns = _find_term_columns()
155 if columns: max_width = columns
156
157 fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position)
158 fmt.format_option_strings = _format_option_string
159
160 kw = {
161 'version' : __version__,
162 'formatter' : fmt,
163 'usage' : '%prog [options] url [url...]',
164 'conflict_handler' : 'resolve',
165 }
166
167 parser = optparse.OptionParser(**kw)
168
169 # option groups
170 general = optparse.OptionGroup(parser, 'General Options')
171 selection = optparse.OptionGroup(parser, 'Video Selection')
172 authentication = optparse.OptionGroup(parser, 'Authentication Options')
173 video_format = optparse.OptionGroup(parser, 'Video Format Options')
174 postproc = optparse.OptionGroup(parser, 'Post-processing Options')
175 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
176 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
177
178 general.add_option('-h', '--help',
179 action='help', help='print this help text and exit')
180 general.add_option('-v', '--version',
181 action='version', help='print program version and exit')
182 general.add_option('-U', '--update',
183 action='store_true', dest='update_self', help='update this program to latest version')
184 general.add_option('-i', '--ignore-errors',
185 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
186 general.add_option('-r', '--rate-limit',
187 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
188 general.add_option('-R', '--retries',
189 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
190 general.add_option('--dump-user-agent',
191 action='store_true', dest='dump_user_agent',
192 help='display the current browser identification', default=False)
193 general.add_option('--list-extractors',
194 action='store_true', dest='list_extractors',
195 help='List all supported extractors and the URLs they would handle', default=False)
196
197 selection.add_option('--playlist-start',
198 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
199 selection.add_option('--playlist-end',
200 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
201 selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)')
202 selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)')
203 selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None)
204
205 authentication.add_option('-u', '--username',
206 dest='username', metavar='USERNAME', help='account username')
207 authentication.add_option('-p', '--password',
208 dest='password', metavar='PASSWORD', help='account password')
209 authentication.add_option('-n', '--netrc',
210 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
211
212
213 video_format.add_option('-f', '--format',
214 action='store', dest='format', metavar='FORMAT', help='video format code')
215 video_format.add_option('--all-formats',
216 action='store_const', dest='format', help='download all available video formats', const='all')
217 video_format.add_option('--prefer-free-formats',
218 action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested')
219 video_format.add_option('--max-quality',
220 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
221 video_format.add_option('-F', '--list-formats',
222 action='store_true', dest='listformats', help='list all available formats (currently youtube only)')
223 video_format.add_option('--write-srt',
224 action='store_true', dest='writesubtitles',
225 help='write video closed captions to a .srt file (currently youtube only)', default=False)
226 video_format.add_option('--srt-lang',
227 action='store', dest='subtitleslang', metavar='LANG',
228 help='language of the closed captions to download (optional) use IETF language tags like \'en\'')
229
230
231 verbosity.add_option('-q', '--quiet',
232 action='store_true', dest='quiet', help='activates quiet mode', default=False)
233 verbosity.add_option('-s', '--simulate',
234 action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False)
235 verbosity.add_option('--skip-download',
236 action='store_true', dest='skip_download', help='do not download the video', default=False)
237 verbosity.add_option('-g', '--get-url',
238 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
239 verbosity.add_option('-e', '--get-title',
240 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
241 verbosity.add_option('--get-thumbnail',
242 action='store_true', dest='getthumbnail',
243 help='simulate, quiet but print thumbnail URL', default=False)
244 verbosity.add_option('--get-description',
245 action='store_true', dest='getdescription',
246 help='simulate, quiet but print video description', default=False)
247 verbosity.add_option('--get-filename',
248 action='store_true', dest='getfilename',
249 help='simulate, quiet but print output filename', default=False)
250 verbosity.add_option('--get-format',
251 action='store_true', dest='getformat',
252 help='simulate, quiet but print output format', default=False)
253 verbosity.add_option('--no-progress',
254 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
255 verbosity.add_option('--console-title',
256 action='store_true', dest='consoletitle',
257 help='display progress in console titlebar', default=False)
258 verbosity.add_option('-v', '--verbose',
259 action='store_true', dest='verbose', help='print various debugging information', default=False)
260
261
262 filesystem.add_option('-t', '--title',
263 action='store_true', dest='usetitle', help='use title in file name', default=False)
264 filesystem.add_option('-l', '--literal',
265 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
266 filesystem.add_option('-A', '--auto-number',
267 action='store_true', dest='autonumber',
268 help='number downloaded files starting from 00000', default=False)
269 filesystem.add_option('-o', '--output',
270 dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.')
271 filesystem.add_option('-a', '--batch-file',
272 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
273 filesystem.add_option('-w', '--no-overwrites',
274 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
275 filesystem.add_option('-c', '--continue',
276 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True)
277 filesystem.add_option('--no-continue',
278 action='store_false', dest='continue_dl',
279 help='do not resume partially downloaded files (restart from beginning)')
280 filesystem.add_option('--cookies',
281 dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in')
282 filesystem.add_option('--no-part',
283 action='store_true', dest='nopart', help='do not use .part files', default=False)
284 filesystem.add_option('--no-mtime',
285 action='store_false', dest='updatetime',
286 help='do not use the Last-modified header to set the file modification time', default=True)
287 filesystem.add_option('--write-description',
288 action='store_true', dest='writedescription',
289 help='write video description to a .description file', default=False)
290 filesystem.add_option('--write-info-json',
291 action='store_true', dest='writeinfojson',
292 help='write video metadata to a .info.json file', default=False)
293
294
295 postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
296 help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)')
297 postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
298 help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default')
299 postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K',
300 help='ffmpeg/avconv audio bitrate specification, 128k by default')
301 postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False,
302 help='keeps the video file on disk after the post-processing; the video is erased by default')
303
304
305 parser.add_option_group(general)
306 parser.add_option_group(selection)
307 parser.add_option_group(filesystem)
308 parser.add_option_group(verbosity)
309 parser.add_option_group(video_format)
310 parser.add_option_group(authentication)
311 parser.add_option_group(postproc)
312
313 xdg_config_home = os.environ.get('XDG_CONFIG_HOME')
314 if xdg_config_home:
315 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf')
316 else:
317 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf')
318 argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:]
319 opts, args = parser.parse_args(argv)
320
321 return parser, opts, args
322
323 def gen_extractors():
324 """ Return a list of an instance of every supported extractor.
325 The order does matter; the first extractor matched is the one handling the URL.
326 """
327 return [
328 YoutubePlaylistIE(),
329 YoutubeUserIE(),
330 YoutubeSearchIE(),
331 YoutubeIE(),
332 MetacafeIE(),
333 DailymotionIE(),
334 GoogleIE(),
335 GoogleSearchIE(),
336 PhotobucketIE(),
337 YahooIE(),
338 YahooSearchIE(),
339 DepositFilesIE(),
340 FacebookIE(),
341 BlipTVUserIE(),
342 BlipTVIE(),
343 VimeoIE(),
344 MyVideoIE(),
345 ComedyCentralIE(),
346 EscapistIE(),
347 CollegeHumorIE(),
348 XVideosIE(),
349 SoundcloudIE(),
350 InfoQIE(),
351 MixcloudIE(),
352 StanfordOpenClassroomIE(),
353 MTVIE(),
354
355 GenericIE()
356 ]
357
358 def _real_main():
359 parser, opts, args = parseOpts()
360
361 # Open appropriate CookieJar
362 if opts.cookiefile is None:
363 jar = cookielib.CookieJar()
364 else:
365 try:
366 jar = cookielib.MozillaCookieJar(opts.cookiefile)
367 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
368 jar.load()
369 except (IOError, OSError), err:
370 sys.exit(u'ERROR: unable to open cookie file')
371
372 # Dump user agent
373 if opts.dump_user_agent:
374 print std_headers['User-Agent']
375 sys.exit(0)
376
377 # Batch file verification
378 batchurls = []
379 if opts.batchfile is not None:
380 try:
381 if opts.batchfile == '-':
382 batchfd = sys.stdin
383 else:
384 batchfd = open(opts.batchfile, 'r')
385 batchurls = batchfd.readlines()
386 batchurls = [x.strip() for x in batchurls]
387 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
388 except IOError:
389 sys.exit(u'ERROR: batch file could not be read')
390 all_urls = batchurls + args
391 all_urls = map(lambda url: url.strip(), all_urls)
392
393 # General configuration
394 cookie_processor = urllib2.HTTPCookieProcessor(jar)
395 proxy_handler = urllib2.ProxyHandler()
396 opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())
397 urllib2.install_opener(opener)
398 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
399
400 extractors = gen_extractors()
401
402 if opts.list_extractors:
403 for ie in extractors:
404 print(ie.IE_NAME)
405 matchedUrls = filter(lambda url: ie.suitable(url), all_urls)
406 all_urls = filter(lambda url: url not in matchedUrls, all_urls)
407 for mu in matchedUrls:
408 print(u' ' + mu)
409 sys.exit(0)
410
411 # Conflicting, missing and erroneous options
412 if opts.usenetrc and (opts.username is not None or opts.password is not None):
413 parser.error(u'using .netrc conflicts with giving username/password')
414 if opts.password is not None and opts.username is None:
415 parser.error(u'account username missing')
416 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
417 parser.error(u'using output template conflicts with using title, literal title or auto number')
418 if opts.usetitle and opts.useliteral:
419 parser.error(u'using title conflicts with using literal title')
420 if opts.username is not None and opts.password is None:
421 opts.password = getpass.getpass(u'Type account password and press return:')
422 if opts.ratelimit is not None:
423 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
424 if numeric_limit is None:
425 parser.error(u'invalid rate limit specified')
426 opts.ratelimit = numeric_limit
427 if opts.retries is not None:
428 try:
429 opts.retries = long(opts.retries)
430 except (TypeError, ValueError), err:
431 parser.error(u'invalid retry count specified')
432 try:
433 opts.playliststart = int(opts.playliststart)
434 if opts.playliststart <= 0:
435 raise ValueError(u'Playlist start must be positive')
436 except (TypeError, ValueError), err:
437 parser.error(u'invalid playlist start number specified')
438 try:
439 opts.playlistend = int(opts.playlistend)
440 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
441 raise ValueError(u'Playlist end must be greater than playlist start')
442 except (TypeError, ValueError), err:
443 parser.error(u'invalid playlist end number specified')
444 if opts.extractaudio:
445 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']:
446 parser.error(u'invalid audio format specified')
447
448 # File downloader
449 fd = FileDownloader({
450 'usenetrc': opts.usenetrc,
451 'username': opts.username,
452 'password': opts.password,
453 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
454 'forceurl': opts.geturl,
455 'forcetitle': opts.gettitle,
456 'forcethumbnail': opts.getthumbnail,
457 'forcedescription': opts.getdescription,
458 'forcefilename': opts.getfilename,
459 'forceformat': opts.getformat,
460 'simulate': opts.simulate,
461 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat),
462 'format': opts.format,
463 'format_limit': opts.format_limit,
464 'listformats': opts.listformats,
465 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
466 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
467 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
468 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
469 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
470 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
471 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
472 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
473 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
474 or u'%(id)s.%(ext)s'),
475 'ignoreerrors': opts.ignoreerrors,
476 'ratelimit': opts.ratelimit,
477 'nooverwrites': opts.nooverwrites,
478 'retries': opts.retries,
479 'continuedl': opts.continue_dl,
480 'noprogress': opts.noprogress,
481 'playliststart': opts.playliststart,
482 'playlistend': opts.playlistend,
483 'logtostderr': opts.outtmpl == '-',
484 'consoletitle': opts.consoletitle,
485 'nopart': opts.nopart,
486 'updatetime': opts.updatetime,
487 'writedescription': opts.writedescription,
488 'writeinfojson': opts.writeinfojson,
489 'writesubtitles': opts.writesubtitles,
490 'subtitleslang': opts.subtitleslang,
491 'matchtitle': opts.matchtitle,
492 'rejecttitle': opts.rejecttitle,
493 'max_downloads': opts.max_downloads,
494 'prefer_free_formats': opts.prefer_free_formats,
495 'verbose': opts.verbose,
496 })
497
498 if opts.verbose:
499 fd.to_screen(u'[debug] Proxy map: ' + str(proxy_handler.proxies))
500
501 for extractor in extractors:
502 fd.add_info_extractor(extractor)
503
504 # PostProcessors
505 if opts.extractaudio:
506 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo))
507
508 # Update version
509 if opts.update_self:
510 updateSelf(fd, sys.argv[0])
511
512 # Maybe do nothing
513 if len(all_urls) < 1:
514 if not opts.update_self:
515 parser.error(u'you must provide at least one URL')
516 else:
517 sys.exit()
518
519 try:
520 retcode = fd.download(all_urls)
521 except MaxDownloadsReached:
522 fd.to_screen(u'--max-download limit reached, aborting.')
523 retcode = 101
524
525 # Dump cookie jar if requested
526 if opts.cookiefile is not None:
527 try:
528 jar.save()
529 except (IOError, OSError), err:
530 sys.exit(u'ERROR: unable to save cookie jar')
531
532 sys.exit(retcode)
533
534 def main():
535 try:
536 _real_main()
537 except DownloadError:
538 sys.exit(1)
539 except SameFileError:
540 sys.exit(u'ERROR: fixed output name but more than one file to download')
541 except KeyboardInterrupt:
542 sys.exit(u'\nERROR: Interrupted by user')