]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # License: Public domain code
23 # parse_qs was moved from the cgi module to the urlparse module recently.
25 from urlparse
import parse_qs
27 from cgi
import parse_qs
30 'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2',
31 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
32 'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
33 'Accept-Language': 'en-us,en;q=0.5',
36 simple_title_chars
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii')
38 def preferredencoding():
39 """Get preferred encoding.
41 Returns the best encoding scheme for the system, based on
42 locale.getpreferredencoding() and some further tweaks.
44 def yield_preferredencoding():
46 pref
= locale
.getpreferredencoding()
52 return yield_preferredencoding().next()
54 class DownloadError(Exception):
55 """Download Error exception.
57 This exception may be thrown by FileDownloader objects if they are not
58 configured to continue on errors. They will contain the appropriate
63 class SameFileError(Exception):
64 """Same File exception.
66 This exception will be thrown by FileDownloader objects if they detect
67 multiple files would have to be downloaded to the same file on disk.
71 class PostProcessingError(Exception):
72 """Post Processing exception.
74 This exception may be raised by PostProcessor's .run() method to
75 indicate an error in the postprocessing task.
79 class UnavailableFormatError(Exception):
80 """Unavailable Format exception.
82 This exception will be thrown when a video is requested
83 in a format that is not available for that video.
87 class ContentTooShortError(Exception):
88 """Content Too Short exception.
90 This exception may be raised by FileDownloader objects when a file they
91 download is too small for what the server announced first, indicating
92 the connection was probably interrupted.
98 def __init__(self
, downloaded
, expected
):
99 self
.downloaded
= downloaded
100 self
.expected
= expected
102 class FileDownloader(object):
103 """File Downloader class.
105 File downloader objects are the ones responsible of downloading the
106 actual video file and writing it to disk if the user has requested
107 it, among some other tasks. In most cases there should be one per
108 program. As, given a video URL, the downloader doesn't know how to
109 extract all the needed information, task that InfoExtractors do, it
110 has to pass the URL to one of them.
112 For this, file downloader objects have a method that allows
113 InfoExtractors to be registered in a given order. When it is passed
114 a URL, the file downloader handles it to the first InfoExtractor it
115 finds that reports being able to handle it. The InfoExtractor extracts
116 all the information about the video or videos the URL refers to, and
117 asks the FileDownloader to process the video information, possibly
118 downloading the video.
120 File downloaders accept a lot of parameters. In order not to saturate
121 the object constructor with arguments, it receives a dictionary of
122 options instead. These options are available through the params
123 attribute for the InfoExtractors to use. The FileDownloader also
124 registers itself as the downloader in charge for the InfoExtractors
125 that are added to it, so this is a "mutual registration".
129 username: Username for authentication purposes.
130 password: Password for authentication purposes.
131 usenetrc: Use netrc for authentication instead.
132 quiet: Do not print messages to stdout.
133 forceurl: Force printing final URL.
134 forcetitle: Force printing title.
135 simulate: Do not download the video files.
136 format: Video format code.
137 outtmpl: Template for output names.
138 ignoreerrors: Do not stop on download errors.
139 ratelimit: Download speed limit, in bytes/sec.
140 nooverwrites: Prevent overwriting files.
141 continuedl: Try to continue downloads if possible.
147 _download_retcode
= None
149 def __init__(self
, params
):
150 """Create a FileDownloader object with the given options."""
153 self
._download
_retcode
= 0
157 def pmkdir(filename
):
158 """Create directory components in filename. Similar to Unix "mkdir -p"."""
159 components
= filename
.split(os
.sep
)
160 aggregate
= [os
.sep
.join(components
[0:x
]) for x
in xrange(1, len(components
))]
161 aggregate
= ['%s%s' % (x
, os
.sep
) for x
in aggregate
] # Finish names with separator
162 for dir in aggregate
:
163 if not os
.path
.exists(dir):
167 def format_bytes(bytes):
170 if type(bytes) is str:
175 exponent
= long(math
.log(bytes, 1024.0))
176 suffix
= 'bkMGTPEZY'[exponent
]
177 converted
= float(bytes) / float(1024**exponent
)
178 return '%.2f%s' % (converted
, suffix
)
181 def calc_percent(byte_counter
, data_len
):
184 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0))
187 def calc_eta(start
, now
, total
, current
):
191 if current
== 0 or dif
< 0.001: # One millisecond
193 rate
= float(current
) / dif
194 eta
= long((float(total
) - float(current
)) / rate
)
195 (eta_mins
, eta_secs
) = divmod(eta
, 60)
198 return '%02d:%02d' % (eta_mins
, eta_secs
)
201 def calc_speed(start
, now
, bytes):
203 if bytes == 0 or dif
< 0.001: # One millisecond
204 return '%10s' % '---b/s'
205 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
))
208 def best_block_size(elapsed_time
, bytes):
209 new_min
= max(bytes / 2.0, 1.0)
210 new_max
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
211 if elapsed_time
< 0.001:
213 rate
= bytes / elapsed_time
221 def parse_bytes(bytestr
):
222 """Parse a string indicating a byte quantity into a long integer."""
223 matchobj
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
)
226 number
= float(matchobj
.group(1))
227 multiplier
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower())
228 return long(round(number
* multiplier
))
232 """Verify a URL is valid and data could be downloaded. Return real data URL."""
233 request
= urllib2
.Request(url
, None, std_headers
)
234 data
= urllib2
.urlopen(request
)
240 def add_info_extractor(self
, ie
):
241 """Add an InfoExtractor object to the end of the list."""
243 ie
.set_downloader(self
)
245 def add_post_processor(self
, pp
):
246 """Add a PostProcessor object to the end of the chain."""
248 pp
.set_downloader(self
)
250 def to_stdout(self
, message
, skip_eol
=False):
251 """Print message to stdout if not in quiet mode."""
252 if not self
.params
.get('quiet', False):
253 print (u
'%s%s' % (message
, [u
'\n', u
''][skip_eol
])).encode(preferredencoding()),
256 def to_stderr(self
, message
):
257 """Print message to stderr."""
258 print >>sys
.stderr
, message
.encode(preferredencoding())
260 def fixed_template(self
):
261 """Checks if the output template is fixed."""
262 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None)
264 def trouble(self
, message
=None):
265 """Determine action to take when a download problem appears.
267 Depending on if the downloader has been configured to ignore
268 download errors or not, this method may throw an exception or
269 not when errors are found, after printing the message.
271 if message
is not None:
272 self
.to_stderr(message
)
273 if not self
.params
.get('ignoreerrors', False):
274 raise DownloadError(message
)
275 self
._download
_retcode
= 1
277 def slow_down(self
, start_time
, byte_counter
):
278 """Sleep if the download speed is over the rate limit."""
279 rate_limit
= self
.params
.get('ratelimit', None)
280 if rate_limit
is None or byte_counter
== 0:
283 elapsed
= now
- start_time
286 speed
= float(byte_counter
) / elapsed
287 if speed
> rate_limit
:
288 time
.sleep((byte_counter
- rate_limit
* (now
- start_time
)) / rate_limit
)
290 def report_destination(self
, filename
):
291 """Report destination filename."""
292 self
.to_stdout(u
'[download] Destination: %s' % filename
)
294 def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
):
295 """Report download progress."""
296 self
.to_stdout(u
'\r[download] %s of %s at %s ETA %s' %
297 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True)
299 def report_resuming_byte(self
, resume_len
):
300 """Report attemtp to resume at given byte."""
301 self
.to_stdout(u
'[download] Resuming download at byte %s' % resume_len
)
303 def report_file_already_downloaded(self
, file_name
):
304 """Report file has already been fully downloaded."""
305 self
.to_stdout(u
'[download] %s has already been downloaded' % file_name
)
307 def report_unable_to_resume(self
):
308 """Report it was impossible to resume download."""
309 self
.to_stdout(u
'[download] Unable to resume')
311 def report_finish(self
):
312 """Report download finished."""
315 def process_info(self
, info_dict
):
316 """Process a single dictionary returned by an InfoExtractor."""
317 # Do nothing else if in simulate mode
318 if self
.params
.get('simulate', False):
319 # Verify URL if it's an HTTP one
320 if info_dict
['url'].startswith('http'):
322 info_dict
['url'] = self
.verify_url(info_dict
['url'].encode('utf-8')).decode('utf-8')
323 except (OSError, IOError, urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
324 raise UnavailableFormatError
327 if self
.params
.get('forcetitle', False):
328 print info_dict
['title'].encode(preferredencoding())
329 if self
.params
.get('forceurl', False):
330 print info_dict
['url'].encode(preferredencoding())
335 template_dict
= dict(info_dict
)
336 template_dict
['epoch'] = unicode(long(time
.time()))
337 filename
= self
.params
['outtmpl'] % template_dict
338 except (ValueError, KeyError), err
:
339 self
.trouble('ERROR: invalid output template or system charset: %s' % str(err
))
340 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
):
341 self
.to_stderr(u
'WARNING: file exists: %s; skipping' % filename
)
345 self
.pmkdir(filename
)
346 except (OSError, IOError), err
:
347 self
.trouble('ERROR: unable to create directories: %s' % str(err
))
351 success
= self
._do
_download
(filename
, info_dict
['url'].encode('utf-8'))
352 except (OSError, IOError), err
:
353 raise UnavailableFormatError
354 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
355 self
.trouble('ERROR: unable to download video data: %s' % str(err
))
357 except (ContentTooShortError
, ), err
:
358 self
.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
))
363 self
.post_process(filename
, info_dict
)
364 except (PostProcessingError
), err
:
365 self
.trouble('ERROR: postprocessing: %s' % str(err
))
368 def download(self
, url_list
):
369 """Download a given list of URLs."""
370 if len(url_list
) > 1 and self
.fixed_template():
371 raise SameFileError(self
.params
['outtmpl'])
374 suitable_found
= False
376 # Go to next InfoExtractor if not suitable
377 if not ie
.suitable(url
):
380 # Suitable InfoExtractor found
381 suitable_found
= True
383 # Extract information from URL and process it
386 # Suitable InfoExtractor had been found; go to next URL
389 if not suitable_found
:
390 self
.trouble('ERROR: no suitable InfoExtractor: %s' % url
)
392 return self
._download
_retcode
394 def post_process(self
, filename
, ie_info
):
395 """Run the postprocessing chain on the given file."""
397 info
['filepath'] = filename
403 def _download_with_rtmpdump(self
, filename
, url
):
404 self
.report_destination(filename
)
406 # Check for rtmpdump first
408 subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
)
409 except (OSError, IOError):
410 self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run')
413 # Download using rtmpdump. rtmpdump returns exit code 2 when
414 # the connection was interrumpted and resuming appears to be
415 # possible. This is part of rtmpdump's normal usage, AFAIK.
416 basic_args
= ['rtmpdump', '-q', '-r', url
, '-o', filename
]
417 retval
= subprocess
.call(basic_args
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)])
418 while retval
== 2 or retval
== 1:
419 self
.to_stdout(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(filename
), skip_eol
=True)
420 time
.sleep(2.0) # This seems to be needed
421 retval
= subprocess
.call(basic_args
+ ['-e'] + [[], ['-k', '1']][retval
== 1])
423 self
.to_stdout(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(filename
))
426 self
.trouble('ERROR: rtmpdump exited with code %d' % retval
)
429 def _do_download(self
, filename
, url
):
430 # Attempt to download using rtmpdump
431 if url
.startswith('rtmp'):
432 return self
._download
_with
_rtmpdump
(filename
, url
)
436 basic_request
= urllib2
.Request(url
, None, std_headers
)
437 request
= urllib2
.Request(url
, None, std_headers
)
439 # Establish possible resume length
440 if os
.path
.isfile(filename
):
441 resume_len
= os
.path
.getsize(filename
)
445 # Request parameters in case of being able to resume
446 if self
.params
.get('continuedl', False) and resume_len
!= 0:
447 self
.report_resuming_byte(resume_len
)
448 request
.add_header('Range','bytes=%d-' % resume_len
)
451 # Establish connection
453 data
= urllib2
.urlopen(request
)
454 except (urllib2
.HTTPError
, ), err
:
455 if err
.code
!= 416: # 416 is 'Requested range not satisfiable'
458 data
= urllib2
.urlopen(basic_request
)
459 content_length
= data
.info()['Content-Length']
461 if content_length
is not None and long(content_length
) == resume_len
:
462 # Because the file had already been fully downloaded
463 self
.report_file_already_downloaded(filename
)
466 # Because the server didn't let us
467 self
.report_unable_to_resume()
470 data_len
= data
.info().get('Content-length', None)
471 data_len_str
= self
.format_bytes(data_len
)
478 data_block
= data
.read(block_size
)
480 data_block_len
= len(data_block
)
481 if data_block_len
== 0:
483 byte_counter
+= data_block_len
485 # Open file just in time
488 stream
= open(filename
, open_mode
)
489 self
.report_destination(filename
)
490 except (OSError, IOError), err
:
491 self
.trouble('ERROR: unable to open for writing: %s' % str(err
))
493 stream
.write(data_block
)
494 block_size
= self
.best_block_size(after
- before
, data_block_len
)
497 percent_str
= self
.calc_percent(byte_counter
, data_len
)
498 eta_str
= self
.calc_eta(start
, time
.time(), data_len
, byte_counter
)
499 speed_str
= self
.calc_speed(start
, time
.time(), byte_counter
)
500 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
)
503 self
.slow_down(start
, byte_counter
)
506 if data_len
is not None and str(byte_counter
) != data_len
:
507 raise ContentTooShortError(byte_counter
, long(data_len
))
510 class InfoExtractor(object):
511 """Information Extractor class.
513 Information extractors are the classes that, given a URL, extract
514 information from the video (or videos) the URL refers to. This
515 information includes the real video URL, the video title and simplified
516 title, author and others. The information is stored in a dictionary
517 which is then passed to the FileDownloader. The FileDownloader
518 processes this information possibly downloading the video to the file
519 system, among other possible outcomes. The dictionaries must include
520 the following fields:
522 id: Video identifier.
523 url: Final video URL.
524 uploader: Nickname of the video uploader.
525 title: Literal title.
526 stitle: Simplified title.
527 ext: Video filename extension.
529 Subclasses of this one should re-define the _real_initialize() and
530 _real_extract() methods, as well as the suitable() static method.
531 Probably, they should also be instantiated and added to the main
538 def __init__(self
, downloader
=None):
539 """Constructor. Receives an optional downloader."""
541 self
.set_downloader(downloader
)
545 """Receives a URL and returns True if suitable for this IE."""
548 def initialize(self
):
549 """Initializes an instance (authentication, etc)."""
551 self
._real
_initialize
()
554 def extract(self
, url
):
555 """Extracts URL information and returns it in list of dicts."""
557 return self
._real
_extract
(url
)
559 def set_downloader(self
, downloader
):
560 """Sets the downloader for this IE."""
561 self
._downloader
= downloader
563 def _real_initialize(self
):
564 """Real initialization process. Redefine in subclasses."""
567 def _real_extract(self
, url
):
568 """Real extraction process. Redefine in subclasses."""
571 class YoutubeIE(InfoExtractor
):
572 """Information extractor for youtube.com."""
574 _VALID_URL
= r
'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?\?(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
575 _LANG_URL
= r
'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
576 _LOGIN_URL
= 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
577 _AGE_URL
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
578 _NETRC_MACHINE
= 'youtube'
579 _available_formats
= ['37', '22', '35', '18', '5', '17', '13', None] # listed in order of priority for -b flag
580 _video_extensions
= {
590 return (re
.match(YoutubeIE
._VALID
_URL
, url
) is not None)
593 def htmlentity_transform(matchobj
):
594 """Transforms an HTML entity to a Unicode character."""
595 entity
= matchobj
.group(1)
597 # Known non-numeric HTML entity
598 if entity
in htmlentitydefs
.name2codepoint
:
599 return unichr(htmlentitydefs
.name2codepoint
[entity
])
602 mobj
= re
.match(ur
'(?u)#(x?\d+)', entity
)
604 numstr
= mobj
.group(1)
605 if numstr
.startswith(u
'x'):
607 numstr
= u
'0%s' % numstr
610 return unichr(long(numstr
, base
))
612 # Unknown entity in name, return its literal representation
613 return (u
'&%s;' % entity
)
615 def report_lang(self
):
616 """Report attempt to set language."""
617 self
._downloader
.to_stdout(u
'[youtube] Setting language')
619 def report_login(self
):
620 """Report attempt to log in."""
621 self
._downloader
.to_stdout(u
'[youtube] Logging in')
623 def report_age_confirmation(self
):
624 """Report attempt to confirm age."""
625 self
._downloader
.to_stdout(u
'[youtube] Confirming age')
627 def report_video_info_webpage_download(self
, video_id
):
628 """Report attempt to download video info webpage."""
629 self
._downloader
.to_stdout(u
'[youtube] %s: Downloading video info webpage' % video_id
)
631 def report_information_extraction(self
, video_id
):
632 """Report attempt to extract video information."""
633 self
._downloader
.to_stdout(u
'[youtube] %s: Extracting video information' % video_id
)
635 def report_unavailable_format(self
, video_id
, format
):
636 """Report extracted video URL."""
637 self
._downloader
.to_stdout(u
'[youtube] %s: Format %s not available' % (video_id
, format
))
639 def report_rtmp_download(self
):
640 """Indicate the download will use the RTMP protocol."""
641 self
._downloader
.to_stdout(u
'[youtube] RTMP download detected')
643 def _real_initialize(self
):
644 if self
._downloader
is None:
649 downloader_params
= self
._downloader
.params
651 # Attempt to use provided username and password or .netrc data
652 if downloader_params
.get('username', None) is not None:
653 username
= downloader_params
['username']
654 password
= downloader_params
['password']
655 elif downloader_params
.get('usenetrc', False):
657 info
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
)
662 raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
)
663 except (IOError, netrc
.NetrcParseError
), err
:
664 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
))
668 request
= urllib2
.Request(self
._LANG
_URL
, None, std_headers
)
671 urllib2
.urlopen(request
).read()
672 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
673 self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
))
676 # No authentication to be performed
682 'current_form': 'loginForm',
684 'action_login': 'Log In',
685 'username': username
,
686 'password': password
,
688 request
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
), std_headers
)
691 login_results
= urllib2
.urlopen(request
).read()
692 if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None:
693 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password')
695 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
696 self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
))
702 'action_confirm': 'Confirm',
704 request
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
), std_headers
)
706 self
.report_age_confirmation()
707 age_results
= urllib2
.urlopen(request
).read()
708 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
709 self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
))
712 def _real_extract(self
, url
):
713 # Extract video id from URL
714 mobj
= re
.match(self
._VALID
_URL
, url
)
716 self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
)
718 video_id
= mobj
.group(2)
720 # Downloader parameters
724 if self
._downloader
is not None:
725 params
= self
._downloader
.params
726 format_param
= params
.get('format', None)
727 if format_param
== '0':
728 format_param
= self
._available
_formats
[quality_index
]
733 video_extension
= self
._video
_extensions
.get(format_param
, 'flv')
736 video_info_url
= 'http://www.youtube.com/get_video_info?&video_id=%s&el=detailpage&ps=default&eurl=&gl=US&hl=en' % video_id
737 request
= urllib2
.Request(video_info_url
, None, std_headers
)
739 self
.report_video_info_webpage_download(video_id
)
740 video_info_webpage
= urllib2
.urlopen(request
).read()
741 video_info
= parse_qs(video_info_webpage
)
742 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
743 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
))
745 self
.report_information_extraction(video_id
)
748 if 'token' not in video_info
:
749 # Attempt to see if YouTube has issued an error message
750 if 'reason' not in video_info
:
751 self
._downloader
.trouble(u
'ERROR: unable to extract "t" parameter for unknown reason')
752 stream
= open('reportme-ydl-%s.dat' % time
.time(), 'wb')
753 stream
.write(video_info_webpage
)
756 reason
= urllib
.unquote_plus(video_info
['reason'][0])
757 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % reason
.decode('utf-8'))
759 token
= urllib
.unquote_plus(video_info
['token'][0])
760 video_real_url
= 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id
, token
)
761 if format_param
is not None:
762 video_real_url
= '%s&fmt=%s' % (video_real_url
, format_param
)
764 # Check possible RTMP download
765 if 'conn' in video_info
and video_info
['conn'][0].startswith('rtmp'):
766 self
.report_rtmp_download()
767 video_real_url
= video_info
['conn'][0]
770 if 'author' not in video_info
:
771 self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname')
773 video_uploader
= urllib
.unquote_plus(video_info
['author'][0])
776 if 'title' not in video_info
:
777 self
._downloader
.trouble(u
'ERROR: unable to extract video title')
779 video_title
= urllib
.unquote_plus(video_info
['title'][0])
780 video_title
= video_title
.decode('utf-8')
781 video_title
= re
.sub(ur
'(?u)&(.+?);', self
.htmlentity_transform
, video_title
)
782 video_title
= video_title
.replace(os
.sep
, u
'%')
785 simple_title
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
)
786 simple_title
= simple_title
.strip(ur
'_')
789 # Process video information
790 self
._downloader
.process_info({
791 'id': video_id
.decode('utf-8'),
792 'url': video_real_url
.decode('utf-8'),
793 'uploader': video_uploader
.decode('utf-8'),
794 'title': video_title
,
795 'stitle': simple_title
,
796 'ext': video_extension
.decode('utf-8'),
801 except UnavailableFormatError
, err
:
803 if quality_index
== len(self
._available
_formats
) - 1:
804 # I don't ever expect this to happen
805 self
._downloader
.trouble(u
'ERROR: no known formats available for video')
808 self
.report_unavailable_format(video_id
, format_param
)
810 format_param
= self
._available
_formats
[quality_index
]
813 self
._downloader
.trouble('ERROR: format not available for video')
817 class MetacafeIE(InfoExtractor
):
818 """Information Extractor for metacafe.com."""
820 _VALID_URL
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
821 _DISCLAIMER
= 'http://www.metacafe.com/family_filter/'
822 _FILTER_POST
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
825 def __init__(self
, youtube_ie
, downloader
=None):
826 InfoExtractor
.__init
__(self
, downloader
)
827 self
._youtube
_ie
= youtube_ie
831 return (re
.match(MetacafeIE
._VALID
_URL
, url
) is not None)
833 def report_disclaimer(self
):
834 """Report disclaimer retrieval."""
835 self
._downloader
.to_stdout(u
'[metacafe] Retrieving disclaimer')
837 def report_age_confirmation(self
):
838 """Report attempt to confirm age."""
839 self
._downloader
.to_stdout(u
'[metacafe] Confirming age')
841 def report_download_webpage(self
, video_id
):
842 """Report webpage download."""
843 self
._downloader
.to_stdout(u
'[metacafe] %s: Downloading webpage' % video_id
)
845 def report_extraction(self
, video_id
):
846 """Report information extraction."""
847 self
._downloader
.to_stdout(u
'[metacafe] %s: Extracting information' % video_id
)
849 def _real_initialize(self
):
850 # Retrieve disclaimer
851 request
= urllib2
.Request(self
._DISCLAIMER
, None, std_headers
)
853 self
.report_disclaimer()
854 disclaimer
= urllib2
.urlopen(request
).read()
855 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
856 self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
))
862 'submit': "Continue - I'm over 18",
864 request
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
), std_headers
)
866 self
.report_age_confirmation()
867 disclaimer
= urllib2
.urlopen(request
).read()
868 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
869 self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
))
872 def _real_extract(self
, url
):
873 # Extract id and simplified title from URL
874 mobj
= re
.match(self
._VALID
_URL
, url
)
876 self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
)
879 video_id
= mobj
.group(1)
881 # Check if video comes from YouTube
882 mobj2
= re
.match(r
'^yt-(.*)$', video_id
)
883 if mobj2
is not None:
884 self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1))
887 simple_title
= mobj
.group(2).decode('utf-8')
888 video_extension
= 'flv'
890 # Retrieve video webpage to extract further information
891 request
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
)
893 self
.report_download_webpage(video_id
)
894 webpage
= urllib2
.urlopen(request
).read()
895 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
896 self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
))
899 # Extract URL, uploader and title from webpage
900 self
.report_extraction(video_id
)
901 mobj
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
)
903 self
._downloader
.trouble(u
'ERROR: unable to extract media URL')
905 mediaURL
= urllib
.unquote(mobj
.group(1))
907 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
909 # self._downloader.trouble(u'ERROR: unable to extract gdaKey')
911 #gdaKey = mobj.group(1)
913 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
917 mobj
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
)
919 self
._downloader
.trouble(u
'ERROR: unable to extract title')
921 video_title
= mobj
.group(1).decode('utf-8')
923 mobj
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
)
925 self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname')
927 video_uploader
= mobj
.group(1)
930 # Process video information
931 self
._downloader
.process_info({
932 'id': video_id
.decode('utf-8'),
933 'url': video_url
.decode('utf-8'),
934 'uploader': video_uploader
.decode('utf-8'),
935 'title': video_title
,
936 'stitle': simple_title
,
937 'ext': video_extension
.decode('utf-8'),
939 except UnavailableFormatError
:
940 self
._downloader
.trouble(u
'ERROR: format not available for video')
943 class GoogleIE(InfoExtractor
):
944 """Information extractor for video.google.com."""
946 _VALID_URL
= r
'(?:http://)?video\.google\.com/videoplay\?docid=([^\&]+).*'
948 def __init__(self
, downloader
=None):
949 InfoExtractor
.__init
__(self
, downloader
)
953 return (re
.match(GoogleIE
._VALID
_URL
, url
) is not None)
955 def report_download_webpage(self
, video_id
):
956 """Report webpage download."""
957 self
._downloader
.to_stdout(u
'[video.google] %s: Downloading webpage' % video_id
)
959 def report_extraction(self
, video_id
):
960 """Report information extraction."""
961 self
._downloader
.to_stdout(u
'[video.google] %s: Extracting information' % video_id
)
963 def _real_initialize(self
):
966 def _real_extract(self
, url
):
967 # Extract id from URL
968 mobj
= re
.match(self
._VALID
_URL
, url
)
970 self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
)
973 video_id
= mobj
.group(1)
975 video_extension
= 'mp4'
977 # Retrieve video webpage to extract further information
978 request
= urllib2
.Request('http://video.google.com/videoplay?docid=%s' % video_id
)
980 self
.report_download_webpage(video_id
)
981 webpage
= urllib2
.urlopen(request
).read()
982 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
983 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
))
986 # Extract URL, uploader, and title from webpage
987 self
.report_extraction(video_id
)
988 mobj
= re
.search(r
"download_url:'(.*)'", webpage
)
990 self
._downloader
.trouble(u
'ERROR: unable to extract media URL')
992 mediaURL
= urllib
.unquote(mobj
.group(1))
993 mediaURL
= mediaURL
.replace('\\x3d', '\x3d')
994 mediaURL
= mediaURL
.replace('\\x26', '\x26')
998 mobj
= re
.search(r
'<title>(.*)</title>', webpage
)
1000 self
._downloader
.trouble(u
'ERROR: unable to extract title')
1002 video_title
= mobj
.group(1).decode('utf-8')
1004 # Google Video doesn't show uploader nicknames?
1005 video_uploader
= 'uploader'
1008 # Process video information
1009 self
._downloader
.process_info({
1010 'id': video_id
.decode('utf-8'),
1011 'url': video_url
.decode('utf-8'),
1012 'uploader': video_uploader
.decode('utf-8'),
1013 'title': video_title
.decode('utf-8'),
1014 'stitle': video_title
.decode('utf-8'),
1015 'ext': video_extension
.decode('utf-8'),
1017 except UnavailableFormatError
:
1018 self
._downloader
.trouble(u
'ERROR: format not available for video')
1021 class PhotobucketIE(InfoExtractor
):
1022 """Information extractor for photobucket.com."""
1024 _VALID_URL
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1026 def __init__(self
, downloader
=None):
1027 InfoExtractor
.__init
__(self
, downloader
)
1031 return (re
.match(PhotobucketIE
._VALID
_URL
, url
) is not None)
1033 def report_download_webpage(self
, video_id
):
1034 """Report webpage download."""
1035 self
._downloader
.to_stdout(u
'[photobucket] %s: Downloading webpage' % video_id
)
1037 def report_extraction(self
, video_id
):
1038 """Report information extraction."""
1039 self
._downloader
.to_stdout(u
'[photobucket] %s: Extracting information' % video_id
)
1041 def _real_initialize(self
):
1044 def _real_extract(self
, url
):
1045 # Extract id from URL
1046 mobj
= re
.match(self
._VALID
_URL
, url
)
1048 self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
)
1051 video_id
= mobj
.group(1)
1053 video_extension
= 'flv'
1055 # Retrieve video webpage to extract further information
1056 request
= urllib2
.Request(url
)
1058 self
.report_download_webpage(video_id
)
1059 webpage
= urllib2
.urlopen(request
).read()
1060 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
1061 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
))
1064 # Extract URL, uploader, and title from webpage
1065 self
.report_extraction(video_id
)
1066 mobj
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
)
1068 self
._downloader
.trouble(u
'ERROR: unable to extract media URL')
1070 mediaURL
= urllib
.unquote(mobj
.group(1))
1072 video_url
= mediaURL
1074 mobj
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
)
1076 self
._downloader
.trouble(u
'ERROR: unable to extract title')
1078 video_title
= mobj
.group(1).decode('utf-8')
1080 video_uploader
= mobj
.group(2).decode('utf-8')
1083 # Process video information
1084 self
._downloader
.process_info({
1085 'id': video_id
.decode('utf-8'),
1086 'url': video_url
.decode('utf-8'),
1087 'uploader': video_uploader
.decode('utf-8'),
1088 'title': video_title
.decode('utf-8'),
1089 'stitle': video_title
.decode('utf-8'),
1090 'ext': video_extension
.decode('utf-8'),
1092 except UnavailableFormatError
:
1093 self
._downloader
.trouble(u
'ERROR: format not available for video')
1096 class YoutubeSearchIE(InfoExtractor
):
1097 """Information Extractor for YouTube search queries."""
1098 _VALID_QUERY
= r
'ytsearch(\d+|all)?:[\s\S]+'
1099 _TEMPLATE_URL
= 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1100 _VIDEO_INDICATOR
= r
'href="/watch\?v=.+?"'
1101 _MORE_PAGES_INDICATOR
= r
'(?m)>\s*Next\s*</a>'
1103 _max_youtube_results
= 1000
1105 def __init__(self
, youtube_ie
, downloader
=None):
1106 InfoExtractor
.__init
__(self
, downloader
)
1107 self
._youtube
_ie
= youtube_ie
1111 return (re
.match(YoutubeSearchIE
._VALID
_QUERY
, url
) is not None)
1113 def report_download_page(self
, query
, pagenum
):
1114 """Report attempt to download playlist page with given number."""
1115 self
._downloader
.to_stdout(u
'[youtube] query "%s": Downloading page %s' % (query
, pagenum
))
1117 def _real_initialize(self
):
1118 self
._youtube
_ie
.initialize()
1120 def _real_extract(self
, query
):
1121 mobj
= re
.match(self
._VALID
_QUERY
, query
)
1123 self
._downloader
.trouble(u
'ERROR: invalid search query "%s"' % query
)
1126 prefix
, query
= query
.split(':')
1129 self
._download
_n
_results
(query
, 1)
1131 elif prefix
== 'all':
1132 self
._download
_n
_results
(query
, self
._max
_youtube
_results
)
1138 self
._downloader
.trouble(u
'ERROR: invalid download number %s for query "%s"' % (n
, query
))
1140 elif n
> self
._max
_youtube
_results
:
1141 self
._downloader
.to_stderr(u
'WARNING: ytsearch returns max %i results (you requested %i)' % (self
._max
_youtube
_results
, n
))
1142 n
= self
._max
_youtube
_results
1143 self
._download
_n
_results
(query
, n
)
1145 except ValueError: # parsing prefix as integer fails
1146 self
._download
_n
_results
(query
, 1)
1149 def _download_n_results(self
, query
, n
):
1150 """Downloads a specified number of results for a query"""
1153 already_seen
= set()
1157 self
.report_download_page(query
, pagenum
)
1158 result_url
= self
._TEMPLATE
_URL
% (urllib
.quote_plus(query
), pagenum
)
1159 request
= urllib2
.Request(result_url
, None, std_headers
)
1161 page
= urllib2
.urlopen(request
).read()
1162 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
1163 self
._downloader
.trouble(u
'ERROR: unable to download webpage: %s' % str(err
))
1166 # Extract video identifiers
1167 for mobj
in re
.finditer(self
._VIDEO
_INDICATOR
, page
):
1168 video_id
= page
[mobj
.span()[0]:mobj
.span()[1]].split('=')[2][:-1]
1169 if video_id
not in already_seen
:
1170 video_ids
.append(video_id
)
1171 already_seen
.add(video_id
)
1172 if len(video_ids
) == n
:
1173 # Specified n videos reached
1174 for id in video_ids
:
1175 self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % id)
1178 if re
.search(self
._MORE
_PAGES
_INDICATOR
, page
) is None:
1179 for id in video_ids
:
1180 self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % id)
1183 pagenum
= pagenum
+ 1
1185 class YoutubePlaylistIE(InfoExtractor
):
1186 """Information Extractor for YouTube playlists."""
1188 _VALID_URL
= r
'(?:http://)?(?:\w+\.)?youtube.com/(?:view_play_list|my_playlists)\?.*?p=([^&]+).*'
1189 _TEMPLATE_URL
= 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1190 _VIDEO_INDICATOR
= r
'/watch\?v=(.+?)&'
1191 _MORE_PAGES_INDICATOR
= r
'/view_play_list?p=%s&page=%s'
1194 def __init__(self
, youtube_ie
, downloader
=None):
1195 InfoExtractor
.__init
__(self
, downloader
)
1196 self
._youtube
_ie
= youtube_ie
1200 return (re
.match(YoutubePlaylistIE
._VALID
_URL
, url
) is not None)
1202 def report_download_page(self
, playlist_id
, pagenum
):
1203 """Report attempt to download playlist page with given number."""
1204 self
._downloader
.to_stdout(u
'[youtube] PL %s: Downloading page #%s' % (playlist_id
, pagenum
))
1206 def _real_initialize(self
):
1207 self
._youtube
_ie
.initialize()
1209 def _real_extract(self
, url
):
1210 # Extract playlist id
1211 mobj
= re
.match(self
._VALID
_URL
, url
)
1213 self
._downloader
.trouble(u
'ERROR: invalid url: %s' % url
)
1216 # Download playlist pages
1217 playlist_id
= mobj
.group(1)
1222 self
.report_download_page(playlist_id
, pagenum
)
1223 request
= urllib2
.Request(self
._TEMPLATE
_URL
% (playlist_id
, pagenum
), None, std_headers
)
1225 page
= urllib2
.urlopen(request
).read()
1226 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
1227 self
._downloader
.trouble(u
'ERROR: unable to download webpage: %s' % str(err
))
1230 # Extract video identifiers
1232 for mobj
in re
.finditer(self
._VIDEO
_INDICATOR
, page
):
1233 if mobj
.group(1) not in ids_in_page
:
1234 ids_in_page
.append(mobj
.group(1))
1235 video_ids
.extend(ids_in_page
)
1237 if (self
._MORE
_PAGES
_INDICATOR
% (playlist_id
.upper(), pagenum
+ 1)) not in page
:
1239 pagenum
= pagenum
+ 1
1241 for id in video_ids
:
1242 self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % id)
1245 class YoutubeUserIE(InfoExtractor
):
1246 """Information Extractor for YouTube users."""
1248 _VALID_URL
= r
'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1249 _TEMPLATE_URL
= 'http://gdata.youtube.com/feeds/api/users/%s'
1250 _VIDEO_INDICATOR
= r
'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1253 def __init__(self
, youtube_ie
, downloader
=None):
1254 InfoExtractor
.__init
__(self
, downloader
)
1255 self
._youtube
_ie
= youtube_ie
1259 return (re
.match(YoutubeUserIE
._VALID
_URL
, url
) is not None)
1261 def report_download_page(self
, username
):
1262 """Report attempt to download user page."""
1263 self
._downloader
.to_stdout(u
'[youtube] user %s: Downloading page ' % (username
))
1265 def _real_initialize(self
):
1266 self
._youtube
_ie
.initialize()
1268 def _real_extract(self
, url
):
1270 mobj
= re
.match(self
._VALID
_URL
, url
)
1272 self
._downloader
.trouble(u
'ERROR: invalid url: %s' % url
)
1275 # Download user page
1276 username
= mobj
.group(1)
1280 self
.report_download_page(username
)
1281 request
= urllib2
.Request(self
._TEMPLATE
_URL
% (username
), None, std_headers
)
1283 page
= urllib2
.urlopen(request
).read()
1284 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
:
1285 self
._downloader
.trouble(u
'ERROR: unable to download webpage: %s' % str(err
))
1288 # Extract video identifiers
1291 for mobj
in re
.finditer(self
._VIDEO
_INDICATOR
, page
):
1292 if mobj
.group(1) not in ids_in_page
:
1293 ids_in_page
.append(mobj
.group(1))
1294 video_ids
.extend(ids_in_page
)
1296 for id in video_ids
:
1297 self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % id)
1300 class PostProcessor(object):
1301 """Post Processor class.
1303 PostProcessor objects can be added to downloaders with their
1304 add_post_processor() method. When the downloader has finished a
1305 successful download, it will take its internal chain of PostProcessors
1306 and start calling the run() method on each one of them, first with
1307 an initial argument and then with the returned value of the previous
1310 The chain will be stopped if one of them ever returns None or the end
1311 of the chain is reached.
1313 PostProcessor objects follow a "mutual registration" process similar
1314 to InfoExtractor objects.
1319 def __init__(self
, downloader
=None):
1320 self
._downloader
= downloader
1322 def set_downloader(self
, downloader
):
1323 """Sets the downloader for this PP."""
1324 self
._downloader
= downloader
1326 def run(self
, information
):
1327 """Run the PostProcessor.
1329 The "information" argument is a dictionary like the ones
1330 composed by InfoExtractors. The only difference is that this
1331 one has an extra field called "filepath" that points to the
1334 When this method returns None, the postprocessing chain is
1335 stopped. However, this method may return an information
1336 dictionary that will be passed to the next postprocessing
1337 object in the chain. It can be the one it received after
1338 changing some fields.
1340 In addition, this method may raise a PostProcessingError
1341 exception that will be taken into account by the downloader
1344 return information
# by default, do nothing
1346 ### MAIN PROGRAM ###
1347 if __name__
== '__main__':
1349 # Modules needed only when running the main program
1353 # Function to update the program file with the latest version from bitbucket.org
1354 def update_self(downloader
, filename
):
1355 # Note: downloader only used for options
1356 if not os
.access (filename
, os
.W_OK
):
1357 sys
.exit('ERROR: no write permissions on %s' % filename
)
1359 downloader
.to_stdout('Updating to latest stable version...')
1360 latest_url
= 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1361 latest_version
= urllib
.urlopen(latest_url
).read().strip()
1362 prog_url
= 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1363 newcontent
= urllib
.urlopen(prog_url
).read()
1364 stream
= open(filename
, 'w')
1365 stream
.write(newcontent
)
1367 downloader
.to_stdout('Updated to version %s' % latest_version
)
1369 # General configuration
1370 urllib2
.install_opener(urllib2
.build_opener(urllib2
.ProxyHandler()))
1371 urllib2
.install_opener(urllib2
.build_opener(urllib2
.HTTPCookieProcessor()))
1372 socket
.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1374 # Parse command line
1375 parser
= optparse
.OptionParser(
1376 usage
='Usage: %prog [options] url...',
1377 version
='2010.01.19',
1378 conflict_handler
='resolve',
1381 parser
.add_option('-h', '--help',
1382 action
='help', help='print this help text and exit')
1383 parser
.add_option('-v', '--version',
1384 action
='version', help='print program version and exit')
1385 parser
.add_option('-U', '--update',
1386 action
='store_true', dest
='update_self', help='update this program to latest stable version')
1387 parser
.add_option('-i', '--ignore-errors',
1388 action
='store_true', dest
='ignoreerrors', help='continue on download errors', default
=False)
1389 parser
.add_option('-r', '--rate-limit',
1390 dest
='ratelimit', metavar
='L', help='download rate limit (e.g. 50k or 44.6m)')
1392 authentication
= optparse
.OptionGroup(parser
, 'Authentication Options')
1393 authentication
.add_option('-u', '--username',
1394 dest
='username', metavar
='UN', help='account username')
1395 authentication
.add_option('-p', '--password',
1396 dest
='password', metavar
='PW', help='account password')
1397 authentication
.add_option('-n', '--netrc',
1398 action
='store_true', dest
='usenetrc', help='use .netrc authentication data', default
=False)
1399 parser
.add_option_group(authentication
)
1401 video_format
= optparse
.OptionGroup(parser
, 'Video Format Options')
1402 video_format
.add_option('-f', '--format',
1403 action
='store', dest
='format', metavar
='FMT', help='video format code')
1404 video_format
.add_option('-b', '--best-quality',
1405 action
='store_const', dest
='format', help='download the best quality video possible', const
='0')
1406 video_format
.add_option('-m', '--mobile-version',
1407 action
='store_const', dest
='format', help='alias for -f 17', const
='17')
1408 video_format
.add_option('-d', '--high-def',
1409 action
='store_const', dest
='format', help='alias for -f 22', const
='22')
1410 parser
.add_option_group(video_format
)
1412 verbosity
= optparse
.OptionGroup(parser
, 'Verbosity / Simulation Options')
1413 verbosity
.add_option('-q', '--quiet',
1414 action
='store_true', dest
='quiet', help='activates quiet mode', default
=False)
1415 verbosity
.add_option('-s', '--simulate',
1416 action
='store_true', dest
='simulate', help='do not download video', default
=False)
1417 verbosity
.add_option('-g', '--get-url',
1418 action
='store_true', dest
='geturl', help='simulate, quiet but print URL', default
=False)
1419 verbosity
.add_option('-e', '--get-title',
1420 action
='store_true', dest
='gettitle', help='simulate, quiet but print title', default
=False)
1421 parser
.add_option_group(verbosity
)
1423 filesystem
= optparse
.OptionGroup(parser
, 'Filesystem Options')
1424 filesystem
.add_option('-t', '--title',
1425 action
='store_true', dest
='usetitle', help='use title in file name', default
=False)
1426 filesystem
.add_option('-l', '--literal',
1427 action
='store_true', dest
='useliteral', help='use literal title in file name', default
=False)
1428 filesystem
.add_option('-o', '--output',
1429 dest
='outtmpl', metavar
='TPL', help='output filename template')
1430 filesystem
.add_option('-a', '--batch-file',
1431 dest
='batchfile', metavar
='F', help='file containing URLs to download')
1432 filesystem
.add_option('-w', '--no-overwrites',
1433 action
='store_true', dest
='nooverwrites', help='do not overwrite files', default
=False)
1434 filesystem
.add_option('-c', '--continue',
1435 action
='store_true', dest
='continue_dl', help='resume partially downloaded files', default
=False)
1436 parser
.add_option_group(filesystem
)
1438 (opts
, args
) = parser
.parse_args()
1440 # Batch file verification
1442 if opts
.batchfile
is not None:
1444 batchurls
= open(opts
.batchfile
, 'r').readlines()
1445 batchurls
= [x
.strip() for x
in batchurls
]
1446 batchurls
= [x
for x
in batchurls
if len(x
) > 0]
1448 sys
.exit(u
'ERROR: batch file could not be read')
1449 all_urls
= batchurls
+ args
1451 # Conflicting, missing and erroneous options
1452 if opts
.usenetrc
and (opts
.username
is not None or opts
.password
is not None):
1453 parser
.error(u
'using .netrc conflicts with giving username/password')
1454 if opts
.password
is not None and opts
.username
is None:
1455 parser
.error(u
'account username missing')
1456 if opts
.outtmpl
is not None and (opts
.useliteral
or opts
.usetitle
):
1457 parser
.error(u
'using output template conflicts with using title or literal title')
1458 if opts
.usetitle
and opts
.useliteral
:
1459 parser
.error(u
'using title conflicts with using literal title')
1460 if opts
.username
is not None and opts
.password
is None:
1461 opts
.password
= getpass
.getpass(u
'Type account password and press return:')
1462 if opts
.ratelimit
is not None:
1463 numeric_limit
= FileDownloader
.parse_bytes(opts
.ratelimit
)
1464 if numeric_limit
is None:
1465 parser
.error(u
'invalid rate limit specified')
1466 opts
.ratelimit
= numeric_limit
1468 # Information extractors
1469 youtube_ie
= YoutubeIE()
1470 metacafe_ie
= MetacafeIE(youtube_ie
)
1471 youtube_pl_ie
= YoutubePlaylistIE(youtube_ie
)
1472 youtube_user_ie
= YoutubeUserIE(youtube_ie
)
1473 youtube_search_ie
= YoutubeSearchIE(youtube_ie
)
1474 google_ie
= GoogleIE()
1475 photobucket_ie
= PhotobucketIE()
1478 fd
= FileDownloader({
1479 'usenetrc': opts
.usenetrc
,
1480 'username': opts
.username
,
1481 'password': opts
.password
,
1482 'quiet': (opts
.quiet
or opts
.geturl
or opts
.gettitle
),
1483 'forceurl': opts
.geturl
,
1484 'forcetitle': opts
.gettitle
,
1485 'simulate': (opts
.simulate
or opts
.geturl
or opts
.gettitle
),
1486 'format': opts
.format
,
1487 'outtmpl': ((opts
.outtmpl
is not None and opts
.outtmpl
.decode(preferredencoding()))
1488 or (opts
.usetitle
and u
'%(stitle)s-%(id)s.%(ext)s')
1489 or (opts
.useliteral
and u
'%(title)s-%(id)s.%(ext)s')
1490 or u
'%(id)s.%(ext)s'),
1491 'ignoreerrors': opts
.ignoreerrors
,
1492 'ratelimit': opts
.ratelimit
,
1493 'nooverwrites': opts
.nooverwrites
,
1494 'continuedl': opts
.continue_dl
,
1496 fd
.add_info_extractor(youtube_search_ie
)
1497 fd
.add_info_extractor(youtube_pl_ie
)
1498 fd
.add_info_extractor(youtube_user_ie
)
1499 fd
.add_info_extractor(metacafe_ie
)
1500 fd
.add_info_extractor(youtube_ie
)
1501 fd
.add_info_extractor(google_ie
)
1502 fd
.add_info_extractor(photobucket_ie
)
1505 if opts
.update_self
:
1506 update_self(fd
, sys
.argv
[0])
1509 if len(all_urls
) < 1:
1510 if not opts
.update_self
:
1511 parser
.error(u
'you must provide at least one URL')
1514 retcode
= fd
.download(all_urls
)
1517 except DownloadError
:
1519 except SameFileError
:
1520 sys
.exit(u
'ERROR: fixed output name but more than one file to download')
1521 except KeyboardInterrupt:
1522 sys
.exit(u
'\nERROR: Interrupted by user')