]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
Imported Upstream version 2010.12.09
[youtubedl] / youtube-dl
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 # Author: Ricardo Garcia Gonzalez
4 # Author: Danny Colligan
5 # Author: Benjamin Johnson
6 # Author: Vasyl' Vavrychuk
7 # License: Public domain code
8 import cookielib
9 import datetime
10 import htmlentitydefs
11 import httplib
12 import locale
13 import math
14 import netrc
15 import os
16 import os.path
17 import re
18 import socket
19 import string
20 import subprocess
21 import sys
22 import time
23 import urllib
24 import urllib2
25
26 # parse_qs was moved from the cgi module to the urlparse module recently.
27 try:
28 from urlparse import parse_qs
29 except ImportError:
30 from cgi import parse_qs
31
32 std_headers = {
33 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12',
34 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
35 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
36 'Accept-Language': 'en-us,en;q=0.5',
37 }
38
39 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
40
41 def preferredencoding():
42 """Get preferred encoding.
43
44 Returns the best encoding scheme for the system, based on
45 locale.getpreferredencoding() and some further tweaks.
46 """
47 def yield_preferredencoding():
48 try:
49 pref = locale.getpreferredencoding()
50 u'TEST'.encode(pref)
51 except:
52 pref = 'UTF-8'
53 while True:
54 yield pref
55 return yield_preferredencoding().next()
56
57 def htmlentity_transform(matchobj):
58 """Transforms an HTML entity to a Unicode character.
59
60 This function receives a match object and is intended to be used with
61 the re.sub() function.
62 """
63 entity = matchobj.group(1)
64
65 # Known non-numeric HTML entity
66 if entity in htmlentitydefs.name2codepoint:
67 return unichr(htmlentitydefs.name2codepoint[entity])
68
69 # Unicode character
70 mobj = re.match(ur'(?u)#(x?\d+)', entity)
71 if mobj is not None:
72 numstr = mobj.group(1)
73 if numstr.startswith(u'x'):
74 base = 16
75 numstr = u'0%s' % numstr
76 else:
77 base = 10
78 return unichr(long(numstr, base))
79
80 # Unknown entity in name, return its literal representation
81 return (u'&%s;' % entity)
82
83 def sanitize_title(utitle):
84 """Sanitizes a video title so it could be used as part of a filename."""
85 utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
86 return utitle.replace(unicode(os.sep), u'%')
87
88 def sanitize_open(filename, open_mode):
89 """Try to open the given filename, and slightly tweak it if this fails.
90
91 Attempts to open the given filename. If this fails, it tries to change
92 the filename slightly, step by step, until it's either able to open it
93 or it fails and raises a final exception, like the standard open()
94 function.
95
96 It returns the tuple (stream, definitive_file_name).
97 """
98 try:
99 if filename == u'-':
100 if sys.platform == 'win32':
101 import msvcrt
102 msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
103 return (sys.stdout, filename)
104 stream = open(filename, open_mode)
105 return (stream, filename)
106 except (IOError, OSError), err:
107 # In case of error, try to remove win32 forbidden chars
108 filename = re.sub(ur'[/<>:"\|\?\*]', u'#', filename)
109
110 # An exception here should be caught in the caller
111 stream = open(filename, open_mode)
112 return (stream, filename)
113
114 class DownloadError(Exception):
115 """Download Error exception.
116
117 This exception may be thrown by FileDownloader objects if they are not
118 configured to continue on errors. They will contain the appropriate
119 error message.
120 """
121 pass
122
123 class SameFileError(Exception):
124 """Same File exception.
125
126 This exception will be thrown by FileDownloader objects if they detect
127 multiple files would have to be downloaded to the same file on disk.
128 """
129 pass
130
131 class PostProcessingError(Exception):
132 """Post Processing exception.
133
134 This exception may be raised by PostProcessor's .run() method to
135 indicate an error in the postprocessing task.
136 """
137 pass
138
139 class UnavailableVideoError(Exception):
140 """Unavailable Format exception.
141
142 This exception will be thrown when a video is requested
143 in a format that is not available for that video.
144 """
145 pass
146
147 class ContentTooShortError(Exception):
148 """Content Too Short exception.
149
150 This exception may be raised by FileDownloader objects when a file they
151 download is too small for what the server announced first, indicating
152 the connection was probably interrupted.
153 """
154 # Both in bytes
155 downloaded = None
156 expected = None
157
158 def __init__(self, downloaded, expected):
159 self.downloaded = downloaded
160 self.expected = expected
161
162 class FileDownloader(object):
163 """File Downloader class.
164
165 File downloader objects are the ones responsible of downloading the
166 actual video file and writing it to disk if the user has requested
167 it, among some other tasks. In most cases there should be one per
168 program. As, given a video URL, the downloader doesn't know how to
169 extract all the needed information, task that InfoExtractors do, it
170 has to pass the URL to one of them.
171
172 For this, file downloader objects have a method that allows
173 InfoExtractors to be registered in a given order. When it is passed
174 a URL, the file downloader handles it to the first InfoExtractor it
175 finds that reports being able to handle it. The InfoExtractor extracts
176 all the information about the video or videos the URL refers to, and
177 asks the FileDownloader to process the video information, possibly
178 downloading the video.
179
180 File downloaders accept a lot of parameters. In order not to saturate
181 the object constructor with arguments, it receives a dictionary of
182 options instead. These options are available through the params
183 attribute for the InfoExtractors to use. The FileDownloader also
184 registers itself as the downloader in charge for the InfoExtractors
185 that are added to it, so this is a "mutual registration".
186
187 Available options:
188
189 username: Username for authentication purposes.
190 password: Password for authentication purposes.
191 usenetrc: Use netrc for authentication instead.
192 quiet: Do not print messages to stdout.
193 forceurl: Force printing final URL.
194 forcetitle: Force printing title.
195 forcethumbnail: Force printing thumbnail URL.
196 forcedescription: Force printing description.
197 simulate: Do not download the video files.
198 format: Video format code.
199 format_limit: Highest quality format to try.
200 outtmpl: Template for output names.
201 ignoreerrors: Do not stop on download errors.
202 ratelimit: Download speed limit, in bytes/sec.
203 nooverwrites: Prevent overwriting files.
204 retries: Number of times to retry for HTTP error 5xx
205 continuedl: Try to continue downloads if possible.
206 noprogress: Do not print the progress bar.
207 playliststart: Playlist item to start at.
208 playlistend: Playlist item to end at.
209 logtostderr: Log messages to stderr instead of stdout.
210 """
211
212 params = None
213 _ies = []
214 _pps = []
215 _download_retcode = None
216 _num_downloads = None
217 _screen_file = None
218
219 def __init__(self, params):
220 """Create a FileDownloader object with the given options."""
221 self._ies = []
222 self._pps = []
223 self._download_retcode = 0
224 self._num_downloads = 0
225 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
226 self.params = params
227
228 @staticmethod
229 def pmkdir(filename):
230 """Create directory components in filename. Similar to Unix "mkdir -p"."""
231 components = filename.split(os.sep)
232 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
233 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
234 for dir in aggregate:
235 if not os.path.exists(dir):
236 os.mkdir(dir)
237
238 @staticmethod
239 def temp_name(filename):
240 """Returns a temporary filename for the given filename."""
241 if filename == u'-' or (os.path.exists(filename) and not os.path.isfile(filename)):
242 return filename
243 return filename + u'.part'
244
245 @staticmethod
246 def format_bytes(bytes):
247 if bytes is None:
248 return 'N/A'
249 if type(bytes) is str:
250 bytes = float(bytes)
251 if bytes == 0.0:
252 exponent = 0
253 else:
254 exponent = long(math.log(bytes, 1024.0))
255 suffix = 'bkMGTPEZY'[exponent]
256 converted = float(bytes) / float(1024**exponent)
257 return '%.2f%s' % (converted, suffix)
258
259 @staticmethod
260 def calc_percent(byte_counter, data_len):
261 if data_len is None:
262 return '---.-%'
263 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
264
265 @staticmethod
266 def calc_eta(start, now, total, current):
267 if total is None:
268 return '--:--'
269 dif = now - start
270 if current == 0 or dif < 0.001: # One millisecond
271 return '--:--'
272 rate = float(current) / dif
273 eta = long((float(total) - float(current)) / rate)
274 (eta_mins, eta_secs) = divmod(eta, 60)
275 if eta_mins > 99:
276 return '--:--'
277 return '%02d:%02d' % (eta_mins, eta_secs)
278
279 @staticmethod
280 def calc_speed(start, now, bytes):
281 dif = now - start
282 if bytes == 0 or dif < 0.001: # One millisecond
283 return '%10s' % '---b/s'
284 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
285
286 @staticmethod
287 def best_block_size(elapsed_time, bytes):
288 new_min = max(bytes / 2.0, 1.0)
289 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
290 if elapsed_time < 0.001:
291 return long(new_max)
292 rate = bytes / elapsed_time
293 if rate > new_max:
294 return long(new_max)
295 if rate < new_min:
296 return long(new_min)
297 return long(rate)
298
299 @staticmethod
300 def parse_bytes(bytestr):
301 """Parse a string indicating a byte quantity into a long integer."""
302 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
303 if matchobj is None:
304 return None
305 number = float(matchobj.group(1))
306 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
307 return long(round(number * multiplier))
308
309 def add_info_extractor(self, ie):
310 """Add an InfoExtractor object to the end of the list."""
311 self._ies.append(ie)
312 ie.set_downloader(self)
313
314 def add_post_processor(self, pp):
315 """Add a PostProcessor object to the end of the chain."""
316 self._pps.append(pp)
317 pp.set_downloader(self)
318
319 def to_screen(self, message, skip_eol=False, ignore_encoding_errors=False):
320 """Print message to stdout if not in quiet mode."""
321 try:
322 if not self.params.get('quiet', False):
323 terminator = [u'\n', u''][skip_eol]
324 print >>self._screen_file, (u'%s%s' % (message, terminator)).encode(preferredencoding()),
325 self._screen_file.flush()
326 except (UnicodeEncodeError), err:
327 if not ignore_encoding_errors:
328 raise
329
330 def to_stderr(self, message):
331 """Print message to stderr."""
332 print >>sys.stderr, message.encode(preferredencoding())
333
334 def fixed_template(self):
335 """Checks if the output template is fixed."""
336 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
337
338 def trouble(self, message=None):
339 """Determine action to take when a download problem appears.
340
341 Depending on if the downloader has been configured to ignore
342 download errors or not, this method may throw an exception or
343 not when errors are found, after printing the message.
344 """
345 if message is not None:
346 self.to_stderr(message)
347 if not self.params.get('ignoreerrors', False):
348 raise DownloadError(message)
349 self._download_retcode = 1
350
351 def slow_down(self, start_time, byte_counter):
352 """Sleep if the download speed is over the rate limit."""
353 rate_limit = self.params.get('ratelimit', None)
354 if rate_limit is None or byte_counter == 0:
355 return
356 now = time.time()
357 elapsed = now - start_time
358 if elapsed <= 0.0:
359 return
360 speed = float(byte_counter) / elapsed
361 if speed > rate_limit:
362 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
363
364 def try_rename(self, old_filename, new_filename):
365 try:
366 if old_filename == new_filename:
367 return
368 os.rename(old_filename, new_filename)
369 except (IOError, OSError), err:
370 self.trouble(u'ERROR: unable to rename file')
371
372 def report_destination(self, filename):
373 """Report destination filename."""
374 self.to_screen(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
375
376 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
377 """Report download progress."""
378 if self.params.get('noprogress', False):
379 return
380 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
381 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
382
383 def report_resuming_byte(self, resume_len):
384 """Report attempt to resume at given byte."""
385 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
386
387 def report_retry(self, count, retries):
388 """Report retry in case of HTTP error 5xx"""
389 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
390
391 def report_file_already_downloaded(self, file_name):
392 """Report file has already been fully downloaded."""
393 try:
394 self.to_screen(u'[download] %s has already been downloaded' % file_name)
395 except (UnicodeEncodeError), err:
396 self.to_screen(u'[download] The file has already been downloaded')
397
398 def report_unable_to_resume(self):
399 """Report it was impossible to resume download."""
400 self.to_screen(u'[download] Unable to resume')
401
402 def report_finish(self):
403 """Report download finished."""
404 if self.params.get('noprogress', False):
405 self.to_screen(u'[download] Download completed')
406 else:
407 self.to_screen(u'')
408
409 def increment_downloads(self):
410 """Increment the ordinal that assigns a number to each file."""
411 self._num_downloads += 1
412
413 def process_info(self, info_dict):
414 """Process a single dictionary returned by an InfoExtractor."""
415 # Do nothing else if in simulate mode
416 if self.params.get('simulate', False):
417 # Forced printings
418 if self.params.get('forcetitle', False):
419 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
420 if self.params.get('forceurl', False):
421 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
422 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
423 print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')
424 if self.params.get('forcedescription', False) and 'description' in info_dict:
425 print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')
426
427 return
428
429 try:
430 template_dict = dict(info_dict)
431 template_dict['epoch'] = unicode(long(time.time()))
432 template_dict['autonumber'] = unicode('%05d' % self._num_downloads)
433 filename = self.params['outtmpl'] % template_dict
434 except (ValueError, KeyError), err:
435 self.trouble(u'ERROR: invalid system charset or erroneous output template')
436 return
437 if self.params.get('nooverwrites', False) and os.path.exists(filename):
438 self.to_stderr(u'WARNING: file exists and will be skipped')
439 return
440
441 try:
442 self.pmkdir(filename)
443 except (OSError, IOError), err:
444 self.trouble(u'ERROR: unable to create directories: %s' % str(err))
445 return
446
447 try:
448 success = self._do_download(filename, info_dict['url'].encode('utf-8'), info_dict.get('player_url', None))
449 except (OSError, IOError), err:
450 raise UnavailableVideoError
451 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
452 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
453 return
454 except (ContentTooShortError, ), err:
455 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
456 return
457
458 if success:
459 try:
460 self.post_process(filename, info_dict)
461 except (PostProcessingError), err:
462 self.trouble(u'ERROR: postprocessing: %s' % str(err))
463 return
464
465 def download(self, url_list):
466 """Download a given list of URLs."""
467 if len(url_list) > 1 and self.fixed_template():
468 raise SameFileError(self.params['outtmpl'])
469
470 for url in url_list:
471 suitable_found = False
472 for ie in self._ies:
473 # Go to next InfoExtractor if not suitable
474 if not ie.suitable(url):
475 continue
476
477 # Suitable InfoExtractor found
478 suitable_found = True
479
480 # Extract information from URL and process it
481 ie.extract(url)
482
483 # Suitable InfoExtractor had been found; go to next URL
484 break
485
486 if not suitable_found:
487 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
488
489 return self._download_retcode
490
491 def post_process(self, filename, ie_info):
492 """Run the postprocessing chain on the given file."""
493 info = dict(ie_info)
494 info['filepath'] = filename
495 for pp in self._pps:
496 info = pp.run(info)
497 if info is None:
498 break
499
500 def _download_with_rtmpdump(self, filename, url, player_url):
501 self.report_destination(filename)
502 tmpfilename = self.temp_name(filename)
503
504 # Check for rtmpdump first
505 try:
506 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
507 except (OSError, IOError):
508 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
509 return False
510
511 # Download using rtmpdump. rtmpdump returns exit code 2 when
512 # the connection was interrumpted and resuming appears to be
513 # possible. This is part of rtmpdump's normal usage, AFAIK.
514 basic_args = ['rtmpdump', '-q'] + [[], ['-W', player_url]][player_url is not None] + ['-r', url, '-o', tmpfilename]
515 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
516 while retval == 2 or retval == 1:
517 prevsize = os.path.getsize(tmpfilename)
518 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
519 time.sleep(5.0) # This seems to be needed
520 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
521 cursize = os.path.getsize(tmpfilename)
522 if prevsize == cursize and retval == 1:
523 break
524 if retval == 0:
525 self.to_screen(u'\r[rtmpdump] %s bytes' % os.path.getsize(tmpfilename))
526 self.try_rename(tmpfilename, filename)
527 return True
528 else:
529 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
530 return False
531
532 def _do_download(self, filename, url, player_url):
533 # Check file already present
534 if self.params.get('continuedl', False) and os.path.isfile(filename):
535 self.report_file_already_downloaded(filename)
536 return True
537
538 # Attempt to download using rtmpdump
539 if url.startswith('rtmp'):
540 return self._download_with_rtmpdump(filename, url, player_url)
541
542 tmpfilename = self.temp_name(filename)
543 stream = None
544 open_mode = 'wb'
545 basic_request = urllib2.Request(url, None, std_headers)
546 request = urllib2.Request(url, None, std_headers)
547
548 # Establish possible resume length
549 if os.path.isfile(tmpfilename):
550 resume_len = os.path.getsize(tmpfilename)
551 else:
552 resume_len = 0
553
554 # Request parameters in case of being able to resume
555 if self.params.get('continuedl', False) and resume_len != 0:
556 self.report_resuming_byte(resume_len)
557 request.add_header('Range','bytes=%d-' % resume_len)
558 open_mode = 'ab'
559
560 count = 0
561 retries = self.params.get('retries', 0)
562 while count <= retries:
563 # Establish connection
564 try:
565 data = urllib2.urlopen(request)
566 break
567 except (urllib2.HTTPError, ), err:
568 if (err.code < 500 or err.code >= 600) and err.code != 416:
569 # Unexpected HTTP error
570 raise
571 elif err.code == 416:
572 # Unable to resume (requested range not satisfiable)
573 try:
574 # Open the connection again without the range header
575 data = urllib2.urlopen(basic_request)
576 content_length = data.info()['Content-Length']
577 except (urllib2.HTTPError, ), err:
578 if err.code < 500 or err.code >= 600:
579 raise
580 else:
581 # Examine the reported length
582 if (content_length is not None and
583 (resume_len - 100 < long(content_length) < resume_len + 100)):
584 # The file had already been fully downloaded.
585 # Explanation to the above condition: in issue #175 it was revealed that
586 # YouTube sometimes adds or removes a few bytes from the end of the file,
587 # changing the file size slightly and causing problems for some users. So
588 # I decided to implement a suggested change and consider the file
589 # completely downloaded if the file size differs less than 100 bytes from
590 # the one in the hard drive.
591 self.report_file_already_downloaded(filename)
592 self.try_rename(tmpfilename, filename)
593 return True
594 else:
595 # The length does not match, we start the download over
596 self.report_unable_to_resume()
597 open_mode = 'wb'
598 break
599 # Retry
600 count += 1
601 if count <= retries:
602 self.report_retry(count, retries)
603
604 if count > retries:
605 self.trouble(u'ERROR: giving up after %s retries' % retries)
606 return False
607
608 data_len = data.info().get('Content-length', None)
609 data_len_str = self.format_bytes(data_len)
610 byte_counter = 0
611 block_size = 1024
612 start = time.time()
613 while True:
614 # Download and write
615 before = time.time()
616 data_block = data.read(block_size)
617 after = time.time()
618 data_block_len = len(data_block)
619 if data_block_len == 0:
620 break
621 byte_counter += data_block_len
622
623 # Open file just in time
624 if stream is None:
625 try:
626 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
627 self.report_destination(filename)
628 except (OSError, IOError), err:
629 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
630 return False
631 try:
632 stream.write(data_block)
633 except (IOError, OSError), err:
634 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
635 return False
636 block_size = self.best_block_size(after - before, data_block_len)
637
638 # Progress message
639 percent_str = self.calc_percent(byte_counter, data_len)
640 eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
641 speed_str = self.calc_speed(start, time.time(), byte_counter)
642 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
643
644 # Apply rate limit
645 self.slow_down(start, byte_counter)
646
647 stream.close()
648 self.report_finish()
649 if data_len is not None and str(byte_counter) != data_len:
650 raise ContentTooShortError(byte_counter, long(data_len))
651 self.try_rename(tmpfilename, filename)
652 return True
653
654 class InfoExtractor(object):
655 """Information Extractor class.
656
657 Information extractors are the classes that, given a URL, extract
658 information from the video (or videos) the URL refers to. This
659 information includes the real video URL, the video title and simplified
660 title, author and others. The information is stored in a dictionary
661 which is then passed to the FileDownloader. The FileDownloader
662 processes this information possibly downloading the video to the file
663 system, among other possible outcomes. The dictionaries must include
664 the following fields:
665
666 id: Video identifier.
667 url: Final video URL.
668 uploader: Nickname of the video uploader.
669 title: Literal title.
670 stitle: Simplified title.
671 ext: Video filename extension.
672 format: Video format.
673 player_url: SWF Player URL (may be None).
674
675 The following fields are optional. Their primary purpose is to allow
676 youtube-dl to serve as the backend for a video search function, such
677 as the one in youtube2mp3. They are only used when their respective
678 forced printing functions are called:
679
680 thumbnail: Full URL to a video thumbnail image.
681 description: One-line video description.
682
683 Subclasses of this one should re-define the _real_initialize() and
684 _real_extract() methods, as well as the suitable() static method.
685 Probably, they should also be instantiated and added to the main
686 downloader.
687 """
688
689 _ready = False
690 _downloader = None
691
692 def __init__(self, downloader=None):
693 """Constructor. Receives an optional downloader."""
694 self._ready = False
695 self.set_downloader(downloader)
696
697 @staticmethod
698 def suitable(url):
699 """Receives a URL and returns True if suitable for this IE."""
700 return False
701
702 def initialize(self):
703 """Initializes an instance (authentication, etc)."""
704 if not self._ready:
705 self._real_initialize()
706 self._ready = True
707
708 def extract(self, url):
709 """Extracts URL information and returns it in list of dicts."""
710 self.initialize()
711 return self._real_extract(url)
712
713 def set_downloader(self, downloader):
714 """Sets the downloader for this IE."""
715 self._downloader = downloader
716
717 def _real_initialize(self):
718 """Real initialization process. Redefine in subclasses."""
719 pass
720
721 def _real_extract(self, url):
722 """Real extraction process. Redefine in subclasses."""
723 pass
724
725 class YoutubeIE(InfoExtractor):
726 """Information extractor for youtube.com."""
727
728 _VALID_URL = r'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$'
729 _LANG_URL = r'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
730 _LOGIN_URL = 'https://www.youtube.com/signup?next=/&gl=US&hl=en'
731 _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
732 _NETRC_MACHINE = 'youtube'
733 # Listed in order of quality
734 _available_formats = ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13']
735 _video_extensions = {
736 '13': '3gp',
737 '17': 'mp4',
738 '18': 'mp4',
739 '22': 'mp4',
740 '37': 'mp4',
741 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever
742 '43': 'webm',
743 '45': 'webm',
744 }
745
746 @staticmethod
747 def suitable(url):
748 return (re.match(YoutubeIE._VALID_URL, url) is not None)
749
750 def report_lang(self):
751 """Report attempt to set language."""
752 self._downloader.to_screen(u'[youtube] Setting language')
753
754 def report_login(self):
755 """Report attempt to log in."""
756 self._downloader.to_screen(u'[youtube] Logging in')
757
758 def report_age_confirmation(self):
759 """Report attempt to confirm age."""
760 self._downloader.to_screen(u'[youtube] Confirming age')
761
762 def report_video_webpage_download(self, video_id):
763 """Report attempt to download video webpage."""
764 self._downloader.to_screen(u'[youtube] %s: Downloading video webpage' % video_id)
765
766 def report_video_info_webpage_download(self, video_id):
767 """Report attempt to download video info webpage."""
768 self._downloader.to_screen(u'[youtube] %s: Downloading video info webpage' % video_id)
769
770 def report_information_extraction(self, video_id):
771 """Report attempt to extract video information."""
772 self._downloader.to_screen(u'[youtube] %s: Extracting video information' % video_id)
773
774 def report_unavailable_format(self, video_id, format):
775 """Report extracted video URL."""
776 self._downloader.to_screen(u'[youtube] %s: Format %s not available' % (video_id, format))
777
778 def report_rtmp_download(self):
779 """Indicate the download will use the RTMP protocol."""
780 self._downloader.to_screen(u'[youtube] RTMP download detected')
781
782 def _real_initialize(self):
783 if self._downloader is None:
784 return
785
786 username = None
787 password = None
788 downloader_params = self._downloader.params
789
790 # Attempt to use provided username and password or .netrc data
791 if downloader_params.get('username', None) is not None:
792 username = downloader_params['username']
793 password = downloader_params['password']
794 elif downloader_params.get('usenetrc', False):
795 try:
796 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
797 if info is not None:
798 username = info[0]
799 password = info[2]
800 else:
801 raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
802 except (IOError, netrc.NetrcParseError), err:
803 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
804 return
805
806 # Set language
807 request = urllib2.Request(self._LANG_URL, None, std_headers)
808 try:
809 self.report_lang()
810 urllib2.urlopen(request).read()
811 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
812 self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
813 return
814
815 # No authentication to be performed
816 if username is None:
817 return
818
819 # Log in
820 login_form = {
821 'current_form': 'loginForm',
822 'next': '/',
823 'action_login': 'Log In',
824 'username': username,
825 'password': password,
826 }
827 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
828 try:
829 self.report_login()
830 login_results = urllib2.urlopen(request).read()
831 if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
832 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
833 return
834 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
835 self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
836 return
837
838 # Confirm age
839 age_form = {
840 'next_url': '/',
841 'action_confirm': 'Confirm',
842 }
843 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
844 try:
845 self.report_age_confirmation()
846 age_results = urllib2.urlopen(request).read()
847 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
848 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
849 return
850
851 def _real_extract(self, url):
852 # Extract video id from URL
853 mobj = re.match(self._VALID_URL, url)
854 if mobj is None:
855 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
856 return
857 video_id = mobj.group(2)
858
859 # Get video webpage
860 self.report_video_webpage_download(video_id)
861 request = urllib2.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&amp;has_verified=1' % video_id, None, std_headers)
862 try:
863 video_webpage = urllib2.urlopen(request).read()
864 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
865 self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
866 return
867
868 # Attempt to extract SWF player URL
869 mobj = re.search(r'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage)
870 if mobj is not None:
871 player_url = re.sub(r'\\(.)', r'\1', mobj.group(1))
872 else:
873 player_url = None
874
875 # Get video info
876 self.report_video_info_webpage_download(video_id)
877 for el_type in ['&el=embedded', '&el=detailpage', '&el=vevo', '']:
878 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en'
879 % (video_id, el_type))
880 request = urllib2.Request(video_info_url, None, std_headers)
881 try:
882 video_info_webpage = urllib2.urlopen(request).read()
883 video_info = parse_qs(video_info_webpage)
884 if 'token' in video_info:
885 break
886 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
887 self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
888 return
889 if 'token' not in video_info:
890 if 'reason' in video_info:
891 self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0].decode('utf-8'))
892 else:
893 self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason')
894 return
895
896 # Start extracting information
897 self.report_information_extraction(video_id)
898
899 # uploader
900 if 'author' not in video_info:
901 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
902 return
903 video_uploader = urllib.unquote_plus(video_info['author'][0])
904
905 # title
906 if 'title' not in video_info:
907 self._downloader.trouble(u'ERROR: unable to extract video title')
908 return
909 video_title = urllib.unquote_plus(video_info['title'][0])
910 video_title = video_title.decode('utf-8')
911 video_title = sanitize_title(video_title)
912
913 # simplified title
914 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
915 simple_title = simple_title.strip(ur'_')
916
917 # thumbnail image
918 if 'thumbnail_url' not in video_info:
919 self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
920 video_thumbnail = ''
921 else: # don't panic if we can't find it
922 video_thumbnail = urllib.unquote_plus(video_info['thumbnail_url'][0])
923
924 # upload date
925 upload_date = u'NA'
926 mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
927 if mobj is not None:
928 upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
929 format_expressions = ['%d %B %Y', '%B %d %Y']
930 for expression in format_expressions:
931 try:
932 upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
933 except:
934 pass
935
936 # description
937 video_description = 'No description available.'
938 if self._downloader.params.get('forcedescription', False):
939 mobj = re.search(r'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage)
940 if mobj is not None:
941 video_description = mobj.group(1)
942
943 # token
944 video_token = urllib.unquote_plus(video_info['token'][0])
945
946 # Decide which formats to download
947 req_format = self._downloader.params.get('format', None)
948 get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token)
949
950 if 'fmt_url_map' in video_info:
951 url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(','))
952 format_limit = self._downloader.params.get('format_limit', None)
953 if format_limit is not None and format_limit in self._available_formats:
954 format_list = self._available_formats[self._available_formats.index(format_limit):]
955 else:
956 format_list = self._available_formats
957 existing_formats = [x for x in format_list if x in url_map]
958 if len(existing_formats) == 0:
959 self._downloader.trouble(u'ERROR: no known formats available for video')
960 return
961 if req_format is None:
962 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
963 elif req_format == '-1':
964 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
965 else:
966 if req_format in url_map:
967 video_url_list = [(req_format, url_map[req_format])] # Specific format
968 else:
969 video_url_list = [(req_format, get_video_template % req_format)] # Specific format
970
971 elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
972 self.report_rtmp_download()
973 video_url_list = [(None, video_info['conn'][0])]
974
975 else:
976 self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info')
977 return
978
979 for format_param, video_real_url in video_url_list:
980 # At this point we have a new video
981 self._downloader.increment_downloads()
982
983 # Extension
984 video_extension = self._video_extensions.get(format_param, 'flv')
985
986 # Find the video URL in fmt_url_map or conn paramters
987 try:
988 # Process video information
989 self._downloader.process_info({
990 'id': video_id.decode('utf-8'),
991 'url': video_real_url.decode('utf-8'),
992 'uploader': video_uploader.decode('utf-8'),
993 'upload_date': upload_date,
994 'title': video_title,
995 'stitle': simple_title,
996 'ext': video_extension.decode('utf-8'),
997 'format': (format_param is None and u'NA' or format_param.decode('utf-8')),
998 'thumbnail': video_thumbnail.decode('utf-8'),
999 'description': video_description.decode('utf-8'),
1000 'player_url': player_url,
1001 })
1002 except UnavailableVideoError, err:
1003 self._downloader.trouble(u'ERROR: unable to download video (format may not be available)')
1004
1005
1006 class MetacafeIE(InfoExtractor):
1007 """Information Extractor for metacafe.com."""
1008
1009 _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
1010 _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
1011 _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
1012 _youtube_ie = None
1013
1014 def __init__(self, youtube_ie, downloader=None):
1015 InfoExtractor.__init__(self, downloader)
1016 self._youtube_ie = youtube_ie
1017
1018 @staticmethod
1019 def suitable(url):
1020 return (re.match(MetacafeIE._VALID_URL, url) is not None)
1021
1022 def report_disclaimer(self):
1023 """Report disclaimer retrieval."""
1024 self._downloader.to_screen(u'[metacafe] Retrieving disclaimer')
1025
1026 def report_age_confirmation(self):
1027 """Report attempt to confirm age."""
1028 self._downloader.to_screen(u'[metacafe] Confirming age')
1029
1030 def report_download_webpage(self, video_id):
1031 """Report webpage download."""
1032 self._downloader.to_screen(u'[metacafe] %s: Downloading webpage' % video_id)
1033
1034 def report_extraction(self, video_id):
1035 """Report information extraction."""
1036 self._downloader.to_screen(u'[metacafe] %s: Extracting information' % video_id)
1037
1038 def _real_initialize(self):
1039 # Retrieve disclaimer
1040 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
1041 try:
1042 self.report_disclaimer()
1043 disclaimer = urllib2.urlopen(request).read()
1044 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1045 self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
1046 return
1047
1048 # Confirm age
1049 disclaimer_form = {
1050 'filters': '0',
1051 'submit': "Continue - I'm over 18",
1052 }
1053 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
1054 try:
1055 self.report_age_confirmation()
1056 disclaimer = urllib2.urlopen(request).read()
1057 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1058 self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
1059 return
1060
1061 def _real_extract(self, url):
1062 # Extract id and simplified title from URL
1063 mobj = re.match(self._VALID_URL, url)
1064 if mobj is None:
1065 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1066 return
1067
1068 video_id = mobj.group(1)
1069
1070 # Check if video comes from YouTube
1071 mobj2 = re.match(r'^yt-(.*)$', video_id)
1072 if mobj2 is not None:
1073 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
1074 return
1075
1076 # At this point we have a new video
1077 self._downloader.increment_downloads()
1078
1079 simple_title = mobj.group(2).decode('utf-8')
1080
1081 # Retrieve video webpage to extract further information
1082 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
1083 try:
1084 self.report_download_webpage(video_id)
1085 webpage = urllib2.urlopen(request).read()
1086 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1087 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1088 return
1089
1090 # Extract URL, uploader and title from webpage
1091 self.report_extraction(video_id)
1092 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
1093 if mobj is not None:
1094 mediaURL = urllib.unquote(mobj.group(1))
1095 video_extension = mediaURL[-3:]
1096
1097 # Extract gdaKey if available
1098 mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
1099 if mobj is None:
1100 video_url = mediaURL
1101 else:
1102 gdaKey = mobj.group(1)
1103 video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
1104 else:
1105 mobj = re.search(r' name="flashvars" value="(.*?)"', webpage)
1106 if mobj is None:
1107 self._downloader.trouble(u'ERROR: unable to extract media URL')
1108 return
1109 vardict = parse_qs(mobj.group(1))
1110 if 'mediaData' not in vardict:
1111 self._downloader.trouble(u'ERROR: unable to extract media URL')
1112 return
1113 mobj = re.search(r'"mediaURL":"(http.*?)","key":"(.*?)"', vardict['mediaData'][0])
1114 if mobj is None:
1115 self._downloader.trouble(u'ERROR: unable to extract media URL')
1116 return
1117 mediaURL = mobj.group(1).replace('\\/', '/')
1118 video_extension = mediaURL[-3:]
1119 video_url = '%s?__gda__=%s' % (mediaURL, mobj.group(2))
1120
1121 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
1122 if mobj is None:
1123 self._downloader.trouble(u'ERROR: unable to extract title')
1124 return
1125 video_title = mobj.group(1).decode('utf-8')
1126 video_title = sanitize_title(video_title)
1127
1128 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
1129 if mobj is None:
1130 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1131 return
1132 video_uploader = mobj.group(1)
1133
1134 try:
1135 # Process video information
1136 self._downloader.process_info({
1137 'id': video_id.decode('utf-8'),
1138 'url': video_url.decode('utf-8'),
1139 'uploader': video_uploader.decode('utf-8'),
1140 'upload_date': u'NA',
1141 'title': video_title,
1142 'stitle': simple_title,
1143 'ext': video_extension.decode('utf-8'),
1144 'format': u'NA',
1145 'player_url': None,
1146 })
1147 except UnavailableVideoError:
1148 self._downloader.trouble(u'ERROR: unable to download video')
1149
1150
1151 class DailymotionIE(InfoExtractor):
1152 """Information Extractor for Dailymotion"""
1153
1154 _VALID_URL = r'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)'
1155
1156 def __init__(self, downloader=None):
1157 InfoExtractor.__init__(self, downloader)
1158
1159 @staticmethod
1160 def suitable(url):
1161 return (re.match(DailymotionIE._VALID_URL, url) is not None)
1162
1163 def report_download_webpage(self, video_id):
1164 """Report webpage download."""
1165 self._downloader.to_screen(u'[dailymotion] %s: Downloading webpage' % video_id)
1166
1167 def report_extraction(self, video_id):
1168 """Report information extraction."""
1169 self._downloader.to_screen(u'[dailymotion] %s: Extracting information' % video_id)
1170
1171 def _real_initialize(self):
1172 return
1173
1174 def _real_extract(self, url):
1175 # Extract id and simplified title from URL
1176 mobj = re.match(self._VALID_URL, url)
1177 if mobj is None:
1178 self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
1179 return
1180
1181 # At this point we have a new video
1182 self._downloader.increment_downloads()
1183 video_id = mobj.group(1)
1184
1185 simple_title = mobj.group(2).decode('utf-8')
1186 video_extension = 'flv'
1187
1188 # Retrieve video webpage to extract further information
1189 request = urllib2.Request(url)
1190 try:
1191 self.report_download_webpage(video_id)
1192 webpage = urllib2.urlopen(request).read()
1193 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1194 self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
1195 return
1196
1197 # Extract URL, uploader and title from webpage
1198 self.report_extraction(video_id)
1199 mobj = re.search(r'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage)
1200 if mobj is None:
1201 self._downloader.trouble(u'ERROR: unable to extract media URL')
1202 return
1203 mediaURL = urllib.unquote(mobj.group(1))
1204
1205 # if needed add http://www.dailymotion.com/ if relative URL
1206
1207 video_url = mediaURL
1208
1209 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>'
1210 mobj = re.search(r'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage)
1211 if mobj is None:
1212 self._downloader.trouble(u'ERROR: unable to extract title')
1213 return
1214 video_title = mobj.group(1).decode('utf-8')
1215 video_title = sanitize_title(video_title)
1216
1217 mobj = re.search(r'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage)
1218 if mobj is None:
1219 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
1220 return
1221 video_uploader = mobj.group(1)
1222
1223 try:
1224 # Process video information
1225 self._downloader.process_info({
1226 'id': video_id.decode('utf-8'),
1227 'url': video_url.decode('utf-8'),
1228 'uploader': video_uploader.decode('utf-8'),
1229 'upload_date': u'NA',
1230 'title': video_title,
1231 'stitle': simple_title,
1232 'ext': video_extension.decode('utf-8'),
1233 'format': u'NA',
1234 'player_url': None,
1235 })
1236 except UnavailableVideoError:
1237 self._downloader.trouble(u'ERROR: unable to download video')
1238
1239 class GoogleIE(InfoExtractor):
1240 """Information extractor for video.google.com."""
1241
1242 _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1243
1244 def __init__(self, downloader=None):
1245 InfoExtractor.__init__(self, downloader)
1246
1247 @staticmethod
1248 def suitable(url):
1249 return (re.match(GoogleIE._VALID_URL, url) is not None)
1250
1251 def report_download_webpage(self, video_id):
1252 """Report webpage download."""
1253 self._downloader.to_screen(u'[video.google] %s: Downloading webpage' % video_id)
1254
1255 def report_extraction(self, video_id):
1256 """Report information extraction."""
1257 self._downloader.to_screen(u'[video.google] %s: Extracting information' % video_id)
1258
1259 def _real_initialize(self):
1260 return
1261
1262 def _real_extract(self, url):
1263 # Extract id from URL
1264 mobj = re.match(self._VALID_URL, url)
1265 if mobj is None:
1266 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1267 return
1268
1269 # At this point we have a new video
1270 self._downloader.increment_downloads()
1271 video_id = mobj.group(1)
1272
1273 video_extension = 'mp4'
1274
1275 # Retrieve video webpage to extract further information
1276 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1277 try:
1278 self.report_download_webpage(video_id)
1279 webpage = urllib2.urlopen(request).read()
1280 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1281 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1282 return
1283
1284 # Extract URL, uploader, and title from webpage
1285 self.report_extraction(video_id)
1286 mobj = re.search(r"download_url:'([^']+)'", webpage)
1287 if mobj is None:
1288 video_extension = 'flv'
1289 mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1290 if mobj is None:
1291 self._downloader.trouble(u'ERROR: unable to extract media URL')
1292 return
1293 mediaURL = urllib.unquote(mobj.group(1))
1294 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1295 mediaURL = mediaURL.replace('\\x26', '\x26')
1296
1297 video_url = mediaURL
1298
1299 mobj = re.search(r'<title>(.*)</title>', webpage)
1300 if mobj is None:
1301 self._downloader.trouble(u'ERROR: unable to extract title')
1302 return
1303 video_title = mobj.group(1).decode('utf-8')
1304 video_title = sanitize_title(video_title)
1305 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1306
1307 # Extract video description
1308 mobj = re.search(r'<span id=short-desc-content>([^<]*)</span>', webpage)
1309 if mobj is None:
1310 self._downloader.trouble(u'ERROR: unable to extract video description')
1311 return
1312 video_description = mobj.group(1).decode('utf-8')
1313 if not video_description:
1314 video_description = 'No description available.'
1315
1316 # Extract video thumbnail
1317 if self._downloader.params.get('forcethumbnail', False):
1318 request = urllib2.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id)))
1319 try:
1320 webpage = urllib2.urlopen(request).read()
1321 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1322 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1323 return
1324 mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage)
1325 if mobj is None:
1326 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1327 return
1328 video_thumbnail = mobj.group(1)
1329 else: # we need something to pass to process_info
1330 video_thumbnail = ''
1331
1332
1333 try:
1334 # Process video information
1335 self._downloader.process_info({
1336 'id': video_id.decode('utf-8'),
1337 'url': video_url.decode('utf-8'),
1338 'uploader': u'NA',
1339 'upload_date': u'NA',
1340 'title': video_title,
1341 'stitle': simple_title,
1342 'ext': video_extension.decode('utf-8'),
1343 'format': u'NA',
1344 'player_url': None,
1345 })
1346 except UnavailableVideoError:
1347 self._downloader.trouble(u'ERROR: unable to download video')
1348
1349
1350 class PhotobucketIE(InfoExtractor):
1351 """Information extractor for photobucket.com."""
1352
1353 _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1354
1355 def __init__(self, downloader=None):
1356 InfoExtractor.__init__(self, downloader)
1357
1358 @staticmethod
1359 def suitable(url):
1360 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1361
1362 def report_download_webpage(self, video_id):
1363 """Report webpage download."""
1364 self._downloader.to_screen(u'[photobucket] %s: Downloading webpage' % video_id)
1365
1366 def report_extraction(self, video_id):
1367 """Report information extraction."""
1368 self._downloader.to_screen(u'[photobucket] %s: Extracting information' % video_id)
1369
1370 def _real_initialize(self):
1371 return
1372
1373 def _real_extract(self, url):
1374 # Extract id from URL
1375 mobj = re.match(self._VALID_URL, url)
1376 if mobj is None:
1377 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1378 return
1379
1380 # At this point we have a new video
1381 self._downloader.increment_downloads()
1382 video_id = mobj.group(1)
1383
1384 video_extension = 'flv'
1385
1386 # Retrieve video webpage to extract further information
1387 request = urllib2.Request(url)
1388 try:
1389 self.report_download_webpage(video_id)
1390 webpage = urllib2.urlopen(request).read()
1391 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1392 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1393 return
1394
1395 # Extract URL, uploader, and title from webpage
1396 self.report_extraction(video_id)
1397 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1398 if mobj is None:
1399 self._downloader.trouble(u'ERROR: unable to extract media URL')
1400 return
1401 mediaURL = urllib.unquote(mobj.group(1))
1402
1403 video_url = mediaURL
1404
1405 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1406 if mobj is None:
1407 self._downloader.trouble(u'ERROR: unable to extract title')
1408 return
1409 video_title = mobj.group(1).decode('utf-8')
1410 video_title = sanitize_title(video_title)
1411 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1412
1413 video_uploader = mobj.group(2).decode('utf-8')
1414
1415 try:
1416 # Process video information
1417 self._downloader.process_info({
1418 'id': video_id.decode('utf-8'),
1419 'url': video_url.decode('utf-8'),
1420 'uploader': video_uploader,
1421 'upload_date': u'NA',
1422 'title': video_title,
1423 'stitle': simple_title,
1424 'ext': video_extension.decode('utf-8'),
1425 'format': u'NA',
1426 'player_url': None,
1427 })
1428 except UnavailableVideoError:
1429 self._downloader.trouble(u'ERROR: unable to download video')
1430
1431
1432 class YahooIE(InfoExtractor):
1433 """Information extractor for video.yahoo.com."""
1434
1435 # _VALID_URL matches all Yahoo! Video URLs
1436 # _VPAGE_URL matches only the extractable '/watch/' URLs
1437 _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1438 _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1439
1440 def __init__(self, downloader=None):
1441 InfoExtractor.__init__(self, downloader)
1442
1443 @staticmethod
1444 def suitable(url):
1445 return (re.match(YahooIE._VALID_URL, url) is not None)
1446
1447 def report_download_webpage(self, video_id):
1448 """Report webpage download."""
1449 self._downloader.to_screen(u'[video.yahoo] %s: Downloading webpage' % video_id)
1450
1451 def report_extraction(self, video_id):
1452 """Report information extraction."""
1453 self._downloader.to_screen(u'[video.yahoo] %s: Extracting information' % video_id)
1454
1455 def _real_initialize(self):
1456 return
1457
1458 def _real_extract(self, url, new_video=True):
1459 # Extract ID from URL
1460 mobj = re.match(self._VALID_URL, url)
1461 if mobj is None:
1462 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1463 return
1464
1465 # At this point we have a new video
1466 self._downloader.increment_downloads()
1467 video_id = mobj.group(2)
1468 video_extension = 'flv'
1469
1470 # Rewrite valid but non-extractable URLs as
1471 # extractable English language /watch/ URLs
1472 if re.match(self._VPAGE_URL, url) is None:
1473 request = urllib2.Request(url)
1474 try:
1475 webpage = urllib2.urlopen(request).read()
1476 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1477 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1478 return
1479
1480 mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1481 if mobj is None:
1482 self._downloader.trouble(u'ERROR: Unable to extract id field')
1483 return
1484 yahoo_id = mobj.group(1)
1485
1486 mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1487 if mobj is None:
1488 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1489 return
1490 yahoo_vid = mobj.group(1)
1491
1492 url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1493 return self._real_extract(url, new_video=False)
1494
1495 # Retrieve video webpage to extract further information
1496 request = urllib2.Request(url)
1497 try:
1498 self.report_download_webpage(video_id)
1499 webpage = urllib2.urlopen(request).read()
1500 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1501 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1502 return
1503
1504 # Extract uploader and title from webpage
1505 self.report_extraction(video_id)
1506 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1507 if mobj is None:
1508 self._downloader.trouble(u'ERROR: unable to extract video title')
1509 return
1510 video_title = mobj.group(1).decode('utf-8')
1511 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1512
1513 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1514 if mobj is None:
1515 self._downloader.trouble(u'ERROR: unable to extract video uploader')
1516 return
1517 video_uploader = mobj.group(1).decode('utf-8')
1518
1519 # Extract video thumbnail
1520 mobj = re.search(r'<link rel="image_src" href="(.*)" />', webpage)
1521 if mobj is None:
1522 self._downloader.trouble(u'ERROR: unable to extract video thumbnail')
1523 return
1524 video_thumbnail = mobj.group(1).decode('utf-8')
1525
1526 # Extract video description
1527 mobj = re.search(r'<meta name="description" content="(.*)" />', webpage)
1528 if mobj is None:
1529 self._downloader.trouble(u'ERROR: unable to extract video description')
1530 return
1531 video_description = mobj.group(1).decode('utf-8')
1532 if not video_description: video_description = 'No description available.'
1533
1534 # Extract video height and width
1535 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1536 if mobj is None:
1537 self._downloader.trouble(u'ERROR: unable to extract video height')
1538 return
1539 yv_video_height = mobj.group(1)
1540
1541 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1542 if mobj is None:
1543 self._downloader.trouble(u'ERROR: unable to extract video width')
1544 return
1545 yv_video_width = mobj.group(1)
1546
1547 # Retrieve video playlist to extract media URL
1548 # I'm not completely sure what all these options are, but we
1549 # seem to need most of them, otherwise the server sends a 401.
1550 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR' # not sure what this represents
1551 yv_bitrate = '700' # according to Wikipedia this is hard-coded
1552 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1553 '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1554 '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1555 try:
1556 self.report_download_webpage(video_id)
1557 webpage = urllib2.urlopen(request).read()
1558 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1559 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1560 return
1561
1562 # Extract media URL from playlist XML
1563 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1564 if mobj is None:
1565 self._downloader.trouble(u'ERROR: Unable to extract media URL')
1566 return
1567 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1568 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1569
1570 try:
1571 # Process video information
1572 self._downloader.process_info({
1573 'id': video_id.decode('utf-8'),
1574 'url': video_url,
1575 'uploader': video_uploader,
1576 'upload_date': u'NA',
1577 'title': video_title,
1578 'stitle': simple_title,
1579 'ext': video_extension.decode('utf-8'),
1580 'thumbnail': video_thumbnail.decode('utf-8'),
1581 'description': video_description,
1582 'thumbnail': video_thumbnail,
1583 'description': video_description,
1584 'player_url': None,
1585 })
1586 except UnavailableVideoError:
1587 self._downloader.trouble(u'ERROR: unable to download video')
1588
1589
1590 class GenericIE(InfoExtractor):
1591 """Generic last-resort information extractor."""
1592
1593 def __init__(self, downloader=None):
1594 InfoExtractor.__init__(self, downloader)
1595
1596 @staticmethod
1597 def suitable(url):
1598 return True
1599
1600 def report_download_webpage(self, video_id):
1601 """Report webpage download."""
1602 self._downloader.to_screen(u'WARNING: Falling back on generic information extractor.')
1603 self._downloader.to_screen(u'[generic] %s: Downloading webpage' % video_id)
1604
1605 def report_extraction(self, video_id):
1606 """Report information extraction."""
1607 self._downloader.to_screen(u'[generic] %s: Extracting information' % video_id)
1608
1609 def _real_initialize(self):
1610 return
1611
1612 def _real_extract(self, url):
1613 # At this point we have a new video
1614 self._downloader.increment_downloads()
1615
1616 video_id = url.split('/')[-1]
1617 request = urllib2.Request(url)
1618 try:
1619 self.report_download_webpage(video_id)
1620 webpage = urllib2.urlopen(request).read()
1621 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1622 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1623 return
1624 except ValueError, err:
1625 # since this is the last-resort InfoExtractor, if
1626 # this error is thrown, it'll be thrown here
1627 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1628 return
1629
1630 self.report_extraction(video_id)
1631 # Start with something easy: JW Player in SWFObject
1632 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1633 if mobj is None:
1634 # Broaden the search a little bit
1635 mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1636 if mobj is None:
1637 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1638 return
1639
1640 # It's possible that one of the regexes
1641 # matched, but returned an empty group:
1642 if mobj.group(1) is None:
1643 self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1644 return
1645
1646 video_url = urllib.unquote(mobj.group(1))
1647 video_id = os.path.basename(video_url)
1648
1649 # here's a fun little line of code for you:
1650 video_extension = os.path.splitext(video_id)[1][1:]
1651 video_id = os.path.splitext(video_id)[0]
1652
1653 # it's tempting to parse this further, but you would
1654 # have to take into account all the variations like
1655 # Video Title - Site Name
1656 # Site Name | Video Title
1657 # Video Title - Tagline | Site Name
1658 # and so on and so forth; it's just not practical
1659 mobj = re.search(r'<title>(.*)</title>', webpage)
1660 if mobj is None:
1661 self._downloader.trouble(u'ERROR: unable to extract title')
1662 return
1663 video_title = mobj.group(1).decode('utf-8')
1664 video_title = sanitize_title(video_title)
1665 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1666
1667 # video uploader is domain name
1668 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1669 if mobj is None:
1670 self._downloader.trouble(u'ERROR: unable to extract title')
1671 return
1672 video_uploader = mobj.group(1).decode('utf-8')
1673
1674 try:
1675 # Process video information
1676 self._downloader.process_info({
1677 'id': video_id.decode('utf-8'),
1678 'url': video_url.decode('utf-8'),
1679 'uploader': video_uploader,
1680 'upload_date': u'NA',
1681 'title': video_title,
1682 'stitle': simple_title,
1683 'ext': video_extension.decode('utf-8'),
1684 'format': u'NA',
1685 'player_url': None,
1686 })
1687 except UnavailableVideoError, err:
1688 self._downloader.trouble(u'ERROR: unable to download video')
1689
1690
1691 class YoutubeSearchIE(InfoExtractor):
1692 """Information Extractor for YouTube search queries."""
1693 _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1694 _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1695 _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1696 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1697 _youtube_ie = None
1698 _max_youtube_results = 1000
1699
1700 def __init__(self, youtube_ie, downloader=None):
1701 InfoExtractor.__init__(self, downloader)
1702 self._youtube_ie = youtube_ie
1703
1704 @staticmethod
1705 def suitable(url):
1706 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1707
1708 def report_download_page(self, query, pagenum):
1709 """Report attempt to download playlist page with given number."""
1710 query = query.decode(preferredencoding())
1711 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1712
1713 def _real_initialize(self):
1714 self._youtube_ie.initialize()
1715
1716 def _real_extract(self, query):
1717 mobj = re.match(self._VALID_QUERY, query)
1718 if mobj is None:
1719 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1720 return
1721
1722 prefix, query = query.split(':')
1723 prefix = prefix[8:]
1724 query = query.encode('utf-8')
1725 if prefix == '':
1726 self._download_n_results(query, 1)
1727 return
1728 elif prefix == 'all':
1729 self._download_n_results(query, self._max_youtube_results)
1730 return
1731 else:
1732 try:
1733 n = long(prefix)
1734 if n <= 0:
1735 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1736 return
1737 elif n > self._max_youtube_results:
1738 self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n))
1739 n = self._max_youtube_results
1740 self._download_n_results(query, n)
1741 return
1742 except ValueError: # parsing prefix as integer fails
1743 self._download_n_results(query, 1)
1744 return
1745
1746 def _download_n_results(self, query, n):
1747 """Downloads a specified number of results for a query"""
1748
1749 video_ids = []
1750 already_seen = set()
1751 pagenum = 1
1752
1753 while True:
1754 self.report_download_page(query, pagenum)
1755 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1756 request = urllib2.Request(result_url, None, std_headers)
1757 try:
1758 page = urllib2.urlopen(request).read()
1759 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1760 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1761 return
1762
1763 # Extract video identifiers
1764 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1765 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1766 if video_id not in already_seen:
1767 video_ids.append(video_id)
1768 already_seen.add(video_id)
1769 if len(video_ids) == n:
1770 # Specified n videos reached
1771 for id in video_ids:
1772 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1773 return
1774
1775 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1776 for id in video_ids:
1777 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1778 return
1779
1780 pagenum = pagenum + 1
1781
1782 class GoogleSearchIE(InfoExtractor):
1783 """Information Extractor for Google Video search queries."""
1784 _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+'
1785 _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en'
1786 _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&'
1787 _MORE_PAGES_INDICATOR = r'<span>Next</span>'
1788 _google_ie = None
1789 _max_google_results = 1000
1790
1791 def __init__(self, google_ie, downloader=None):
1792 InfoExtractor.__init__(self, downloader)
1793 self._google_ie = google_ie
1794
1795 @staticmethod
1796 def suitable(url):
1797 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None)
1798
1799 def report_download_page(self, query, pagenum):
1800 """Report attempt to download playlist page with given number."""
1801 query = query.decode(preferredencoding())
1802 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum))
1803
1804 def _real_initialize(self):
1805 self._google_ie.initialize()
1806
1807 def _real_extract(self, query):
1808 mobj = re.match(self._VALID_QUERY, query)
1809 if mobj is None:
1810 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1811 return
1812
1813 prefix, query = query.split(':')
1814 prefix = prefix[8:]
1815 query = query.encode('utf-8')
1816 if prefix == '':
1817 self._download_n_results(query, 1)
1818 return
1819 elif prefix == 'all':
1820 self._download_n_results(query, self._max_google_results)
1821 return
1822 else:
1823 try:
1824 n = long(prefix)
1825 if n <= 0:
1826 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1827 return
1828 elif n > self._max_google_results:
1829 self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n))
1830 n = self._max_google_results
1831 self._download_n_results(query, n)
1832 return
1833 except ValueError: # parsing prefix as integer fails
1834 self._download_n_results(query, 1)
1835 return
1836
1837 def _download_n_results(self, query, n):
1838 """Downloads a specified number of results for a query"""
1839
1840 video_ids = []
1841 already_seen = set()
1842 pagenum = 1
1843
1844 while True:
1845 self.report_download_page(query, pagenum)
1846 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1847 request = urllib2.Request(result_url, None, std_headers)
1848 try:
1849 page = urllib2.urlopen(request).read()
1850 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1851 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1852 return
1853
1854 # Extract video identifiers
1855 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1856 video_id = mobj.group(1)
1857 if video_id not in already_seen:
1858 video_ids.append(video_id)
1859 already_seen.add(video_id)
1860 if len(video_ids) == n:
1861 # Specified n videos reached
1862 for id in video_ids:
1863 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1864 return
1865
1866 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1867 for id in video_ids:
1868 self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id)
1869 return
1870
1871 pagenum = pagenum + 1
1872
1873 class YahooSearchIE(InfoExtractor):
1874 """Information Extractor for Yahoo! Video search queries."""
1875 _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+'
1876 _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s'
1877 _VIDEO_INDICATOR = r'href="http://video\.yahoo\.com/watch/([0-9]+/[0-9]+)"'
1878 _MORE_PAGES_INDICATOR = r'\s*Next'
1879 _yahoo_ie = None
1880 _max_yahoo_results = 1000
1881
1882 def __init__(self, yahoo_ie, downloader=None):
1883 InfoExtractor.__init__(self, downloader)
1884 self._yahoo_ie = yahoo_ie
1885
1886 @staticmethod
1887 def suitable(url):
1888 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None)
1889
1890 def report_download_page(self, query, pagenum):
1891 """Report attempt to download playlist page with given number."""
1892 query = query.decode(preferredencoding())
1893 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum))
1894
1895 def _real_initialize(self):
1896 self._yahoo_ie.initialize()
1897
1898 def _real_extract(self, query):
1899 mobj = re.match(self._VALID_QUERY, query)
1900 if mobj is None:
1901 self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1902 return
1903
1904 prefix, query = query.split(':')
1905 prefix = prefix[8:]
1906 query = query.encode('utf-8')
1907 if prefix == '':
1908 self._download_n_results(query, 1)
1909 return
1910 elif prefix == 'all':
1911 self._download_n_results(query, self._max_yahoo_results)
1912 return
1913 else:
1914 try:
1915 n = long(prefix)
1916 if n <= 0:
1917 self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1918 return
1919 elif n > self._max_yahoo_results:
1920 self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n))
1921 n = self._max_yahoo_results
1922 self._download_n_results(query, n)
1923 return
1924 except ValueError: # parsing prefix as integer fails
1925 self._download_n_results(query, 1)
1926 return
1927
1928 def _download_n_results(self, query, n):
1929 """Downloads a specified number of results for a query"""
1930
1931 video_ids = []
1932 already_seen = set()
1933 pagenum = 1
1934
1935 while True:
1936 self.report_download_page(query, pagenum)
1937 result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1938 request = urllib2.Request(result_url, None, std_headers)
1939 try:
1940 page = urllib2.urlopen(request).read()
1941 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1942 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1943 return
1944
1945 # Extract video identifiers
1946 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1947 video_id = mobj.group(1)
1948 if video_id not in already_seen:
1949 video_ids.append(video_id)
1950 already_seen.add(video_id)
1951 if len(video_ids) == n:
1952 # Specified n videos reached
1953 for id in video_ids:
1954 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1955 return
1956
1957 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1958 for id in video_ids:
1959 self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id)
1960 return
1961
1962 pagenum = pagenum + 1
1963
1964 class YoutubePlaylistIE(InfoExtractor):
1965 """Information Extractor for YouTube playlists."""
1966
1967 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1968 _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1969 _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1970 _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1971 _youtube_ie = None
1972
1973 def __init__(self, youtube_ie, downloader=None):
1974 InfoExtractor.__init__(self, downloader)
1975 self._youtube_ie = youtube_ie
1976
1977 @staticmethod
1978 def suitable(url):
1979 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1980
1981 def report_download_page(self, playlist_id, pagenum):
1982 """Report attempt to download playlist page with given number."""
1983 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1984
1985 def _real_initialize(self):
1986 self._youtube_ie.initialize()
1987
1988 def _real_extract(self, url):
1989 # Extract playlist id
1990 mobj = re.match(self._VALID_URL, url)
1991 if mobj is None:
1992 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1993 return
1994
1995 # Download playlist pages
1996 playlist_id = mobj.group(1)
1997 video_ids = []
1998 pagenum = 1
1999
2000 while True:
2001 self.report_download_page(playlist_id, pagenum)
2002 request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
2003 try:
2004 page = urllib2.urlopen(request).read()
2005 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2006 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2007 return
2008
2009 # Extract video identifiers
2010 ids_in_page = []
2011 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2012 if mobj.group(1) not in ids_in_page:
2013 ids_in_page.append(mobj.group(1))
2014 video_ids.extend(ids_in_page)
2015
2016 if re.search(self._MORE_PAGES_INDICATOR, page) is None:
2017 break
2018 pagenum = pagenum + 1
2019
2020 playliststart = self._downloader.params.get('playliststart', 1) - 1
2021 playlistend = self._downloader.params.get('playlistend', -1)
2022 video_ids = video_ids[playliststart:playlistend]
2023
2024 for id in video_ids:
2025 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2026 return
2027
2028 class YoutubeUserIE(InfoExtractor):
2029 """Information Extractor for YouTube users."""
2030
2031 _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
2032 _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
2033 _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
2034 _youtube_ie = None
2035
2036 def __init__(self, youtube_ie, downloader=None):
2037 InfoExtractor.__init__(self, downloader)
2038 self._youtube_ie = youtube_ie
2039
2040 @staticmethod
2041 def suitable(url):
2042 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
2043
2044 def report_download_page(self, username):
2045 """Report attempt to download user page."""
2046 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username))
2047
2048 def _real_initialize(self):
2049 self._youtube_ie.initialize()
2050
2051 def _real_extract(self, url):
2052 # Extract username
2053 mobj = re.match(self._VALID_URL, url)
2054 if mobj is None:
2055 self._downloader.trouble(u'ERROR: invalid url: %s' % url)
2056 return
2057
2058 # Download user page
2059 username = mobj.group(1)
2060 video_ids = []
2061 pagenum = 1
2062
2063 self.report_download_page(username)
2064 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
2065 try:
2066 page = urllib2.urlopen(request).read()
2067 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2068 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
2069 return
2070
2071 # Extract video identifiers
2072 ids_in_page = []
2073
2074 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
2075 if mobj.group(1) not in ids_in_page:
2076 ids_in_page.append(mobj.group(1))
2077 video_ids.extend(ids_in_page)
2078
2079 playliststart = self._downloader.params.get('playliststart', 1) - 1
2080 playlistend = self._downloader.params.get('playlistend', -1)
2081 video_ids = video_ids[playliststart:playlistend]
2082
2083 for id in video_ids:
2084 self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
2085 return
2086
2087 class DepositFilesIE(InfoExtractor):
2088 """Information extractor for depositfiles.com"""
2089
2090 _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)'
2091
2092 def __init__(self, downloader=None):
2093 InfoExtractor.__init__(self, downloader)
2094
2095 @staticmethod
2096 def suitable(url):
2097 return (re.match(DepositFilesIE._VALID_URL, url) is not None)
2098
2099 def report_download_webpage(self, file_id):
2100 """Report webpage download."""
2101 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id)
2102
2103 def report_extraction(self, file_id):
2104 """Report information extraction."""
2105 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id)
2106
2107 def _real_initialize(self):
2108 return
2109
2110 def _real_extract(self, url):
2111 # At this point we have a new file
2112 self._downloader.increment_downloads()
2113
2114 file_id = url.split('/')[-1]
2115 # Rebuild url in english locale
2116 url = 'http://depositfiles.com/en/files/' + file_id
2117
2118 # Retrieve file webpage with 'Free download' button pressed
2119 free_download_indication = { 'gateway_result' : '1' }
2120 request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers)
2121 try:
2122 self.report_download_webpage(file_id)
2123 webpage = urllib2.urlopen(request).read()
2124 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
2125 self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err))
2126 return
2127
2128 # Search for the real file URL
2129 mobj = re.search(r'<form action="(http://fileshare.+?)"', webpage)
2130 if (mobj is None) or (mobj.group(1) is None):
2131 # Try to figure out reason of the error.
2132 mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL)
2133 if (mobj is not None) and (mobj.group(1) is not None):
2134 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip()
2135 self._downloader.trouble(u'ERROR: %s' % restriction_message)
2136 else:
2137 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url)
2138 return
2139
2140 file_url = mobj.group(1)
2141 file_extension = os.path.splitext(file_url)[1][1:]
2142
2143 # Search for file title
2144 mobj = re.search(r'<b title="(.*?)">', webpage)
2145 if mobj is None:
2146 self._downloader.trouble(u'ERROR: unable to extract title')
2147 return
2148 file_title = mobj.group(1).decode('utf-8')
2149
2150 try:
2151 # Process file information
2152 self._downloader.process_info({
2153 'id': file_id.decode('utf-8'),
2154 'url': file_url.decode('utf-8'),
2155 'uploader': u'NA',
2156 'upload_date': u'NA',
2157 'title': file_title,
2158 'stitle': file_title,
2159 'ext': file_extension.decode('utf-8'),
2160 'format': u'NA',
2161 'player_url': None,
2162 })
2163 except UnavailableVideoError, err:
2164 self._downloader.trouble(u'ERROR: unable to download file')
2165
2166 class PostProcessor(object):
2167 """Post Processor class.
2168
2169 PostProcessor objects can be added to downloaders with their
2170 add_post_processor() method. When the downloader has finished a
2171 successful download, it will take its internal chain of PostProcessors
2172 and start calling the run() method on each one of them, first with
2173 an initial argument and then with the returned value of the previous
2174 PostProcessor.
2175
2176 The chain will be stopped if one of them ever returns None or the end
2177 of the chain is reached.
2178
2179 PostProcessor objects follow a "mutual registration" process similar
2180 to InfoExtractor objects.
2181 """
2182
2183 _downloader = None
2184
2185 def __init__(self, downloader=None):
2186 self._downloader = downloader
2187
2188 def set_downloader(self, downloader):
2189 """Sets the downloader for this PP."""
2190 self._downloader = downloader
2191
2192 def run(self, information):
2193 """Run the PostProcessor.
2194
2195 The "information" argument is a dictionary like the ones
2196 composed by InfoExtractors. The only difference is that this
2197 one has an extra field called "filepath" that points to the
2198 downloaded file.
2199
2200 When this method returns None, the postprocessing chain is
2201 stopped. However, this method may return an information
2202 dictionary that will be passed to the next postprocessing
2203 object in the chain. It can be the one it received after
2204 changing some fields.
2205
2206 In addition, this method may raise a PostProcessingError
2207 exception that will be taken into account by the downloader
2208 it was called from.
2209 """
2210 return information # by default, do nothing
2211
2212 ### MAIN PROGRAM ###
2213 if __name__ == '__main__':
2214 try:
2215 # Modules needed only when running the main program
2216 import getpass
2217 import optparse
2218
2219 # Function to update the program file with the latest version from bitbucket.org
2220 def update_self(downloader, filename):
2221 # Note: downloader only used for options
2222 if not os.access (filename, os.W_OK):
2223 sys.exit('ERROR: no write permissions on %s' % filename)
2224
2225 downloader.to_screen('Updating to latest stable version...')
2226 latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION'
2227 latest_version = urllib.urlopen(latest_url).read().strip()
2228 prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
2229 newcontent = urllib.urlopen(prog_url).read()
2230 stream = open(filename, 'w')
2231 stream.write(newcontent)
2232 stream.close()
2233 downloader.to_screen('Updated to version %s' % latest_version)
2234
2235 # Parse command line
2236 parser = optparse.OptionParser(
2237 usage='Usage: %prog [options] url...',
2238 version='2010.12.09',
2239 conflict_handler='resolve',
2240 )
2241
2242 parser.add_option('-h', '--help',
2243 action='help', help='print this help text and exit')
2244 parser.add_option('-v', '--version',
2245 action='version', help='print program version and exit')
2246 parser.add_option('-U', '--update',
2247 action='store_true', dest='update_self', help='update this program to latest stable version')
2248 parser.add_option('-i', '--ignore-errors',
2249 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
2250 parser.add_option('-r', '--rate-limit',
2251 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)')
2252 parser.add_option('-R', '--retries',
2253 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10)
2254 parser.add_option('--playlist-start',
2255 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1)
2256 parser.add_option('--playlist-end',
2257 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1)
2258
2259 authentication = optparse.OptionGroup(parser, 'Authentication Options')
2260 authentication.add_option('-u', '--username',
2261 dest='username', metavar='USERNAME', help='account username')
2262 authentication.add_option('-p', '--password',
2263 dest='password', metavar='PASSWORD', help='account password')
2264 authentication.add_option('-n', '--netrc',
2265 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
2266 parser.add_option_group(authentication)
2267
2268 video_format = optparse.OptionGroup(parser, 'Video Format Options')
2269 video_format.add_option('-f', '--format',
2270 action='store', dest='format', metavar='FORMAT', help='video format code')
2271 video_format.add_option('-m', '--mobile-version',
2272 action='store_const', dest='format', help='alias for -f 17', const='17')
2273 video_format.add_option('--all-formats',
2274 action='store_const', dest='format', help='download all available video formats', const='-1')
2275 video_format.add_option('--max-quality',
2276 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download')
2277 video_format.add_option('-b', '--best-quality',
2278 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)')
2279 parser.add_option_group(video_format)
2280
2281 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
2282 verbosity.add_option('-q', '--quiet',
2283 action='store_true', dest='quiet', help='activates quiet mode', default=False)
2284 verbosity.add_option('-s', '--simulate',
2285 action='store_true', dest='simulate', help='do not download video', default=False)
2286 verbosity.add_option('-g', '--get-url',
2287 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
2288 verbosity.add_option('-e', '--get-title',
2289 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
2290 verbosity.add_option('--get-thumbnail',
2291 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False)
2292 verbosity.add_option('--get-description',
2293 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False)
2294 verbosity.add_option('--no-progress',
2295 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
2296 parser.add_option_group(verbosity)
2297
2298 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
2299 filesystem.add_option('-t', '--title',
2300 action='store_true', dest='usetitle', help='use title in file name', default=False)
2301 filesystem.add_option('-l', '--literal',
2302 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
2303 filesystem.add_option('-A', '--auto-number',
2304 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False)
2305 filesystem.add_option('-o', '--output',
2306 dest='outtmpl', metavar='TEMPLATE', help='output filename template')
2307 filesystem.add_option('-a', '--batch-file',
2308 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')
2309 filesystem.add_option('-w', '--no-overwrites',
2310 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
2311 filesystem.add_option('-c', '--continue',
2312 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
2313 filesystem.add_option('--cookies',
2314 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to')
2315 parser.add_option_group(filesystem)
2316
2317 (opts, args) = parser.parse_args()
2318
2319 # Open appropriate CookieJar
2320 if opts.cookiefile is None:
2321 jar = cookielib.CookieJar()
2322 else:
2323 try:
2324 jar = cookielib.MozillaCookieJar(opts.cookiefile)
2325 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK):
2326 jar.load()
2327 except (IOError, OSError), err:
2328 sys.exit(u'ERROR: unable to open cookie file')
2329
2330 # General configuration
2331 cookie_processor = urllib2.HTTPCookieProcessor(jar)
2332 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
2333 urllib2.install_opener(urllib2.build_opener(cookie_processor))
2334 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
2335
2336 # Batch file verification
2337 batchurls = []
2338 if opts.batchfile is not None:
2339 try:
2340 if opts.batchfile == '-':
2341 batchfd = sys.stdin
2342 else:
2343 batchfd = open(opts.batchfile, 'r')
2344 batchurls = batchfd.readlines()
2345 batchurls = [x.strip() for x in batchurls]
2346 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)]
2347 except IOError:
2348 sys.exit(u'ERROR: batch file could not be read')
2349 all_urls = batchurls + args
2350
2351 # Conflicting, missing and erroneous options
2352 if opts.bestquality:
2353 print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n'
2354 if opts.usenetrc and (opts.username is not None or opts.password is not None):
2355 parser.error(u'using .netrc conflicts with giving username/password')
2356 if opts.password is not None and opts.username is None:
2357 parser.error(u'account username missing')
2358 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber):
2359 parser.error(u'using output template conflicts with using title, literal title or auto number')
2360 if opts.usetitle and opts.useliteral:
2361 parser.error(u'using title conflicts with using literal title')
2362 if opts.username is not None and opts.password is None:
2363 opts.password = getpass.getpass(u'Type account password and press return:')
2364 if opts.ratelimit is not None:
2365 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
2366 if numeric_limit is None:
2367 parser.error(u'invalid rate limit specified')
2368 opts.ratelimit = numeric_limit
2369 if opts.retries is not None:
2370 try:
2371 opts.retries = long(opts.retries)
2372 except (TypeError, ValueError), err:
2373 parser.error(u'invalid retry count specified')
2374 try:
2375 opts.playliststart = long(opts.playliststart)
2376 if opts.playliststart <= 0:
2377 raise ValueError
2378 except (TypeError, ValueError), err:
2379 parser.error(u'invalid playlist start number specified')
2380 try:
2381 opts.playlistend = long(opts.playlistend)
2382 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart):
2383 raise ValueError
2384 except (TypeError, ValueError), err:
2385 parser.error(u'invalid playlist end number specified')
2386
2387 # Information extractors
2388 youtube_ie = YoutubeIE()
2389 metacafe_ie = MetacafeIE(youtube_ie)
2390 dailymotion_ie = DailymotionIE()
2391 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
2392 youtube_user_ie = YoutubeUserIE(youtube_ie)
2393 youtube_search_ie = YoutubeSearchIE(youtube_ie)
2394 google_ie = GoogleIE()
2395 google_search_ie = GoogleSearchIE(google_ie)
2396 photobucket_ie = PhotobucketIE()
2397 yahoo_ie = YahooIE()
2398 yahoo_search_ie = YahooSearchIE(yahoo_ie)
2399 deposit_files_ie = DepositFilesIE()
2400 generic_ie = GenericIE()
2401
2402 # File downloader
2403 fd = FileDownloader({
2404 'usenetrc': opts.usenetrc,
2405 'username': opts.username,
2406 'password': opts.password,
2407 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2408 'forceurl': opts.geturl,
2409 'forcetitle': opts.gettitle,
2410 'forcethumbnail': opts.getthumbnail,
2411 'forcedescription': opts.getdescription,
2412 'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription),
2413 'format': opts.format,
2414 'format_limit': opts.format_limit,
2415 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
2416 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
2417 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
2418 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
2419 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s')
2420 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s')
2421 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
2422 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
2423 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s')
2424 or u'%(id)s.%(ext)s'),
2425 'ignoreerrors': opts.ignoreerrors,
2426 'ratelimit': opts.ratelimit,
2427 'nooverwrites': opts.nooverwrites,
2428 'retries': opts.retries,
2429 'continuedl': opts.continue_dl,
2430 'noprogress': opts.noprogress,
2431 'playliststart': opts.playliststart,
2432 'playlistend': opts.playlistend,
2433 'logtostderr': opts.outtmpl == '-',
2434 })
2435 fd.add_info_extractor(youtube_search_ie)
2436 fd.add_info_extractor(youtube_pl_ie)
2437 fd.add_info_extractor(youtube_user_ie)
2438 fd.add_info_extractor(metacafe_ie)
2439 fd.add_info_extractor(dailymotion_ie)
2440 fd.add_info_extractor(youtube_ie)
2441 fd.add_info_extractor(google_ie)
2442 fd.add_info_extractor(google_search_ie)
2443 fd.add_info_extractor(photobucket_ie)
2444 fd.add_info_extractor(yahoo_ie)
2445 fd.add_info_extractor(yahoo_search_ie)
2446 fd.add_info_extractor(deposit_files_ie)
2447
2448 # This must come last since it's the
2449 # fallback if none of the others work
2450 fd.add_info_extractor(generic_ie)
2451
2452 # Update version
2453 if opts.update_self:
2454 update_self(fd, sys.argv[0])
2455
2456 # Maybe do nothing
2457 if len(all_urls) < 1:
2458 if not opts.update_self:
2459 parser.error(u'you must provide at least one URL')
2460 else:
2461 sys.exit()
2462 retcode = fd.download(all_urls)
2463
2464 # Dump cookie jar if requested
2465 if opts.cookiefile is not None:
2466 try:
2467 jar.save()
2468 except (IOError, OSError), err:
2469 sys.exit(u'ERROR: unable to save cookie jar')
2470
2471 sys.exit(retcode)
2472
2473 except DownloadError:
2474 sys.exit(1)
2475 except SameFileError:
2476 sys.exit(u'ERROR: fixed output name but more than one file to download')
2477 except KeyboardInterrupt:
2478 sys.exit(u'\nERROR: Interrupted by user')