Raphaël G. Git Repositories - youtubedl/blob - youtube-dl

   1 #!/usr/bin/env python
   2 # -*- coding: utf-8 -*-
   3 # Author: Ricardo Garcia Gonzalez
   4 # Author: Danny Colligan
   5 # Author: Benjamin Johnson
   6 # License: Public domain code
   7 import htmlentitydefs
   8 import httplib
   9 import locale
  10 import math
  11 import netrc
  12 import os
  13 import os.path
  14 import re
  15 import socket
  16 import string
  17 import subprocess
  18 import sys
  19 import time
  20 import urllib
  21 import urllib2
  22
  23 # parse_qs was moved from the cgi module to the urlparse module recently.
  24 try:
  25         from urlparse import parse_qs
  26 except ImportError:
  27         from cgi import parse_qs
  28
  29 std_headers = {
  30         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
  31         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
  32         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5',
  33         'Accept-Language': 'en-us,en;q=0.5',
  34 }
  35
  36 simple_title_chars = string.ascii_letters.decode('ascii') + string.digits.decode('ascii')
  37
  38 def preferredencoding():
  39         """Get preferred encoding.
  40
  41         Returns the best encoding scheme for the system, based on
  42         locale.getpreferredencoding() and some further tweaks.
  43         """
  44         def yield_preferredencoding():
  45                 try:
  46                         pref = locale.getpreferredencoding()
  47                         u'TEST'.encode(pref)
  48                 except:
  49                         pref = 'UTF-8'
  50                 while True:
  51                         yield pref
  52         return yield_preferredencoding().next()
  53
  54 def htmlentity_transform(matchobj):
  55         """Transforms an HTML entity to a Unicode character.
  56
  57         This function receives a match object and is intended to be used with
  58         the re.sub() function.
  59         """
  60         entity = matchobj.group(1)
  61
  62         # Known non-numeric HTML entity
  63         if entity in htmlentitydefs.name2codepoint:
  64                 return unichr(htmlentitydefs.name2codepoint[entity])
  65
  66         # Unicode character
  67         mobj = re.match(ur'(?u)#(x?\d+)', entity)
  68         if mobj is not None:
  69                 numstr = mobj.group(1)
  70                 if numstr.startswith(u'x'):
  71                         base = 16
  72                         numstr = u'0%s' % numstr
  73                 else:
  74                         base = 10
  75                 return unichr(long(numstr, base))
  76
  77         # Unknown entity in name, return its literal representation
  78         return (u'&%s;' % entity)
  79
  80 def sanitize_title(utitle):
  81         """Sanitizes a video title so it could be used as part of a filename."""
  82         utitle = re.sub(ur'(?u)&(.+?);', htmlentity_transform, utitle)
  83         return utitle.replace(unicode(os.sep), u'%')
  84
  85 def sanitize_open(filename, open_mode):
  86         """Try to open the given filename, and slightly tweak it if this fails.
  87
  88         Attempts to open the given filename. If this fails, it tries to change
  89         the filename slightly, step by step, until it's either able to open it
  90         or it fails and raises a final exception, like the standard open()
  91         function.
  92
  93         It returns the tuple (stream, definitive_file_name).
  94         """
  95         try:
  96                 if filename == u'-':
  97                         return (sys.stdout, filename)
  98                 stream = open(filename, open_mode)
  99                 return (stream, filename)
 100         except (IOError, OSError), err:
 101                 # In case of error, try to remove win32 forbidden chars
 102                 filename = re.sub(ur'[<>:"\|\?\*]', u'#', filename)
 103
 104                 # An exception here should be caught in the caller
 105                 stream = open(filename, open_mode)
 106                 return (stream, filename)
 107
 108
 109 class DownloadError(Exception):
 110         """Download Error exception.
 111
 112         This exception may be thrown by FileDownloader objects if they are not
 113         configured to continue on errors. They will contain the appropriate
 114         error message.
 115         """
 116         pass
 117
 118 class SameFileError(Exception):
 119         """Same File exception.
 120
 121         This exception will be thrown by FileDownloader objects if they detect
 122         multiple files would have to be downloaded to the same file on disk.
 123         """
 124         pass
 125
 126 class PostProcessingError(Exception):
 127         """Post Processing exception.
 128
 129         This exception may be raised by PostProcessor's .run() method to
 130         indicate an error in the postprocessing task.
 131         """
 132         pass
 133
 134 class UnavailableFormatError(Exception):
 135         """Unavailable Format exception.
 136
 137         This exception will be thrown when a video is requested
 138         in a format that is not available for that video.
 139         """
 140         pass
 141
 142 class ContentTooShortError(Exception):
 143         """Content Too Short exception.
 144
 145         This exception may be raised by FileDownloader objects when a file they
 146         download is too small for what the server announced first, indicating
 147         the connection was probably interrupted.
 148         """
 149         # Both in bytes
 150         downloaded = None
 151         expected = None
 152
 153         def __init__(self, downloaded, expected):
 154                 self.downloaded = downloaded
 155                 self.expected = expected
 156
 157 class FileDownloader(object):
 158         """File Downloader class.
 159
 160         File downloader objects are the ones responsible of downloading the
 161         actual video file and writing it to disk if the user has requested
 162         it, among some other tasks. In most cases there should be one per
 163         program. As, given a video URL, the downloader doesn't know how to
 164         extract all the needed information, task that InfoExtractors do, it
 165         has to pass the URL to one of them.
 166
 167         For this, file downloader objects have a method that allows
 168         InfoExtractors to be registered in a given order. When it is passed
 169         a URL, the file downloader handles it to the first InfoExtractor it
 170         finds that reports being able to handle it. The InfoExtractor extracts
 171         all the information about the video or videos the URL refers to, and
 172         asks the FileDownloader to process the video information, possibly
 173         downloading the video.
 174
 175         File downloaders accept a lot of parameters. In order not to saturate
 176         the object constructor with arguments, it receives a dictionary of
 177         options instead. These options are available through the params
 178         attribute for the InfoExtractors to use. The FileDownloader also
 179         registers itself as the downloader in charge for the InfoExtractors
 180         that are added to it, so this is a "mutual registration".
 181
 182         Available options:
 183
 184         username:       Username for authentication purposes.
 185         password:       Password for authentication purposes.
 186         usenetrc:       Use netrc for authentication instead.
 187         quiet:          Do not print messages to stdout.
 188         forceurl:       Force printing final URL.
 189         forcetitle:     Force printing title.
 190         simulate:       Do not download the video files.
 191         format:         Video format code.
 192         outtmpl:        Template for output names.
 193         ignoreerrors:   Do not stop on download errors.
 194         ratelimit:      Download speed limit, in bytes/sec.
 195         nooverwrites:   Prevent overwriting files.
 196         continuedl:     Try to continue downloads if possible.
 197         noprogress:     Do not print the progress bar.
 198         """
 199
 200         params = None
 201         _ies = []
 202         _pps = []
 203         _download_retcode = None
 204         _num_downloads = None
 205
 206         def __init__(self, params):
 207                 """Create a FileDownloader object with the given options."""
 208                 self._ies = []
 209                 self._pps = []
 210                 self._download_retcode = 0
 211                 self._num_downloads = 0
 212                 self.params = params
 213
 214         @staticmethod
 215         def pmkdir(filename):
 216                 """Create directory components in filename. Similar to Unix "mkdir -p"."""
 217                 components = filename.split(os.sep)
 218                 aggregate = [os.sep.join(components[0:x]) for x in xrange(1, len(components))]
 219                 aggregate = ['%s%s' % (x, os.sep) for x in aggregate] # Finish names with separator
 220                 for dir in aggregate:
 221                         if not os.path.exists(dir):
 222                                 os.mkdir(dir)
 223
 224         @staticmethod
 225         def format_bytes(bytes):
 226                 if bytes is None:
 227                         return 'N/A'
 228                 if type(bytes) is str:
 229                         bytes = float(bytes)
 230                 if bytes == 0.0:
 231                         exponent = 0
 232                 else:
 233                         exponent = long(math.log(bytes, 1024.0))
 234                 suffix = 'bkMGTPEZY'[exponent]
 235                 converted = float(bytes) / float(1024**exponent)
 236                 return '%.2f%s' % (converted, suffix)
 237
 238         @staticmethod
 239         def calc_percent(byte_counter, data_len):
 240                 if data_len is None:
 241                         return '---.-%'
 242                 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
 243
 244         @staticmethod
 245         def calc_eta(start, now, total, current):
 246                 if total is None:
 247                         return '--:--'
 248                 dif = now - start
 249                 if current == 0 or dif < 0.001: # One millisecond
 250                         return '--:--'
 251                 rate = float(current) / dif
 252                 eta = long((float(total) - float(current)) / rate)
 253                 (eta_mins, eta_secs) = divmod(eta, 60)
 254                 if eta_mins > 99:
 255                         return '--:--'
 256                 return '%02d:%02d' % (eta_mins, eta_secs)
 257
 258         @staticmethod
 259         def calc_speed(start, now, bytes):
 260                 dif = now - start
 261                 if bytes == 0 or dif < 0.001: # One millisecond
 262                         return '%10s' % '---b/s'
 263                 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
 264
 265         @staticmethod
 266         def best_block_size(elapsed_time, bytes):
 267                 new_min = max(bytes / 2.0, 1.0)
 268                 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
 269                 if elapsed_time < 0.001:
 270                         return long(new_max)
 271                 rate = bytes / elapsed_time
 272                 if rate > new_max:
 273                         return long(new_max)
 274                 if rate < new_min:
 275                         return long(new_min)
 276                 return long(rate)
 277
 278         @staticmethod
 279         def parse_bytes(bytestr):
 280                 """Parse a string indicating a byte quantity into a long integer."""
 281                 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
 282                 if matchobj is None:
 283                         return None
 284                 number = float(matchobj.group(1))
 285                 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
 286                 return long(round(number * multiplier))
 287
 288         @staticmethod
 289         def verify_url(url):
 290                 """Verify a URL is valid and data could be downloaded. Return real data URL."""
 291                 request = urllib2.Request(url, None, std_headers)
 292                 data = urllib2.urlopen(request)
 293                 data.read(1)
 294                 url = data.geturl()
 295                 data.close()
 296                 return url
 297
 298         def add_info_extractor(self, ie):
 299                 """Add an InfoExtractor object to the end of the list."""
 300                 self._ies.append(ie)
 301                 ie.set_downloader(self)
 302
 303         def add_post_processor(self, pp):
 304                 """Add a PostProcessor object to the end of the chain."""
 305                 self._pps.append(pp)
 306                 pp.set_downloader(self)
 307
 308         def to_stdout(self, message, skip_eol=False, ignore_encoding_errors=False):
 309                 """Print message to stdout if not in quiet mode."""
 310                 try:
 311                         if not self.params.get('quiet', False):
 312                                 print (u'%s%s' % (message, [u'\n', u''][skip_eol])).encode(preferredencoding()),
 313                         sys.stdout.flush()
 314                 except (UnicodeEncodeError), err:
 315                         if not ignore_encoding_errors:
 316                                 raise
 317
 318         def to_stderr(self, message):
 319                 """Print message to stderr."""
 320                 print >>sys.stderr, message.encode(preferredencoding())
 321
 322         def fixed_template(self):
 323                 """Checks if the output template is fixed."""
 324                 return (re.search(ur'(?u)%\(.+?\)s', self.params['outtmpl']) is None)
 325
 326         def trouble(self, message=None):
 327                 """Determine action to take when a download problem appears.
 328
 329                 Depending on if the downloader has been configured to ignore
 330                 download errors or not, this method may throw an exception or
 331                 not when errors are found, after printing the message.
 332                 """
 333                 if message is not None:
 334                         self.to_stderr(message)
 335                 if not self.params.get('ignoreerrors', False):
 336                         raise DownloadError(message)
 337                 self._download_retcode = 1
 338
 339         def slow_down(self, start_time, byte_counter):
 340                 """Sleep if the download speed is over the rate limit."""
 341                 rate_limit = self.params.get('ratelimit', None)
 342                 if rate_limit is None or byte_counter == 0:
 343                         return
 344                 now = time.time()
 345                 elapsed = now - start_time
 346                 if elapsed <= 0.0:
 347                         return
 348                 speed = float(byte_counter) / elapsed
 349                 if speed > rate_limit:
 350                         time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
 351
 352         def report_destination(self, filename):
 353                 """Report destination filename."""
 354                 self.to_stdout(u'[download] Destination: %s' % filename, ignore_encoding_errors=True)
 355
 356         def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
 357                 """Report download progress."""
 358                 if self.params.get('noprogress', False):
 359                         return
 360                 self.to_stdout(u'\r[download] %s of %s at %s ETA %s' %
 361                                 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
 362
 363         def report_resuming_byte(self, resume_len):
 364                 """Report attemtp to resume at given byte."""
 365                 self.to_stdout(u'[download] Resuming download at byte %s' % resume_len)
 366
 367         def report_file_already_downloaded(self, file_name):
 368                 """Report file has already been fully downloaded."""
 369                 try:
 370                         self.to_stdout(u'[download] %s has already been downloaded' % file_name)
 371                 except (UnicodeEncodeError), err:
 372                         self.to_stdout(u'[download] The file has already been downloaded')
 373
 374         def report_unable_to_resume(self):
 375                 """Report it was impossible to resume download."""
 376                 self.to_stdout(u'[download] Unable to resume')
 377
 378         def report_finish(self):
 379                 """Report download finished."""
 380                 if self.params.get('noprogress', False):
 381                         self.to_stdout(u'[download] Download completed')
 382                 else:
 383                         self.to_stdout(u'')
 384
 385         def process_info(self, info_dict):
 386                 """Process a single dictionary returned by an InfoExtractor."""
 387                 # Do nothing else if in simulate mode
 388                 if self.params.get('simulate', False):
 389                         # Verify URL if it's an HTTP one
 390                         if info_dict['url'].startswith('http'):
 391                                 try:
 392                                         self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8')
 393                                 except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err:
 394                                         raise UnavailableFormatError
 395
 396                         # Forced printings
 397                         if self.params.get('forcetitle', False):
 398                                 print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')
 399                         if self.params.get('forceurl', False):
 400                                 print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')
 401
 402                         return
 403
 404                 try:
 405                         template_dict = dict(info_dict)
 406                         template_dict['epoch'] = unicode(long(time.time()))
 407                         template_dict['ord'] = unicode('%05d' % self._num_downloads)
 408                         filename = self.params['outtmpl'] % template_dict
 409                 except (ValueError, KeyError), err:
 410                         self.trouble('ERROR: invalid output template or system charset: %s' % str(err))
 411                 if self.params.get('nooverwrites', False) and os.path.exists(filename):
 412                         self.to_stderr(u'WARNING: file exists: %s; skipping' % filename)
 413                         return
 414
 415                 try:
 416                         self.pmkdir(filename)
 417                 except (OSError, IOError), err:
 418                         self.trouble('ERROR: unable to create directories: %s' % str(err))
 419                         return
 420
 421                 try:
 422                         success = self._do_download(filename, info_dict['url'].encode('utf-8'))
 423                 except (OSError, IOError), err:
 424                         raise UnavailableFormatError
 425                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 426                         self.trouble('ERROR: unable to download video data: %s' % str(err))
 427                         return
 428                 except (ContentTooShortError, ), err:
 429                         self.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
 430                         return
 431
 432                 if success:
 433                         try:
 434                                 self.post_process(filename, info_dict)
 435                         except (PostProcessingError), err:
 436                                 self.trouble('ERROR: postprocessing: %s' % str(err))
 437                                 return
 438
 439         def download(self, url_list):
 440                 """Download a given list of URLs."""
 441                 if len(url_list) > 1 and self.fixed_template():
 442                         raise SameFileError(self.params['outtmpl'])
 443
 444                 for url in url_list:
 445                         suitable_found = False
 446                         for ie in self._ies:
 447                                 # Go to next InfoExtractor if not suitable
 448                                 if not ie.suitable(url):
 449                                         continue
 450
 451                                 # Suitable InfoExtractor found
 452                                 suitable_found = True
 453
 454                                 # Extract information from URL and process it
 455                                 ie.extract(url)
 456
 457                                 # Suitable InfoExtractor had been found; go to next URL
 458                                 break
 459
 460                         if not suitable_found:
 461                                 self.trouble('ERROR: no suitable InfoExtractor: %s' % url)
 462
 463                 return self._download_retcode
 464
 465         def post_process(self, filename, ie_info):
 466                 """Run the postprocessing chain on the given file."""
 467                 info = dict(ie_info)
 468                 info['filepath'] = filename
 469                 for pp in self._pps:
 470                         info = pp.run(info)
 471                         if info is None:
 472                                 break
 473
 474         def _download_with_rtmpdump(self, filename, url):
 475                 self.report_destination(filename)
 476
 477                 # Check for rtmpdump first
 478                 try:
 479                         subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
 480                 except (OSError, IOError):
 481                         self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
 482                         return False
 483
 484                 # Download using rtmpdump. rtmpdump returns exit code 2 when
 485                 # the connection was interrumpted and resuming appears to be
 486                 # possible. This is part of rtmpdump's normal usage, AFAIK.
 487                 basic_args = ['rtmpdump', '-q', '-r', url, '-o', filename]
 488                 retval = subprocess.call(basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)])
 489                 while retval == 2 or retval == 1:
 490                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename), skip_eol=True)
 491                         time.sleep(2.0) # This seems to be needed
 492                         retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
 493                 if retval == 0:
 494                         self.to_stdout(u'\r[rtmpdump] %s bytes' % os.path.getsize(filename))
 495                         return True
 496                 else:
 497                         self.trouble('\nERROR: rtmpdump exited with code %d' % retval)
 498                         return False
 499
 500         def _do_download(self, filename, url):
 501                 # Attempt to download using rtmpdump
 502                 if url.startswith('rtmp'):
 503                         return self._download_with_rtmpdump(filename, url)
 504
 505                 stream = None
 506                 open_mode = 'wb'
 507                 basic_request = urllib2.Request(url, None, std_headers)
 508                 request = urllib2.Request(url, None, std_headers)
 509
 510                 # Establish possible resume length
 511                 if os.path.isfile(filename):
 512                         resume_len = os.path.getsize(filename)
 513                 else:
 514                         resume_len = 0
 515
 516                 # Request parameters in case of being able to resume
 517                 if self.params.get('continuedl', False) and resume_len != 0:
 518                         self.report_resuming_byte(resume_len)
 519                         request.add_header('Range','bytes=%d-' % resume_len)
 520                         open_mode = 'ab'
 521
 522                 # Establish connection
 523                 try:
 524                         data = urllib2.urlopen(request)
 525                 except (urllib2.HTTPError, ), err:
 526                         if err.code != 416: #  416 is 'Requested range not satisfiable'
 527                                 raise
 528                         # Unable to resume
 529                         data = urllib2.urlopen(basic_request)
 530                         content_length = data.info()['Content-Length']
 531
 532                         if content_length is not None and long(content_length) == resume_len:
 533                                 # Because the file had already been fully downloaded
 534                                 self.report_file_already_downloaded(filename)
 535                                 return True
 536                         else:
 537                                 # Because the server didn't let us
 538                                 self.report_unable_to_resume()
 539                                 open_mode = 'wb'
 540
 541                 data_len = data.info().get('Content-length', None)
 542                 data_len_str = self.format_bytes(data_len)
 543                 byte_counter = 0
 544                 block_size = 1024
 545                 start = time.time()
 546                 while True:
 547                         # Download and write
 548                         before = time.time()
 549                         data_block = data.read(block_size)
 550                         after = time.time()
 551                         data_block_len = len(data_block)
 552                         if data_block_len == 0:
 553                                 break
 554                         byte_counter += data_block_len
 555
 556                         # Open file just in time
 557                         if stream is None:
 558                                 try:
 559                                         (stream, filename) = sanitize_open(filename, open_mode)
 560                                         self.report_destination(filename)
 561                                         self._num_downloads += 1
 562                                 except (OSError, IOError), err:
 563                                         self.trouble('ERROR: unable to open for writing: %s' % str(err))
 564                                         return False
 565                         stream.write(data_block)
 566                         block_size = self.best_block_size(after - before, data_block_len)
 567
 568                         # Progress message
 569                         percent_str = self.calc_percent(byte_counter, data_len)
 570                         eta_str = self.calc_eta(start, time.time(), data_len, byte_counter)
 571                         speed_str = self.calc_speed(start, time.time(), byte_counter)
 572                         self.report_progress(percent_str, data_len_str, speed_str, eta_str)
 573
 574                         # Apply rate limit
 575                         self.slow_down(start, byte_counter)
 576
 577                 self.report_finish()
 578                 if data_len is not None and str(byte_counter) != data_len:
 579                         raise ContentTooShortError(byte_counter, long(data_len))
 580                 return True
 581
 582 class InfoExtractor(object):
 583         """Information Extractor class.
 584
 585         Information extractors are the classes that, given a URL, extract
 586         information from the video (or videos) the URL refers to. This
 587         information includes the real video URL, the video title and simplified
 588         title, author and others. The information is stored in a dictionary
 589         which is then passed to the FileDownloader. The FileDownloader
 590         processes this information possibly downloading the video to the file
 591         system, among other possible outcomes. The dictionaries must include
 592         the following fields:
 593
 594         id:             Video identifier.
 595         url:            Final video URL.
 596         uploader:       Nickname of the video uploader.
 597         title:          Literal title.
 598         stitle:         Simplified title.
 599         ext:            Video filename extension.
 600         format:         Video format.
 601
 602         Subclasses of this one should re-define the _real_initialize() and
 603         _real_extract() methods, as well as the suitable() static method.
 604         Probably, they should also be instantiated and added to the main
 605         downloader.
 606         """
 607
 608         _ready = False
 609         _downloader = None
 610
 611         def __init__(self, downloader=None):
 612                 """Constructor. Receives an optional downloader."""
 613                 self._ready = False
 614                 self.set_downloader(downloader)
 615
 616         @staticmethod
 617         def suitable(url):
 618                 """Receives a URL and returns True if suitable for this IE."""
 619                 return False
 620
 621         def initialize(self):
 622                 """Initializes an instance (authentication, etc)."""
 623                 if not self._ready:
 624                         self._real_initialize()
 625                         self._ready = True
 626
 627         def extract(self, url):
 628                 """Extracts URL information and returns it in list of dicts."""
 629                 self.initialize()
 630                 return self._real_extract(url)
 631
 632         def set_downloader(self, downloader):
 633                 """Sets the downloader for this IE."""
 634                 self._downloader = downloader
 635
 636         def _real_initialize(self):
 637                 """Real initialization process. Redefine in subclasses."""
 638                 pass
 639
 640         def _real_extract(self, url):
 641                 """Real extraction process. Redefine in subclasses."""
 642                 pass
 643
 644 class YoutubeIE(InfoExtractor):
 645         """Information extractor for youtube.com."""
 646
 647         _VALID_URL = r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$'
 648         _LANG_URL = r'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1'
 649         _LOGIN_URL = 'http://www.youtube.com/signup?next=/&gl=US&hl=en'
 650         _AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
 651         _NETRC_MACHINE = 'youtube'
 652         _available_formats = ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag
 653         _video_extensions = {
 654                 '13': '3gp',
 655                 '17': 'mp4',
 656                 '18': 'mp4',
 657                 '22': 'mp4',
 658                 '37': 'mp4',
 659         }
 660
 661         @staticmethod
 662         def suitable(url):
 663                 return (re.match(YoutubeIE._VALID_URL, url) is not None)
 664
 665         def report_lang(self):
 666                 """Report attempt to set language."""
 667                 self._downloader.to_stdout(u'[youtube] Setting language')
 668
 669         def report_login(self):
 670                 """Report attempt to log in."""
 671                 self._downloader.to_stdout(u'[youtube] Logging in')
 672
 673         def report_age_confirmation(self):
 674                 """Report attempt to confirm age."""
 675                 self._downloader.to_stdout(u'[youtube] Confirming age')
 676
 677         def report_video_info_webpage_download(self, video_id):
 678                 """Report attempt to download video info webpage."""
 679                 self._downloader.to_stdout(u'[youtube] %s: Downloading video info webpage' % video_id)
 680
 681         def report_information_extraction(self, video_id):
 682                 """Report attempt to extract video information."""
 683                 self._downloader.to_stdout(u'[youtube] %s: Extracting video information' % video_id)
 684
 685         def report_unavailable_format(self, video_id, format):
 686                 """Report extracted video URL."""
 687                 self._downloader.to_stdout(u'[youtube] %s: Format %s not available' % (video_id, format))
 688
 689         def report_rtmp_download(self):
 690                 """Indicate the download will use the RTMP protocol."""
 691                 self._downloader.to_stdout(u'[youtube] RTMP download detected')
 692
 693         def _real_initialize(self):
 694                 if self._downloader is None:
 695                         return
 696
 697                 username = None
 698                 password = None
 699                 downloader_params = self._downloader.params
 700
 701                 # Attempt to use provided username and password or .netrc data
 702                 if downloader_params.get('username', None) is not None:
 703                         username = downloader_params['username']
 704                         password = downloader_params['password']
 705                 elif downloader_params.get('usenetrc', False):
 706                         try:
 707                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE)
 708                                 if info is not None:
 709                                         username = info[0]
 710                                         password = info[2]
 711                                 else:
 712                                         raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
 713                         except (IOError, netrc.NetrcParseError), err:
 714                                 self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
 715                                 return
 716
 717                 # Set language
 718                 request = urllib2.Request(self._LANG_URL, None, std_headers)
 719                 try:
 720                         self.report_lang()
 721                         urllib2.urlopen(request).read()
 722                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 723                         self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err))
 724                         return
 725
 726                 # No authentication to be performed
 727                 if username is None:
 728                         return
 729
 730                 # Log in
 731                 login_form = {
 732                                 'current_form': 'loginForm',
 733                                 'next':         '/',
 734                                 'action_login': 'Log In',
 735                                 'username':     username,
 736                                 'password':     password,
 737                                 }
 738                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form), std_headers)
 739                 try:
 740                         self.report_login()
 741                         login_results = urllib2.urlopen(request).read()
 742                         if re.search(r'(?i)<form[^>]* name="loginForm"', login_results) is not None:
 743                                 self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password')
 744                                 return
 745                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 746                         self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
 747                         return
 748
 749                 # Confirm age
 750                 age_form = {
 751                                 'next_url':             '/',
 752                                 'action_confirm':       'Confirm',
 753                                 }
 754                 request = urllib2.Request(self._AGE_URL, urllib.urlencode(age_form), std_headers)
 755                 try:
 756                         self.report_age_confirmation()
 757                         age_results = urllib2.urlopen(request).read()
 758                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 759                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 760                         return
 761
 762         def _real_extract(self, url):
 763                 # Extract video id from URL
 764                 mobj = re.match(self._VALID_URL, url)
 765                 if mobj is None:
 766                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 767                         return
 768                 video_id = mobj.group(2)
 769
 770                 # Downloader parameters
 771                 best_quality = False
 772                 all_formats = False
 773                 format_param = None
 774                 quality_index = 0
 775                 if self._downloader is not None:
 776                         params = self._downloader.params
 777                         format_param = params.get('format', None)
 778                         if format_param == '0':
 779                                 format_param = self._available_formats[quality_index]
 780                                 best_quality = True
 781                         elif format_param == '-1':
 782                                 format_param = self._available_formats[quality_index]
 783                                 all_formats = True
 784
 785                 while True:
 786                         # Extension
 787                         video_extension = self._video_extensions.get(format_param, 'flv')
 788
 789                         # Get video info
 790                         self.report_video_info_webpage_download(video_id)
 791                         for el_type in ['embedded', 'detailpage', 'vevo']:
 792                                 video_info_url = ('http://www.youtube.com/get_video_info?&video_id=%s&el=%s&ps=default&eurl=&gl=US&hl=en'
 793                                                    % (video_id, el_type))
 794                                 request = urllib2.Request(video_info_url, None, std_headers)
 795                                 try:
 796                                         video_info_webpage = urllib2.urlopen(request).read()
 797                                         video_info = parse_qs(video_info_webpage)
 798                                         if 'token' in video_info:
 799                                                 break
 800                                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 801                                         self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err))
 802                                         return
 803                         self.report_information_extraction(video_id)
 804
 805                         # "t" param
 806                         if 'token' not in video_info:
 807                                 # Attempt to see if YouTube has issued an error message
 808                                 if 'reason' not in video_info:
 809                                         self._downloader.trouble(u'ERROR: unable to extract "t" parameter for unknown reason')
 810                                         stream = open('reportme-ydl-%s.dat' % time.time(), 'wb')
 811                                         stream.write(video_info_webpage)
 812                                         stream.close()
 813                                 else:
 814                                         reason = urllib.unquote_plus(video_info['reason'][0])
 815                                         self._downloader.trouble(u'ERROR: YouTube said: %s' % reason.decode('utf-8'))
 816                                 return
 817                         token = urllib.unquote_plus(video_info['token'][0])
 818                         video_real_url = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id, token)
 819                         if format_param is not None:
 820                                 video_real_url = '%s&fmt=%s' % (video_real_url, format_param)
 821
 822                         # Check possible RTMP download
 823                         if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
 824                                 self.report_rtmp_download()
 825                                 video_real_url = video_info['conn'][0]
 826
 827                         # uploader
 828                         if 'author' not in video_info:
 829                                 self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 830                                 return
 831                         video_uploader = urllib.unquote_plus(video_info['author'][0])
 832
 833                         # title
 834                         if 'title' not in video_info:
 835                                 self._downloader.trouble(u'ERROR: unable to extract video title')
 836                                 return
 837                         video_title = urllib.unquote_plus(video_info['title'][0])
 838                         video_title = video_title.decode('utf-8')
 839                         video_title = sanitize_title(video_title)
 840
 841                         # simplified title
 842                         simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
 843                         simple_title = simple_title.strip(ur'_')
 844
 845                         try:
 846                                 # Process video information
 847                                 self._downloader.process_info({
 848                                         'id':           video_id.decode('utf-8'),
 849                                         'url':          video_real_url.decode('utf-8'),
 850                                         'uploader':     video_uploader.decode('utf-8'),
 851                                         'title':        video_title,
 852                                         'stitle':       simple_title,
 853                                         'ext':          video_extension.decode('utf-8'),
 854                                         'format':       (format_param is None and u'NA' or format_param.decode('utf-8')),
 855                                 })
 856
 857                                 if all_formats:
 858                                         if quality_index == len(self._available_formats) - 1:
 859                                                 # None left to get
 860                                                 return
 861                                         else:
 862                                                 quality_index += 1
 863                                                 format_param = self._available_formats[quality_index]
 864                                                 if format_param == None:
 865                                                         return
 866                                                 continue
 867
 868                                 return
 869
 870                         except UnavailableFormatError, err:
 871                                 if best_quality or all_formats:
 872                                         if quality_index == len(self._available_formats) - 1:
 873                                                 # I don't ever expect this to happen
 874                                                 if not all_formats:
 875                                                         self._downloader.trouble(u'ERROR: no known formats available for video')
 876                                                 return
 877                                         else:
 878                                                 self.report_unavailable_format(video_id, format_param)
 879                                                 quality_index += 1
 880                                                 format_param = self._available_formats[quality_index]
 881                                                 if format_param == None:
 882                                                         return
 883                                                 continue
 884                                 else:
 885                                         self._downloader.trouble('ERROR: format not available for video')
 886                                         return
 887
 888
 889 class MetacafeIE(InfoExtractor):
 890         """Information Extractor for metacafe.com."""
 891
 892         _VALID_URL = r'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'
 893         _DISCLAIMER = 'http://www.metacafe.com/family_filter/'
 894         _FILTER_POST = 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'
 895         _youtube_ie = None
 896
 897         def __init__(self, youtube_ie, downloader=None):
 898                 InfoExtractor.__init__(self, downloader)
 899                 self._youtube_ie = youtube_ie
 900
 901         @staticmethod
 902         def suitable(url):
 903                 return (re.match(MetacafeIE._VALID_URL, url) is not None)
 904
 905         def report_disclaimer(self):
 906                 """Report disclaimer retrieval."""
 907                 self._downloader.to_stdout(u'[metacafe] Retrieving disclaimer')
 908
 909         def report_age_confirmation(self):
 910                 """Report attempt to confirm age."""
 911                 self._downloader.to_stdout(u'[metacafe] Confirming age')
 912
 913         def report_download_webpage(self, video_id):
 914                 """Report webpage download."""
 915                 self._downloader.to_stdout(u'[metacafe] %s: Downloading webpage' % video_id)
 916
 917         def report_extraction(self, video_id):
 918                 """Report information extraction."""
 919                 self._downloader.to_stdout(u'[metacafe] %s: Extracting information' % video_id)
 920
 921         def _real_initialize(self):
 922                 # Retrieve disclaimer
 923                 request = urllib2.Request(self._DISCLAIMER, None, std_headers)
 924                 try:
 925                         self.report_disclaimer()
 926                         disclaimer = urllib2.urlopen(request).read()
 927                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 928                         self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err))
 929                         return
 930
 931                 # Confirm age
 932                 disclaimer_form = {
 933                         'filters': '0',
 934                         'submit': "Continue - I'm over 18",
 935                         }
 936                 request = urllib2.Request(self._FILTER_POST, urllib.urlencode(disclaimer_form), std_headers)
 937                 try:
 938                         self.report_age_confirmation()
 939                         disclaimer = urllib2.urlopen(request).read()
 940                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 941                         self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err))
 942                         return
 943
 944         def _real_extract(self, url):
 945                 # Extract id and simplified title from URL
 946                 mobj = re.match(self._VALID_URL, url)
 947                 if mobj is None:
 948                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
 949                         return
 950
 951                 video_id = mobj.group(1)
 952
 953                 # Check if video comes from YouTube
 954                 mobj2 = re.match(r'^yt-(.*)$', video_id)
 955                 if mobj2 is not None:
 956                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % mobj2.group(1))
 957                         return
 958
 959                 simple_title = mobj.group(2).decode('utf-8')
 960                 video_extension = 'flv'
 961
 962                 # Retrieve video webpage to extract further information
 963                 request = urllib2.Request('http://www.metacafe.com/watch/%s/' % video_id)
 964                 try:
 965                         self.report_download_webpage(video_id)
 966                         webpage = urllib2.urlopen(request).read()
 967                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
 968                         self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err))
 969                         return
 970
 971                 # Extract URL, uploader and title from webpage
 972                 self.report_extraction(video_id)
 973                 mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage)
 974                 if mobj is None:
 975                         self._downloader.trouble(u'ERROR: unable to extract media URL')
 976                         return
 977                 mediaURL = urllib.unquote(mobj.group(1))
 978
 979                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage)
 980                 #if mobj is None:
 981                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey')
 982                 #       return
 983                 #gdaKey = mobj.group(1)
 984                 #
 985                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey)
 986
 987                 video_url = mediaURL
 988
 989                 mobj = re.search(r'(?im)<title>(.*) - Video</title>', webpage)
 990                 if mobj is None:
 991                         self._downloader.trouble(u'ERROR: unable to extract title')
 992                         return
 993                 video_title = mobj.group(1).decode('utf-8')
 994                 video_title = sanitize_title(video_title)
 995
 996                 mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage)
 997                 if mobj is None:
 998                         self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
 999                         return
1000                 video_uploader = mobj.group(1)
1001
1002                 try:
1003                         # Process video information
1004                         self._downloader.process_info({
1005                                 'id':           video_id.decode('utf-8'),
1006                                 'url':          video_url.decode('utf-8'),
1007                                 'uploader':     video_uploader.decode('utf-8'),
1008                                 'title':        video_title,
1009                                 'stitle':       simple_title,
1010                                 'ext':          video_extension.decode('utf-8'),
1011                                 'format':       u'NA',
1012                         })
1013                 except UnavailableFormatError:
1014                         self._downloader.trouble(u'ERROR: format not available for video')
1015
1016
1017 class GoogleIE(InfoExtractor):
1018         """Information extractor for video.google.com."""
1019
1020         _VALID_URL = r'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*'
1021
1022         def __init__(self, downloader=None):
1023                 InfoExtractor.__init__(self, downloader)
1024
1025         @staticmethod
1026         def suitable(url):
1027                 return (re.match(GoogleIE._VALID_URL, url) is not None)
1028
1029         def report_download_webpage(self, video_id):
1030                 """Report webpage download."""
1031                 self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id)
1032
1033         def report_extraction(self, video_id):
1034                 """Report information extraction."""
1035                 self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id)
1036
1037         def _real_initialize(self):
1038                 return
1039
1040         def _real_extract(self, url):
1041                 # Extract id from URL
1042                 mobj = re.match(self._VALID_URL, url)
1043                 if mobj is None:
1044                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1045                         return
1046
1047                 video_id = mobj.group(1)
1048
1049                 video_extension = 'mp4'
1050
1051                 # Retrieve video webpage to extract further information
1052                 request = urllib2.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id)
1053                 try:
1054                         self.report_download_webpage(video_id)
1055                         webpage = urllib2.urlopen(request).read()
1056                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1057                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1058                         return
1059
1060                 # Extract URL, uploader, and title from webpage
1061                 self.report_extraction(video_id)
1062                 mobj = re.search(r"download_url:'([^']+)'", webpage)
1063                 if mobj is None:
1064                         video_extension = 'flv'
1065                         mobj = re.search(r"(?i)videoUrl\\x3d(.+?)\\x26", webpage)
1066                 if mobj is None:
1067                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1068                         return
1069                 mediaURL = urllib.unquote(mobj.group(1))
1070                 mediaURL = mediaURL.replace('\\x3d', '\x3d')
1071                 mediaURL = mediaURL.replace('\\x26', '\x26')
1072
1073                 video_url = mediaURL
1074
1075                 mobj = re.search(r'<title>(.*)</title>', webpage)
1076                 if mobj is None:
1077                         self._downloader.trouble(u'ERROR: unable to extract title')
1078                         return
1079                 video_title = mobj.group(1).decode('utf-8')
1080                 video_title = sanitize_title(video_title)
1081                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1082
1083                 try:
1084                         # Process video information
1085                         self._downloader.process_info({
1086                                 'id':           video_id.decode('utf-8'),
1087                                 'url':          video_url.decode('utf-8'),
1088                                 'uploader':     u'NA',
1089                                 'title':        video_title,
1090                                 'stitle':       simple_title,
1091                                 'ext':          video_extension.decode('utf-8'),
1092                                 'format':       u'NA',
1093                         })
1094                 except UnavailableFormatError:
1095                         self._downloader.trouble(u'ERROR: format not available for video')
1096
1097
1098 class PhotobucketIE(InfoExtractor):
1099         """Information extractor for photobucket.com."""
1100
1101         _VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)'
1102
1103         def __init__(self, downloader=None):
1104                 InfoExtractor.__init__(self, downloader)
1105
1106         @staticmethod
1107         def suitable(url):
1108                 return (re.match(PhotobucketIE._VALID_URL, url) is not None)
1109
1110         def report_download_webpage(self, video_id):
1111                 """Report webpage download."""
1112                 self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id)
1113
1114         def report_extraction(self, video_id):
1115                 """Report information extraction."""
1116                 self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id)
1117
1118         def _real_initialize(self):
1119                 return
1120
1121         def _real_extract(self, url):
1122                 # Extract id from URL
1123                 mobj = re.match(self._VALID_URL, url)
1124                 if mobj is None:
1125                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1126                         return
1127
1128                 video_id = mobj.group(1)
1129
1130                 video_extension = 'flv'
1131
1132                 # Retrieve video webpage to extract further information
1133                 request = urllib2.Request(url)
1134                 try:
1135                         self.report_download_webpage(video_id)
1136                         webpage = urllib2.urlopen(request).read()
1137                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1138                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1139                         return
1140
1141                 # Extract URL, uploader, and title from webpage
1142                 self.report_extraction(video_id)
1143                 mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage)
1144                 if mobj is None:
1145                         self._downloader.trouble(u'ERROR: unable to extract media URL')
1146                         return
1147                 mediaURL = urllib.unquote(mobj.group(1))
1148
1149                 video_url = mediaURL
1150
1151                 mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage)
1152                 if mobj is None:
1153                         self._downloader.trouble(u'ERROR: unable to extract title')
1154                         return
1155                 video_title = mobj.group(1).decode('utf-8')
1156                 video_title = sanitize_title(video_title)
1157                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1158
1159                 video_uploader = mobj.group(2).decode('utf-8')
1160
1161                 try:
1162                         # Process video information
1163                         self._downloader.process_info({
1164                                 'id':           video_id.decode('utf-8'),
1165                                 'url':          video_url.decode('utf-8'),
1166                                 'uploader':     video_uploader,
1167                                 'title':        video_title,
1168                                 'stitle':       simple_title,
1169                                 'ext':          video_extension.decode('utf-8'),
1170                                 'format':       u'NA',
1171                         })
1172                 except UnavailableFormatError:
1173                         self._downloader.trouble(u'ERROR: format not available for video')
1174
1175
1176 class YahooIE(InfoExtractor):
1177         """Information extractor for video.yahoo.com."""
1178
1179         # _VALID_URL matches all Yahoo! Video URLs
1180         # _VPAGE_URL matches only the extractable '/watch/' URLs
1181         _VALID_URL = r'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?'
1182         _VPAGE_URL = r'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?'
1183
1184         def __init__(self, downloader=None):
1185                 InfoExtractor.__init__(self, downloader)
1186
1187         @staticmethod
1188         def suitable(url):
1189                 return (re.match(YahooIE._VALID_URL, url) is not None)
1190
1191         def report_download_webpage(self, video_id):
1192                 """Report webpage download."""
1193                 self._downloader.to_stdout(u'[video.yahoo] %s: Downloading webpage' % video_id)
1194
1195         def report_extraction(self, video_id):
1196                 """Report information extraction."""
1197                 self._downloader.to_stdout(u'[video.yahoo] %s: Extracting information' % video_id)
1198
1199         def _real_initialize(self):
1200                 return
1201
1202         def _real_extract(self, url):
1203                 # Extract ID from URL
1204                 mobj = re.match(self._VALID_URL, url)
1205                 if mobj is None:
1206                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1207                         return
1208
1209                 video_id = mobj.group(2)
1210                 video_extension = 'flv'
1211
1212                 # Rewrite valid but non-extractable URLs as
1213                 # extractable English language /watch/ URLs
1214                 if re.match(self._VPAGE_URL, url) is None:
1215                         request = urllib2.Request(url)
1216                         try:
1217                                 webpage = urllib2.urlopen(request).read()
1218                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1219                                 self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1220                                 return
1221
1222                         mobj = re.search(r'\("id", "([0-9]+)"\);', webpage)
1223                         if mobj is None:
1224                                 self._downloader.trouble(u'ERROR: Unable to extract id field')
1225                                 return
1226                         yahoo_id = mobj.group(1)
1227
1228                         mobj = re.search(r'\("vid", "([0-9]+)"\);', webpage)
1229                         if mobj is None:
1230                                 self._downloader.trouble(u'ERROR: Unable to extract vid field')
1231                                 return
1232                         yahoo_vid = mobj.group(1)
1233
1234                         url = 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid, yahoo_id)
1235                         return self._real_extract(url)
1236
1237                 # Retrieve video webpage to extract further information
1238                 request = urllib2.Request(url)
1239                 try:
1240                         self.report_download_webpage(video_id)
1241                         webpage = urllib2.urlopen(request).read()
1242                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1243                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1244                         return
1245
1246                 # Extract uploader and title from webpage
1247                 self.report_extraction(video_id)
1248                 mobj = re.search(r'<meta name="title" content="(.*)" />', webpage)
1249                 if mobj is None:
1250                         self._downloader.trouble(u'ERROR: unable to extract video title')
1251                         return
1252                 video_title = mobj.group(1).decode('utf-8')
1253                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1254
1255                 mobj = re.search(r'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage)
1256                 if mobj is None:
1257                         self._downloader.trouble(u'ERROR: unable to extract video uploader')
1258                         return
1259                 video_uploader = mobj.group(1).decode('utf-8')
1260
1261                 # Extract video height and width
1262                 mobj = re.search(r'<meta name="video_height" content="([0-9]+)" />', webpage)
1263                 if mobj is None:
1264                         self._downloader.trouble(u'ERROR: unable to extract video height')
1265                         return
1266                 yv_video_height = mobj.group(1)
1267
1268                 mobj = re.search(r'<meta name="video_width" content="([0-9]+)" />', webpage)
1269                 if mobj is None:
1270                         self._downloader.trouble(u'ERROR: unable to extract video width')
1271                         return
1272                 yv_video_width = mobj.group(1)
1273
1274                 # Retrieve video playlist to extract media URL
1275                 # I'm not completely sure what all these options are, but we
1276                 # seem to need most of them, otherwise the server sends a 401.
1277                 yv_lg = 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents
1278                 yv_bitrate = '700'  # according to Wikipedia this is hard-coded
1279                 request = urllib2.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id +
1280                                           '&tech=flash&mode=playlist&lg=' + yv_lg + '&bitrate=' + yv_bitrate + '&vidH=' + yv_video_height +
1281                                           '&vidW=' + yv_video_width + '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797')
1282                 try:
1283                         self.report_download_webpage(video_id)
1284                         webpage = urllib2.urlopen(request).read()
1285                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1286                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1287                         return
1288
1289                 # Extract media URL from playlist XML
1290                 mobj = re.search(r'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage)
1291                 if mobj is None:
1292                         self._downloader.trouble(u'ERROR: Unable to extract media URL')
1293                         return
1294                 video_url = urllib.unquote(mobj.group(1) + mobj.group(2)).decode('utf-8')
1295                 video_url = re.sub(r'(?u)&(.+?);', htmlentity_transform, video_url)
1296
1297                 try:
1298                         # Process video information
1299                         self._downloader.process_info({
1300                                 'id':           video_id.decode('utf-8'),
1301                                 'url':          video_url,
1302                                 'uploader':     video_uploader,
1303                                 'title':        video_title,
1304                                 'stitle':       simple_title,
1305                                 'ext':          video_extension.decode('utf-8'),
1306                         })
1307                 except UnavailableFormatError:
1308                         self._downloader.trouble(u'ERROR: format not available for video')
1309
1310
1311 class GenericIE(InfoExtractor):
1312         """Generic last-resort information extractor."""
1313
1314         def __init__(self, downloader=None):
1315                 InfoExtractor.__init__(self, downloader)
1316
1317         @staticmethod
1318         def suitable(url):
1319                 return True
1320
1321         def report_download_webpage(self, video_id):
1322                 """Report webpage download."""
1323                 self._downloader.to_stdout(u'WARNING: Falling back on generic information extractor.')
1324                 self._downloader.to_stdout(u'[generic] %s: Downloading webpage' % video_id)
1325
1326         def report_extraction(self, video_id):
1327                 """Report information extraction."""
1328                 self._downloader.to_stdout(u'[generic] %s: Extracting information' % video_id)
1329
1330         def _real_initialize(self):
1331                 return
1332
1333         def _real_extract(self, url):
1334                 video_id = url.split('/')[-1]
1335                 request = urllib2.Request(url)
1336                 try:
1337                         self.report_download_webpage(video_id)
1338                         webpage = urllib2.urlopen(request).read()
1339                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1340                         self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err))
1341                         return
1342                 except ValueError, err:
1343                         # since this is the last-resort InfoExtractor, if
1344                         # this error is thrown, it'll be thrown here
1345                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1346                         return
1347
1348                 # Start with something easy: JW Player in SWFObject
1349                 mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
1350                 if mobj is None:
1351                         # Broaden the search a little bit
1352                         mobj = re.search(r'[^A-Za-z0-9]?(?:file|source)=(http[^\'"&]*)', webpage)
1353                 if mobj is None:
1354                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1355                         return
1356
1357                 # It's possible that one of the regexes
1358                 # matched, but returned an empty group:
1359                 if mobj.group(1) is None:
1360                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url)
1361                         return
1362
1363                 video_url = urllib.unquote(mobj.group(1))
1364                 video_id  = os.path.basename(video_url)
1365
1366                 # here's a fun little line of code for you:
1367                 video_extension = os.path.splitext(video_id)[1][1:]
1368                 video_id        = os.path.splitext(video_id)[0]
1369
1370                 # it's tempting to parse this further, but you would
1371                 # have to take into account all the variations like
1372                 #   Video Title - Site Name
1373                 #   Site Name | Video Title
1374                 #   Video Title - Tagline | Site Name
1375                 # and so on and so forth; it's just not practical
1376                 mobj = re.search(r'<title>(.*)</title>', webpage)
1377                 if mobj is None:
1378                         self._downloader.trouble(u'ERROR: unable to extract title')
1379                         return
1380                 video_title = mobj.group(1).decode('utf-8')
1381                 video_title = sanitize_title(video_title)
1382                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
1383
1384                 # video uploader is domain name
1385                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url)
1386                 if mobj is None:
1387                         self._downloader.trouble(u'ERROR: unable to extract title')
1388                         return
1389                 video_uploader = mobj.group(1).decode('utf-8')
1390
1391                 try:
1392                         # Process video information
1393                         self._downloader.process_info({
1394                                 'id':           video_id.decode('utf-8'),
1395                                 'url':          video_url.decode('utf-8'),
1396                                 'uploader':     video_uploader,
1397                                 'title':        video_title,
1398                                 'stitle':       simple_title,
1399                                 'ext':          video_extension.decode('utf-8'),
1400                                 'format':       u'NA',
1401                         })
1402                 except UnavailableFormatError:
1403                         self._downloader.trouble(u'ERROR: format not available for video')
1404
1405
1406 class YoutubeSearchIE(InfoExtractor):
1407         """Information Extractor for YouTube search queries."""
1408         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+'
1409         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en'
1410         _VIDEO_INDICATOR = r'href="/watch\?v=.+?"'
1411         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1412         _youtube_ie = None
1413         _max_youtube_results = 1000
1414
1415         def __init__(self, youtube_ie, downloader=None):
1416                 InfoExtractor.__init__(self, downloader)
1417                 self._youtube_ie = youtube_ie
1418
1419         @staticmethod
1420         def suitable(url):
1421                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None)
1422
1423         def report_download_page(self, query, pagenum):
1424                 """Report attempt to download playlist page with given number."""
1425                 query = query.decode(preferredencoding())
1426                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum))
1427
1428         def _real_initialize(self):
1429                 self._youtube_ie.initialize()
1430
1431         def _real_extract(self, query):
1432                 mobj = re.match(self._VALID_QUERY, query)
1433                 if mobj is None:
1434                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query)
1435                         return
1436
1437                 prefix, query = query.split(':')
1438                 prefix = prefix[8:]
1439                 query  = query.encode('utf-8')
1440                 if prefix == '':
1441                         self._download_n_results(query, 1)
1442                         return
1443                 elif prefix == 'all':
1444                         self._download_n_results(query, self._max_youtube_results)
1445                         return
1446                 else:
1447                         try:
1448                                 n = long(prefix)
1449                                 if n <= 0:
1450                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query))
1451                                         return
1452                                 elif n > self._max_youtube_results:
1453                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n))
1454                                         n = self._max_youtube_results
1455                                 self._download_n_results(query, n)
1456                                 return
1457                         except ValueError: # parsing prefix as integer fails
1458                                 self._download_n_results(query, 1)
1459                                 return
1460
1461         def _download_n_results(self, query, n):
1462                 """Downloads a specified number of results for a query"""
1463
1464                 video_ids = []
1465                 already_seen = set()
1466                 pagenum = 1
1467
1468                 while True:
1469                         self.report_download_page(query, pagenum)
1470                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum)
1471                         request = urllib2.Request(result_url, None, std_headers)
1472                         try:
1473                                 page = urllib2.urlopen(request).read()
1474                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1475                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1476                                 return
1477
1478                         # Extract video identifiers
1479                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1480                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1]
1481                                 if video_id not in already_seen:
1482                                         video_ids.append(video_id)
1483                                         already_seen.add(video_id)
1484                                         if len(video_ids) == n:
1485                                                 # Specified n videos reached
1486                                                 for id in video_ids:
1487                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1488                                                 return
1489
1490                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1491                                 for id in video_ids:
1492                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1493                                 return
1494
1495                         pagenum = pagenum + 1
1496
1497 class YoutubePlaylistIE(InfoExtractor):
1498         """Information Extractor for YouTube playlists."""
1499
1500         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*'
1501         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
1502         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
1503         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
1504         _youtube_ie = None
1505
1506         def __init__(self, youtube_ie, downloader=None):
1507                 InfoExtractor.__init__(self, downloader)
1508                 self._youtube_ie = youtube_ie
1509
1510         @staticmethod
1511         def suitable(url):
1512                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None)
1513
1514         def report_download_page(self, playlist_id, pagenum):
1515                 """Report attempt to download playlist page with given number."""
1516                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum))
1517
1518         def _real_initialize(self):
1519                 self._youtube_ie.initialize()
1520
1521         def _real_extract(self, url):
1522                 # Extract playlist id
1523                 mobj = re.match(self._VALID_URL, url)
1524                 if mobj is None:
1525                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1526                         return
1527
1528                 # Download playlist pages
1529                 playlist_id = mobj.group(1)
1530                 video_ids = []
1531                 pagenum = 1
1532
1533                 while True:
1534                         self.report_download_page(playlist_id, pagenum)
1535                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers)
1536                         try:
1537                                 page = urllib2.urlopen(request).read()
1538                         except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1539                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1540                                 return
1541
1542                         # Extract video identifiers
1543                         ids_in_page = []
1544                         for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1545                                 if mobj.group(1) not in ids_in_page:
1546                                         ids_in_page.append(mobj.group(1))
1547                         video_ids.extend(ids_in_page)
1548
1549                         if re.search(self._MORE_PAGES_INDICATOR, page) is None:
1550                                 break
1551                         pagenum = pagenum + 1
1552
1553                 for id in video_ids:
1554                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1555                 return
1556
1557 class YoutubeUserIE(InfoExtractor):
1558         """Information Extractor for YouTube users."""
1559
1560         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)'
1561         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s'
1562         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this.
1563         _youtube_ie = None
1564
1565         def __init__(self, youtube_ie, downloader=None):
1566                 InfoExtractor.__init__(self, downloader)
1567                 self._youtube_ie = youtube_ie
1568
1569         @staticmethod
1570         def suitable(url):
1571                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None)
1572
1573         def report_download_page(self, username):
1574                 """Report attempt to download user page."""
1575                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username))
1576
1577         def _real_initialize(self):
1578                 self._youtube_ie.initialize()
1579
1580         def _real_extract(self, url):
1581                 # Extract username
1582                 mobj = re.match(self._VALID_URL, url)
1583                 if mobj is None:
1584                         self._downloader.trouble(u'ERROR: invalid url: %s' % url)
1585                         return
1586
1587                 # Download user page
1588                 username = mobj.group(1)
1589                 video_ids = []
1590                 pagenum = 1
1591
1592                 self.report_download_page(username)
1593                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers)
1594                 try:
1595                         page = urllib2.urlopen(request).read()
1596                 except (urllib2.URLError, httplib.HTTPException, socket.error), err:
1597                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err))
1598                         return
1599
1600                 # Extract video identifiers
1601                 ids_in_page = []
1602
1603                 for mobj in re.finditer(self._VIDEO_INDICATOR, page):
1604                         if mobj.group(1) not in ids_in_page:
1605                                 ids_in_page.append(mobj.group(1))
1606                 video_ids.extend(ids_in_page)
1607
1608                 for id in video_ids:
1609                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id)
1610                 return
1611
1612 class PostProcessor(object):
1613         """Post Processor class.
1614
1615         PostProcessor objects can be added to downloaders with their
1616         add_post_processor() method. When the downloader has finished a
1617         successful download, it will take its internal chain of PostProcessors
1618         and start calling the run() method on each one of them, first with
1619         an initial argument and then with the returned value of the previous
1620         PostProcessor.
1621
1622         The chain will be stopped if one of them ever returns None or the end
1623         of the chain is reached.
1624
1625         PostProcessor objects follow a "mutual registration" process similar
1626         to InfoExtractor objects.
1627         """
1628
1629         _downloader = None
1630
1631         def __init__(self, downloader=None):
1632                 self._downloader = downloader
1633
1634         def set_downloader(self, downloader):
1635                 """Sets the downloader for this PP."""
1636                 self._downloader = downloader
1637
1638         def run(self, information):
1639                 """Run the PostProcessor.
1640
1641                 The "information" argument is a dictionary like the ones
1642                 composed by InfoExtractors. The only difference is that this
1643                 one has an extra field called "filepath" that points to the
1644                 downloaded file.
1645
1646                 When this method returns None, the postprocessing chain is
1647                 stopped. However, this method may return an information
1648                 dictionary that will be passed to the next postprocessing
1649                 object in the chain. It can be the one it received after
1650                 changing some fields.
1651
1652                 In addition, this method may raise a PostProcessingError
1653                 exception that will be taken into account by the downloader
1654                 it was called from.
1655                 """
1656                 return information # by default, do nothing
1657
1658 ### MAIN PROGRAM ###
1659 if __name__ == '__main__':
1660         try:
1661                 # Modules needed only when running the main program
1662                 import getpass
1663                 import optparse
1664
1665                 # Function to update the program file with the latest version from bitbucket.org
1666                 def update_self(downloader, filename):
1667                         # Note: downloader only used for options
1668                         if not os.access (filename, os.W_OK):
1669                                 sys.exit('ERROR: no write permissions on %s' % filename)
1670
1671                         downloader.to_stdout('Updating to latest stable version...')
1672                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION'
1673                         latest_version = urllib.urlopen(latest_url).read().strip()
1674                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version
1675                         newcontent = urllib.urlopen(prog_url).read()
1676                         stream = open(filename, 'w')
1677                         stream.write(newcontent)
1678                         stream.close()
1679                         downloader.to_stdout('Updated to version %s' % latest_version)
1680
1681                 # General configuration
1682                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
1683                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
1684                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words)
1685
1686                 # Parse command line
1687                 parser = optparse.OptionParser(
1688                         usage='Usage: %prog [options] url...',
1689                         version='2010.04.04',
1690                         conflict_handler='resolve',
1691                 )
1692
1693                 parser.add_option('-h', '--help',
1694                                 action='help', help='print this help text and exit')
1695                 parser.add_option('-v', '--version',
1696                                 action='version', help='print program version and exit')
1697                 parser.add_option('-U', '--update',
1698                                 action='store_true', dest='update_self', help='update this program to latest stable version')
1699                 parser.add_option('-i', '--ignore-errors',
1700                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False)
1701                 parser.add_option('-r', '--rate-limit',
1702                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)')
1703
1704                 authentication = optparse.OptionGroup(parser, 'Authentication Options')
1705                 authentication.add_option('-u', '--username',
1706                                 dest='username', metavar='UN', help='account username')
1707                 authentication.add_option('-p', '--password',
1708                                 dest='password', metavar='PW', help='account password')
1709                 authentication.add_option('-n', '--netrc',
1710                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False)
1711                 parser.add_option_group(authentication)
1712
1713                 video_format = optparse.OptionGroup(parser, 'Video Format Options')
1714                 video_format.add_option('-f', '--format',
1715                                 action='store', dest='format', metavar='FMT', help='video format code')
1716                 video_format.add_option('-b', '--best-quality',
1717                                 action='store_const', dest='format', help='download the best quality video possible', const='0')
1718                 video_format.add_option('-m', '--mobile-version',
1719                                 action='store_const', dest='format', help='alias for -f 17', const='17')
1720                 video_format.add_option('-d', '--high-def',
1721                                 action='store_const', dest='format', help='alias for -f 22', const='22')
1722                 video_format.add_option('--all-formats',
1723                                 action='store_const', dest='format', help='download all available video formats', const='-1')
1724                 parser.add_option_group(video_format)
1725
1726                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options')
1727                 verbosity.add_option('-q', '--quiet',
1728                                 action='store_true', dest='quiet', help='activates quiet mode', default=False)
1729                 verbosity.add_option('-s', '--simulate',
1730                                 action='store_true', dest='simulate', help='do not download video', default=False)
1731                 verbosity.add_option('-g', '--get-url',
1732                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False)
1733                 verbosity.add_option('-e', '--get-title',
1734                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False)
1735                 verbosity.add_option('--no-progress',
1736                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False)
1737                 parser.add_option_group(verbosity)
1738
1739                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options')
1740                 filesystem.add_option('-t', '--title',
1741                                 action='store_true', dest='usetitle', help='use title in file name', default=False)
1742                 filesystem.add_option('-l', '--literal',
1743                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False)
1744                 filesystem.add_option('-o', '--output',
1745                                 dest='outtmpl', metavar='TPL', help='output filename template')
1746                 filesystem.add_option('-a', '--batch-file',
1747                                 dest='batchfile', metavar='F', help='file containing URLs to download')
1748                 filesystem.add_option('-w', '--no-overwrites',
1749                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False)
1750                 filesystem.add_option('-c', '--continue',
1751                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False)
1752                 parser.add_option_group(filesystem)
1753
1754                 (opts, args) = parser.parse_args()
1755
1756                 # Batch file verification
1757                 batchurls = []
1758                 if opts.batchfile is not None:
1759                         try:
1760                                 batchurls = open(opts.batchfile, 'r').readlines()
1761                                 batchurls = [x.strip() for x in batchurls]
1762                                 batchurls = [x for x in batchurls if len(x) > 0]
1763                         except IOError:
1764                                 sys.exit(u'ERROR: batch file could not be read')
1765                 all_urls = batchurls + args
1766
1767                 # Conflicting, missing and erroneous options
1768                 if opts.usenetrc and (opts.username is not None or opts.password is not None):
1769                         parser.error(u'using .netrc conflicts with giving username/password')
1770                 if opts.password is not None and opts.username is None:
1771                         parser.error(u'account username missing')
1772                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle):
1773                         parser.error(u'using output template conflicts with using title or literal title')
1774                 if opts.usetitle and opts.useliteral:
1775                         parser.error(u'using title conflicts with using literal title')
1776                 if opts.username is not None and opts.password is None:
1777                         opts.password = getpass.getpass(u'Type account password and press return:')
1778                 if opts.ratelimit is not None:
1779                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit)
1780                         if numeric_limit is None:
1781                                 parser.error(u'invalid rate limit specified')
1782                         opts.ratelimit = numeric_limit
1783
1784                 # Information extractors
1785                 youtube_ie = YoutubeIE()
1786                 metacafe_ie = MetacafeIE(youtube_ie)
1787                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie)
1788                 youtube_user_ie = YoutubeUserIE(youtube_ie)
1789                 youtube_search_ie = YoutubeSearchIE(youtube_ie)
1790                 google_ie = GoogleIE()
1791                 photobucket_ie = PhotobucketIE()
1792                 yahoo_ie = YahooIE()
1793                 generic_ie = GenericIE()
1794
1795                 # File downloader
1796                 fd = FileDownloader({
1797                         'usenetrc': opts.usenetrc,
1798                         'username': opts.username,
1799                         'password': opts.password,
1800                         'quiet': (opts.quiet or opts.geturl or opts.gettitle),
1801                         'forceurl': opts.geturl,
1802                         'forcetitle': opts.gettitle,
1803                         'simulate': (opts.simulate or opts.geturl or opts.gettitle),
1804                         'format': opts.format,
1805                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding()))
1806                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s')
1807                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s')
1808                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s')
1809                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s')
1810                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s')
1811                                 or u'%(id)s.%(ext)s'),
1812                         'ignoreerrors': opts.ignoreerrors,
1813                         'ratelimit': opts.ratelimit,
1814                         'nooverwrites': opts.nooverwrites,
1815                         'continuedl': opts.continue_dl,
1816                         'noprogress': opts.noprogress,
1817                         })
1818                 fd.add_info_extractor(youtube_search_ie)
1819                 fd.add_info_extractor(youtube_pl_ie)
1820                 fd.add_info_extractor(youtube_user_ie)
1821                 fd.add_info_extractor(metacafe_ie)
1822                 fd.add_info_extractor(youtube_ie)
1823                 fd.add_info_extractor(google_ie)
1824                 fd.add_info_extractor(photobucket_ie)
1825                 fd.add_info_extractor(yahoo_ie)
1826
1827                 # This must come last since it's the
1828                 # fallback if none of the others work
1829                 fd.add_info_extractor(generic_ie)
1830
1831                 # Update version
1832                 if opts.update_self:
1833                         update_self(fd, sys.argv[0])
1834
1835                 # Maybe do nothing
1836                 if len(all_urls) < 1:
1837                         if not opts.update_self:
1838                                 parser.error(u'you must provide at least one URL')
1839                         else:
1840                                 sys.exit()
1841                 retcode = fd.download(all_urls)
1842                 sys.exit(retcode)
1843
1844         except DownloadError:
1845                 sys.exit(1)
1846         except SameFileError:
1847                 sys.exit(u'ERROR: fixed output name but more than one file to download')
1848         except KeyboardInterrupt:
1849                 sys.exit(u'\nERROR: Interrupted by user')