]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
01a61ba12015d03e90659aa107430eab2ecf9acd
   2 # -*- coding: utf-8 -*- 
   3 # Author: Ricardo Garcia Gonzalez 
   4 # Author: Danny Colligan 
   5 # Author: Benjamin Johnson 
   6 # License: Public domain code 
  23 # parse_qs was moved from the cgi module to the urlparse module recently. 
  25         from urlparse 
import parse_qs
 
  27         from cgi 
import parse_qs
 
  30         'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6', 
  31         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  32         'Accept': 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 
  33         'Accept-Language': 'en-us,en;q=0.5', 
  36 simple_title_chars 
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii') 
  38 def preferredencoding(): 
  39         """Get preferred encoding. 
  41         Returns the best encoding scheme for the system, based on 
  42         locale.getpreferredencoding() and some further tweaks. 
  44         def yield_preferredencoding(): 
  46                         pref 
= locale
.getpreferredencoding() 
  52         return yield_preferredencoding().next() 
  54 def htmlentity_transform(matchobj
): 
  55         """Transforms an HTML entity to a Unicode character. 
  57         This function receives a match object and is intended to be used with 
  58         the re.sub() function. 
  60         entity 
= matchobj
.group(1) 
  62         # Known non-numeric HTML entity 
  63         if entity 
in htmlentitydefs
.name2codepoint
: 
  64                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
  67         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
  69                 numstr 
= mobj
.group(1) 
  70                 if numstr
.startswith(u
'x'): 
  72                         numstr 
= u
'0%s' % numstr
 
  75                 return unichr(long(numstr
, base
)) 
  77         # Unknown entity in name, return its literal representation 
  78         return (u
'&%s;' % entity
) 
  80 def sanitize_title(utitle
): 
  81         """Sanitizes a video title so it could be used as part of a filename.""" 
  82         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
  83         return utitle
.replace(unicode(os
.sep
), u
'%') 
  85 def sanitize_open(filename
, open_mode
): 
  86         """Try to open the given filename, and slightly tweak it if this fails. 
  88         Attempts to open the given filename. If this fails, it tries to change 
  89         the filename slightly, step by step, until it's either able to open it 
  90         or it fails and raises a final exception, like the standard open() 
  93         It returns the tuple (stream, definitive_file_name). 
  97                         return (sys
.stdout
, filename
) 
  98                 stream 
= open(filename
, open_mode
) 
  99                 return (stream
, filename
) 
 100         except (IOError, OSError), err
: 
 101                 # In case of error, try to remove win32 forbidden chars 
 102                 filename 
= re
.sub(ur
'[<>:"\|\?\*]', u
'#', filename
) 
 104                 # An exception here should be caught in the caller 
 105                 stream 
= open(filename
, open_mode
) 
 106                 return (stream
, filename
) 
 109 class DownloadError(Exception): 
 110         """Download Error exception. 
 112         This exception may be thrown by FileDownloader objects if they are not 
 113         configured to continue on errors. They will contain the appropriate 
 118 class SameFileError(Exception): 
 119         """Same File exception. 
 121         This exception will be thrown by FileDownloader objects if they detect 
 122         multiple files would have to be downloaded to the same file on disk. 
 126 class PostProcessingError(Exception): 
 127         """Post Processing exception. 
 129         This exception may be raised by PostProcessor's .run() method to 
 130         indicate an error in the postprocessing task. 
 134 class UnavailableFormatError(Exception): 
 135         """Unavailable Format exception. 
 137         This exception will be thrown when a video is requested 
 138         in a format that is not available for that video. 
 142 class ContentTooShortError(Exception): 
 143         """Content Too Short exception. 
 145         This exception may be raised by FileDownloader objects when a file they 
 146         download is too small for what the server announced first, indicating 
 147         the connection was probably interrupted. 
 153         def __init__(self
, downloaded
, expected
): 
 154                 self
.downloaded 
= downloaded
 
 155                 self
.expected 
= expected
 
 157 class FileDownloader(object): 
 158         """File Downloader class. 
 160         File downloader objects are the ones responsible of downloading the 
 161         actual video file and writing it to disk if the user has requested 
 162         it, among some other tasks. In most cases there should be one per 
 163         program. As, given a video URL, the downloader doesn't know how to 
 164         extract all the needed information, task that InfoExtractors do, it 
 165         has to pass the URL to one of them. 
 167         For this, file downloader objects have a method that allows 
 168         InfoExtractors to be registered in a given order. When it is passed 
 169         a URL, the file downloader handles it to the first InfoExtractor it 
 170         finds that reports being able to handle it. The InfoExtractor extracts 
 171         all the information about the video or videos the URL refers to, and 
 172         asks the FileDownloader to process the video information, possibly 
 173         downloading the video. 
 175         File downloaders accept a lot of parameters. In order not to saturate 
 176         the object constructor with arguments, it receives a dictionary of 
 177         options instead. These options are available through the params 
 178         attribute for the InfoExtractors to use. The FileDownloader also 
 179         registers itself as the downloader in charge for the InfoExtractors 
 180         that are added to it, so this is a "mutual registration". 
 184         username:       Username for authentication purposes. 
 185         password:       Password for authentication purposes. 
 186         usenetrc:       Use netrc for authentication instead. 
 187         quiet:          Do not print messages to stdout. 
 188         forceurl:       Force printing final URL. 
 189         forcetitle:     Force printing title. 
 190         simulate:       Do not download the video files. 
 191         format:         Video format code. 
 192         outtmpl:        Template for output names. 
 193         ignoreerrors:   Do not stop on download errors. 
 194         ratelimit:      Download speed limit, in bytes/sec. 
 195         nooverwrites:   Prevent overwriting files. 
 196         retries:        Number of times to retry for HTTP error 503 
 197         continuedl:     Try to continue downloads if possible. 
 198         noprogress:     Do not print the progress bar. 
 204         _download_retcode 
= None 
 205         _num_downloads 
= None 
 207         def __init__(self
, params
): 
 208                 """Create a FileDownloader object with the given options.""" 
 211                 self
._download
_retcode 
= 0 
 212                 self
._num
_downloads 
= 0 
 216         def pmkdir(filename
): 
 217                 """Create directory components in filename. Similar to Unix "mkdir -p".""" 
 218                 components 
= filename
.split(os
.sep
) 
 219                 aggregate 
= [os
.sep
.join(components
[0:x
]) for x 
in xrange(1, len(components
))] 
 220                 aggregate 
= ['%s%s' % (x
, os
.sep
) for x 
in aggregate
] # Finish names with separator 
 221                 for dir in aggregate
: 
 222                         if not os
.path
.exists(dir): 
 226         def format_bytes(bytes): 
 229                 if type(bytes) is str: 
 234                         exponent 
= long(math
.log(bytes, 1024.0)) 
 235                 suffix 
= 'bkMGTPEZY'[exponent
] 
 236                 converted 
= float(bytes) / float(1024**exponent
) 
 237                 return '%.2f%s' % (converted
, suffix
) 
 240         def calc_percent(byte_counter
, data_len
): 
 243                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 246         def calc_eta(start
, now
, total
, current
): 
 250                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 252                 rate 
= float(current
) / dif
 
 253                 eta 
= long((float(total
) - float(current
)) / rate
) 
 254                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 257                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 260         def calc_speed(start
, now
, bytes): 
 262                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 263                         return '%10s' % '---b/s' 
 264                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 267         def best_block_size(elapsed_time
, bytes): 
 268                 new_min 
= max(bytes / 2.0, 1.0) 
 269                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 270                 if elapsed_time 
< 0.001: 
 272                 rate 
= bytes / elapsed_time
 
 280         def parse_bytes(bytestr
): 
 281                 """Parse a string indicating a byte quantity into a long integer.""" 
 282                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 285                 number 
= float(matchobj
.group(1)) 
 286                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 287                 return long(round(number 
* multiplier
)) 
 291                 """Verify a URL is valid and data could be downloaded. Return real data URL.""" 
 292                 request 
= urllib2
.Request(url
, None, std_headers
) 
 293                 data 
= urllib2
.urlopen(request
) 
 299         def add_info_extractor(self
, ie
): 
 300                 """Add an InfoExtractor object to the end of the list.""" 
 302                 ie
.set_downloader(self
) 
 304         def add_post_processor(self
, pp
): 
 305                 """Add a PostProcessor object to the end of the chain.""" 
 307                 pp
.set_downloader(self
) 
 309         def to_stdout(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 310                 """Print message to stdout if not in quiet mode.""" 
 312                         if not self
.params
.get('quiet', False): 
 313                                 print (u
'%s%s' % (message
, [u
'\n', u
''][skip_eol
])).encode(preferredencoding()), 
 315                 except (UnicodeEncodeError), err
: 
 316                         if not ignore_encoding_errors
: 
 319         def to_stderr(self
, message
): 
 320                 """Print message to stderr.""" 
 321                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 323         def fixed_template(self
): 
 324                 """Checks if the output template is fixed.""" 
 325                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 327         def trouble(self
, message
=None): 
 328                 """Determine action to take when a download problem appears. 
 330                 Depending on if the downloader has been configured to ignore 
 331                 download errors or not, this method may throw an exception or 
 332                 not when errors are found, after printing the message. 
 334                 if message 
is not None: 
 335                         self
.to_stderr(message
) 
 336                 if not self
.params
.get('ignoreerrors', False): 
 337                         raise DownloadError(message
) 
 338                 self
._download
_retcode 
= 1 
 340         def slow_down(self
, start_time
, byte_counter
): 
 341                 """Sleep if the download speed is over the rate limit.""" 
 342                 rate_limit 
= self
.params
.get('ratelimit', None) 
 343                 if rate_limit 
is None or byte_counter 
== 0: 
 346                 elapsed 
= now 
- start_time
 
 349                 speed 
= float(byte_counter
) / elapsed
 
 350                 if speed 
> rate_limit
: 
 351                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 353         def report_destination(self
, filename
): 
 354                 """Report destination filename.""" 
 355                 self
.to_stdout(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 357         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 358                 """Report download progress.""" 
 359                 if self
.params
.get('noprogress', False): 
 361                 self
.to_stdout(u
'\r[download] %s of %s at %s ETA %s' % 
 362                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 364         def report_resuming_byte(self
, resume_len
): 
 365                 """Report attemtp to resume at given byte.""" 
 366                 self
.to_stdout(u
'[download] Resuming download at byte %s' % resume_len
) 
 368         def report_retry(self
, count
, retries
): 
 369                 """Report retry in case of HTTP error 503""" 
 370                 self
.to_stdout(u
'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 372         def report_file_already_downloaded(self
, file_name
): 
 373                 """Report file has already been fully downloaded.""" 
 375                         self
.to_stdout(u
'[download] %s has already been downloaded' % file_name
) 
 376                 except (UnicodeEncodeError), err
: 
 377                         self
.to_stdout(u
'[download] The file has already been downloaded') 
 379         def report_unable_to_resume(self
): 
 380                 """Report it was impossible to resume download.""" 
 381                 self
.to_stdout(u
'[download] Unable to resume') 
 383         def report_finish(self
): 
 384                 """Report download finished.""" 
 385                 if self
.params
.get('noprogress', False): 
 386                         self
.to_stdout(u
'[download] Download completed') 
 390         def process_info(self
, info_dict
): 
 391                 """Process a single dictionary returned by an InfoExtractor.""" 
 392                 # Do nothing else if in simulate mode 
 393                 if self
.params
.get('simulate', False): 
 394                         # Verify URL if it's an HTTP one 
 395                         if info_dict
['url'].startswith('http'): 
 397                                         self
.verify_url(info_dict
['url'].encode('utf-8')).decode('utf-8') 
 398                                 except (OSError, IOError, urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 399                                         raise UnavailableFormatError
 
 402                         if self
.params
.get('forcetitle', False): 
 403                                 print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 404                         if self
.params
.get('forceurl', False): 
 405                                 print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 406                         if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 407                                 print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 408                         if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 409                                 print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 414                         template_dict 
= dict(info_dict
) 
 415                         template_dict
['epoch'] = unicode(long(time
.time())) 
 416                         template_dict
['ord'] = unicode('%05d' % self
._num
_downloads
) 
 417                         filename 
= self
.params
['outtmpl'] % template_dict
 
 418                 except (ValueError, KeyError), err
: 
 419                         self
.trouble('ERROR: invalid output template or system charset: %s' % str(err
)) 
 420                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 421                         self
.to_stderr(u
'WARNING: file exists: %s; skipping' % filename
) 
 425                         self
.pmkdir(filename
) 
 426                 except (OSError, IOError), err
: 
 427                         self
.trouble('ERROR: unable to create directories: %s' % str(err
)) 
 431                         success 
= self
._do
_download
(filename
, info_dict
['url'].encode('utf-8'), info_dict
.get('player_url', None)) 
 432                 except (OSError, IOError), err
: 
 433                         raise UnavailableFormatError
 
 434                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 435                         self
.trouble('ERROR: unable to download video data: %s' % str(err
)) 
 437                 except (ContentTooShortError
, ), err
: 
 438                         self
.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 443                                 self
.post_process(filename
, info_dict
) 
 444                         except (PostProcessingError
), err
: 
 445                                 self
.trouble('ERROR: postprocessing: %s' % str(err
)) 
 448         def download(self
, url_list
): 
 449                 """Download a given list of URLs.""" 
 450                 if len(url_list
) > 1 and self
.fixed_template(): 
 451                         raise SameFileError(self
.params
['outtmpl']) 
 454                         suitable_found 
= False 
 456                                 # Go to next InfoExtractor if not suitable 
 457                                 if not ie
.suitable(url
): 
 460                                 # Suitable InfoExtractor found 
 461                                 suitable_found 
= True 
 463                                 # Extract information from URL and process it 
 466                                 # Suitable InfoExtractor had been found; go to next URL 
 469                         if not suitable_found
: 
 470                                 self
.trouble('ERROR: no suitable InfoExtractor: %s' % url
) 
 472                 return self
._download
_retcode
 
 474         def post_process(self
, filename
, ie_info
): 
 475                 """Run the postprocessing chain on the given file.""" 
 477                 info
['filepath'] = filename
 
 483         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 484                 self
.report_destination(filename
) 
 486                 # Check for rtmpdump first 
 488                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 489                 except (OSError, IOError): 
 490                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 493                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 494                 # the connection was interrumpted and resuming appears to be 
 495                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 496                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', filename
] 
 497                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 498                 while retval 
== 2 or retval 
== 1: 
 499                         prevsize 
= os
.path
.getsize(filename
) 
 500                         self
.to_stdout(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 501                         time
.sleep(5.0) # This seems to be needed 
 502                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 503                         cursize 
= os
.path
.getsize(filename
) 
 504                         if prevsize 
== cursize 
and retval 
== 1: 
 507                         self
.to_stdout(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(filename
)) 
 510                         self
.trouble('\nERROR: rtmpdump exited with code %d' % retval
) 
 513         def _do_download(self
, filename
, url
, player_url
): 
 514                 # Attempt to download using rtmpdump 
 515                 if url
.startswith('rtmp'): 
 516                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 520                 basic_request 
= urllib2
.Request(url
, None, std_headers
) 
 521                 request 
= urllib2
.Request(url
, None, std_headers
) 
 523                 # Establish possible resume length 
 524                 if os
.path
.isfile(filename
): 
 525                         resume_len 
= os
.path
.getsize(filename
) 
 529                 # Request parameters in case of being able to resume 
 530                 if self
.params
.get('continuedl', False) and resume_len 
!= 0: 
 531                         self
.report_resuming_byte(resume_len
) 
 532                         request
.add_header('Range','bytes=%d-' % resume_len
) 
 536                 retries 
= self
.params
.get('retries', 0) 
 538                         # Establish connection 
 540                                 data 
= urllib2
.urlopen(request
) 
 542                         except (urllib2
.HTTPError
, ), err
: 
 544                                         # Retry in case of HTTP error 503 
 547                                                 self
.report_retry(count
, retries
) 
 549                                 if err
.code 
!= 416: #  416 is 'Requested range not satisfiable' 
 552                                 data 
= urllib2
.urlopen(basic_request
) 
 553                                 content_length 
= data
.info()['Content-Length'] 
 555                                 if content_length 
is not None and long(content_length
) == resume_len
: 
 556                                         # Because the file had already been fully downloaded 
 557                                         self
.report_file_already_downloaded(filename
) 
 558                                         self
._num
_downloads 
+= 1 
 561                                         # Because the server didn't let us 
 562                                         self
.report_unable_to_resume() 
 565                 data_len 
= data
.info().get('Content-length', None) 
 566                 data_len_str 
= self
.format_bytes(data_len
) 
 573                         data_block 
= data
.read(block_size
) 
 575                         data_block_len 
= len(data_block
) 
 576                         if data_block_len 
== 0: 
 578                         byte_counter 
+= data_block_len
 
 580                         # Open file just in time 
 583                                         (stream
, filename
) = sanitize_open(filename
, open_mode
) 
 584                                         self
.report_destination(filename
) 
 585                                         self
._num
_downloads 
+= 1 
 586                                 except (OSError, IOError), err
: 
 587                                         self
.trouble('ERROR: unable to open for writing: %s' % str(err
)) 
 590                                 stream
.write(data_block
) 
 591                         except (IOError, OSError), err
: 
 592                                 self
.trouble('\nERROR: unable to write data: %s' % str(err
)) 
 593                         block_size 
= self
.best_block_size(after 
- before
, data_block_len
) 
 596                         percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 597                         eta_str 
= self
.calc_eta(start
, time
.time(), data_len
, byte_counter
) 
 598                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter
) 
 599                         self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 602                         self
.slow_down(start
, byte_counter
) 
 605                 if data_len 
is not None and str(byte_counter
) != data_len
: 
 606                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
 609 class InfoExtractor(object): 
 610         """Information Extractor class. 
 612         Information extractors are the classes that, given a URL, extract 
 613         information from the video (or videos) the URL refers to. This 
 614         information includes the real video URL, the video title and simplified 
 615         title, author and others. The information is stored in a dictionary 
 616         which is then passed to the FileDownloader. The FileDownloader 
 617         processes this information possibly downloading the video to the file 
 618         system, among other possible outcomes. The dictionaries must include 
 619         the following fields: 
 621         id:             Video identifier. 
 622         url:            Final video URL. 
 623         uploader:       Nickname of the video uploader. 
 624         title:          Literal title. 
 625         stitle:         Simplified title. 
 626         ext:            Video filename extension. 
 627         format:         Video format. 
 628         player_url:     SWF Player URL (may be None). 
 630         The following fields are optional. Their primary purpose is to allow 
 631         youtube-dl to serve as the backend for a video search function, such 
 632         as the one in youtube2mp3.  They are only used when their respective 
 633         forced printing functions are called: 
 635         thumbnail:      Full URL to a video thumbnail image. 
 636         description:    One-line video description. 
 638         Subclasses of this one should re-define the _real_initialize() and 
 639         _real_extract() methods, as well as the suitable() static method. 
 640         Probably, they should also be instantiated and added to the main 
 647         def __init__(self
, downloader
=None): 
 648                 """Constructor. Receives an optional downloader.""" 
 650                 self
.set_downloader(downloader
) 
 654                 """Receives a URL and returns True if suitable for this IE.""" 
 657         def initialize(self
): 
 658                 """Initializes an instance (authentication, etc).""" 
 660                         self
._real
_initialize
() 
 663         def extract(self
, url
): 
 664                 """Extracts URL information and returns it in list of dicts.""" 
 666                 return self
._real
_extract
(url
) 
 668         def set_downloader(self
, downloader
): 
 669                 """Sets the downloader for this IE.""" 
 670                 self
._downloader 
= downloader
 
 672         def _real_initialize(self
): 
 673                 """Real initialization process. Redefine in subclasses.""" 
 676         def _real_extract(self
, url
): 
 677                 """Real extraction process. Redefine in subclasses.""" 
 680 class YoutubeIE(InfoExtractor
): 
 681         """Information extractor for youtube.com.""" 
 683         _VALID_URL 
= r
'^((?:http://)?(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:\.php)?)?[\?#](?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' 
 684         _LANG_URL 
= r
'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
 685         _LOGIN_URL 
= 'http://www.youtube.com/signup?next=/&gl=US&hl=en' 
 686         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
 687         _NETRC_MACHINE 
= 'youtube' 
 688         _available_formats 
= ['37', '22', '35', '18', '34', '5', '17', '13', None] # listed in order of priority for -b flag 
 689         _video_extensions 
= { 
 701                 return (re
.match(YoutubeIE
._VALID
_URL
, url
) is not None) 
 703         def report_lang(self
): 
 704                 """Report attempt to set language.""" 
 705                 self
._downloader
.to_stdout(u
'[youtube] Setting language') 
 707         def report_login(self
): 
 708                 """Report attempt to log in.""" 
 709                 self
._downloader
.to_stdout(u
'[youtube] Logging in') 
 711         def report_age_confirmation(self
): 
 712                 """Report attempt to confirm age.""" 
 713                 self
._downloader
.to_stdout(u
'[youtube] Confirming age') 
 715         def report_video_webpage_download(self
, video_id
): 
 716                 """Report attempt to download video webpage.""" 
 717                 self
._downloader
.to_stdout(u
'[youtube] %s: Downloading video webpage' % video_id
) 
 719         def report_video_info_webpage_download(self
, video_id
): 
 720                 """Report attempt to download video info webpage.""" 
 721                 self
._downloader
.to_stdout(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
 723         def report_information_extraction(self
, video_id
): 
 724                 """Report attempt to extract video information.""" 
 725                 self
._downloader
.to_stdout(u
'[youtube] %s: Extracting video information' % video_id
) 
 727         def report_unavailable_format(self
, video_id
, format
): 
 728                 """Report extracted video URL.""" 
 729                 self
._downloader
.to_stdout(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
 731         def report_rtmp_download(self
): 
 732                 """Indicate the download will use the RTMP protocol.""" 
 733                 self
._downloader
.to_stdout(u
'[youtube] RTMP download detected') 
 735         def _real_initialize(self
): 
 736                 if self
._downloader 
is None: 
 741                 downloader_params 
= self
._downloader
.params
 
 743                 # Attempt to use provided username and password or .netrc data 
 744                 if downloader_params
.get('username', None) is not None: 
 745                         username 
= downloader_params
['username'] 
 746                         password 
= downloader_params
['password'] 
 747                 elif downloader_params
.get('usenetrc', False): 
 749                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
 754                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
 755                         except (IOError, netrc
.NetrcParseError
), err
: 
 756                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
 760                 request 
= urllib2
.Request(self
._LANG
_URL
, None, std_headers
) 
 763                         urllib2
.urlopen(request
).read() 
 764                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 765                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
 768                 # No authentication to be performed 
 774                                 'current_form': 'loginForm', 
 776                                 'action_login': 'Log In', 
 777                                 'username':     username
, 
 778                                 'password':     password
, 
 780                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
), std_headers
) 
 783                         login_results 
= urllib2
.urlopen(request
).read() 
 784                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
 785                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
 787                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 788                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
 794                                 'action_confirm':       'Confirm', 
 796                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
), std_headers
) 
 798                         self
.report_age_confirmation() 
 799                         age_results 
= urllib2
.urlopen(request
).read() 
 800                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 801                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
 804         def _real_extract(self
, url
): 
 805                 # Extract video id from URL 
 806                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
 808                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
 810                 video_id 
= mobj
.group(2) 
 812                 # Downloader parameters 
 817                 if self
._downloader 
is not None: 
 818                         params 
= self
._downloader
.params
 
 819                         format_param 
= params
.get('format', None) 
 820                         if format_param 
== '0': 
 821                                 format_param 
= self
._available
_formats
[quality_index
] 
 823                         elif format_param 
== '-1': 
 824                                 format_param 
= self
._available
_formats
[quality_index
] 
 829                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
 832                         self
.report_video_webpage_download(video_id
) 
 833                         request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
, None, std_headers
) 
 835                                 video_webpage 
= urllib2
.urlopen(request
).read() 
 836                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 837                                 self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
 840                         # Attempt to extract SWF player URL 
 841                         mobj 
= re
.search(r
'swfConfig.*"(http://.*?watch-.*?\.swf)"', video_webpage
) 
 843                                 player_url 
= mobj
.group(1) 
 848                         self
.report_video_info_webpage_download(video_id
) 
 849                         for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 850                                 video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 851                                                    % (video_id
, el_type
)) 
 852                                 request 
= urllib2
.Request(video_info_url
, None, std_headers
) 
 854                                         video_info_webpage 
= urllib2
.urlopen(request
).read() 
 855                                         video_info 
= parse_qs(video_info_webpage
) 
 856                                         if 'token' in video_info
: 
 858                                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 859                                         self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
 861                         self
.report_information_extraction(video_id
) 
 864                         if 'token' not in video_info
: 
 865                                 # Attempt to see if YouTube has issued an error message 
 866                                 if 'reason' not in video_info
: 
 867                                         self
._downloader
.trouble(u
'ERROR: unable to extract "t" parameter for unknown reason') 
 868                                         stream 
= open('reportme-ydl-%s.dat' % time
.time(), 'wb') 
 869                                         stream
.write(video_info_webpage
) 
 872                                         reason 
= urllib
.unquote_plus(video_info
['reason'][0]) 
 873                                         self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % reason
.decode('utf-8')) 
 875                         token 
= urllib
.unquote_plus(video_info
['token'][0]) 
 876                         video_real_url 
= 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id
, token
) 
 877                         if format_param 
is not None: 
 878                                 video_real_url 
= '%s&fmt=%s' % (video_real_url
, format_param
) 
 880                         # Check possible RTMP download 
 881                         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
 882                                 self
.report_rtmp_download() 
 883                                 video_real_url 
= video_info
['conn'][0] 
 886                         if 'author' not in video_info
: 
 887                                 self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
 889                         video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
 892                         if 'title' not in video_info
: 
 893                                 self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
 895                         video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
 896                         video_title 
= video_title
.decode('utf-8') 
 897                         video_title 
= sanitize_title(video_title
) 
 900                         simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
 901                         simple_title 
= simple_title
.strip(ur
'_') 
 904                         if 'thumbnail_url' not in video_info
: 
 905                                 self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
 907                         else:   # don't panic if we can't find it 
 908                                 video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
 911                         video_description 
= 'No description available.' 
 912                         if self
._downloader
.params
.get('forcedescription', False): 
 913                                 mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
 915                                         video_description 
= mobj
.group(1) 
 918                                 # Process video information 
 919                                 self
._downloader
.process_info({ 
 920                                         'id':           video_id
.decode('utf-8'), 
 921                                         'url':          video_real_url
.decode('utf-8'), 
 922                                         'uploader':     video_uploader
.decode('utf-8'), 
 923                                         'title':        video_title
, 
 924                                         'stitle':       simple_title
, 
 925                                         'ext':          video_extension
.decode('utf-8'), 
 926                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
 927                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
 928                                         'description':  video_description
.decode('utf-8'), 
 929                                         'player_url':   player_url
, 
 934                                         if quality_index 
== len(self
._available
_formats
): 
 938                                                 format_param 
= self
._available
_formats
[quality_index
] 
 942                         except UnavailableFormatError
, err
: 
 943                                 if best_quality 
or all_formats
: 
 945                                         if quality_index 
== len(self
._available
_formats
): 
 946                                                 # I don't ever expect this to happen 
 948                                                         self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
 951                                                 self
.report_unavailable_format(video_id
, format_param
) 
 952                                                 format_param 
= self
._available
_formats
[quality_index
] 
 955                                         self
._downloader
.trouble('ERROR: format not available for video') 
 959 class MetacafeIE(InfoExtractor
): 
 960         """Information Extractor for metacafe.com.""" 
 962         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
 963         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
 964         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
 967         def __init__(self
, youtube_ie
, downloader
=None): 
 968                 InfoExtractor
.__init
__(self
, downloader
) 
 969                 self
._youtube
_ie 
= youtube_ie
 
 973                 return (re
.match(MetacafeIE
._VALID
_URL
, url
) is not None) 
 975         def report_disclaimer(self
): 
 976                 """Report disclaimer retrieval.""" 
 977                 self
._downloader
.to_stdout(u
'[metacafe] Retrieving disclaimer') 
 979         def report_age_confirmation(self
): 
 980                 """Report attempt to confirm age.""" 
 981                 self
._downloader
.to_stdout(u
'[metacafe] Confirming age') 
 983         def report_download_webpage(self
, video_id
): 
 984                 """Report webpage download.""" 
 985                 self
._downloader
.to_stdout(u
'[metacafe] %s: Downloading webpage' % video_id
) 
 987         def report_extraction(self
, video_id
): 
 988                 """Report information extraction.""" 
 989                 self
._downloader
.to_stdout(u
'[metacafe] %s: Extracting information' % video_id
) 
 991         def _real_initialize(self
): 
 992                 # Retrieve disclaimer 
 993                 request 
= urllib2
.Request(self
._DISCLAIMER
, None, std_headers
) 
 995                         self
.report_disclaimer() 
 996                         disclaimer 
= urllib2
.urlopen(request
).read() 
 997                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 998                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1004                         'submit': "Continue - I'm over 18", 
1006                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
), std_headers
) 
1008                         self
.report_age_confirmation() 
1009                         disclaimer 
= urllib2
.urlopen(request
).read() 
1010                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1011                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1014         def _real_extract(self
, url
): 
1015                 # Extract id and simplified title from URL 
1016                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1018                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1021                 video_id 
= mobj
.group(1) 
1023                 # Check if video comes from YouTube 
1024                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1025                 if mobj2 
is not None: 
1026                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1029                 simple_title 
= mobj
.group(2).decode('utf-8') 
1030                 video_extension 
= 'flv' 
1032                 # Retrieve video webpage to extract further information 
1033                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1035                         self
.report_download_webpage(video_id
) 
1036                         webpage 
= urllib2
.urlopen(request
).read() 
1037                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1038                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1041                 # Extract URL, uploader and title from webpage 
1042                 self
.report_extraction(video_id
) 
1043                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1045                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1047                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1049                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) 
1051                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey') 
1053                 #gdaKey = mobj.group(1) 
1055                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) 
1057                 video_url 
= mediaURL
 
1059                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1061                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1063                 video_title 
= mobj
.group(1).decode('utf-8') 
1064                 video_title 
= sanitize_title(video_title
) 
1066                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1068                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1070                 video_uploader 
= mobj
.group(1) 
1073                         # Process video information 
1074                         self
._downloader
.process_info({ 
1075                                 'id':           video_id
.decode('utf-8'), 
1076                                 'url':          video_url
.decode('utf-8'), 
1077                                 'uploader':     video_uploader
.decode('utf-8'), 
1078                                 'title':        video_title
, 
1079                                 'stitle':       simple_title
, 
1080                                 'ext':          video_extension
.decode('utf-8'), 
1084                 except UnavailableFormatError
: 
1085                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1088 class GoogleIE(InfoExtractor
): 
1089         """Information extractor for video.google.com.""" 
1091         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1093         def __init__(self
, downloader
=None): 
1094                 InfoExtractor
.__init
__(self
, downloader
) 
1098                 return (re
.match(GoogleIE
._VALID
_URL
, url
) is not None) 
1100         def report_download_webpage(self
, video_id
): 
1101                 """Report webpage download.""" 
1102                 self
._downloader
.to_stdout(u
'[video.google] %s: Downloading webpage' % video_id
) 
1104         def report_extraction(self
, video_id
): 
1105                 """Report information extraction.""" 
1106                 self
._downloader
.to_stdout(u
'[video.google] %s: Extracting information' % video_id
) 
1108         def _real_initialize(self
): 
1111         def _real_extract(self
, url
): 
1112                 # Extract id from URL 
1113                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1115                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1118                 video_id 
= mobj
.group(1) 
1120                 video_extension 
= 'mp4' 
1122                 # Retrieve video webpage to extract further information 
1123                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1125                         self
.report_download_webpage(video_id
) 
1126                         webpage 
= urllib2
.urlopen(request
).read() 
1127                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1128                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1131                 # Extract URL, uploader, and title from webpage 
1132                 self
.report_extraction(video_id
) 
1133                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1135                         video_extension 
= 'flv' 
1136                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1138                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1140                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1141                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1142                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1144                 video_url 
= mediaURL
 
1146                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1148                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1150                 video_title 
= mobj
.group(1).decode('utf-8') 
1151                 video_title 
= sanitize_title(video_title
) 
1152                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1154                 # Extract video description 
1155                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1157                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1159                 video_description 
= mobj
.group(1).decode('utf-8') 
1160                 if not video_description
: 
1161                         video_description 
= 'No description available.' 
1163                 # Extract video thumbnail 
1164                 if self
._downloader
.params
.get('forcethumbnail', False): 
1165                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1167                                 webpage 
= urllib2
.urlopen(request
).read() 
1168                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1169                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1171                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1173                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1175                         video_thumbnail 
= mobj
.group(1) 
1176                 else:   # we need something to pass to process_info 
1177                         video_thumbnail 
= '' 
1181                         # Process video information 
1182                         self
._downloader
.process_info({ 
1183                                 'id':           video_id
.decode('utf-8'), 
1184                                 'url':          video_url
.decode('utf-8'), 
1186                                 'title':        video_title
, 
1187                                 'stitle':       simple_title
, 
1188                                 'ext':          video_extension
.decode('utf-8'), 
1192                 except UnavailableFormatError
: 
1193                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1196 class PhotobucketIE(InfoExtractor
): 
1197         """Information extractor for photobucket.com.""" 
1199         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1201         def __init__(self
, downloader
=None): 
1202                 InfoExtractor
.__init
__(self
, downloader
) 
1206                 return (re
.match(PhotobucketIE
._VALID
_URL
, url
) is not None) 
1208         def report_download_webpage(self
, video_id
): 
1209                 """Report webpage download.""" 
1210                 self
._downloader
.to_stdout(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1212         def report_extraction(self
, video_id
): 
1213                 """Report information extraction.""" 
1214                 self
._downloader
.to_stdout(u
'[photobucket] %s: Extracting information' % video_id
) 
1216         def _real_initialize(self
): 
1219         def _real_extract(self
, url
): 
1220                 # Extract id from URL 
1221                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1223                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1226                 video_id 
= mobj
.group(1) 
1228                 video_extension 
= 'flv' 
1230                 # Retrieve video webpage to extract further information 
1231                 request 
= urllib2
.Request(url
) 
1233                         self
.report_download_webpage(video_id
) 
1234                         webpage 
= urllib2
.urlopen(request
).read() 
1235                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1236                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1239                 # Extract URL, uploader, and title from webpage 
1240                 self
.report_extraction(video_id
) 
1241                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1243                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1245                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1247                 video_url 
= mediaURL
 
1249                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1251                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1253                 video_title 
= mobj
.group(1).decode('utf-8') 
1254                 video_title 
= sanitize_title(video_title
) 
1255                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1257                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1260                         # Process video information 
1261                         self
._downloader
.process_info({ 
1262                                 'id':           video_id
.decode('utf-8'), 
1263                                 'url':          video_url
.decode('utf-8'), 
1264                                 'uploader':     video_uploader
, 
1265                                 'title':        video_title
, 
1266                                 'stitle':       simple_title
, 
1267                                 'ext':          video_extension
.decode('utf-8'), 
1271                 except UnavailableFormatError
: 
1272                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1275 class YahooIE(InfoExtractor
): 
1276         """Information extractor for video.yahoo.com.""" 
1278         # _VALID_URL matches all Yahoo! Video URLs 
1279         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1280         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1281         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1283         def __init__(self
, downloader
=None): 
1284                 InfoExtractor
.__init
__(self
, downloader
) 
1288                 return (re
.match(YahooIE
._VALID
_URL
, url
) is not None) 
1290         def report_download_webpage(self
, video_id
): 
1291                 """Report webpage download.""" 
1292                 self
._downloader
.to_stdout(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1294         def report_extraction(self
, video_id
): 
1295                 """Report information extraction.""" 
1296                 self
._downloader
.to_stdout(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1298         def _real_initialize(self
): 
1301         def _real_extract(self
, url
): 
1302                 # Extract ID from URL 
1303                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1305                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1308                 video_id 
= mobj
.group(2) 
1309                 video_extension 
= 'flv' 
1311                 # Rewrite valid but non-extractable URLs as 
1312                 # extractable English language /watch/ URLs 
1313                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1314                         request 
= urllib2
.Request(url
) 
1316                                 webpage 
= urllib2
.urlopen(request
).read() 
1317                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1318                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1321                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1323                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1325                         yahoo_id 
= mobj
.group(1) 
1327                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1329                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1331                         yahoo_vid 
= mobj
.group(1) 
1333                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1334                         return self
._real
_extract
(url
) 
1336                 # Retrieve video webpage to extract further information 
1337                 request 
= urllib2
.Request(url
) 
1339                         self
.report_download_webpage(video_id
) 
1340                         webpage 
= urllib2
.urlopen(request
).read() 
1341                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1342                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1345                 # Extract uploader and title from webpage 
1346                 self
.report_extraction(video_id
) 
1347                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1349                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1351                 video_title 
= mobj
.group(1).decode('utf-8') 
1352                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1354                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1356                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1358                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1360                 # Extract video thumbnail 
1361                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1363                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1365                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1367                 # Extract video description 
1368                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1370                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1372                 video_description 
= mobj
.group(1).decode('utf-8') 
1373                 if not video_description
: video_description 
= 'No description available.' 
1375                 # Extract video height and width 
1376                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1378                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1380                 yv_video_height 
= mobj
.group(1) 
1382                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1384                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1386                 yv_video_width 
= mobj
.group(1) 
1388                 # Retrieve video playlist to extract media URL 
1389                 # I'm not completely sure what all these options are, but we 
1390                 # seem to need most of them, otherwise the server sends a 401. 
1391                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1392                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1393                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1394                                           '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1395                                           '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1397                         self
.report_download_webpage(video_id
) 
1398                         webpage 
= urllib2
.urlopen(request
).read() 
1399                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1400                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1403                 # Extract media URL from playlist XML 
1404                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1406                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1408                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1409                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1412                         # Process video information 
1413                         self
._downloader
.process_info({ 
1414                                 'id':           video_id
.decode('utf-8'), 
1416                                 'uploader':     video_uploader
, 
1417                                 'title':        video_title
, 
1418                                 'stitle':       simple_title
, 
1419                                 'ext':          video_extension
.decode('utf-8'), 
1420                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1421                                 'description':  video_description
, 
1422                                 'thumbnail':    video_thumbnail
, 
1423                                 'description':  video_description
, 
1426                 except UnavailableFormatError
: 
1427                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1430 class GenericIE(InfoExtractor
): 
1431         """Generic last-resort information extractor.""" 
1433         def __init__(self
, downloader
=None): 
1434                 InfoExtractor
.__init
__(self
, downloader
) 
1440         def report_download_webpage(self
, video_id
): 
1441                 """Report webpage download.""" 
1442                 self
._downloader
.to_stdout(u
'WARNING: Falling back on generic information extractor.') 
1443                 self
._downloader
.to_stdout(u
'[generic] %s: Downloading webpage' % video_id
) 
1445         def report_extraction(self
, video_id
): 
1446                 """Report information extraction.""" 
1447                 self
._downloader
.to_stdout(u
'[generic] %s: Extracting information' % video_id
) 
1449         def _real_initialize(self
): 
1452         def _real_extract(self
, url
): 
1453                 video_id 
= url
.split('/')[-1] 
1454                 request 
= urllib2
.Request(url
) 
1456                         self
.report_download_webpage(video_id
) 
1457                         webpage 
= urllib2
.urlopen(request
).read() 
1458                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1459                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1461                 except ValueError, err
: 
1462                         # since this is the last-resort InfoExtractor, if 
1463                         # this error is thrown, it'll be thrown here 
1464                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1467                 # Start with something easy: JW Player in SWFObject 
1468                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
1470                         # Broaden the search a little bit 
1471                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
1473                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1476                 # It's possible that one of the regexes 
1477                 # matched, but returned an empty group: 
1478                 if mobj.group(1) is None: 
1479                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1482                 video_url = urllib.unquote(mobj.group(1)) 
1483                 video_id  = os.path.basename(video_url) 
1485                 # here's a fun little line of code for you: 
1486                 video_extension = os.path.splitext(video_id)[1][1:] 
1487                 video_id        = os.path.splitext(video_id)[0] 
1489                 # it's tempting to parse this further, but you would 
1490                 # have to take into account all the variations like 
1491                 #   Video Title - Site Name 
1492                 #   Site Name | Video Title 
1493                 #   Video Title - Tagline | Site Name 
1494                 # and so on and so forth; it's just not practical 
1495                 mobj = re.search(r'<title>(.*)</title>', webpage) 
1497                         self._downloader.trouble(u'ERROR: unable to extract title') 
1499                 video_title = mobj.group(1).decode('utf-8') 
1500                 video_title = sanitize_title(video_title) 
1501                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) 
1503                 # video uploader is domain name 
1504                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
1506                         self._downloader.trouble(u'ERROR: unable to extract title') 
1508                 video_uploader = mobj.group(1).decode('utf-8') 
1511                         # Process video information 
1512                         self._downloader.process_info({ 
1513                                 'id':           video_id.decode('utf-8'), 
1514                                 'url':          video_url.decode('utf-8'), 
1515                                 'uploader':     video_uploader, 
1516                                 'title':        video_title, 
1517                                 'stitle':       simple_title, 
1518                                 'ext':          video_extension.decode('utf-8'), 
1522                 except UnavailableFormatError: 
1523                         self._downloader.trouble(u'ERROR: format not available for video') 
1526 class YoutubeSearchIE(InfoExtractor): 
1527         """Information Extractor for YouTube search queries.""" 
1528         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' 
1529         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
1530         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
1531         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1533         _max_youtube_results = 1000 
1535         def __init__(self, youtube_ie, downloader=None): 
1536                 InfoExtractor.__init__(self, downloader) 
1537                 self._youtube_ie = youtube_ie 
1541                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) 
1543         def report_download_page(self, query, pagenum): 
1544                 """Report attempt to download playlist page with given number.""" 
1545                 query = query.decode(preferredencoding()) 
1546                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
1548         def _real_initialize(self): 
1549                 self._youtube_ie.initialize() 
1551         def _real_extract(self, query): 
1552                 mobj = re.match(self._VALID_QUERY, query) 
1554                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1557                 prefix, query = query.split(':') 
1559                 query  = query.encode('utf-8') 
1561                         self._download_n_results(query, 1) 
1563                 elif prefix == 'all': 
1564                         self._download_n_results(query, self._max_youtube_results) 
1570                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1572                                 elif n > self._max_youtube_results: 
1573                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n)) 
1574                                         n = self._max_youtube_results 
1575                                 self._download_n_results(query, n) 
1577                         except ValueError: # parsing prefix as integer fails 
1578                                 self._download_n_results(query, 1) 
1581         def _download_n_results(self, query, n): 
1582                 """Downloads a specified number of results for a query""" 
1585                 already_seen = set() 
1589                         self.report_download_page(query, pagenum) 
1590                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1591                         request = urllib2.Request(result_url, None, std_headers) 
1593                                 page = urllib2.urlopen(request).read() 
1594                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1595                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1598                         # Extract video identifiers 
1599                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1600                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
1601                                 if video_id not in already_seen: 
1602                                         video_ids.append(video_id) 
1603                                         already_seen.add(video_id) 
1604                                         if len(video_ids) == n: 
1605                                                 # Specified n videos reached 
1606                                                 for id in video_ids: 
1607                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1610                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1611                                 for id in video_ids: 
1612                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1615                         pagenum = pagenum + 1 
1617 class GoogleSearchIE(InfoExtractor): 
1618         """Information Extractor for Google Video search queries.""" 
1619         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' 
1620         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
1621         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
1622         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
1624         _max_google_results = 1000 
1626         def __init__(self, google_ie, downloader=None): 
1627                 InfoExtractor.__init__(self, downloader) 
1628                 self._google_ie = google_ie 
1632                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) 
1634         def report_download_page(self, query, pagenum): 
1635                 """Report attempt to download playlist page with given number.""" 
1636                 query = query.decode(preferredencoding()) 
1637                 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
1639         def _real_initialize(self): 
1640                 self._google_ie.initialize() 
1642         def _real_extract(self, query): 
1643                 mobj = re.match(self._VALID_QUERY, query) 
1645                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1648                 prefix, query = query.split(':') 
1650                 query  = query.encode('utf-8') 
1652                         self._download_n_results(query, 1) 
1654                 elif prefix == 'all': 
1655                         self._download_n_results(query, self._max_google_results) 
1661                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1663                                 elif n > self._max_google_results: 
1664                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n)) 
1665                                         n = self._max_google_results 
1666                                 self._download_n_results(query, n) 
1668                         except ValueError: # parsing prefix as integer fails 
1669                                 self._download_n_results(query, 1) 
1672         def _download_n_results(self, query, n): 
1673                 """Downloads a specified number of results for a query""" 
1676                 already_seen = set() 
1680                         self.report_download_page(query, pagenum) 
1681                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1682                         request = urllib2.Request(result_url, None, std_headers) 
1684                                 page = urllib2.urlopen(request).read() 
1685                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1686                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1689                         # Extract video identifiers 
1690                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1691                                 video_id = mobj.group(1) 
1692                                 if video_id not in already_seen: 
1693                                         video_ids.append(video_id) 
1694                                         already_seen.add(video_id) 
1695                                         if len(video_ids) == n: 
1696                                                 # Specified n videos reached 
1697                                                 for id in video_ids: 
1698                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1701                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1702                                 for id in video_ids: 
1703                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1706                         pagenum = pagenum + 1 
1708 class YahooSearchIE(InfoExtractor): 
1709         """Information Extractor for Yahoo! Video search queries.""" 
1710         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' 
1711         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
1712         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
1713         _MORE_PAGES_INDICATOR = r'\s*Next' 
1715         _max_yahoo_results = 1000 
1717         def __init__(self, yahoo_ie, downloader=None): 
1718                 InfoExtractor.__init__(self, downloader) 
1719                 self._yahoo_ie = yahoo_ie 
1723                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) 
1725         def report_download_page(self, query, pagenum): 
1726                 """Report attempt to download playlist page with given number.""" 
1727                 query = query.decode(preferredencoding()) 
1728                 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
1730         def _real_initialize(self): 
1731                 self._yahoo_ie.initialize() 
1733         def _real_extract(self, query): 
1734                 mobj = re.match(self._VALID_QUERY, query) 
1736                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1739                 prefix, query = query.split(':') 
1741                 query  = query.encode('utf-8') 
1743                         self._download_n_results(query, 1) 
1745                 elif prefix == 'all': 
1746                         self._download_n_results(query, self._max_yahoo_results) 
1752                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1754                                 elif n > self._max_yahoo_results: 
1755                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n)) 
1756                                         n = self._max_yahoo_results 
1757                                 self._download_n_results(query, n) 
1759                         except ValueError: # parsing prefix as integer fails 
1760                                 self._download_n_results(query, 1) 
1763         def _download_n_results(self, query, n): 
1764                 """Downloads a specified number of results for a query""" 
1767                 already_seen = set() 
1771                         self.report_download_page(query, pagenum) 
1772                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1773                         request = urllib2.Request(result_url, None, std_headers) 
1775                                 page = urllib2.urlopen(request).read() 
1776                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1777                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1780                         # Extract video identifiers 
1781                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1782                                 video_id = mobj.group(1) 
1783                                 if video_id not in already_seen: 
1784                                         video_ids.append(video_id) 
1785                                         already_seen.add(video_id) 
1786                                         if len(video_ids) == n: 
1787                                                 # Specified n videos reached 
1788                                                 for id in video_ids: 
1789                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
1792                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1793                                 for id in video_ids: 
1794                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
1797                         pagenum = pagenum + 1 
1799 class YoutubePlaylistIE(InfoExtractor): 
1800         """Information Extractor for YouTube playlists.""" 
1802         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' 
1803         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' 
1804         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
1805         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1808         def __init__(self, youtube_ie, downloader=None): 
1809                 InfoExtractor.__init__(self, downloader) 
1810                 self._youtube_ie = youtube_ie 
1814                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) 
1816         def report_download_page(self, playlist_id, pagenum): 
1817                 """Report attempt to download playlist page with given number.""" 
1818                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
1820         def _real_initialize(self): 
1821                 self._youtube_ie.initialize() 
1823         def _real_extract(self, url): 
1824                 # Extract playlist id 
1825                 mobj = re.match(self._VALID_URL, url) 
1827                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
1830                 # Download playlist pages 
1831                 playlist_id = mobj.group(1) 
1836                         self.report_download_page(playlist_id, pagenum) 
1837                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) 
1839                                 page = urllib2.urlopen(request).read() 
1840                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1841                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1844                         # Extract video identifiers 
1846                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1847                                 if mobj.group(1) not in ids_in_page: 
1848                                         ids_in_page.append(mobj.group(1)) 
1849                         video_ids.extend(ids_in_page) 
1851                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1853                         pagenum = pagenum + 1 
1855                 for id in video_ids: 
1856                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1859 class YoutubeUserIE(InfoExtractor): 
1860         """Information Extractor for YouTube users.""" 
1862         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' 
1863         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
1864         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. 
1867         def __init__(self, youtube_ie, downloader=None): 
1868                 InfoExtractor.__init__(self, downloader) 
1869                 self._youtube_ie = youtube_ie 
1873                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None) 
1875         def report_download_page(self, username): 
1876                 """Report attempt to download user page.""" 
1877                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) 
1879         def _real_initialize(self): 
1880                 self._youtube_ie.initialize() 
1882         def _real_extract(self, url): 
1884                 mobj = re.match(self._VALID_URL, url) 
1886                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
1889                 # Download user page 
1890                 username = mobj.group(1) 
1894                 self.report_download_page(username) 
1895                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) 
1897                         page = urllib2.urlopen(request).read() 
1898                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1899                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1902                 # Extract video identifiers 
1905                 for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1906                         if mobj.group(1) not in ids_in_page: 
1907                                 ids_in_page.append(mobj.group(1)) 
1908                 video_ids.extend(ids_in_page) 
1910                 for id in video_ids: 
1911                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1914 class PostProcessor(object): 
1915         """Post Processor class. 
1917         PostProcessor objects can be added to downloaders with their 
1918         add_post_processor() method. When the downloader has finished a 
1919         successful download, it will take its internal chain of PostProcessors 
1920         and start calling the run() method on each one of them, first with 
1921         an initial argument and then with the returned value of the previous 
1924         The chain will be stopped if one of them ever returns None or the end 
1925         of the chain is reached. 
1927         PostProcessor objects follow a "mutual registration
" process similar 
1928         to InfoExtractor objects. 
1933         def __init__(self, downloader=None): 
1934                 self._downloader = downloader 
1936         def set_downloader(self, downloader): 
1937                 """Sets the downloader for this PP.""" 
1938                 self._downloader = downloader 
1940         def run(self, information): 
1941                 """Run the PostProcessor. 
1943                 The "information
" argument is a dictionary like the ones 
1944                 composed by InfoExtractors. The only difference is that this 
1945                 one has an extra field called "filepath
" that points to the 
1948                 When this method returns None, the postprocessing chain is 
1949                 stopped. However, this method may return an information 
1950                 dictionary that will be passed to the next postprocessing 
1951                 object in the chain. It can be the one it received after 
1952                 changing some fields. 
1954                 In addition, this method may raise a PostProcessingError 
1955                 exception that will be taken into account by the downloader 
1958                 return information # by default, do nothing 
1960 ### MAIN PROGRAM ### 
1961 if __name__ == '__main__': 
1963                 # Modules needed only when running the main program 
1967                 # Function to update the program file with the latest version from bitbucket.org 
1968                 def update_self(downloader, filename): 
1969                         # Note: downloader only used for options 
1970                         if not os.access (filename, os.W_OK): 
1971                                 sys.exit('ERROR: no write permissions on %s' % filename) 
1973                         downloader.to_stdout('Updating to latest stable version...') 
1974                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' 
1975                         latest_version = urllib.urlopen(latest_url).read().strip() 
1976                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version 
1977                         newcontent = urllib.urlopen(prog_url).read() 
1978                         stream = open(filename, 'w') 
1979                         stream.write(newcontent) 
1981                         downloader.to_stdout('Updated to version %s' % latest_version) 
1983                 # General configuration 
1984                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) 
1985                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) 
1986                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
1988                 # Parse command line 
1989                 parser = optparse.OptionParser( 
1990                         usage='Usage: %prog [options] url...', 
1991                         version='2010.06.06', 
1992                         conflict_handler='resolve', 
1995                 parser.add_option('-h', '--help', 
1996                                 action='help', help='print this help text and exit') 
1997                 parser.add_option('-v', '--version', 
1998                                 action='version', help='print program version and exit') 
1999                 parser.add_option('-U', '--update', 
2000                                 action='store_true', dest='update_self', help='update this program to latest stable version') 
2001                 parser.add_option('-i', '--ignore-errors', 
2002                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) 
2003                 parser.add_option('-r', '--rate-limit', 
2004                                 dest='ratelimit', metavar='L', help='download rate limit (e.g. 50k or 44.6m)') 
2005                 parser.add_option('-R', '--retries', 
2006                                 dest='retries', metavar='T', help='number of retries (default is 10)', default=10) 
2008                 authentication = optparse.OptionGroup(parser, 'Authentication Options') 
2009                 authentication.add_option('-u', '--username', 
2010                                 dest='username', metavar='UN', help='account username') 
2011                 authentication.add_option('-p', '--password', 
2012                                 dest='password', metavar='PW', help='account password') 
2013                 authentication.add_option('-n', '--netrc', 
2014                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) 
2015                 parser.add_option_group(authentication) 
2017                 video_format = optparse.OptionGroup(parser, 'Video Format Options') 
2018                 video_format.add_option('-f', '--format', 
2019                                 action='store', dest='format', metavar='FMT', help='video format code') 
2020                 video_format.add_option('-b', '--best-quality', 
2021                                 action='store_const', dest='format', help='download the best quality video possible', const='0') 
2022                 video_format.add_option('-m', '--mobile-version', 
2023                                 action='store_const', dest='format', help='alias for -f 17', const='17') 
2024                 video_format.add_option('-d', '--high-def', 
2025                                 action='store_const', dest='format', help='alias for -f 22', const='22') 
2026                 video_format.add_option('--all-formats', 
2027                                 action='store_const', dest='format', help='download all available video formats', const='-1') 
2028                 parser.add_option_group(video_format) 
2030                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') 
2031                 verbosity.add_option('-q', '--quiet', 
2032                                 action='store_true', dest='quiet', help='activates quiet mode', default=False) 
2033                 verbosity.add_option('-s', '--simulate', 
2034                                 action='store_true', dest='simulate', help='do not download video', default=False) 
2035                 verbosity.add_option('-g', '--get-url', 
2036                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) 
2037                 verbosity.add_option('-e', '--get-title', 
2038                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) 
2039                 verbosity.add_option('--get-thumbnail', 
2040                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) 
2041                 verbosity.add_option('--get-description', 
2042                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) 
2043                 verbosity.add_option('--no-progress', 
2044                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False) 
2045                 parser.add_option_group(verbosity) 
2047                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options') 
2048                 filesystem.add_option('-t', '--title', 
2049                                 action='store_true', dest='usetitle', help='use title in file name', default=False) 
2050                 filesystem.add_option('-l', '--literal', 
2051                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False) 
2052                 filesystem.add_option('-o', '--output', 
2053                                 dest='outtmpl', metavar='TPL', help='output filename template') 
2054                 filesystem.add_option('-a', '--batch-file', 
2055                                 dest='batchfile', metavar='F', help='file containing URLs to download (\'-\' for stdin)') 
2056                 filesystem.add_option('-w', '--no-overwrites', 
2057                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) 
2058                 filesystem.add_option('-c', '--continue', 
2059                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) 
2060                 parser.add_option_group(filesystem) 
2062                 (opts, args) = parser.parse_args() 
2064                 # Batch file verification 
2066                 if opts.batchfile is not None: 
2068                                 if opts.batchfile == '-': 
2071                                         batchfd = open(opts.batchfile, 'r') 
2072                                 batchurls = batchfd.readlines() 
2073                                 batchurls = [x.strip() for x in batchurls] 
2074                                 batchurls = [x for x in batchurls if len(x) > 0] 
2076                                 sys.exit(u'ERROR: batch file could not be read') 
2077                 all_urls = batchurls + args 
2079                 # Conflicting, missing and erroneous options 
2080                 if opts.usenetrc and (opts.username is not None or opts.password is not None): 
2081                         parser.error(u'using .netrc conflicts with giving username/password') 
2082                 if opts.password is not None and opts.username is None: 
2083                         parser.error(u'account username missing') 
2084                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): 
2085                         parser.error(u'using output template conflicts with using title or literal title') 
2086                 if opts.usetitle and opts.useliteral: 
2087                         parser.error(u'using title conflicts with using literal title') 
2088                 if opts.username is not None and opts.password is None: 
2089                         opts.password = getpass.getpass(u'Type account password and press return:') 
2090                 if opts.ratelimit is not None: 
2091                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) 
2092                         if numeric_limit is None: 
2093                                 parser.error(u'invalid rate limit specified') 
2094                         opts.ratelimit = numeric_limit 
2095                 if opts.retries is not None: 
2097                                 opts.retries = long(opts.retries) 
2098                         except (TypeError, ValueError), err: 
2099                                 parser.error(u'invalid retry count specified') 
2101                 # Information extractors 
2102                 youtube_ie = YoutubeIE() 
2103                 metacafe_ie = MetacafeIE(youtube_ie) 
2104                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie) 
2105                 youtube_user_ie = YoutubeUserIE(youtube_ie) 
2106                 youtube_search_ie = YoutubeSearchIE(youtube_ie) 
2107                 google_ie = GoogleIE() 
2108                 google_search_ie = GoogleSearchIE(google_ie) 
2109                 photobucket_ie = PhotobucketIE() 
2110                 yahoo_ie = YahooIE() 
2111                 yahoo_search_ie = YahooSearchIE(yahoo_ie) 
2112                 generic_ie = GenericIE() 
2115                 fd = FileDownloader({ 
2116                         'usenetrc': opts.usenetrc, 
2117                         'username': opts.username, 
2118                         'password': opts.password, 
2119                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 
2120                         'forceurl': opts.geturl, 
2121                         'forcetitle': opts.gettitle, 
2122                         'forcethumbnail': opts.getthumbnail, 
2123                         'forcedescription': opts.getdescription, 
2124                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 
2125                         'format': opts.format, 
2126                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) 
2127                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
2128                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') 
2129                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') 
2130                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') 
2131                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') 
2132                                 or u'%(id)s.%(ext)s'), 
2133                         'ignoreerrors': opts.ignoreerrors, 
2134                         'ratelimit': opts.ratelimit, 
2135                         'nooverwrites': opts.nooverwrites, 
2136                         'retries': opts.retries, 
2137                         'continuedl': opts.continue_dl, 
2138                         'noprogress': opts.noprogress, 
2140                 fd.add_info_extractor(youtube_search_ie) 
2141                 fd.add_info_extractor(youtube_pl_ie) 
2142                 fd.add_info_extractor(youtube_user_ie) 
2143                 fd.add_info_extractor(metacafe_ie) 
2144                 fd.add_info_extractor(youtube_ie) 
2145                 fd.add_info_extractor(google_ie) 
2146                 fd.add_info_extractor(google_search_ie) 
2147                 fd.add_info_extractor(photobucket_ie) 
2148                 fd.add_info_extractor(yahoo_ie) 
2149                 fd.add_info_extractor(yahoo_search_ie) 
2151                 # This must come last since it's the 
2152                 # fallback if none of the others work 
2153                 fd.add_info_extractor(generic_ie) 
2156                 if opts.update_self: 
2157                         update_self(fd, sys.argv[0]) 
2160                 if len(all_urls) < 1: 
2161                         if not opts.update_self: 
2162                                 parser.error(u'you must provide at least one URL') 
2165                 retcode = fd.download(all_urls) 
2168         except DownloadError: 
2170         except SameFileError: 
2171                 sys.exit(u'ERROR: fixed output name but more than one file to download') 
2172         except KeyboardInterrupt: 
2173                 sys.exit(u'\nERROR: Interrupted by user')