]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
   2 # -*- coding: utf-8 -*- 
   3 # Author: Ricardo Garcia Gonzalez 
   4 # Author: Danny Colligan 
   5 # Author: Benjamin Johnson 
   6 # License: Public domain code 
  23 # parse_qs was moved from the cgi module to the urlparse module recently. 
  25         from urlparse 
import parse_qs
 
  27         from cgi 
import parse_qs
 
  30         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.6) Gecko/20100627 Firefox/3.6.6', 
  31         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  32         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  33         'Accept-Language': 'en-us,en;q=0.5', 
  36 simple_title_chars 
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii') 
  38 def preferredencoding(): 
  39         """Get preferred encoding. 
  41         Returns the best encoding scheme for the system, based on 
  42         locale.getpreferredencoding() and some further tweaks. 
  44         def yield_preferredencoding(): 
  46                         pref 
= locale
.getpreferredencoding() 
  52         return yield_preferredencoding().next() 
  54 def htmlentity_transform(matchobj
): 
  55         """Transforms an HTML entity to a Unicode character. 
  57         This function receives a match object and is intended to be used with 
  58         the re.sub() function. 
  60         entity 
= matchobj
.group(1) 
  62         # Known non-numeric HTML entity 
  63         if entity 
in htmlentitydefs
.name2codepoint
: 
  64                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
  67         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
  69                 numstr 
= mobj
.group(1) 
  70                 if numstr
.startswith(u
'x'): 
  72                         numstr 
= u
'0%s' % numstr
 
  75                 return unichr(long(numstr
, base
)) 
  77         # Unknown entity in name, return its literal representation 
  78         return (u
'&%s;' % entity
) 
  80 def sanitize_title(utitle
): 
  81         """Sanitizes a video title so it could be used as part of a filename.""" 
  82         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
  83         return utitle
.replace(unicode(os
.sep
), u
'%') 
  85 def sanitize_open(filename
, open_mode
): 
  86         """Try to open the given filename, and slightly tweak it if this fails. 
  88         Attempts to open the given filename. If this fails, it tries to change 
  89         the filename slightly, step by step, until it's either able to open it 
  90         or it fails and raises a final exception, like the standard open() 
  93         It returns the tuple (stream, definitive_file_name). 
  97                         return (sys
.stdout
, filename
) 
  98                 stream 
= open(filename
, open_mode
) 
  99                 return (stream
, filename
) 
 100         except (IOError, OSError), err
: 
 101                 # In case of error, try to remove win32 forbidden chars 
 102                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 104                 # An exception here should be caught in the caller 
 105                 stream 
= open(filename
, open_mode
) 
 106                 return (stream
, filename
) 
 109 class DownloadError(Exception): 
 110         """Download Error exception. 
 112         This exception may be thrown by FileDownloader objects if they are not 
 113         configured to continue on errors. They will contain the appropriate 
 118 class SameFileError(Exception): 
 119         """Same File exception. 
 121         This exception will be thrown by FileDownloader objects if they detect 
 122         multiple files would have to be downloaded to the same file on disk. 
 126 class PostProcessingError(Exception): 
 127         """Post Processing exception. 
 129         This exception may be raised by PostProcessor's .run() method to 
 130         indicate an error in the postprocessing task. 
 134 class UnavailableFormatError(Exception): 
 135         """Unavailable Format exception. 
 137         This exception will be thrown when a video is requested 
 138         in a format that is not available for that video. 
 142 class ContentTooShortError(Exception): 
 143         """Content Too Short exception. 
 145         This exception may be raised by FileDownloader objects when a file they 
 146         download is too small for what the server announced first, indicating 
 147         the connection was probably interrupted. 
 153         def __init__(self
, downloaded
, expected
): 
 154                 self
.downloaded 
= downloaded
 
 155                 self
.expected 
= expected
 
 157 class FileDownloader(object): 
 158         """File Downloader class. 
 160         File downloader objects are the ones responsible of downloading the 
 161         actual video file and writing it to disk if the user has requested 
 162         it, among some other tasks. In most cases there should be one per 
 163         program. As, given a video URL, the downloader doesn't know how to 
 164         extract all the needed information, task that InfoExtractors do, it 
 165         has to pass the URL to one of them. 
 167         For this, file downloader objects have a method that allows 
 168         InfoExtractors to be registered in a given order. When it is passed 
 169         a URL, the file downloader handles it to the first InfoExtractor it 
 170         finds that reports being able to handle it. The InfoExtractor extracts 
 171         all the information about the video or videos the URL refers to, and 
 172         asks the FileDownloader to process the video information, possibly 
 173         downloading the video. 
 175         File downloaders accept a lot of parameters. In order not to saturate 
 176         the object constructor with arguments, it receives a dictionary of 
 177         options instead. These options are available through the params 
 178         attribute for the InfoExtractors to use. The FileDownloader also 
 179         registers itself as the downloader in charge for the InfoExtractors 
 180         that are added to it, so this is a "mutual registration". 
 184         username:       Username for authentication purposes. 
 185         password:       Password for authentication purposes. 
 186         usenetrc:       Use netrc for authentication instead. 
 187         quiet:          Do not print messages to stdout. 
 188         forceurl:       Force printing final URL. 
 189         forcetitle:     Force printing title. 
 190         simulate:       Do not download the video files. 
 191         format:         Video format code. 
 192         format_limit:   Highest quality format to try. 
 193         outtmpl:        Template for output names. 
 194         ignoreerrors:   Do not stop on download errors. 
 195         ratelimit:      Download speed limit, in bytes/sec. 
 196         nooverwrites:   Prevent overwriting files. 
 197         retries:        Number of times to retry for HTTP error 503 
 198         continuedl:     Try to continue downloads if possible. 
 199         noprogress:     Do not print the progress bar. 
 205         _download_retcode 
= None 
 206         _num_downloads 
= None 
 208         def __init__(self
, params
): 
 209                 """Create a FileDownloader object with the given options.""" 
 212                 self
._download
_retcode 
= 0 
 213                 self
._num
_downloads 
= 0 
 217         def pmkdir(filename
): 
 218                 """Create directory components in filename. Similar to Unix "mkdir -p".""" 
 219                 components 
= filename
.split(os
.sep
) 
 220                 aggregate 
= [os
.sep
.join(components
[0:x
]) for x 
in xrange(1, len(components
))] 
 221                 aggregate 
= ['%s%s' % (x
, os
.sep
) for x 
in aggregate
] # Finish names with separator 
 222                 for dir in aggregate
: 
 223                         if not os
.path
.exists(dir): 
 227         def format_bytes(bytes): 
 230                 if type(bytes) is str: 
 235                         exponent 
= long(math
.log(bytes, 1024.0)) 
 236                 suffix 
= 'bkMGTPEZY'[exponent
] 
 237                 converted 
= float(bytes) / float(1024**exponent
) 
 238                 return '%.2f%s' % (converted
, suffix
) 
 241         def calc_percent(byte_counter
, data_len
): 
 244                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 247         def calc_eta(start
, now
, total
, current
): 
 251                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 253                 rate 
= float(current
) / dif
 
 254                 eta 
= long((float(total
) - float(current
)) / rate
) 
 255                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 258                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 261         def calc_speed(start
, now
, bytes): 
 263                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 264                         return '%10s' % '---b/s' 
 265                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 268         def best_block_size(elapsed_time
, bytes): 
 269                 new_min 
= max(bytes / 2.0, 1.0) 
 270                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 271                 if elapsed_time 
< 0.001: 
 273                 rate 
= bytes / elapsed_time
 
 281         def parse_bytes(bytestr
): 
 282                 """Parse a string indicating a byte quantity into a long integer.""" 
 283                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 286                 number 
= float(matchobj
.group(1)) 
 287                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 288                 return long(round(number 
* multiplier
)) 
 292                 """Verify a URL is valid and data could be downloaded. Return real data URL.""" 
 293                 request 
= urllib2
.Request(url
, None, std_headers
) 
 294                 data 
= urllib2
.urlopen(request
) 
 300         def add_info_extractor(self
, ie
): 
 301                 """Add an InfoExtractor object to the end of the list.""" 
 303                 ie
.set_downloader(self
) 
 305         def add_post_processor(self
, pp
): 
 306                 """Add a PostProcessor object to the end of the chain.""" 
 308                 pp
.set_downloader(self
) 
 310         def to_stdout(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 311                 """Print message to stdout if not in quiet mode.""" 
 313                         if not self
.params
.get('quiet', False): 
 314                                 print (u
'%s%s' % (message
, [u
'\n', u
''][skip_eol
])).encode(preferredencoding()), 
 316                 except (UnicodeEncodeError), err
: 
 317                         if not ignore_encoding_errors
: 
 320         def to_stderr(self
, message
): 
 321                 """Print message to stderr.""" 
 322                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 324         def fixed_template(self
): 
 325                 """Checks if the output template is fixed.""" 
 326                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 328         def trouble(self
, message
=None): 
 329                 """Determine action to take when a download problem appears. 
 331                 Depending on if the downloader has been configured to ignore 
 332                 download errors or not, this method may throw an exception or 
 333                 not when errors are found, after printing the message. 
 335                 if message 
is not None: 
 336                         self
.to_stderr(message
) 
 337                 if not self
.params
.get('ignoreerrors', False): 
 338                         raise DownloadError(message
) 
 339                 self
._download
_retcode 
= 1 
 341         def slow_down(self
, start_time
, byte_counter
): 
 342                 """Sleep if the download speed is over the rate limit.""" 
 343                 rate_limit 
= self
.params
.get('ratelimit', None) 
 344                 if rate_limit 
is None or byte_counter 
== 0: 
 347                 elapsed 
= now 
- start_time
 
 350                 speed 
= float(byte_counter
) / elapsed
 
 351                 if speed 
> rate_limit
: 
 352                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 354         def report_destination(self
, filename
): 
 355                 """Report destination filename.""" 
 356                 self
.to_stdout(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 358         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 359                 """Report download progress.""" 
 360                 if self
.params
.get('noprogress', False): 
 362                 self
.to_stdout(u
'\r[download] %s of %s at %s ETA %s' % 
 363                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 365         def report_resuming_byte(self
, resume_len
): 
 366                 """Report attemtp to resume at given byte.""" 
 367                 self
.to_stdout(u
'[download] Resuming download at byte %s' % resume_len
) 
 369         def report_retry(self
, count
, retries
): 
 370                 """Report retry in case of HTTP error 503""" 
 371                 self
.to_stdout(u
'[download] Got HTTP error 503. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 373         def report_file_already_downloaded(self
, file_name
): 
 374                 """Report file has already been fully downloaded.""" 
 376                         self
.to_stdout(u
'[download] %s has already been downloaded' % file_name
) 
 377                 except (UnicodeEncodeError), err
: 
 378                         self
.to_stdout(u
'[download] The file has already been downloaded') 
 380         def report_unable_to_resume(self
): 
 381                 """Report it was impossible to resume download.""" 
 382                 self
.to_stdout(u
'[download] Unable to resume') 
 384         def report_finish(self
): 
 385                 """Report download finished.""" 
 386                 if self
.params
.get('noprogress', False): 
 387                         self
.to_stdout(u
'[download] Download completed') 
 391         def increment_downloads(self
): 
 392                 """Increment the ordinal that assigns a number to each file.""" 
 393                 self
._num
_downloads 
+= 1 
 395         def process_info(self
, info_dict
): 
 396                 """Process a single dictionary returned by an InfoExtractor.""" 
 397                 # Do nothing else if in simulate mode 
 398                 if self
.params
.get('simulate', False): 
 399                         # Verify URL if it's an HTTP one 
 400                         if info_dict
['url'].startswith('http'): 
 402                                         self
.verify_url(info_dict
['url'].encode('utf-8')).decode('utf-8') 
 403                                 except (OSError, IOError, urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 404                                         raise UnavailableFormatError
 
 407                         if self
.params
.get('forcetitle', False): 
 408                                 print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 409                         if self
.params
.get('forceurl', False): 
 410                                 print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 411                         if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 412                                 print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 413                         if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 414                                 print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 419                         template_dict 
= dict(info_dict
) 
 420                         template_dict
['epoch'] = unicode(long(time
.time())) 
 421                         template_dict
['ord'] = unicode('%05d' % self
._num
_downloads
) 
 422                         filename 
= self
.params
['outtmpl'] % template_dict
 
 423                 except (ValueError, KeyError), err
: 
 424                         self
.trouble('ERROR: invalid output template or system charset: %s' % str(err
)) 
 425                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 426                         self
.to_stderr(u
'WARNING: file exists: %s; skipping' % filename
) 
 430                         self
.pmkdir(filename
) 
 431                 except (OSError, IOError), err
: 
 432                         self
.trouble('ERROR: unable to create directories: %s' % str(err
)) 
 436                         success 
= self
._do
_download
(filename
, info_dict
['url'].encode('utf-8'), info_dict
.get('player_url', None)) 
 437                 except (OSError, IOError), err
: 
 438                         raise UnavailableFormatError
 
 439                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 440                         self
.trouble('ERROR: unable to download video data: %s' % str(err
)) 
 442                 except (ContentTooShortError
, ), err
: 
 443                         self
.trouble('ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 448                                 self
.post_process(filename
, info_dict
) 
 449                         except (PostProcessingError
), err
: 
 450                                 self
.trouble('ERROR: postprocessing: %s' % str(err
)) 
 453         def download(self
, url_list
): 
 454                 """Download a given list of URLs.""" 
 455                 if len(url_list
) > 1 and self
.fixed_template(): 
 456                         raise SameFileError(self
.params
['outtmpl']) 
 459                         suitable_found 
= False 
 461                                 # Go to next InfoExtractor if not suitable 
 462                                 if not ie
.suitable(url
): 
 465                                 # Suitable InfoExtractor found 
 466                                 suitable_found 
= True 
 468                                 # Extract information from URL and process it 
 471                                 # Suitable InfoExtractor had been found; go to next URL 
 474                         if not suitable_found
: 
 475                                 self
.trouble('ERROR: no suitable InfoExtractor: %s' % url
) 
 477                 return self
._download
_retcode
 
 479         def post_process(self
, filename
, ie_info
): 
 480                 """Run the postprocessing chain on the given file.""" 
 482                 info
['filepath'] = filename
 
 488         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 489                 self
.report_destination(filename
) 
 491                 # Check for rtmpdump first 
 493                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 494                 except (OSError, IOError): 
 495                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 498                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 499                 # the connection was interrumpted and resuming appears to be 
 500                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 501                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', filename
] 
 502                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 503                 while retval 
== 2 or retval 
== 1: 
 504                         prevsize 
= os
.path
.getsize(filename
) 
 505                         self
.to_stdout(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 506                         time
.sleep(5.0) # This seems to be needed 
 507                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 508                         cursize 
= os
.path
.getsize(filename
) 
 509                         if prevsize 
== cursize 
and retval 
== 1: 
 512                         self
.to_stdout(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(filename
)) 
 515                         self
.trouble('\nERROR: rtmpdump exited with code %d' % retval
) 
 518         def _do_download(self
, filename
, url
, player_url
): 
 519                 # Attempt to download using rtmpdump 
 520                 if url
.startswith('rtmp'): 
 521                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 525                 basic_request 
= urllib2
.Request(url
, None, std_headers
) 
 526                 request 
= urllib2
.Request(url
, None, std_headers
) 
 528                 # Establish possible resume length 
 529                 if os
.path
.isfile(filename
): 
 530                         resume_len 
= os
.path
.getsize(filename
) 
 534                 # Request parameters in case of being able to resume 
 535                 if self
.params
.get('continuedl', False) and resume_len 
!= 0: 
 536                         self
.report_resuming_byte(resume_len
) 
 537                         request
.add_header('Range','bytes=%d-' % resume_len
) 
 541                 retries 
= self
.params
.get('retries', 0) 
 543                         # Establish connection 
 545                                 data 
= urllib2
.urlopen(request
) 
 547                         except (urllib2
.HTTPError
, ), err
: 
 549                                         # Retry in case of HTTP error 503 
 552                                                 self
.report_retry(count
, retries
) 
 554                                 if err
.code 
!= 416: #  416 is 'Requested range not satisfiable' 
 557                                 data 
= urllib2
.urlopen(basic_request
) 
 558                                 content_length 
= data
.info()['Content-Length'] 
 560                                 if content_length 
is not None and long(content_length
) == resume_len
: 
 561                                         # Because the file had already been fully downloaded 
 562                                         self
.report_file_already_downloaded(filename
) 
 565                                         # Because the server didn't let us 
 566                                         self
.report_unable_to_resume() 
 569                 data_len 
= data
.info().get('Content-length', None) 
 570                 data_len_str 
= self
.format_bytes(data_len
) 
 577                         data_block 
= data
.read(block_size
) 
 579                         data_block_len 
= len(data_block
) 
 580                         if data_block_len 
== 0: 
 582                         byte_counter 
+= data_block_len
 
 584                         # Open file just in time 
 587                                         (stream
, filename
) = sanitize_open(filename
, open_mode
) 
 588                                         self
.report_destination(filename
) 
 589                                 except (OSError, IOError), err
: 
 590                                         self
.trouble('ERROR: unable to open for writing: %s' % str(err
)) 
 593                                 stream
.write(data_block
) 
 594                         except (IOError, OSError), err
: 
 595                                 self
.trouble('\nERROR: unable to write data: %s' % str(err
)) 
 596                         block_size 
= self
.best_block_size(after 
- before
, data_block_len
) 
 599                         percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 600                         eta_str 
= self
.calc_eta(start
, time
.time(), data_len
, byte_counter
) 
 601                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter
) 
 602                         self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 605                         self
.slow_down(start
, byte_counter
) 
 608                 if data_len 
is not None and str(byte_counter
) != data_len
: 
 609                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
 612 class InfoExtractor(object): 
 613         """Information Extractor class. 
 615         Information extractors are the classes that, given a URL, extract 
 616         information from the video (or videos) the URL refers to. This 
 617         information includes the real video URL, the video title and simplified 
 618         title, author and others. The information is stored in a dictionary 
 619         which is then passed to the FileDownloader. The FileDownloader 
 620         processes this information possibly downloading the video to the file 
 621         system, among other possible outcomes. The dictionaries must include 
 622         the following fields: 
 624         id:             Video identifier. 
 625         url:            Final video URL. 
 626         uploader:       Nickname of the video uploader. 
 627         title:          Literal title. 
 628         stitle:         Simplified title. 
 629         ext:            Video filename extension. 
 630         format:         Video format. 
 631         player_url:     SWF Player URL (may be None). 
 633         The following fields are optional. Their primary purpose is to allow 
 634         youtube-dl to serve as the backend for a video search function, such 
 635         as the one in youtube2mp3.  They are only used when their respective 
 636         forced printing functions are called: 
 638         thumbnail:      Full URL to a video thumbnail image. 
 639         description:    One-line video description. 
 641         Subclasses of this one should re-define the _real_initialize() and 
 642         _real_extract() methods, as well as the suitable() static method. 
 643         Probably, they should also be instantiated and added to the main 
 650         def __init__(self
, downloader
=None): 
 651                 """Constructor. Receives an optional downloader.""" 
 653                 self
.set_downloader(downloader
) 
 657                 """Receives a URL and returns True if suitable for this IE.""" 
 660         def initialize(self
): 
 661                 """Initializes an instance (authentication, etc).""" 
 663                         self
._real
_initialize
() 
 666         def extract(self
, url
): 
 667                 """Extracts URL information and returns it in list of dicts.""" 
 669                 return self
._real
_extract
(url
) 
 671         def set_downloader(self
, downloader
): 
 672                 """Sets the downloader for this IE.""" 
 673                 self
._downloader 
= downloader
 
 675         def _real_initialize(self
): 
 676                 """Real initialization process. Redefine in subclasses.""" 
 679         def _real_extract(self
, url
): 
 680                 """Real extraction process. Redefine in subclasses.""" 
 683 class YoutubeIE(InfoExtractor
): 
 684         """Information extractor for youtube.com.""" 
 686         _VALID_URL 
= r
'^((?:http://)?(?:youtu\.be/|(?:\w+\.)?youtube\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?[\?#](?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' 
 687         _LANG_URL 
= r
'http://uk.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
 688         _LOGIN_URL 
= 'http://www.youtube.com/signup?next=/&gl=US&hl=en' 
 689         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
 690         _NETRC_MACHINE 
= 'youtube' 
 691         # Listed in order of priority for the -b option 
 692         _available_formats 
= ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13', None] 
 693         _video_extensions 
= { 
 699                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
 706                 return (re
.match(YoutubeIE
._VALID
_URL
, url
) is not None) 
 708         def report_lang(self
): 
 709                 """Report attempt to set language.""" 
 710                 self
._downloader
.to_stdout(u
'[youtube] Setting language') 
 712         def report_login(self
): 
 713                 """Report attempt to log in.""" 
 714                 self
._downloader
.to_stdout(u
'[youtube] Logging in') 
 716         def report_age_confirmation(self
): 
 717                 """Report attempt to confirm age.""" 
 718                 self
._downloader
.to_stdout(u
'[youtube] Confirming age') 
 720         def report_video_webpage_download(self
, video_id
): 
 721                 """Report attempt to download video webpage.""" 
 722                 self
._downloader
.to_stdout(u
'[youtube] %s: Downloading video webpage' % video_id
) 
 724         def report_video_info_webpage_download(self
, video_id
): 
 725                 """Report attempt to download video info webpage.""" 
 726                 self
._downloader
.to_stdout(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
 728         def report_information_extraction(self
, video_id
): 
 729                 """Report attempt to extract video information.""" 
 730                 self
._downloader
.to_stdout(u
'[youtube] %s: Extracting video information' % video_id
) 
 732         def report_unavailable_format(self
, video_id
, format
): 
 733                 """Report extracted video URL.""" 
 734                 self
._downloader
.to_stdout(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
 736         def report_rtmp_download(self
): 
 737                 """Indicate the download will use the RTMP protocol.""" 
 738                 self
._downloader
.to_stdout(u
'[youtube] RTMP download detected') 
 740         def _real_initialize(self
): 
 741                 if self
._downloader 
is None: 
 746                 downloader_params 
= self
._downloader
.params
 
 748                 # Attempt to use provided username and password or .netrc data 
 749                 if downloader_params
.get('username', None) is not None: 
 750                         username 
= downloader_params
['username'] 
 751                         password 
= downloader_params
['password'] 
 752                 elif downloader_params
.get('usenetrc', False): 
 754                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
 759                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
 760                         except (IOError, netrc
.NetrcParseError
), err
: 
 761                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
 765                 request 
= urllib2
.Request(self
._LANG
_URL
, None, std_headers
) 
 768                         urllib2
.urlopen(request
).read() 
 769                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 770                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
 773                 # No authentication to be performed 
 779                                 'current_form': 'loginForm', 
 781                                 'action_login': 'Log In', 
 782                                 'username':     username
, 
 783                                 'password':     password
, 
 785                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
), std_headers
) 
 788                         login_results 
= urllib2
.urlopen(request
).read() 
 789                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
 790                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
 792                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 793                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
 799                                 'action_confirm':       'Confirm', 
 801                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
), std_headers
) 
 803                         self
.report_age_confirmation() 
 804                         age_results 
= urllib2
.urlopen(request
).read() 
 805                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 806                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
 809         def _real_extract(self
, url
): 
 810                 # Extract video id from URL 
 811                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
 813                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
 816                 # At this point we have a new video 
 817                 if self
._downloader 
is not None: 
 818                         self
._downloader
.increment_downloads() 
 819                 video_id 
= mobj
.group(2) 
 821                 # Downloader parameters 
 826                 if self
._downloader 
is not None: 
 827                         params 
= self
._downloader
.params
 
 828                         format_param 
= params
.get('format', None) 
 829                         if format_param 
== '0': 
 830                                 format_limit 
= params
.get('format_limit', None) 
 831                                 if format_limit 
is not None: 
 833                                                 # Start at a different format if the user has limited the maximum quality 
 834                                                 quality_index 
= self
._available
_formats
.index(format_limit
) 
 837                                 format_param 
= self
._available
_formats
[quality_index
] 
 839                         elif format_param 
== '-1': 
 840                                 format_param 
= self
._available
_formats
[quality_index
] 
 845                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
 848                         self
.report_video_webpage_download(video_id
) 
 849                         request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en' % video_id
, None, std_headers
) 
 851                                 video_webpage 
= urllib2
.urlopen(request
).read() 
 852                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 853                                 self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
 856                         # Attempt to extract SWF player URL 
 857                         mobj 
= re
.search(r
'swfConfig.*"(http://.*?watch.*?-.*?\.swf)"', video_webpage
) 
 859                                 player_url 
= mobj
.group(1) 
 864                         self
.report_video_info_webpage_download(video_id
) 
 865                         for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 866                                 video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 867                                                    % (video_id
, el_type
)) 
 868                                 request 
= urllib2
.Request(video_info_url
, None, std_headers
) 
 870                                         video_info_webpage 
= urllib2
.urlopen(request
).read() 
 871                                         video_info 
= parse_qs(video_info_webpage
) 
 872                                         if 'token' in video_info
: 
 874                                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 875                                         self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
 877                         self
.report_information_extraction(video_id
) 
 880                         if 'token' not in video_info
: 
 881                                 # Attempt to see if YouTube has issued an error message 
 882                                 if 'reason' not in video_info
: 
 883                                         self
._downloader
.trouble(u
'ERROR: unable to extract "t" parameter for unknown reason') 
 884                                         stream 
= open('reportme-ydl-%s.dat' % time
.time(), 'wb') 
 885                                         stream
.write(video_info_webpage
) 
 888                                         reason 
= urllib
.unquote_plus(video_info
['reason'][0]) 
 889                                         self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % reason
.decode('utf-8')) 
 891                         token 
= urllib
.unquote_plus(video_info
['token'][0]) 
 892                         video_real_url 
= 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=detailpage&ps=default&gl=US&hl=en' % (video_id
, token
) 
 893                         if format_param 
is not None: 
 894                                 video_real_url 
= '%s&fmt=%s' % (video_real_url
, format_param
) 
 896                         # Check possible RTMP download 
 897                         if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
 898                                 self
.report_rtmp_download() 
 899                                 video_real_url 
= video_info
['conn'][0] 
 902                         if 'author' not in video_info
: 
 903                                 self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
 905                         video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
 908                         if 'title' not in video_info
: 
 909                                 self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
 911                         video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
 912                         video_title 
= video_title
.decode('utf-8') 
 913                         video_title 
= sanitize_title(video_title
) 
 916                         simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
 917                         simple_title 
= simple_title
.strip(ur
'_') 
 920                         if 'thumbnail_url' not in video_info
: 
 921                                 self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
 923                         else:   # don't panic if we can't find it 
 924                                 video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
 927                         video_description 
= 'No description available.' 
 928                         if self
._downloader
.params
.get('forcedescription', False): 
 929                                 mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
 931                                         video_description 
= mobj
.group(1) 
 934                                 # Process video information 
 935                                 self
._downloader
.process_info({ 
 936                                         'id':           video_id
.decode('utf-8'), 
 937                                         'url':          video_real_url
.decode('utf-8'), 
 938                                         'uploader':     video_uploader
.decode('utf-8'), 
 939                                         'title':        video_title
, 
 940                                         'stitle':       simple_title
, 
 941                                         'ext':          video_extension
.decode('utf-8'), 
 942                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
 943                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
 944                                         'description':  video_description
.decode('utf-8'), 
 945                                         'player_url':   player_url
, 
 950                                         if quality_index 
== len(self
._available
_formats
): 
 954                                                 format_param 
= self
._available
_formats
[quality_index
] 
 958                         except UnavailableFormatError
, err
: 
 959                                 if best_quality 
or all_formats
: 
 961                                         if quality_index 
== len(self
._available
_formats
): 
 962                                                 # I don't ever expect this to happen 
 964                                                         self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
 967                                                 self
.report_unavailable_format(video_id
, format_param
) 
 968                                                 format_param 
= self
._available
_formats
[quality_index
] 
 971                                         self
._downloader
.trouble('ERROR: format not available for video') 
 975 class MetacafeIE(InfoExtractor
): 
 976         """Information Extractor for metacafe.com.""" 
 978         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
 979         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
 980         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
 983         def __init__(self
, youtube_ie
, downloader
=None): 
 984                 InfoExtractor
.__init
__(self
, downloader
) 
 985                 self
._youtube
_ie 
= youtube_ie
 
 989                 return (re
.match(MetacafeIE
._VALID
_URL
, url
) is not None) 
 991         def report_disclaimer(self
): 
 992                 """Report disclaimer retrieval.""" 
 993                 self
._downloader
.to_stdout(u
'[metacafe] Retrieving disclaimer') 
 995         def report_age_confirmation(self
): 
 996                 """Report attempt to confirm age.""" 
 997                 self
._downloader
.to_stdout(u
'[metacafe] Confirming age') 
 999         def report_download_webpage(self
, video_id
): 
1000                 """Report webpage download.""" 
1001                 self
._downloader
.to_stdout(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1003         def report_extraction(self
, video_id
): 
1004                 """Report information extraction.""" 
1005                 self
._downloader
.to_stdout(u
'[metacafe] %s: Extracting information' % video_id
) 
1007         def _real_initialize(self
): 
1008                 # Retrieve disclaimer 
1009                 request 
= urllib2
.Request(self
._DISCLAIMER
, None, std_headers
) 
1011                         self
.report_disclaimer() 
1012                         disclaimer 
= urllib2
.urlopen(request
).read() 
1013                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1014                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1020                         'submit': "Continue - I'm over 18", 
1022                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
), std_headers
) 
1024                         self
.report_age_confirmation() 
1025                         disclaimer 
= urllib2
.urlopen(request
).read() 
1026                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1027                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1030         def _real_extract(self
, url
): 
1031                 # Extract id and simplified title from URL 
1032                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1034                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1037                 video_id 
= mobj
.group(1) 
1039                 # Check if video comes from YouTube 
1040                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1041                 if mobj2 
is not None: 
1042                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1045                 # At this point we have a new video 
1046                 if self
._downloader 
is not None: 
1047                         self
._downloader
.increment_downloads() 
1049                 simple_title 
= mobj
.group(2).decode('utf-8') 
1050                 video_extension 
= 'flv' 
1052                 # Retrieve video webpage to extract further information 
1053                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1055                         self
.report_download_webpage(video_id
) 
1056                         webpage 
= urllib2
.urlopen(request
).read() 
1057                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1058                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1061                 # Extract URL, uploader and title from webpage 
1062                 self
.report_extraction(video_id
) 
1063                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1065                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1067                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1069                 #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) 
1071                 #       self._downloader.trouble(u'ERROR: unable to extract gdaKey') 
1073                 #gdaKey = mobj.group(1) 
1075                 #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) 
1077                 video_url 
= mediaURL
 
1079                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1081                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1083                 video_title 
= mobj
.group(1).decode('utf-8') 
1084                 video_title 
= sanitize_title(video_title
) 
1086                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1088                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1090                 video_uploader 
= mobj
.group(1) 
1093                         # Process video information 
1094                         self
._downloader
.process_info({ 
1095                                 'id':           video_id
.decode('utf-8'), 
1096                                 'url':          video_url
.decode('utf-8'), 
1097                                 'uploader':     video_uploader
.decode('utf-8'), 
1098                                 'title':        video_title
, 
1099                                 'stitle':       simple_title
, 
1100                                 'ext':          video_extension
.decode('utf-8'), 
1104                 except UnavailableFormatError
: 
1105                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1108 class DailymotionIE(InfoExtractor
): 
1109         """Information Extractor for Dailymotion""" 
1111         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1113         def __init__(self
, downloader
=None): 
1114                 InfoExtractor
.__init
__(self
, downloader
) 
1118                 return (re
.match(DailymotionIE
._VALID
_URL
, url
) is not None) 
1120         def report_download_webpage(self
, video_id
): 
1121                 """Report webpage download.""" 
1122                 self
._downloader
.to_stdout(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1124         def report_extraction(self
, video_id
): 
1125                 """Report information extraction.""" 
1126                 self
._downloader
.to_stdout(u
'[dailymotion] %s: Extracting information' % video_id
) 
1128         def _real_initialize(self
): 
1131         def _real_extract(self
, url
): 
1132                 # Extract id and simplified title from URL 
1133                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1135                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1138                 # At this point we have a new video 
1139                 if self
._downloader 
is not None: 
1140                         self
._downloader
.increment_downloads() 
1141                 video_id 
= mobj
.group(1) 
1143                 simple_title 
= mobj
.group(2).decode('utf-8') 
1144                 video_extension 
= 'flv' 
1146                 # Retrieve video webpage to extract further information 
1147                 request 
= urllib2
.Request(url
) 
1149                         self
.report_download_webpage(video_id
) 
1150                         webpage 
= urllib2
.urlopen(request
).read() 
1151                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1152                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1155                 # Extract URL, uploader and title from webpage 
1156                 self
.report_extraction(video_id
) 
1157                 mobj 
= re
.search(r
'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage
) 
1159                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1161                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1163                 # if needed add http://www.dailymotion.com/ if relative URL 
1165                 video_url 
= mediaURL
 
1167                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>' 
1168                 mobj 
= re
.search(r
'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage
) 
1170                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1172                 video_title 
= mobj
.group(1).decode('utf-8') 
1173                 video_title 
= sanitize_title(video_title
) 
1175                 mobj 
= re
.search(r
'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a></div>', webpage
) 
1177                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1179                 video_uploader 
= mobj
.group(1) 
1182                         # Process video information 
1183                         self
._downloader
.process_info({ 
1184                                 'id':           video_id
.decode('utf-8'), 
1185                                 'url':          video_url
.decode('utf-8'), 
1186                                 'uploader':     video_uploader
.decode('utf-8'), 
1187                                 'title':        video_title
, 
1188                                 'stitle':       simple_title
, 
1189                                 'ext':          video_extension
.decode('utf-8'), 
1193                 except UnavailableFormatError
: 
1194                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1196 class GoogleIE(InfoExtractor
): 
1197         """Information extractor for video.google.com.""" 
1199         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1201         def __init__(self
, downloader
=None): 
1202                 InfoExtractor
.__init
__(self
, downloader
) 
1206                 return (re
.match(GoogleIE
._VALID
_URL
, url
) is not None) 
1208         def report_download_webpage(self
, video_id
): 
1209                 """Report webpage download.""" 
1210                 self
._downloader
.to_stdout(u
'[video.google] %s: Downloading webpage' % video_id
) 
1212         def report_extraction(self
, video_id
): 
1213                 """Report information extraction.""" 
1214                 self
._downloader
.to_stdout(u
'[video.google] %s: Extracting information' % video_id
) 
1216         def _real_initialize(self
): 
1219         def _real_extract(self
, url
): 
1220                 # Extract id from URL 
1221                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1223                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1226                 # At this point we have a new video 
1227                 if self
._downloader 
is not None: 
1228                         self
._downloader
.increment_downloads() 
1229                 video_id 
= mobj
.group(1) 
1231                 video_extension 
= 'mp4' 
1233                 # Retrieve video webpage to extract further information 
1234                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1236                         self
.report_download_webpage(video_id
) 
1237                         webpage 
= urllib2
.urlopen(request
).read() 
1238                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1239                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1242                 # Extract URL, uploader, and title from webpage 
1243                 self
.report_extraction(video_id
) 
1244                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1246                         video_extension 
= 'flv' 
1247                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1249                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1251                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1252                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1253                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1255                 video_url 
= mediaURL
 
1257                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1259                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1261                 video_title 
= mobj
.group(1).decode('utf-8') 
1262                 video_title 
= sanitize_title(video_title
) 
1263                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1265                 # Extract video description 
1266                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1268                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1270                 video_description 
= mobj
.group(1).decode('utf-8') 
1271                 if not video_description
: 
1272                         video_description 
= 'No description available.' 
1274                 # Extract video thumbnail 
1275                 if self
._downloader
.params
.get('forcethumbnail', False): 
1276                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1278                                 webpage 
= urllib2
.urlopen(request
).read() 
1279                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1280                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1282                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1284                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1286                         video_thumbnail 
= mobj
.group(1) 
1287                 else:   # we need something to pass to process_info 
1288                         video_thumbnail 
= '' 
1292                         # Process video information 
1293                         self
._downloader
.process_info({ 
1294                                 'id':           video_id
.decode('utf-8'), 
1295                                 'url':          video_url
.decode('utf-8'), 
1297                                 'title':        video_title
, 
1298                                 'stitle':       simple_title
, 
1299                                 'ext':          video_extension
.decode('utf-8'), 
1303                 except UnavailableFormatError
: 
1304                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1307 class PhotobucketIE(InfoExtractor
): 
1308         """Information extractor for photobucket.com.""" 
1310         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1312         def __init__(self
, downloader
=None): 
1313                 InfoExtractor
.__init
__(self
, downloader
) 
1317                 return (re
.match(PhotobucketIE
._VALID
_URL
, url
) is not None) 
1319         def report_download_webpage(self
, video_id
): 
1320                 """Report webpage download.""" 
1321                 self
._downloader
.to_stdout(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1323         def report_extraction(self
, video_id
): 
1324                 """Report information extraction.""" 
1325                 self
._downloader
.to_stdout(u
'[photobucket] %s: Extracting information' % video_id
) 
1327         def _real_initialize(self
): 
1330         def _real_extract(self
, url
): 
1331                 # Extract id from URL 
1332                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1334                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1337                 # At this point we have a new video 
1338                 if self
._downloader 
is not None: 
1339                         self
._downloader
.increment_downloads() 
1340                 video_id 
= mobj
.group(1) 
1342                 video_extension 
= 'flv' 
1344                 # Retrieve video webpage to extract further information 
1345                 request 
= urllib2
.Request(url
) 
1347                         self
.report_download_webpage(video_id
) 
1348                         webpage 
= urllib2
.urlopen(request
).read() 
1349                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1350                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1353                 # Extract URL, uploader, and title from webpage 
1354                 self
.report_extraction(video_id
) 
1355                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1357                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1359                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1361                 video_url 
= mediaURL
 
1363                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1365                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1367                 video_title 
= mobj
.group(1).decode('utf-8') 
1368                 video_title 
= sanitize_title(video_title
) 
1369                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1371                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1374                         # Process video information 
1375                         self
._downloader
.process_info({ 
1376                                 'id':           video_id
.decode('utf-8'), 
1377                                 'url':          video_url
.decode('utf-8'), 
1378                                 'uploader':     video_uploader
, 
1379                                 'title':        video_title
, 
1380                                 'stitle':       simple_title
, 
1381                                 'ext':          video_extension
.decode('utf-8'), 
1385                 except UnavailableFormatError
: 
1386                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1389 class YahooIE(InfoExtractor
): 
1390         """Information extractor for video.yahoo.com.""" 
1392         # _VALID_URL matches all Yahoo! Video URLs 
1393         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1394         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1395         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1397         def __init__(self
, downloader
=None): 
1398                 InfoExtractor
.__init
__(self
, downloader
) 
1402                 return (re
.match(YahooIE
._VALID
_URL
, url
) is not None) 
1404         def report_download_webpage(self
, video_id
): 
1405                 """Report webpage download.""" 
1406                 self
._downloader
.to_stdout(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1408         def report_extraction(self
, video_id
): 
1409                 """Report information extraction.""" 
1410                 self
._downloader
.to_stdout(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1412         def _real_initialize(self
): 
1415         def _real_extract(self
, url
, new_video
=True): 
1416                 # Extract ID from URL 
1417                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1419                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1422                 # At this point we have a new video 
1423                 if self
._downloader 
is not None and new_video
: 
1424                         self
._downloader
.increment_downloads() 
1425                 video_id 
= mobj
.group(2) 
1426                 video_extension 
= 'flv' 
1428                 # Rewrite valid but non-extractable URLs as 
1429                 # extractable English language /watch/ URLs 
1430                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1431                         request 
= urllib2
.Request(url
) 
1433                                 webpage 
= urllib2
.urlopen(request
).read() 
1434                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1435                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1438                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1440                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1442                         yahoo_id 
= mobj
.group(1) 
1444                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1446                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1448                         yahoo_vid 
= mobj
.group(1) 
1450                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1451                         return self
._real
_extract
(url
, new_video
=False) 
1453                 # Retrieve video webpage to extract further information 
1454                 request 
= urllib2
.Request(url
) 
1456                         self
.report_download_webpage(video_id
) 
1457                         webpage 
= urllib2
.urlopen(request
).read() 
1458                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1459                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1462                 # Extract uploader and title from webpage 
1463                 self
.report_extraction(video_id
) 
1464                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1466                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1468                 video_title 
= mobj
.group(1).decode('utf-8') 
1469                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1471                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1473                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1475                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1477                 # Extract video thumbnail 
1478                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1480                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1482                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1484                 # Extract video description 
1485                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1487                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1489                 video_description 
= mobj
.group(1).decode('utf-8') 
1490                 if not video_description
: video_description 
= 'No description available.' 
1492                 # Extract video height and width 
1493                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1495                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1497                 yv_video_height 
= mobj
.group(1) 
1499                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1501                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1503                 yv_video_width 
= mobj
.group(1) 
1505                 # Retrieve video playlist to extract media URL 
1506                 # I'm not completely sure what all these options are, but we 
1507                 # seem to need most of them, otherwise the server sends a 401. 
1508                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1509                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1510                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1511                                           '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1512                                           '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1514                         self
.report_download_webpage(video_id
) 
1515                         webpage 
= urllib2
.urlopen(request
).read() 
1516                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1517                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1520                 # Extract media URL from playlist XML 
1521                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1523                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1525                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1526                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1529                         # Process video information 
1530                         self
._downloader
.process_info({ 
1531                                 'id':           video_id
.decode('utf-8'), 
1533                                 'uploader':     video_uploader
, 
1534                                 'title':        video_title
, 
1535                                 'stitle':       simple_title
, 
1536                                 'ext':          video_extension
.decode('utf-8'), 
1537                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1538                                 'description':  video_description
, 
1539                                 'thumbnail':    video_thumbnail
, 
1540                                 'description':  video_description
, 
1543                 except UnavailableFormatError
: 
1544                         self
._downloader
.trouble(u
'ERROR: format not available for video') 
1547 class GenericIE(InfoExtractor
): 
1548         """Generic last-resort information extractor.""" 
1550         def __init__(self
, downloader
=None): 
1551                 InfoExtractor
.__init
__(self
, downloader
) 
1557         def report_download_webpage(self
, video_id
): 
1558                 """Report webpage download.""" 
1559                 self
._downloader
.to_stdout(u
'WARNING: Falling back on generic information extractor.') 
1560                 self
._downloader
.to_stdout(u
'[generic] %s: Downloading webpage' % video_id
) 
1562         def report_extraction(self
, video_id
): 
1563                 """Report information extraction.""" 
1564                 self
._downloader
.to_stdout(u
'[generic] %s: Extracting information' % video_id
) 
1566         def _real_initialize(self
): 
1569         def _real_extract(self
, url
): 
1570                 # At this point we have a new video 
1571                 if self
._downloader 
is not None: 
1572                         self
._downloader
.increment_downloads() 
1574                 video_id 
= url
.split('/')[-1] 
1575                 request 
= urllib2
.Request(url
) 
1577                         self
.report_download_webpage(video_id
) 
1578                         webpage 
= urllib2
.urlopen(request
).read() 
1579                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1580                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1582                 except ValueError, err
: 
1583                         # since this is the last-resort InfoExtractor, if 
1584                         # this error is thrown, it'll be thrown here 
1585                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1588                 # Start with something easy: JW Player in SWFObject 
1589                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
1591                         # Broaden the search a little bit 
1592                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
1594                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1597                 # It's possible that one of the regexes 
1598                 # matched, but returned an empty group: 
1599                 if mobj.group(1) is None: 
1600                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1603                 video_url = urllib.unquote(mobj.group(1)) 
1604                 video_id  = os.path.basename(video_url) 
1606                 # here's a fun little line of code for you: 
1607                 video_extension = os.path.splitext(video_id)[1][1:] 
1608                 video_id        = os.path.splitext(video_id)[0] 
1610                 # it's tempting to parse this further, but you would 
1611                 # have to take into account all the variations like 
1612                 #   Video Title - Site Name 
1613                 #   Site Name | Video Title 
1614                 #   Video Title - Tagline | Site Name 
1615                 # and so on and so forth; it's just not practical 
1616                 mobj = re.search(r'<title>(.*)</title>', webpage) 
1618                         self._downloader.trouble(u'ERROR: unable to extract title') 
1620                 video_title = mobj.group(1).decode('utf-8') 
1621                 video_title = sanitize_title(video_title) 
1622                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) 
1624                 # video uploader is domain name 
1625                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
1627                         self._downloader.trouble(u'ERROR: unable to extract title') 
1629                 video_uploader = mobj.group(1).decode('utf-8') 
1632                         # Process video information 
1633                         self._downloader.process_info({ 
1634                                 'id':           video_id.decode('utf-8'), 
1635                                 'url':          video_url.decode('utf-8'), 
1636                                 'uploader':     video_uploader, 
1637                                 'title':        video_title, 
1638                                 'stitle':       simple_title, 
1639                                 'ext':          video_extension.decode('utf-8'), 
1643                 except UnavailableFormatError: 
1644                         self._downloader.trouble(u'ERROR: format not available for video') 
1647 class YoutubeSearchIE(InfoExtractor): 
1648         """Information Extractor for YouTube search queries.""" 
1649         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' 
1650         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
1651         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
1652         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1654         _max_youtube_results = 1000 
1656         def __init__(self, youtube_ie, downloader=None): 
1657                 InfoExtractor.__init__(self, downloader) 
1658                 self._youtube_ie = youtube_ie 
1662                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) 
1664         def report_download_page(self, query, pagenum): 
1665                 """Report attempt to download playlist page with given number.""" 
1666                 query = query.decode(preferredencoding()) 
1667                 self._downloader.to_stdout(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
1669         def _real_initialize(self): 
1670                 self._youtube_ie.initialize() 
1672         def _real_extract(self, query): 
1673                 mobj = re.match(self._VALID_QUERY, query) 
1675                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1678                 prefix, query = query.split(':') 
1680                 query  = query.encode('utf-8') 
1682                         self._download_n_results(query, 1) 
1684                 elif prefix == 'all': 
1685                         self._download_n_results(query, self._max_youtube_results) 
1691                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1693                                 elif n > self._max_youtube_results: 
1694                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n)) 
1695                                         n = self._max_youtube_results 
1696                                 self._download_n_results(query, n) 
1698                         except ValueError: # parsing prefix as integer fails 
1699                                 self._download_n_results(query, 1) 
1702         def _download_n_results(self, query, n): 
1703                 """Downloads a specified number of results for a query""" 
1706                 already_seen = set() 
1710                         self.report_download_page(query, pagenum) 
1711                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1712                         request = urllib2.Request(result_url, None, std_headers) 
1714                                 page = urllib2.urlopen(request).read() 
1715                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1716                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1719                         # Extract video identifiers 
1720                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1721                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
1722                                 if video_id not in already_seen: 
1723                                         video_ids.append(video_id) 
1724                                         already_seen.add(video_id) 
1725                                         if len(video_ids) == n: 
1726                                                 # Specified n videos reached 
1727                                                 for id in video_ids: 
1728                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1731                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1732                                 for id in video_ids: 
1733                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1736                         pagenum = pagenum + 1 
1738 class GoogleSearchIE(InfoExtractor): 
1739         """Information Extractor for Google Video search queries.""" 
1740         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' 
1741         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
1742         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
1743         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
1745         _max_google_results = 1000 
1747         def __init__(self, google_ie, downloader=None): 
1748                 InfoExtractor.__init__(self, downloader) 
1749                 self._google_ie = google_ie 
1753                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) 
1755         def report_download_page(self, query, pagenum): 
1756                 """Report attempt to download playlist page with given number.""" 
1757                 query = query.decode(preferredencoding()) 
1758                 self._downloader.to_stdout(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
1760         def _real_initialize(self): 
1761                 self._google_ie.initialize() 
1763         def _real_extract(self, query): 
1764                 mobj = re.match(self._VALID_QUERY, query) 
1766                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1769                 prefix, query = query.split(':') 
1771                 query  = query.encode('utf-8') 
1773                         self._download_n_results(query, 1) 
1775                 elif prefix == 'all': 
1776                         self._download_n_results(query, self._max_google_results) 
1782                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1784                                 elif n > self._max_google_results: 
1785                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n)) 
1786                                         n = self._max_google_results 
1787                                 self._download_n_results(query, n) 
1789                         except ValueError: # parsing prefix as integer fails 
1790                                 self._download_n_results(query, 1) 
1793         def _download_n_results(self, query, n): 
1794                 """Downloads a specified number of results for a query""" 
1797                 already_seen = set() 
1801                         self.report_download_page(query, pagenum) 
1802                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1803                         request = urllib2.Request(result_url, None, std_headers) 
1805                                 page = urllib2.urlopen(request).read() 
1806                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1807                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1810                         # Extract video identifiers 
1811                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1812                                 video_id = mobj.group(1) 
1813                                 if video_id not in already_seen: 
1814                                         video_ids.append(video_id) 
1815                                         already_seen.add(video_id) 
1816                                         if len(video_ids) == n: 
1817                                                 # Specified n videos reached 
1818                                                 for id in video_ids: 
1819                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1822                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1823                                 for id in video_ids: 
1824                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1827                         pagenum = pagenum + 1 
1829 class YahooSearchIE(InfoExtractor): 
1830         """Information Extractor for Yahoo! Video search queries.""" 
1831         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' 
1832         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
1833         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
1834         _MORE_PAGES_INDICATOR = r'\s*Next' 
1836         _max_yahoo_results = 1000 
1838         def __init__(self, yahoo_ie, downloader=None): 
1839                 InfoExtractor.__init__(self, downloader) 
1840                 self._yahoo_ie = yahoo_ie 
1844                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) 
1846         def report_download_page(self, query, pagenum): 
1847                 """Report attempt to download playlist page with given number.""" 
1848                 query = query.decode(preferredencoding()) 
1849                 self._downloader.to_stdout(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
1851         def _real_initialize(self): 
1852                 self._yahoo_ie.initialize() 
1854         def _real_extract(self, query): 
1855                 mobj = re.match(self._VALID_QUERY, query) 
1857                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1860                 prefix, query = query.split(':') 
1862                 query  = query.encode('utf-8') 
1864                         self._download_n_results(query, 1) 
1866                 elif prefix == 'all': 
1867                         self._download_n_results(query, self._max_yahoo_results) 
1873                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1875                                 elif n > self._max_yahoo_results: 
1876                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n)) 
1877                                         n = self._max_yahoo_results 
1878                                 self._download_n_results(query, n) 
1880                         except ValueError: # parsing prefix as integer fails 
1881                                 self._download_n_results(query, 1) 
1884         def _download_n_results(self, query, n): 
1885                 """Downloads a specified number of results for a query""" 
1888                 already_seen = set() 
1892                         self.report_download_page(query, pagenum) 
1893                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1894                         request = urllib2.Request(result_url, None, std_headers) 
1896                                 page = urllib2.urlopen(request).read() 
1897                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1898                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1901                         # Extract video identifiers 
1902                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1903                                 video_id = mobj.group(1) 
1904                                 if video_id not in already_seen: 
1905                                         video_ids.append(video_id) 
1906                                         already_seen.add(video_id) 
1907                                         if len(video_ids) == n: 
1908                                                 # Specified n videos reached 
1909                                                 for id in video_ids: 
1910                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
1913                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1914                                 for id in video_ids: 
1915                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
1918                         pagenum = pagenum + 1 
1920 class YoutubePlaylistIE(InfoExtractor): 
1921         """Information Extractor for YouTube playlists.""" 
1923         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' 
1924         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' 
1925         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
1926         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1929         def __init__(self, youtube_ie, downloader=None): 
1930                 InfoExtractor.__init__(self, downloader) 
1931                 self._youtube_ie = youtube_ie 
1935                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) 
1937         def report_download_page(self, playlist_id, pagenum): 
1938                 """Report attempt to download playlist page with given number.""" 
1939                 self._downloader.to_stdout(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
1941         def _real_initialize(self): 
1942                 self._youtube_ie.initialize() 
1944         def _real_extract(self, url): 
1945                 # Extract playlist id 
1946                 mobj = re.match(self._VALID_URL, url) 
1948                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
1951                 # Download playlist pages 
1952                 playlist_id = mobj.group(1) 
1957                         self.report_download_page(playlist_id, pagenum) 
1958                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) 
1960                                 page = urllib2.urlopen(request).read() 
1961                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1962                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1965                         # Extract video identifiers 
1967                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1968                                 if mobj.group(1) not in ids_in_page: 
1969                                         ids_in_page.append(mobj.group(1)) 
1970                         video_ids.extend(ids_in_page) 
1972                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1974                         pagenum = pagenum + 1 
1976                 for id in video_ids: 
1977                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1980 class YoutubeUserIE(InfoExtractor): 
1981         """Information Extractor for YouTube users.""" 
1983         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' 
1984         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
1985         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. 
1988         def __init__(self, youtube_ie, downloader=None): 
1989                 InfoExtractor.__init__(self, downloader) 
1990                 self._youtube_ie = youtube_ie 
1994                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None) 
1996         def report_download_page(self, username): 
1997                 """Report attempt to download user page.""" 
1998                 self._downloader.to_stdout(u'[youtube] user %s: Downloading page ' % (username)) 
2000         def _real_initialize(self): 
2001                 self._youtube_ie.initialize() 
2003         def _real_extract(self, url): 
2005                 mobj = re.match(self._VALID_URL, url) 
2007                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2010                 # Download user page 
2011                 username = mobj.group(1) 
2015                 self.report_download_page(username) 
2016                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) 
2018                         page = urllib2.urlopen(request).read() 
2019                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2020                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2023                 # Extract video identifiers 
2026                 for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2027                         if mobj.group(1) not in ids_in_page: 
2028                                 ids_in_page.append(mobj.group(1)) 
2029                 video_ids.extend(ids_in_page) 
2031                 for id in video_ids: 
2032                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2035 class PostProcessor(object): 
2036         """Post Processor class. 
2038         PostProcessor objects can be added to downloaders with their 
2039         add_post_processor() method. When the downloader has finished a 
2040         successful download, it will take its internal chain of PostProcessors 
2041         and start calling the run() method on each one of them, first with 
2042         an initial argument and then with the returned value of the previous 
2045         The chain will be stopped if one of them ever returns None or the end 
2046         of the chain is reached. 
2048         PostProcessor objects follow a "mutual registration
" process similar 
2049         to InfoExtractor objects. 
2054         def __init__(self, downloader=None): 
2055                 self._downloader = downloader 
2057         def set_downloader(self, downloader): 
2058                 """Sets the downloader for this PP.""" 
2059                 self._downloader = downloader 
2061         def run(self, information): 
2062                 """Run the PostProcessor. 
2064                 The "information
" argument is a dictionary like the ones 
2065                 composed by InfoExtractors. The only difference is that this 
2066                 one has an extra field called "filepath
" that points to the 
2069                 When this method returns None, the postprocessing chain is 
2070                 stopped. However, this method may return an information 
2071                 dictionary that will be passed to the next postprocessing 
2072                 object in the chain. It can be the one it received after 
2073                 changing some fields. 
2075                 In addition, this method may raise a PostProcessingError 
2076                 exception that will be taken into account by the downloader 
2079                 return information # by default, do nothing 
2081 ### MAIN PROGRAM ### 
2082 if __name__ == '__main__': 
2084                 # Modules needed only when running the main program 
2088                 # Function to update the program file with the latest version from bitbucket.org 
2089                 def update_self(downloader, filename): 
2090                         # Note: downloader only used for options 
2091                         if not os.access (filename, os.W_OK): 
2092                                 sys.exit('ERROR: no write permissions on %s' % filename) 
2094                         downloader.to_stdout('Updating to latest stable version...') 
2095                         latest_url = 'http://bitbucket.org/rg3/youtube-dl/raw/tip/LATEST_VERSION' 
2096                         latest_version = urllib.urlopen(latest_url).read().strip() 
2097                         prog_url = 'http://bitbucket.org/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version 
2098                         newcontent = urllib.urlopen(prog_url).read() 
2099                         stream = open(filename, 'w') 
2100                         stream.write(newcontent) 
2102                         downloader.to_stdout('Updated to version %s' % latest_version) 
2104                 # General configuration 
2105                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) 
2106                 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor())) 
2107                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
2109                 # Parse command line 
2110                 parser = optparse.OptionParser( 
2111                         usage='Usage: %prog [options] url...', 
2112                         version='2010.06.06', 
2113                         conflict_handler='resolve', 
2116                 parser.add_option('-h', '--help', 
2117                                 action='help', help='print this help text and exit') 
2118                 parser.add_option('-v', '--version', 
2119                                 action='version', help='print program version and exit') 
2120                 parser.add_option('-U', '--update', 
2121                                 action='store_true', dest='update_self', help='update this program to latest stable version') 
2122                 parser.add_option('-i', '--ignore-errors', 
2123                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) 
2124                 parser.add_option('-r', '--rate-limit', 
2125                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') 
2126                 parser.add_option('-R', '--retries', 
2127                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) 
2129                 authentication = optparse.OptionGroup(parser, 'Authentication Options') 
2130                 authentication.add_option('-u', '--username', 
2131                                 dest='username', metavar='USERNAME', help='account username') 
2132                 authentication.add_option('-p', '--password', 
2133                                 dest='password', metavar='PASSWORD', help='account password') 
2134                 authentication.add_option('-n', '--netrc', 
2135                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) 
2136                 parser.add_option_group(authentication) 
2138                 video_format = optparse.OptionGroup(parser, 'Video Format Options') 
2139                 video_format.add_option('-f', '--format', 
2140                                 action='store', dest='format', metavar='FORMAT', help='video format code') 
2141                 video_format.add_option('-b', '--best-quality', 
2142                                 action='store_const', dest='format', help='download the best quality video possible', const='0') 
2143                 video_format.add_option('-m', '--mobile-version', 
2144                                 action='store_const', dest='format', help='alias for -f 17', const='17') 
2145                 video_format.add_option('-d', '--high-def', 
2146                                 action='store_const', dest='format', help='alias for -f 22', const='22') 
2147                 video_format.add_option('--all-formats', 
2148                                 action='store_const', dest='format', help='download all available video formats', const='-1') 
2149                 video_format.add_option('--max-quality', 
2150                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format limit for -b') 
2151                 parser.add_option_group(video_format) 
2153                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') 
2154                 verbosity.add_option('-q', '--quiet', 
2155                                 action='store_true', dest='quiet', help='activates quiet mode', default=False) 
2156                 verbosity.add_option('-s', '--simulate', 
2157                                 action='store_true', dest='simulate', help='do not download video', default=False) 
2158                 verbosity.add_option('-g', '--get-url', 
2159                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) 
2160                 verbosity.add_option('-e', '--get-title', 
2161                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) 
2162                 verbosity.add_option('--get-thumbnail', 
2163                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) 
2164                 verbosity.add_option('--get-description', 
2165                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) 
2166                 verbosity.add_option('--no-progress', 
2167                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False) 
2168                 parser.add_option_group(verbosity) 
2170                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options') 
2171                 filesystem.add_option('-t', '--title', 
2172                                 action='store_true', dest='usetitle', help='use title in file name', default=False) 
2173                 filesystem.add_option('-l', '--literal', 
2174                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False) 
2175                 filesystem.add_option('-o', '--output', 
2176                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template') 
2177                 filesystem.add_option('-a', '--batch-file', 
2178                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') 
2179                 filesystem.add_option('-w', '--no-overwrites', 
2180                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) 
2181                 filesystem.add_option('-c', '--continue', 
2182                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) 
2183                 parser.add_option_group(filesystem) 
2185                 (opts, args) = parser.parse_args() 
2187                 # Batch file verification 
2189                 if opts.batchfile is not None: 
2191                                 if opts.batchfile == '-': 
2194                                         batchfd = open(opts.batchfile, 'r') 
2195                                 batchurls = batchfd.readlines() 
2196                                 batchurls = [x.strip() for x in batchurls] 
2197                                 batchurls = [x for x in batchurls if len(x) > 0] 
2199                                 sys.exit(u'ERROR: batch file could not be read') 
2200                 all_urls = batchurls + args 
2202                 # Conflicting, missing and erroneous options 
2203                 if opts.usenetrc and (opts.username is not None or opts.password is not None): 
2204                         parser.error(u'using .netrc conflicts with giving username/password') 
2205                 if opts.password is not None and opts.username is None: 
2206                         parser.error(u'account username missing') 
2207                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle): 
2208                         parser.error(u'using output template conflicts with using title or literal title') 
2209                 if opts.usetitle and opts.useliteral: 
2210                         parser.error(u'using title conflicts with using literal title') 
2211                 if opts.username is not None and opts.password is None: 
2212                         opts.password = getpass.getpass(u'Type account password and press return:') 
2213                 if opts.ratelimit is not None: 
2214                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) 
2215                         if numeric_limit is None: 
2216                                 parser.error(u'invalid rate limit specified') 
2217                         opts.ratelimit = numeric_limit 
2218                 if opts.retries is not None: 
2220                                 opts.retries = long(opts.retries) 
2221                         except (TypeError, ValueError), err: 
2222                                 parser.error(u'invalid retry count specified') 
2224                 # Information extractors 
2225                 youtube_ie = YoutubeIE() 
2226                 metacafe_ie = MetacafeIE(youtube_ie) 
2227                 dailymotion_ie = DailymotionIE() 
2228                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie) 
2229                 youtube_user_ie = YoutubeUserIE(youtube_ie) 
2230                 youtube_search_ie = YoutubeSearchIE(youtube_ie) 
2231                 google_ie = GoogleIE() 
2232                 google_search_ie = GoogleSearchIE(google_ie) 
2233                 photobucket_ie = PhotobucketIE() 
2234                 yahoo_ie = YahooIE() 
2235                 yahoo_search_ie = YahooSearchIE(yahoo_ie) 
2236                 generic_ie = GenericIE() 
2239                 fd = FileDownloader({ 
2240                         'usenetrc': opts.usenetrc, 
2241                         'username': opts.username, 
2242                         'password': opts.password, 
2243                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 
2244                         'forceurl': opts.geturl, 
2245                         'forcetitle': opts.gettitle, 
2246                         'forcethumbnail': opts.getthumbnail, 
2247                         'forcedescription': opts.getdescription, 
2248                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 
2249                         'format': opts.format, 
2250                         'format_limit': opts.format_limit, 
2251                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) 
2252                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
2253                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') 
2254                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') 
2255                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') 
2256                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') 
2257                                 or u'%(id)s.%(ext)s'), 
2258                         'ignoreerrors': opts.ignoreerrors, 
2259                         'ratelimit': opts.ratelimit, 
2260                         'nooverwrites': opts.nooverwrites, 
2261                         'retries': opts.retries, 
2262                         'continuedl': opts.continue_dl, 
2263                         'noprogress': opts.noprogress, 
2265                 fd.add_info_extractor(youtube_search_ie) 
2266                 fd.add_info_extractor(youtube_pl_ie) 
2267                 fd.add_info_extractor(youtube_user_ie) 
2268                 fd.add_info_extractor(metacafe_ie) 
2269                 fd.add_info_extractor(dailymotion_ie) 
2270                 fd.add_info_extractor(youtube_ie) 
2271                 fd.add_info_extractor(google_ie) 
2272                 fd.add_info_extractor(google_search_ie) 
2273                 fd.add_info_extractor(photobucket_ie) 
2274                 fd.add_info_extractor(yahoo_ie) 
2275                 fd.add_info_extractor(yahoo_search_ie) 
2277                 # This must come last since it's the 
2278                 # fallback if none of the others work 
2279                 fd.add_info_extractor(generic_ie) 
2282                 if opts.update_self: 
2283                         update_self(fd, sys.argv[0]) 
2286                 if len(all_urls) < 1: 
2287                         if not opts.update_self: 
2288                                 parser.error(u'you must provide at least one URL') 
2291                 retcode = fd.download(all_urls) 
2294         except DownloadError: 
2296         except SameFileError: 
2297                 sys.exit(u'ERROR: fixed output name but more than one file to download') 
2298         except KeyboardInterrupt: 
2299                 sys.exit(u'\nERROR: Interrupted by user')