]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
   2 # -*- coding: utf-8 -*- 
   3 # Author: Ricardo Garcia Gonzalez 
   4 # Author: Danny Colligan 
   5 # Author: Benjamin Johnson 
   6 # Author: Vasyl' Vavrychuk 
   7 # License: Public domain code 
  26 # parse_qs was moved from the cgi module to the urlparse module recently. 
  28         from urlparse 
import parse_qs
 
  30         from cgi 
import parse_qs
 
  33         'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.12) Gecko/20101028 Firefox/3.6.12', 
  34         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  35         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  36         'Accept-Language': 'en-us,en;q=0.5', 
  39 simple_title_chars 
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii') 
  41 def preferredencoding(): 
  42         """Get preferred encoding. 
  44         Returns the best encoding scheme for the system, based on 
  45         locale.getpreferredencoding() and some further tweaks. 
  47         def yield_preferredencoding(): 
  49                         pref 
= locale
.getpreferredencoding() 
  55         return yield_preferredencoding().next() 
  57 def htmlentity_transform(matchobj
): 
  58         """Transforms an HTML entity to a Unicode character. 
  60         This function receives a match object and is intended to be used with 
  61         the re.sub() function. 
  63         entity 
= matchobj
.group(1) 
  65         # Known non-numeric HTML entity 
  66         if entity 
in htmlentitydefs
.name2codepoint
: 
  67                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
  70         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
  72                 numstr 
= mobj
.group(1) 
  73                 if numstr
.startswith(u
'x'): 
  75                         numstr 
= u
'0%s' % numstr
 
  78                 return unichr(long(numstr
, base
)) 
  80         # Unknown entity in name, return its literal representation 
  81         return (u
'&%s;' % entity
) 
  83 def sanitize_title(utitle
): 
  84         """Sanitizes a video title so it could be used as part of a filename.""" 
  85         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
  86         return utitle
.replace(unicode(os
.sep
), u
'%') 
  88 def sanitize_open(filename
, open_mode
): 
  89         """Try to open the given filename, and slightly tweak it if this fails. 
  91         Attempts to open the given filename. If this fails, it tries to change 
  92         the filename slightly, step by step, until it's either able to open it 
  93         or it fails and raises a final exception, like the standard open() 
  96         It returns the tuple (stream, definitive_file_name). 
 100                         if sys
.platform 
== 'win32': 
 102                                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 103                         return (sys
.stdout
, filename
) 
 104                 stream 
= open(filename
, open_mode
) 
 105                 return (stream
, filename
) 
 106         except (IOError, OSError), err
: 
 107                 # In case of error, try to remove win32 forbidden chars 
 108                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 110                 # An exception here should be caught in the caller 
 111                 stream 
= open(filename
, open_mode
) 
 112                 return (stream
, filename
) 
 114 class DownloadError(Exception): 
 115         """Download Error exception. 
 117         This exception may be thrown by FileDownloader objects if they are not 
 118         configured to continue on errors. They will contain the appropriate 
 123 class SameFileError(Exception): 
 124         """Same File exception. 
 126         This exception will be thrown by FileDownloader objects if they detect 
 127         multiple files would have to be downloaded to the same file on disk. 
 131 class PostProcessingError(Exception): 
 132         """Post Processing exception. 
 134         This exception may be raised by PostProcessor's .run() method to 
 135         indicate an error in the postprocessing task. 
 139 class UnavailableVideoError(Exception): 
 140         """Unavailable Format exception. 
 142         This exception will be thrown when a video is requested 
 143         in a format that is not available for that video. 
 147 class ContentTooShortError(Exception): 
 148         """Content Too Short exception. 
 150         This exception may be raised by FileDownloader objects when a file they 
 151         download is too small for what the server announced first, indicating 
 152         the connection was probably interrupted. 
 158         def __init__(self
, downloaded
, expected
): 
 159                 self
.downloaded 
= downloaded
 
 160                 self
.expected 
= expected
 
 162 class FileDownloader(object): 
 163         """File Downloader class. 
 165         File downloader objects are the ones responsible of downloading the 
 166         actual video file and writing it to disk if the user has requested 
 167         it, among some other tasks. In most cases there should be one per 
 168         program. As, given a video URL, the downloader doesn't know how to 
 169         extract all the needed information, task that InfoExtractors do, it 
 170         has to pass the URL to one of them. 
 172         For this, file downloader objects have a method that allows 
 173         InfoExtractors to be registered in a given order. When it is passed 
 174         a URL, the file downloader handles it to the first InfoExtractor it 
 175         finds that reports being able to handle it. The InfoExtractor extracts 
 176         all the information about the video or videos the URL refers to, and 
 177         asks the FileDownloader to process the video information, possibly 
 178         downloading the video. 
 180         File downloaders accept a lot of parameters. In order not to saturate 
 181         the object constructor with arguments, it receives a dictionary of 
 182         options instead. These options are available through the params 
 183         attribute for the InfoExtractors to use. The FileDownloader also 
 184         registers itself as the downloader in charge for the InfoExtractors 
 185         that are added to it, so this is a "mutual registration". 
 189         username:         Username for authentication purposes. 
 190         password:         Password for authentication purposes. 
 191         usenetrc:         Use netrc for authentication instead. 
 192         quiet:            Do not print messages to stdout. 
 193         forceurl:         Force printing final URL. 
 194         forcetitle:       Force printing title. 
 195         forcethumbnail:   Force printing thumbnail URL. 
 196         forcedescription: Force printing description. 
 197         simulate:         Do not download the video files. 
 198         format:           Video format code. 
 199         format_limit:     Highest quality format to try. 
 200         outtmpl:          Template for output names. 
 201         ignoreerrors:     Do not stop on download errors. 
 202         ratelimit:        Download speed limit, in bytes/sec. 
 203         nooverwrites:     Prevent overwriting files. 
 204         retries:          Number of times to retry for HTTP error 5xx 
 205         continuedl:       Try to continue downloads if possible. 
 206         noprogress:       Do not print the progress bar. 
 207         playliststart:    Playlist item to start at. 
 208         playlistend:      Playlist item to end at. 
 209         logtostderr:      Log messages to stderr instead of stdout. 
 215         _download_retcode 
= None 
 216         _num_downloads 
= None 
 219         def __init__(self
, params
): 
 220                 """Create a FileDownloader object with the given options.""" 
 223                 self
._download
_retcode 
= 0 
 224                 self
._num
_downloads 
= 0 
 225                 self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 229         def pmkdir(filename
): 
 230                 """Create directory components in filename. Similar to Unix "mkdir -p".""" 
 231                 components 
= filename
.split(os
.sep
) 
 232                 aggregate 
= [os
.sep
.join(components
[0:x
]) for x 
in xrange(1, len(components
))] 
 233                 aggregate 
= ['%s%s' % (x
, os
.sep
) for x 
in aggregate
] # Finish names with separator 
 234                 for dir in aggregate
: 
 235                         if not os
.path
.exists(dir): 
 239         def temp_name(filename
): 
 240                 """Returns a temporary filename for the given filename.""" 
 241                 if filename 
== u
'-' or (os
.path
.exists(filename
) and not os
.path
.isfile(filename
)): 
 243                 return filename 
+ u
'.part' 
 246         def format_bytes(bytes): 
 249                 if type(bytes) is str: 
 254                         exponent 
= long(math
.log(bytes, 1024.0)) 
 255                 suffix 
= 'bkMGTPEZY'[exponent
] 
 256                 converted 
= float(bytes) / float(1024**exponent
) 
 257                 return '%.2f%s' % (converted
, suffix
) 
 260         def calc_percent(byte_counter
, data_len
): 
 263                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 266         def calc_eta(start
, now
, total
, current
): 
 270                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 272                 rate 
= float(current
) / dif
 
 273                 eta 
= long((float(total
) - float(current
)) / rate
) 
 274                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 277                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 280         def calc_speed(start
, now
, bytes): 
 282                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 283                         return '%10s' % '---b/s' 
 284                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 287         def best_block_size(elapsed_time
, bytes): 
 288                 new_min 
= max(bytes / 2.0, 1.0) 
 289                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 290                 if elapsed_time 
< 0.001: 
 292                 rate 
= bytes / elapsed_time
 
 300         def parse_bytes(bytestr
): 
 301                 """Parse a string indicating a byte quantity into a long integer.""" 
 302                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 305                 number 
= float(matchobj
.group(1)) 
 306                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 307                 return long(round(number 
* multiplier
)) 
 309         def add_info_extractor(self
, ie
): 
 310                 """Add an InfoExtractor object to the end of the list.""" 
 312                 ie
.set_downloader(self
) 
 314         def add_post_processor(self
, pp
): 
 315                 """Add a PostProcessor object to the end of the chain.""" 
 317                 pp
.set_downloader(self
) 
 319         def to_screen(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 320                 """Print message to stdout if not in quiet mode.""" 
 322                         if not self
.params
.get('quiet', False): 
 323                                 terminator 
= [u
'\n', u
''][skip_eol
] 
 324                                 print >>self
._screen
_file
, (u
'%s%s' % (message
, terminator
)).encode(preferredencoding()), 
 325                         self
._screen
_file
.flush() 
 326                 except (UnicodeEncodeError), err
: 
 327                         if not ignore_encoding_errors
: 
 330         def to_stderr(self
, message
): 
 331                 """Print message to stderr.""" 
 332                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 334         def fixed_template(self
): 
 335                 """Checks if the output template is fixed.""" 
 336                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 338         def trouble(self
, message
=None): 
 339                 """Determine action to take when a download problem appears. 
 341                 Depending on if the downloader has been configured to ignore 
 342                 download errors or not, this method may throw an exception or 
 343                 not when errors are found, after printing the message. 
 345                 if message 
is not None: 
 346                         self
.to_stderr(message
) 
 347                 if not self
.params
.get('ignoreerrors', False): 
 348                         raise DownloadError(message
) 
 349                 self
._download
_retcode 
= 1 
 351         def slow_down(self
, start_time
, byte_counter
): 
 352                 """Sleep if the download speed is over the rate limit.""" 
 353                 rate_limit 
= self
.params
.get('ratelimit', None) 
 354                 if rate_limit 
is None or byte_counter 
== 0: 
 357                 elapsed 
= now 
- start_time
 
 360                 speed 
= float(byte_counter
) / elapsed
 
 361                 if speed 
> rate_limit
: 
 362                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 364         def try_rename(self
, old_filename
, new_filename
): 
 366                         if old_filename 
== new_filename
: 
 368                         os
.rename(old_filename
, new_filename
) 
 369                 except (IOError, OSError), err
: 
 370                         self
.trouble(u
'ERROR: unable to rename file') 
 372         def report_destination(self
, filename
): 
 373                 """Report destination filename.""" 
 374                 self
.to_screen(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 376         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 377                 """Report download progress.""" 
 378                 if self
.params
.get('noprogress', False): 
 380                 self
.to_screen(u
'\r[download] %s of %s at %s ETA %s' % 
 381                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 383         def report_resuming_byte(self
, resume_len
): 
 384                 """Report attempt to resume at given byte.""" 
 385                 self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 387         def report_retry(self
, count
, retries
): 
 388                 """Report retry in case of HTTP error 5xx""" 
 389                 self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 391         def report_file_already_downloaded(self
, file_name
): 
 392                 """Report file has already been fully downloaded.""" 
 394                         self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 395                 except (UnicodeEncodeError), err
: 
 396                         self
.to_screen(u
'[download] The file has already been downloaded') 
 398         def report_unable_to_resume(self
): 
 399                 """Report it was impossible to resume download.""" 
 400                 self
.to_screen(u
'[download] Unable to resume') 
 402         def report_finish(self
): 
 403                 """Report download finished.""" 
 404                 if self
.params
.get('noprogress', False): 
 405                         self
.to_screen(u
'[download] Download completed') 
 409         def increment_downloads(self
): 
 410                 """Increment the ordinal that assigns a number to each file.""" 
 411                 self
._num
_downloads 
+= 1 
 413         def process_info(self
, info_dict
): 
 414                 """Process a single dictionary returned by an InfoExtractor.""" 
 415                 # Do nothing else if in simulate mode 
 416                 if self
.params
.get('simulate', False): 
 418                         if self
.params
.get('forcetitle', False): 
 419                                 print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 420                         if self
.params
.get('forceurl', False): 
 421                                 print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 422                         if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 423                                 print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 424                         if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 425                                 print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 430                         template_dict 
= dict(info_dict
) 
 431                         template_dict
['epoch'] = unicode(long(time
.time())) 
 432                         template_dict
['autonumber'] = unicode('%05d' % self
._num
_downloads
) 
 433                         filename 
= self
.params
['outtmpl'] % template_dict
 
 434                 except (ValueError, KeyError), err
: 
 435                         self
.trouble(u
'ERROR: invalid system charset or erroneous output template') 
 437                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 438                         self
.to_stderr(u
'WARNING: file exists and will be skipped') 
 442                         self
.pmkdir(filename
) 
 443                 except (OSError, IOError), err
: 
 444                         self
.trouble(u
'ERROR: unable to create directories: %s' % str(err
)) 
 448                         success 
= self
._do
_download
(filename
, info_dict
['url'].encode('utf-8'), info_dict
.get('player_url', None)) 
 449                 except (OSError, IOError), err
: 
 450                         raise UnavailableVideoError
 
 451                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 452                         self
.trouble(u
'ERROR: unable to download video data: %s' % str(err
)) 
 454                 except (ContentTooShortError
, ), err
: 
 455                         self
.trouble(u
'ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 460                                 self
.post_process(filename
, info_dict
) 
 461                         except (PostProcessingError
), err
: 
 462                                 self
.trouble(u
'ERROR: postprocessing: %s' % str(err
)) 
 465         def download(self
, url_list
): 
 466                 """Download a given list of URLs.""" 
 467                 if len(url_list
) > 1 and self
.fixed_template(): 
 468                         raise SameFileError(self
.params
['outtmpl']) 
 471                         suitable_found 
= False 
 473                                 # Go to next InfoExtractor if not suitable 
 474                                 if not ie
.suitable(url
): 
 477                                 # Suitable InfoExtractor found 
 478                                 suitable_found 
= True 
 480                                 # Extract information from URL and process it 
 483                                 # Suitable InfoExtractor had been found; go to next URL 
 486                         if not suitable_found
: 
 487                                 self
.trouble(u
'ERROR: no suitable InfoExtractor: %s' % url
) 
 489                 return self
._download
_retcode
 
 491         def post_process(self
, filename
, ie_info
): 
 492                 """Run the postprocessing chain on the given file.""" 
 494                 info
['filepath'] = filename
 
 500         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 501                 self
.report_destination(filename
) 
 502                 tmpfilename 
= self
.temp_name(filename
) 
 504                 # Check for rtmpdump first 
 506                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 507                 except (OSError, IOError): 
 508                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 511                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 512                 # the connection was interrumpted and resuming appears to be 
 513                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 514                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', tmpfilename
] 
 515                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 516                 while retval 
== 2 or retval 
== 1: 
 517                         prevsize 
= os
.path
.getsize(tmpfilename
) 
 518                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 519                         time
.sleep(5.0) # This seems to be needed 
 520                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 521                         cursize 
= os
.path
.getsize(tmpfilename
) 
 522                         if prevsize 
== cursize 
and retval 
== 1: 
 525                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(tmpfilename
)) 
 526                         self
.try_rename(tmpfilename
, filename
) 
 529                         self
.trouble(u
'\nERROR: rtmpdump exited with code %d' % retval
) 
 532         def _do_download(self
, filename
, url
, player_url
): 
 533                 # Check file already present 
 534                 if self
.params
.get('continuedl', False) and os
.path
.isfile(filename
): 
 535                         self
.report_file_already_downloaded(filename
) 
 538                 # Attempt to download using rtmpdump 
 539                 if url
.startswith('rtmp'): 
 540                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 542                 tmpfilename 
= self
.temp_name(filename
) 
 545                 basic_request 
= urllib2
.Request(url
, None, std_headers
) 
 546                 request 
= urllib2
.Request(url
, None, std_headers
) 
 548                 # Establish possible resume length 
 549                 if os
.path
.isfile(tmpfilename
): 
 550                         resume_len 
= os
.path
.getsize(tmpfilename
) 
 554                 # Request parameters in case of being able to resume 
 555                 if self
.params
.get('continuedl', False) and resume_len 
!= 0: 
 556                         self
.report_resuming_byte(resume_len
) 
 557                         request
.add_header('Range','bytes=%d-' % resume_len
) 
 561                 retries 
= self
.params
.get('retries', 0) 
 562                 while count 
<= retries
: 
 563                         # Establish connection 
 565                                 data 
= urllib2
.urlopen(request
) 
 567                         except (urllib2
.HTTPError
, ), err
: 
 568                                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 569                                         # Unexpected HTTP error 
 571                                 elif err
.code 
== 416: 
 572                                         # Unable to resume (requested range not satisfiable) 
 574                                                 # Open the connection again without the range header 
 575                                                 data 
= urllib2
.urlopen(basic_request
) 
 576                                                 content_length 
= data
.info()['Content-Length'] 
 577                                         except (urllib2
.HTTPError
, ), err
: 
 578                                                 if err
.code 
< 500 or err
.code 
>= 600: 
 581                                                 # Examine the reported length 
 582                                                 if (content_length 
is not None and 
 583                                                     (resume_len 
- 100 < long(content_length
) < resume_len 
+ 100)): 
 584                                                         # The file had already been fully downloaded. 
 585                                                         # Explanation to the above condition: in issue #175 it was revealed that 
 586                                                         # YouTube sometimes adds or removes a few bytes from the end of the file, 
 587                                                         # changing the file size slightly and causing problems for some users. So 
 588                                                         # I decided to implement a suggested change and consider the file 
 589                                                         # completely downloaded if the file size differs less than 100 bytes from 
 590                                                         # the one in the hard drive. 
 591                                                         self
.report_file_already_downloaded(filename
) 
 592                                                         self
.try_rename(tmpfilename
, filename
) 
 595                                                         # The length does not match, we start the download over 
 596                                                         self
.report_unable_to_resume() 
 602                                 self
.report_retry(count
, retries
) 
 605                         self
.trouble(u
'ERROR: giving up after %s retries' % retries
) 
 608                 data_len 
= data
.info().get('Content-length', None) 
 609                 data_len_str 
= self
.format_bytes(data_len
) 
 616                         data_block 
= data
.read(block_size
) 
 618                         data_block_len 
= len(data_block
) 
 619                         if data_block_len 
== 0: 
 621                         byte_counter 
+= data_block_len
 
 623                         # Open file just in time 
 626                                         (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 627                                         self
.report_destination(filename
) 
 628                                 except (OSError, IOError), err
: 
 629                                         self
.trouble(u
'ERROR: unable to open for writing: %s' % str(err
)) 
 632                                 stream
.write(data_block
) 
 633                         except (IOError, OSError), err
: 
 634                                 self
.trouble(u
'\nERROR: unable to write data: %s' % str(err
)) 
 636                         block_size 
= self
.best_block_size(after 
- before
, data_block_len
) 
 639                         percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 640                         eta_str 
= self
.calc_eta(start
, time
.time(), data_len
, byte_counter
) 
 641                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter
) 
 642                         self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 645                         self
.slow_down(start
, byte_counter
) 
 649                 if data_len 
is not None and str(byte_counter
) != data_len
: 
 650                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
 651                 self
.try_rename(tmpfilename
, filename
) 
 654 class InfoExtractor(object): 
 655         """Information Extractor class. 
 657         Information extractors are the classes that, given a URL, extract 
 658         information from the video (or videos) the URL refers to. This 
 659         information includes the real video URL, the video title and simplified 
 660         title, author and others. The information is stored in a dictionary 
 661         which is then passed to the FileDownloader. The FileDownloader 
 662         processes this information possibly downloading the video to the file 
 663         system, among other possible outcomes. The dictionaries must include 
 664         the following fields: 
 666         id:             Video identifier. 
 667         url:            Final video URL. 
 668         uploader:       Nickname of the video uploader. 
 669         title:          Literal title. 
 670         stitle:         Simplified title. 
 671         ext:            Video filename extension. 
 672         format:         Video format. 
 673         player_url:     SWF Player URL (may be None). 
 675         The following fields are optional. Their primary purpose is to allow 
 676         youtube-dl to serve as the backend for a video search function, such 
 677         as the one in youtube2mp3.  They are only used when their respective 
 678         forced printing functions are called: 
 680         thumbnail:      Full URL to a video thumbnail image. 
 681         description:    One-line video description. 
 683         Subclasses of this one should re-define the _real_initialize() and 
 684         _real_extract() methods, as well as the suitable() static method. 
 685         Probably, they should also be instantiated and added to the main 
 692         def __init__(self
, downloader
=None): 
 693                 """Constructor. Receives an optional downloader.""" 
 695                 self
.set_downloader(downloader
) 
 699                 """Receives a URL and returns True if suitable for this IE.""" 
 702         def initialize(self
): 
 703                 """Initializes an instance (authentication, etc).""" 
 705                         self
._real
_initialize
() 
 708         def extract(self
, url
): 
 709                 """Extracts URL information and returns it in list of dicts.""" 
 711                 return self
._real
_extract
(url
) 
 713         def set_downloader(self
, downloader
): 
 714                 """Sets the downloader for this IE.""" 
 715                 self
._downloader 
= downloader
 
 717         def _real_initialize(self
): 
 718                 """Real initialization process. Redefine in subclasses.""" 
 721         def _real_extract(self
, url
): 
 722                 """Real extraction process. Redefine in subclasses.""" 
 725 class YoutubeIE(InfoExtractor
): 
 726         """Information extractor for youtube.com.""" 
 728         _VALID_URL 
= r
'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/(?:(?:v/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))))?([0-9A-Za-z_-]+)(?(1).+)?$' 
 729         _LANG_URL 
= r
'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
 730         _LOGIN_URL 
= 'https://www.youtube.com/signup?next=/&gl=US&hl=en' 
 731         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
 732         _NETRC_MACHINE 
= 'youtube' 
 733         # Listed in order of quality 
 734         _available_formats 
= ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] 
 735         _video_extensions 
= { 
 741                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
 748                 return (re
.match(YoutubeIE
._VALID
_URL
, url
) is not None) 
 750         def report_lang(self
): 
 751                 """Report attempt to set language.""" 
 752                 self
._downloader
.to_screen(u
'[youtube] Setting language') 
 754         def report_login(self
): 
 755                 """Report attempt to log in.""" 
 756                 self
._downloader
.to_screen(u
'[youtube] Logging in') 
 758         def report_age_confirmation(self
): 
 759                 """Report attempt to confirm age.""" 
 760                 self
._downloader
.to_screen(u
'[youtube] Confirming age') 
 762         def report_video_webpage_download(self
, video_id
): 
 763                 """Report attempt to download video webpage.""" 
 764                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video webpage' % video_id
) 
 766         def report_video_info_webpage_download(self
, video_id
): 
 767                 """Report attempt to download video info webpage.""" 
 768                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
 770         def report_information_extraction(self
, video_id
): 
 771                 """Report attempt to extract video information.""" 
 772                 self
._downloader
.to_screen(u
'[youtube] %s: Extracting video information' % video_id
) 
 774         def report_unavailable_format(self
, video_id
, format
): 
 775                 """Report extracted video URL.""" 
 776                 self
._downloader
.to_screen(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
 778         def report_rtmp_download(self
): 
 779                 """Indicate the download will use the RTMP protocol.""" 
 780                 self
._downloader
.to_screen(u
'[youtube] RTMP download detected') 
 782         def _real_initialize(self
): 
 783                 if self
._downloader 
is None: 
 788                 downloader_params 
= self
._downloader
.params
 
 790                 # Attempt to use provided username and password or .netrc data 
 791                 if downloader_params
.get('username', None) is not None: 
 792                         username 
= downloader_params
['username'] 
 793                         password 
= downloader_params
['password'] 
 794                 elif downloader_params
.get('usenetrc', False): 
 796                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
 801                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
 802                         except (IOError, netrc
.NetrcParseError
), err
: 
 803                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
 807                 request 
= urllib2
.Request(self
._LANG
_URL
, None, std_headers
) 
 810                         urllib2
.urlopen(request
).read() 
 811                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 812                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
 815                 # No authentication to be performed 
 821                                 'current_form': 'loginForm', 
 823                                 'action_login': 'Log In', 
 824                                 'username':     username
, 
 825                                 'password':     password
, 
 827                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
), std_headers
) 
 830                         login_results 
= urllib2
.urlopen(request
).read() 
 831                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
 832                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
 834                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 835                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
 841                                 'action_confirm':       'Confirm', 
 843                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
), std_headers
) 
 845                         self
.report_age_confirmation() 
 846                         age_results 
= urllib2
.urlopen(request
).read() 
 847                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 848                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
 851         def _real_extract(self
, url
): 
 852                 # Extract video id from URL 
 853                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
 855                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
 857                 video_id 
= mobj
.group(2) 
 860                 self
.report_video_webpage_download(video_id
) 
 861                 request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
, None, std_headers
) 
 863                         video_webpage 
= urllib2
.urlopen(request
).read() 
 864                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 865                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
 868                 # Attempt to extract SWF player URL 
 869                 mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
 871                         player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
 876                 self
.report_video_info_webpage_download(video_id
) 
 877                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
 878                         video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
 879                                            % (video_id
, el_type
)) 
 880                         request 
= urllib2
.Request(video_info_url
, None, std_headers
) 
 882                                 video_info_webpage 
= urllib2
.urlopen(request
).read() 
 883                                 video_info 
= parse_qs(video_info_webpage
) 
 884                                 if 'token' in video_info
: 
 886                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 887                                 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
 889                 if 'token' not in video_info
: 
 890                         if 'reason' in video_info
: 
 891                                 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % video_info
['reason'][0].decode('utf-8')) 
 893                                 self
._downloader
.trouble(u
'ERROR: "token" parameter not in video info for unknown reason') 
 896                 # Start extracting information 
 897                 self
.report_information_extraction(video_id
) 
 900                 if 'author' not in video_info
: 
 901                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
 903                 video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
 906                 if 'title' not in video_info
: 
 907                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
 909                 video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
 910                 video_title 
= video_title
.decode('utf-8') 
 911                 video_title 
= sanitize_title(video_title
) 
 914                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
 915                 simple_title 
= simple_title
.strip(ur
'_') 
 918                 if 'thumbnail_url' not in video_info
: 
 919                         self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
 921                 else:   # don't panic if we can't find it 
 922                         video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
 926                 mobj 
= re
.search(r
'id="eow-date".*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
 928                         upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
 929                         format_expressions 
= ['%d %B %Y', '%B %d %Y'] 
 930                         for expression 
in format_expressions
: 
 932                                         upload_date 
= datetime
.datetime
.strptime(upload_date
, expression
).strftime('%Y%m%d') 
 937                 video_description 
= 'No description available.' 
 938                 if self
._downloader
.params
.get('forcedescription', False): 
 939                         mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
 941                                 video_description 
= mobj
.group(1) 
 944                 video_token 
= urllib
.unquote_plus(video_info
['token'][0]) 
 946                 # Decide which formats to download 
 947                 req_format 
= self
._downloader
.params
.get('format', None) 
 948                 get_video_template 
= 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id
, video_token
) 
 950                 if 'fmt_url_map' in video_info
: 
 951                         url_map 
= dict(tuple(pair
.split('|')) for pair 
in video_info
['fmt_url_map'][0].split(',')) 
 952                         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
 953                         if format_limit 
is not None and format_limit 
in self
._available
_formats
: 
 954                                 format_list 
= self
._available
_formats
[self
._available
_formats
.index(format_limit
):] 
 956                                 format_list 
= self
._available
_formats
 
 957                         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
 958                         if len(existing_formats
) == 0: 
 959                                 self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
 961                         if req_format 
is None: 
 962                                 video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
 963                         elif req_format 
== '-1': 
 964                                 video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
 966                                 if req_format 
in url_map
: 
 967                                         video_url_list 
= [(req_format
, url_map
[req_format
])] # Specific format 
 969                                         video_url_list 
= [(req_format
, get_video_template 
% req_format
)] # Specific format 
 971                 elif 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
 972                         self
.report_rtmp_download() 
 973                         video_url_list 
= [(None, video_info
['conn'][0])] 
 976                         self
._downloader
.trouble(u
'ERROR: no fmt_url_map or conn information found in video info') 
 979                 for format_param
, video_real_url 
in video_url_list
: 
 980                         # At this point we have a new video 
 981                         self
._downloader
.increment_downloads() 
 984                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
 986                         # Find the video URL in fmt_url_map or conn paramters 
 988                                 # Process video information 
 989                                 self
._downloader
.process_info({ 
 990                                         'id':           video_id
.decode('utf-8'), 
 991                                         'url':          video_real_url
.decode('utf-8'), 
 992                                         'uploader':     video_uploader
.decode('utf-8'), 
 993                                         'upload_date':  upload_date
, 
 994                                         'title':        video_title
, 
 995                                         'stitle':       simple_title
, 
 996                                         'ext':          video_extension
.decode('utf-8'), 
 997                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
 998                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
 999                                         'description':  video_description
.decode('utf-8'), 
1000                                         'player_url':   player_url
, 
1002                         except UnavailableVideoError
, err
: 
1003                                 self
._downloader
.trouble(u
'ERROR: unable to download video (format may not be available)') 
1006 class MetacafeIE(InfoExtractor
): 
1007         """Information Extractor for metacafe.com.""" 
1009         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
1010         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
1011         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
1014         def __init__(self
, youtube_ie
, downloader
=None): 
1015                 InfoExtractor
.__init
__(self
, downloader
) 
1016                 self
._youtube
_ie 
= youtube_ie
 
1020                 return (re
.match(MetacafeIE
._VALID
_URL
, url
) is not None) 
1022         def report_disclaimer(self
): 
1023                 """Report disclaimer retrieval.""" 
1024                 self
._downloader
.to_screen(u
'[metacafe] Retrieving disclaimer') 
1026         def report_age_confirmation(self
): 
1027                 """Report attempt to confirm age.""" 
1028                 self
._downloader
.to_screen(u
'[metacafe] Confirming age') 
1030         def report_download_webpage(self
, video_id
): 
1031                 """Report webpage download.""" 
1032                 self
._downloader
.to_screen(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1034         def report_extraction(self
, video_id
): 
1035                 """Report information extraction.""" 
1036                 self
._downloader
.to_screen(u
'[metacafe] %s: Extracting information' % video_id
) 
1038         def _real_initialize(self
): 
1039                 # Retrieve disclaimer 
1040                 request 
= urllib2
.Request(self
._DISCLAIMER
, None, std_headers
) 
1042                         self
.report_disclaimer() 
1043                         disclaimer 
= urllib2
.urlopen(request
).read() 
1044                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1045                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1051                         'submit': "Continue - I'm over 18", 
1053                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
), std_headers
) 
1055                         self
.report_age_confirmation() 
1056                         disclaimer 
= urllib2
.urlopen(request
).read() 
1057                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1058                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1061         def _real_extract(self
, url
): 
1062                 # Extract id and simplified title from URL 
1063                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1065                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1068                 video_id 
= mobj
.group(1) 
1070                 # Check if video comes from YouTube 
1071                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1072                 if mobj2 
is not None: 
1073                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1076                 # At this point we have a new video 
1077                 self
._downloader
.increment_downloads() 
1079                 simple_title 
= mobj
.group(2).decode('utf-8') 
1081                 # Retrieve video webpage to extract further information 
1082                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1084                         self
.report_download_webpage(video_id
) 
1085                         webpage 
= urllib2
.urlopen(request
).read() 
1086                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1087                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1090                 # Extract URL, uploader and title from webpage 
1091                 self
.report_extraction(video_id
) 
1092                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1093                 if mobj 
is not None: 
1094                         mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1095                         video_extension 
= mediaURL
[-3:] 
1097                         # Extract gdaKey if available 
1098                         mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
1100                                 video_url 
= mediaURL
 
1102                                 gdaKey 
= mobj
.group(1) 
1103                                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
1105                         mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
1107                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1109                         vardict 
= parse_qs(mobj
.group(1)) 
1110                         if 'mediaData' not in vardict
: 
1111                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1113                         mobj 
= re
.search(r
'"mediaURL":"(http.*?)","key":"(.*?)"', vardict
['mediaData'][0]) 
1115                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1117                         mediaURL 
= mobj
.group(1).replace('\\/', '/') 
1118                         video_extension 
= mediaURL
[-3:] 
1119                         video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group(2)) 
1121                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1123                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1125                 video_title 
= mobj
.group(1).decode('utf-8') 
1126                 video_title 
= sanitize_title(video_title
) 
1128                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1130                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1132                 video_uploader 
= mobj
.group(1) 
1135                         # Process video information 
1136                         self
._downloader
.process_info({ 
1137                                 'id':           video_id
.decode('utf-8'), 
1138                                 'url':          video_url
.decode('utf-8'), 
1139                                 'uploader':     video_uploader
.decode('utf-8'), 
1140                                 'upload_date':  u
'NA', 
1141                                 'title':        video_title
, 
1142                                 'stitle':       simple_title
, 
1143                                 'ext':          video_extension
.decode('utf-8'), 
1147                 except UnavailableVideoError
: 
1148                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
1151 class DailymotionIE(InfoExtractor
): 
1152         """Information Extractor for Dailymotion""" 
1154         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1156         def __init__(self
, downloader
=None): 
1157                 InfoExtractor
.__init
__(self
, downloader
) 
1161                 return (re
.match(DailymotionIE
._VALID
_URL
, url
) is not None) 
1163         def report_download_webpage(self
, video_id
): 
1164                 """Report webpage download.""" 
1165                 self
._downloader
.to_screen(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1167         def report_extraction(self
, video_id
): 
1168                 """Report information extraction.""" 
1169                 self
._downloader
.to_screen(u
'[dailymotion] %s: Extracting information' % video_id
) 
1171         def _real_initialize(self
): 
1174         def _real_extract(self
, url
): 
1175                 # Extract id and simplified title from URL 
1176                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1178                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1181                 # At this point we have a new video 
1182                 self
._downloader
.increment_downloads() 
1183                 video_id 
= mobj
.group(1) 
1185                 simple_title 
= mobj
.group(2).decode('utf-8') 
1186                 video_extension 
= 'flv' 
1188                 # Retrieve video webpage to extract further information 
1189                 request 
= urllib2
.Request(url
) 
1191                         self
.report_download_webpage(video_id
) 
1192                         webpage 
= urllib2
.urlopen(request
).read() 
1193                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1194                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1197                 # Extract URL, uploader and title from webpage 
1198                 self
.report_extraction(video_id
) 
1199                 mobj 
= re
.search(r
'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage
) 
1201                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1203                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1205                 # if needed add http://www.dailymotion.com/ if relative URL 
1207                 video_url 
= mediaURL
 
1209                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>' 
1210                 mobj 
= re
.search(r
'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage
) 
1212                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1214                 video_title 
= mobj
.group(1).decode('utf-8') 
1215                 video_title 
= sanitize_title(video_title
) 
1217                 mobj 
= re
.search(r
'(?im)<div class="dmco_html owner">.*?<a class="name" href="/.+?">(.+?)</a>', webpage
) 
1219                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1221                 video_uploader 
= mobj
.group(1) 
1224                         # Process video information 
1225                         self
._downloader
.process_info({ 
1226                                 'id':           video_id
.decode('utf-8'), 
1227                                 'url':          video_url
.decode('utf-8'), 
1228                                 'uploader':     video_uploader
.decode('utf-8'), 
1229                                 'upload_date':  u
'NA', 
1230                                 'title':        video_title
, 
1231                                 'stitle':       simple_title
, 
1232                                 'ext':          video_extension
.decode('utf-8'), 
1236                 except UnavailableVideoError
: 
1237                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
1239 class GoogleIE(InfoExtractor
): 
1240         """Information extractor for video.google.com.""" 
1242         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1244         def __init__(self
, downloader
=None): 
1245                 InfoExtractor
.__init
__(self
, downloader
) 
1249                 return (re
.match(GoogleIE
._VALID
_URL
, url
) is not None) 
1251         def report_download_webpage(self
, video_id
): 
1252                 """Report webpage download.""" 
1253                 self
._downloader
.to_screen(u
'[video.google] %s: Downloading webpage' % video_id
) 
1255         def report_extraction(self
, video_id
): 
1256                 """Report information extraction.""" 
1257                 self
._downloader
.to_screen(u
'[video.google] %s: Extracting information' % video_id
) 
1259         def _real_initialize(self
): 
1262         def _real_extract(self
, url
): 
1263                 # Extract id from URL 
1264                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1266                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1269                 # At this point we have a new video 
1270                 self
._downloader
.increment_downloads() 
1271                 video_id 
= mobj
.group(1) 
1273                 video_extension 
= 'mp4' 
1275                 # Retrieve video webpage to extract further information 
1276                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1278                         self
.report_download_webpage(video_id
) 
1279                         webpage 
= urllib2
.urlopen(request
).read() 
1280                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1281                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1284                 # Extract URL, uploader, and title from webpage 
1285                 self
.report_extraction(video_id
) 
1286                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1288                         video_extension 
= 'flv' 
1289                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1291                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1293                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1294                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1295                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1297                 video_url 
= mediaURL
 
1299                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1301                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1303                 video_title 
= mobj
.group(1).decode('utf-8') 
1304                 video_title 
= sanitize_title(video_title
) 
1305                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1307                 # Extract video description 
1308                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1310                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1312                 video_description 
= mobj
.group(1).decode('utf-8') 
1313                 if not video_description
: 
1314                         video_description 
= 'No description available.' 
1316                 # Extract video thumbnail 
1317                 if self
._downloader
.params
.get('forcethumbnail', False): 
1318                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1320                                 webpage 
= urllib2
.urlopen(request
).read() 
1321                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1322                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1324                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1326                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1328                         video_thumbnail 
= mobj
.group(1) 
1329                 else:   # we need something to pass to process_info 
1330                         video_thumbnail 
= '' 
1334                         # Process video information 
1335                         self
._downloader
.process_info({ 
1336                                 'id':           video_id
.decode('utf-8'), 
1337                                 'url':          video_url
.decode('utf-8'), 
1339                                 'upload_date':  u
'NA', 
1340                                 'title':        video_title
, 
1341                                 'stitle':       simple_title
, 
1342                                 'ext':          video_extension
.decode('utf-8'), 
1346                 except UnavailableVideoError
: 
1347                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
1350 class PhotobucketIE(InfoExtractor
): 
1351         """Information extractor for photobucket.com.""" 
1353         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1355         def __init__(self
, downloader
=None): 
1356                 InfoExtractor
.__init
__(self
, downloader
) 
1360                 return (re
.match(PhotobucketIE
._VALID
_URL
, url
) is not None) 
1362         def report_download_webpage(self
, video_id
): 
1363                 """Report webpage download.""" 
1364                 self
._downloader
.to_screen(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1366         def report_extraction(self
, video_id
): 
1367                 """Report information extraction.""" 
1368                 self
._downloader
.to_screen(u
'[photobucket] %s: Extracting information' % video_id
) 
1370         def _real_initialize(self
): 
1373         def _real_extract(self
, url
): 
1374                 # Extract id from URL 
1375                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1377                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1380                 # At this point we have a new video 
1381                 self
._downloader
.increment_downloads() 
1382                 video_id 
= mobj
.group(1) 
1384                 video_extension 
= 'flv' 
1386                 # Retrieve video webpage to extract further information 
1387                 request 
= urllib2
.Request(url
) 
1389                         self
.report_download_webpage(video_id
) 
1390                         webpage 
= urllib2
.urlopen(request
).read() 
1391                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1392                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1395                 # Extract URL, uploader, and title from webpage 
1396                 self
.report_extraction(video_id
) 
1397                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1399                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1401                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1403                 video_url 
= mediaURL
 
1405                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1407                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1409                 video_title 
= mobj
.group(1).decode('utf-8') 
1410                 video_title 
= sanitize_title(video_title
) 
1411                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1413                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1416                         # Process video information 
1417                         self
._downloader
.process_info({ 
1418                                 'id':           video_id
.decode('utf-8'), 
1419                                 'url':          video_url
.decode('utf-8'), 
1420                                 'uploader':     video_uploader
, 
1421                                 'upload_date':  u
'NA', 
1422                                 'title':        video_title
, 
1423                                 'stitle':       simple_title
, 
1424                                 'ext':          video_extension
.decode('utf-8'), 
1428                 except UnavailableVideoError
: 
1429                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
1432 class YahooIE(InfoExtractor
): 
1433         """Information extractor for video.yahoo.com.""" 
1435         # _VALID_URL matches all Yahoo! Video URLs 
1436         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1437         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1438         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1440         def __init__(self
, downloader
=None): 
1441                 InfoExtractor
.__init
__(self
, downloader
) 
1445                 return (re
.match(YahooIE
._VALID
_URL
, url
) is not None) 
1447         def report_download_webpage(self
, video_id
): 
1448                 """Report webpage download.""" 
1449                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1451         def report_extraction(self
, video_id
): 
1452                 """Report information extraction.""" 
1453                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1455         def _real_initialize(self
): 
1458         def _real_extract(self
, url
, new_video
=True): 
1459                 # Extract ID from URL 
1460                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1462                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1465                 # At this point we have a new video 
1466                 self
._downloader
.increment_downloads() 
1467                 video_id 
= mobj
.group(2) 
1468                 video_extension 
= 'flv' 
1470                 # Rewrite valid but non-extractable URLs as 
1471                 # extractable English language /watch/ URLs 
1472                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1473                         request 
= urllib2
.Request(url
) 
1475                                 webpage 
= urllib2
.urlopen(request
).read() 
1476                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1477                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1480                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1482                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1484                         yahoo_id 
= mobj
.group(1) 
1486                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1488                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1490                         yahoo_vid 
= mobj
.group(1) 
1492                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1493                         return self
._real
_extract
(url
, new_video
=False) 
1495                 # Retrieve video webpage to extract further information 
1496                 request 
= urllib2
.Request(url
) 
1498                         self
.report_download_webpage(video_id
) 
1499                         webpage 
= urllib2
.urlopen(request
).read() 
1500                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1501                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1504                 # Extract uploader and title from webpage 
1505                 self
.report_extraction(video_id
) 
1506                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1508                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1510                 video_title 
= mobj
.group(1).decode('utf-8') 
1511                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1513                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1515                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1517                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1519                 # Extract video thumbnail 
1520                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1522                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1524                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1526                 # Extract video description 
1527                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1529                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1531                 video_description 
= mobj
.group(1).decode('utf-8') 
1532                 if not video_description
: video_description 
= 'No description available.' 
1534                 # Extract video height and width 
1535                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1537                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1539                 yv_video_height 
= mobj
.group(1) 
1541                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1543                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1545                 yv_video_width 
= mobj
.group(1) 
1547                 # Retrieve video playlist to extract media URL 
1548                 # I'm not completely sure what all these options are, but we 
1549                 # seem to need most of them, otherwise the server sends a 401. 
1550                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1551                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1552                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1553                                           '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1554                                           '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1556                         self
.report_download_webpage(video_id
) 
1557                         webpage 
= urllib2
.urlopen(request
).read() 
1558                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1559                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1562                 # Extract media URL from playlist XML 
1563                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1565                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1567                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1568                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1571                         # Process video information 
1572                         self
._downloader
.process_info({ 
1573                                 'id':           video_id
.decode('utf-8'), 
1575                                 'uploader':     video_uploader
, 
1576                                 'upload_date':  u
'NA', 
1577                                 'title':        video_title
, 
1578                                 'stitle':       simple_title
, 
1579                                 'ext':          video_extension
.decode('utf-8'), 
1580                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1581                                 'description':  video_description
, 
1582                                 'thumbnail':    video_thumbnail
, 
1583                                 'description':  video_description
, 
1586                 except UnavailableVideoError
: 
1587                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
1590 class GenericIE(InfoExtractor
): 
1591         """Generic last-resort information extractor.""" 
1593         def __init__(self
, downloader
=None): 
1594                 InfoExtractor
.__init
__(self
, downloader
) 
1600         def report_download_webpage(self
, video_id
): 
1601                 """Report webpage download.""" 
1602                 self
._downloader
.to_screen(u
'WARNING: Falling back on generic information extractor.') 
1603                 self
._downloader
.to_screen(u
'[generic] %s: Downloading webpage' % video_id
) 
1605         def report_extraction(self
, video_id
): 
1606                 """Report information extraction.""" 
1607                 self
._downloader
.to_screen(u
'[generic] %s: Extracting information' % video_id
) 
1609         def _real_initialize(self
): 
1612         def _real_extract(self
, url
): 
1613                 # At this point we have a new video 
1614                 self
._downloader
.increment_downloads() 
1616                 video_id 
= url
.split('/')[-1] 
1617                 request 
= urllib2
.Request(url
) 
1619                         self
.report_download_webpage(video_id
) 
1620                         webpage 
= urllib2
.urlopen(request
).read() 
1621                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1622                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1624                 except ValueError, err
: 
1625                         # since this is the last-resort InfoExtractor, if 
1626                         # this error is thrown, it'll be thrown here 
1627                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1630                 self
.report_extraction(video_id
) 
1631                 # Start with something easy: JW Player in SWFObject 
1632                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
1634                         # Broaden the search a little bit 
1635                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
1637                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1640                 # It's possible that one of the regexes 
1641                 # matched, but returned an empty group: 
1642                 if mobj.group(1) is None: 
1643                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1646                 video_url = urllib.unquote(mobj.group(1)) 
1647                 video_id  = os.path.basename(video_url) 
1649                 # here's a fun little line of code for you: 
1650                 video_extension = os.path.splitext(video_id)[1][1:] 
1651                 video_id        = os.path.splitext(video_id)[0] 
1653                 # it's tempting to parse this further, but you would 
1654                 # have to take into account all the variations like 
1655                 #   Video Title - Site Name 
1656                 #   Site Name | Video Title 
1657                 #   Video Title - Tagline | Site Name 
1658                 # and so on and so forth; it's just not practical 
1659                 mobj = re.search(r'<title>(.*)</title>', webpage) 
1661                         self._downloader.trouble(u'ERROR: unable to extract title') 
1663                 video_title = mobj.group(1).decode('utf-8') 
1664                 video_title = sanitize_title(video_title) 
1665                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) 
1667                 # video uploader is domain name 
1668                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
1670                         self._downloader.trouble(u'ERROR: unable to extract title') 
1672                 video_uploader = mobj.group(1).decode('utf-8') 
1675                         # Process video information 
1676                         self._downloader.process_info({ 
1677                                 'id':           video_id.decode('utf-8'), 
1678                                 'url':          video_url.decode('utf-8'), 
1679                                 'uploader':     video_uploader, 
1680                                 'upload_date':  u'NA', 
1681                                 'title':        video_title, 
1682                                 'stitle':       simple_title, 
1683                                 'ext':          video_extension.decode('utf-8'), 
1687                 except UnavailableVideoError, err: 
1688                         self._downloader.trouble(u'ERROR: unable to download video') 
1691 class YoutubeSearchIE(InfoExtractor): 
1692         """Information Extractor for YouTube search queries.""" 
1693         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' 
1694         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
1695         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
1696         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1698         _max_youtube_results = 1000 
1700         def __init__(self, youtube_ie, downloader=None): 
1701                 InfoExtractor.__init__(self, downloader) 
1702                 self._youtube_ie = youtube_ie 
1706                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) 
1708         def report_download_page(self, query, pagenum): 
1709                 """Report attempt to download playlist page with given number.""" 
1710                 query = query.decode(preferredencoding()) 
1711                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
1713         def _real_initialize(self): 
1714                 self._youtube_ie.initialize() 
1716         def _real_extract(self, query): 
1717                 mobj = re.match(self._VALID_QUERY, query) 
1719                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1722                 prefix, query = query.split(':') 
1724                 query  = query.encode('utf-8') 
1726                         self._download_n_results(query, 1) 
1728                 elif prefix == 'all': 
1729                         self._download_n_results(query, self._max_youtube_results) 
1735                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1737                                 elif n > self._max_youtube_results: 
1738                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n)) 
1739                                         n = self._max_youtube_results 
1740                                 self._download_n_results(query, n) 
1742                         except ValueError: # parsing prefix as integer fails 
1743                                 self._download_n_results(query, 1) 
1746         def _download_n_results(self, query, n): 
1747                 """Downloads a specified number of results for a query""" 
1750                 already_seen = set() 
1754                         self.report_download_page(query, pagenum) 
1755                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1756                         request = urllib2.Request(result_url, None, std_headers) 
1758                                 page = urllib2.urlopen(request).read() 
1759                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1760                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1763                         # Extract video identifiers 
1764                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1765                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
1766                                 if video_id not in already_seen: 
1767                                         video_ids.append(video_id) 
1768                                         already_seen.add(video_id) 
1769                                         if len(video_ids) == n: 
1770                                                 # Specified n videos reached 
1771                                                 for id in video_ids: 
1772                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1775                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1776                                 for id in video_ids: 
1777                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1780                         pagenum = pagenum + 1 
1782 class GoogleSearchIE(InfoExtractor): 
1783         """Information Extractor for Google Video search queries.""" 
1784         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' 
1785         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
1786         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
1787         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
1789         _max_google_results = 1000 
1791         def __init__(self, google_ie, downloader=None): 
1792                 InfoExtractor.__init__(self, downloader) 
1793                 self._google_ie = google_ie 
1797                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) 
1799         def report_download_page(self, query, pagenum): 
1800                 """Report attempt to download playlist page with given number.""" 
1801                 query = query.decode(preferredencoding()) 
1802                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
1804         def _real_initialize(self): 
1805                 self._google_ie.initialize() 
1807         def _real_extract(self, query): 
1808                 mobj = re.match(self._VALID_QUERY, query) 
1810                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1813                 prefix, query = query.split(':') 
1815                 query  = query.encode('utf-8') 
1817                         self._download_n_results(query, 1) 
1819                 elif prefix == 'all': 
1820                         self._download_n_results(query, self._max_google_results) 
1826                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1828                                 elif n > self._max_google_results: 
1829                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n)) 
1830                                         n = self._max_google_results 
1831                                 self._download_n_results(query, n) 
1833                         except ValueError: # parsing prefix as integer fails 
1834                                 self._download_n_results(query, 1) 
1837         def _download_n_results(self, query, n): 
1838                 """Downloads a specified number of results for a query""" 
1841                 already_seen = set() 
1845                         self.report_download_page(query, pagenum) 
1846                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1847                         request = urllib2.Request(result_url, None, std_headers) 
1849                                 page = urllib2.urlopen(request).read() 
1850                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1851                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1854                         # Extract video identifiers 
1855                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1856                                 video_id = mobj.group(1) 
1857                                 if video_id not in already_seen: 
1858                                         video_ids.append(video_id) 
1859                                         already_seen.add(video_id) 
1860                                         if len(video_ids) == n: 
1861                                                 # Specified n videos reached 
1862                                                 for id in video_ids: 
1863                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1866                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1867                                 for id in video_ids: 
1868                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1871                         pagenum = pagenum + 1 
1873 class YahooSearchIE(InfoExtractor): 
1874         """Information Extractor for Yahoo! Video search queries.""" 
1875         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' 
1876         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
1877         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
1878         _MORE_PAGES_INDICATOR = r'\s*Next' 
1880         _max_yahoo_results = 1000 
1882         def __init__(self, yahoo_ie, downloader=None): 
1883                 InfoExtractor.__init__(self, downloader) 
1884                 self._yahoo_ie = yahoo_ie 
1888                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) 
1890         def report_download_page(self, query, pagenum): 
1891                 """Report attempt to download playlist page with given number.""" 
1892                 query = query.decode(preferredencoding()) 
1893                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
1895         def _real_initialize(self): 
1896                 self._yahoo_ie.initialize() 
1898         def _real_extract(self, query): 
1899                 mobj = re.match(self._VALID_QUERY, query) 
1901                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1904                 prefix, query = query.split(':') 
1906                 query  = query.encode('utf-8') 
1908                         self._download_n_results(query, 1) 
1910                 elif prefix == 'all': 
1911                         self._download_n_results(query, self._max_yahoo_results) 
1917                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1919                                 elif n > self._max_yahoo_results: 
1920                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n)) 
1921                                         n = self._max_yahoo_results 
1922                                 self._download_n_results(query, n) 
1924                         except ValueError: # parsing prefix as integer fails 
1925                                 self._download_n_results(query, 1) 
1928         def _download_n_results(self, query, n): 
1929                 """Downloads a specified number of results for a query""" 
1932                 already_seen = set() 
1936                         self.report_download_page(query, pagenum) 
1937                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1938                         request = urllib2.Request(result_url, None, std_headers) 
1940                                 page = urllib2.urlopen(request).read() 
1941                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1942                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1945                         # Extract video identifiers 
1946                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1947                                 video_id = mobj.group(1) 
1948                                 if video_id not in already_seen: 
1949                                         video_ids.append(video_id) 
1950                                         already_seen.add(video_id) 
1951                                         if len(video_ids) == n: 
1952                                                 # Specified n videos reached 
1953                                                 for id in video_ids: 
1954                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
1957                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1958                                 for id in video_ids: 
1959                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
1962                         pagenum = pagenum + 1 
1964 class YoutubePlaylistIE(InfoExtractor): 
1965         """Information Extractor for YouTube playlists.""" 
1967         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/)([^&]+).*' 
1968         _TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en' 
1969         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
1970         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1973         def __init__(self, youtube_ie, downloader=None): 
1974                 InfoExtractor.__init__(self, downloader) 
1975                 self._youtube_ie = youtube_ie 
1979                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) 
1981         def report_download_page(self, playlist_id, pagenum): 
1982                 """Report attempt to download playlist page with given number.""" 
1983                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
1985         def _real_initialize(self): 
1986                 self._youtube_ie.initialize() 
1988         def _real_extract(self, url): 
1989                 # Extract playlist id 
1990                 mobj = re.match(self._VALID_URL, url) 
1992                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
1995                 # Download playlist pages 
1996                 playlist_id = mobj.group(1) 
2001                         self.report_download_page(playlist_id, pagenum) 
2002                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum), None, std_headers) 
2004                                 page = urllib2.urlopen(request).read() 
2005                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2006                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2009                         # Extract video identifiers 
2011                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2012                                 if mobj.group(1) not in ids_in_page: 
2013                                         ids_in_page.append(mobj.group(1)) 
2014                         video_ids.extend(ids_in_page) 
2016                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2018                         pagenum = pagenum + 1 
2020                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2021                 playlistend = self._downloader.params.get('playlistend', -1) 
2022                 video_ids = video_ids[playliststart:playlistend] 
2024                 for id in video_ids: 
2025                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2028 class YoutubeUserIE(InfoExtractor): 
2029         """Information Extractor for YouTube users.""" 
2031         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/user/(.*)' 
2032         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
2033         _VIDEO_INDICATOR = r'http://gdata.youtube.com/feeds/api/videos/(.*)' # XXX Fix this. 
2036         def __init__(self, youtube_ie, downloader=None): 
2037                 InfoExtractor.__init__(self, downloader) 
2038                 self._youtube_ie = youtube_ie 
2042                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None) 
2044         def report_download_page(self, username): 
2045                 """Report attempt to download user page.""" 
2046                 self._downloader.to_screen(u'[youtube] user %s: Downloading page ' % (username)) 
2048         def _real_initialize(self): 
2049                 self._youtube_ie.initialize() 
2051         def _real_extract(self, url): 
2053                 mobj = re.match(self._VALID_URL, url) 
2055                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2058                 # Download user page 
2059                 username = mobj.group(1) 
2063                 self.report_download_page(username) 
2064                 request = urllib2.Request(self._TEMPLATE_URL % (username), None, std_headers) 
2066                         page = urllib2.urlopen(request).read() 
2067                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2068                         self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2071                 # Extract video identifiers 
2074                 for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2075                         if mobj.group(1) not in ids_in_page: 
2076                                 ids_in_page.append(mobj.group(1)) 
2077                 video_ids.extend(ids_in_page) 
2079                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2080                 playlistend = self._downloader.params.get('playlistend', -1) 
2081                 video_ids = video_ids[playliststart:playlistend] 
2083                 for id in video_ids: 
2084                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2087 class DepositFilesIE(InfoExtractor): 
2088         """Information extractor for depositfiles.com""" 
2090         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' 
2092         def __init__(self, downloader=None): 
2093                 InfoExtractor.__init__(self, downloader) 
2097                 return (re.match(DepositFilesIE._VALID_URL, url) is not None) 
2099         def report_download_webpage(self, file_id): 
2100                 """Report webpage download.""" 
2101                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) 
2103         def report_extraction(self, file_id): 
2104                 """Report information extraction.""" 
2105                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) 
2107         def _real_initialize(self): 
2110         def _real_extract(self, url): 
2111                 # At this point we have a new file 
2112                 self._downloader.increment_downloads() 
2114                 file_id = url.split('/')[-1] 
2115                 # Rebuild url in english locale 
2116                 url = 'http://depositfiles.com/en/files/' + file_id 
2118                 # Retrieve file webpage with 'Free download' button pressed 
2119                 free_download_indication = { 'gateway_result' : '1' } 
2120                 request = urllib2.Request(url, urllib.urlencode(free_download_indication), std_headers) 
2122                         self.report_download_webpage(file_id) 
2123                         webpage = urllib2.urlopen(request).read() 
2124                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2125                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) 
2128                 # Search for the real file URL 
2129                 mobj = re.search(r'<form action="(http
://fileshare
.+?
)"', webpage) 
2130                 if (mobj is None) or (mobj.group(1) is None): 
2131                         # Try to figure out reason of the error. 
2132                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) 
2133                         if (mobj is not None) and (mobj.group(1) is not None): 
2134                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() 
2135                                 self._downloader.trouble(u'ERROR: %s' % restriction_message) 
2137                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) 
2140                 file_url = mobj.group(1) 
2141                 file_extension = os.path.splitext(file_url)[1][1:] 
2143                 # Search for file title 
2144                 mobj = re.search(r'<b title="(.*?
)">', webpage) 
2146                         self._downloader.trouble(u'ERROR: unable to extract title') 
2148                 file_title = mobj.group(1).decode('utf-8') 
2151                         # Process file information 
2152                         self._downloader.process_info({ 
2153                                 'id':           file_id.decode('utf-8'), 
2154                                 'url':          file_url.decode('utf-8'), 
2156                                 'upload_date':  u'NA', 
2157                                 'title':        file_title, 
2158                                 'stitle':       file_title, 
2159                                 'ext':          file_extension.decode('utf-8'), 
2163                 except UnavailableVideoError, err: 
2164                         self._downloader.trouble(u'ERROR: unable to download file') 
2166 class PostProcessor(object): 
2167         """Post Processor class. 
2169         PostProcessor objects can be added to downloaders with their 
2170         add_post_processor() method. When the downloader has finished a 
2171         successful download, it will take its internal chain of PostProcessors 
2172         and start calling the run() method on each one of them, first with 
2173         an initial argument and then with the returned value of the previous 
2176         The chain will be stopped if one of them ever returns None or the end 
2177         of the chain is reached. 
2179         PostProcessor objects follow a "mutual registration
" process similar 
2180         to InfoExtractor objects. 
2185         def __init__(self, downloader=None): 
2186                 self._downloader = downloader 
2188         def set_downloader(self, downloader): 
2189                 """Sets the downloader for this PP.""" 
2190                 self._downloader = downloader 
2192         def run(self, information): 
2193                 """Run the PostProcessor. 
2195                 The "information
" argument is a dictionary like the ones 
2196                 composed by InfoExtractors. The only difference is that this 
2197                 one has an extra field called "filepath
" that points to the 
2200                 When this method returns None, the postprocessing chain is 
2201                 stopped. However, this method may return an information 
2202                 dictionary that will be passed to the next postprocessing 
2203                 object in the chain. It can be the one it received after 
2204                 changing some fields. 
2206                 In addition, this method may raise a PostProcessingError 
2207                 exception that will be taken into account by the downloader 
2210                 return information # by default, do nothing 
2212 ### MAIN PROGRAM ### 
2213 if __name__ == '__main__': 
2215                 # Modules needed only when running the main program 
2219                 # Function to update the program file with the latest version from bitbucket.org 
2220                 def update_self(downloader, filename): 
2221                         # Note: downloader only used for options 
2222                         if not os.access (filename, os.W_OK): 
2223                                 sys.exit('ERROR: no write permissions on %s' % filename) 
2225                         downloader.to_screen('Updating to latest stable version...') 
2226                         latest_url = 'http://github.com/rg3/youtube-dl/raw/master/LATEST_VERSION' 
2227                         latest_version = urllib.urlopen(latest_url).read().strip() 
2228                         prog_url = 'http://github.com/rg3/youtube-dl/raw/%s/youtube-dl' % latest_version 
2229                         newcontent = urllib.urlopen(prog_url).read() 
2230                         stream = open(filename, 'w') 
2231                         stream.write(newcontent) 
2233                         downloader.to_screen('Updated to version %s' % latest_version) 
2235                 # Parse command line 
2236                 parser = optparse.OptionParser( 
2237                         usage='Usage: %prog [options] url...', 
2238                         version='2010.12.09', 
2239                         conflict_handler='resolve', 
2242                 parser.add_option('-h', '--help', 
2243                                 action='help', help='print this help text and exit') 
2244                 parser.add_option('-v', '--version', 
2245                                 action='version', help='print program version and exit') 
2246                 parser.add_option('-U', '--update', 
2247                                 action='store_true', dest='update_self', help='update this program to latest stable version') 
2248                 parser.add_option('-i', '--ignore-errors', 
2249                                 action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) 
2250                 parser.add_option('-r', '--rate-limit', 
2251                                 dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') 
2252                 parser.add_option('-R', '--retries', 
2253                                 dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) 
2254                 parser.add_option('--playlist-start', 
2255                                 dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) 
2256                 parser.add_option('--playlist-end', 
2257                                 dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) 
2259                 authentication = optparse.OptionGroup(parser, 'Authentication Options') 
2260                 authentication.add_option('-u', '--username', 
2261                                 dest='username', metavar='USERNAME', help='account username') 
2262                 authentication.add_option('-p', '--password', 
2263                                 dest='password', metavar='PASSWORD', help='account password') 
2264                 authentication.add_option('-n', '--netrc', 
2265                                 action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) 
2266                 parser.add_option_group(authentication) 
2268                 video_format = optparse.OptionGroup(parser, 'Video Format Options') 
2269                 video_format.add_option('-f', '--format', 
2270                                 action='store', dest='format', metavar='FORMAT', help='video format code') 
2271                 video_format.add_option('-m', '--mobile-version', 
2272                                 action='store_const', dest='format', help='alias for -f 17', const='17') 
2273                 video_format.add_option('--all-formats', 
2274                                 action='store_const', dest='format', help='download all available video formats', const='-1') 
2275                 video_format.add_option('--max-quality', 
2276                                 action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') 
2277                 video_format.add_option('-b', '--best-quality', 
2278                                 action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') 
2279                 parser.add_option_group(video_format) 
2281                 verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') 
2282                 verbosity.add_option('-q', '--quiet', 
2283                                 action='store_true', dest='quiet', help='activates quiet mode', default=False) 
2284                 verbosity.add_option('-s', '--simulate', 
2285                                 action='store_true', dest='simulate', help='do not download video', default=False) 
2286                 verbosity.add_option('-g', '--get-url', 
2287                                 action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) 
2288                 verbosity.add_option('-e', '--get-title', 
2289                                 action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) 
2290                 verbosity.add_option('--get-thumbnail', 
2291                                 action='store_true', dest='getthumbnail', help='simulate, quiet but print thumbnail URL', default=False) 
2292                 verbosity.add_option('--get-description', 
2293                                 action='store_true', dest='getdescription', help='simulate, quiet but print video description', default=False) 
2294                 verbosity.add_option('--no-progress', 
2295                                 action='store_true', dest='noprogress', help='do not print progress bar', default=False) 
2296                 parser.add_option_group(verbosity) 
2298                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options') 
2299                 filesystem.add_option('-t', '--title', 
2300                                 action='store_true', dest='usetitle', help='use title in file name', default=False) 
2301                 filesystem.add_option('-l', '--literal', 
2302                                 action='store_true', dest='useliteral', help='use literal title in file name', default=False) 
2303                 filesystem.add_option('-A', '--auto-number', 
2304                                 action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) 
2305                 filesystem.add_option('-o', '--output', 
2306                                 dest='outtmpl', metavar='TEMPLATE', help='output filename template') 
2307                 filesystem.add_option('-a', '--batch-file', 
2308                                 dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') 
2309                 filesystem.add_option('-w', '--no-overwrites', 
2310                                 action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) 
2311                 filesystem.add_option('-c', '--continue', 
2312                                 action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) 
2313                 filesystem.add_option('--cookies', 
2314                                 dest='cookiefile', metavar='FILE', help='file to dump cookie jar to') 
2315                 parser.add_option_group(filesystem) 
2317                 (opts, args) = parser.parse_args() 
2319                 # Open appropriate CookieJar 
2320                 if opts.cookiefile is None: 
2321                         jar = cookielib.CookieJar() 
2324                                 jar = cookielib.MozillaCookieJar(opts.cookiefile) 
2325                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): 
2327                         except (IOError, OSError), err: 
2328                                 sys.exit(u'ERROR: unable to open cookie file') 
2330                 # General configuration 
2331                 cookie_processor = urllib2.HTTPCookieProcessor(jar) 
2332                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler())) 
2333                 urllib2.install_opener(urllib2.build_opener(cookie_processor)) 
2334                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
2336                 # Batch file verification 
2338                 if opts.batchfile is not None: 
2340                                 if opts.batchfile == '-': 
2343                                         batchfd = open(opts.batchfile, 'r') 
2344                                 batchurls = batchfd.readlines() 
2345                                 batchurls = [x.strip() for x in batchurls] 
2346                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] 
2348                                 sys.exit(u'ERROR: batch file could not be read') 
2349                 all_urls = batchurls + args 
2351                 # Conflicting, missing and erroneous options 
2352                 if opts.bestquality: 
2353                         print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' 
2354                 if opts.usenetrc and (opts.username is not None or opts.password is not None): 
2355                         parser.error(u'using .netrc conflicts with giving username/password') 
2356                 if opts.password is not None and opts.username is None: 
2357                         parser.error(u'account username missing') 
2358                 if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): 
2359                         parser.error(u'using output template conflicts with using title, literal title or auto number') 
2360                 if opts.usetitle and opts.useliteral: 
2361                         parser.error(u'using title conflicts with using literal title') 
2362                 if opts.username is not None and opts.password is None: 
2363                         opts.password = getpass.getpass(u'Type account password and press return:') 
2364                 if opts.ratelimit is not None: 
2365                         numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) 
2366                         if numeric_limit is None: 
2367                                 parser.error(u'invalid rate limit specified') 
2368                         opts.ratelimit = numeric_limit 
2369                 if opts.retries is not None: 
2371                                 opts.retries = long(opts.retries) 
2372                         except (TypeError, ValueError), err: 
2373                                 parser.error(u'invalid retry count specified') 
2375                         opts.playliststart = long(opts.playliststart) 
2376                         if opts.playliststart <= 0: 
2378                 except (TypeError, ValueError), err: 
2379                         parser.error(u'invalid playlist start number specified') 
2381                         opts.playlistend = long(opts.playlistend) 
2382                         if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): 
2384                 except (TypeError, ValueError), err: 
2385                         parser.error(u'invalid playlist end number specified') 
2387                 # Information extractors 
2388                 youtube_ie = YoutubeIE() 
2389                 metacafe_ie = MetacafeIE(youtube_ie) 
2390                 dailymotion_ie = DailymotionIE() 
2391                 youtube_pl_ie = YoutubePlaylistIE(youtube_ie) 
2392                 youtube_user_ie = YoutubeUserIE(youtube_ie) 
2393                 youtube_search_ie = YoutubeSearchIE(youtube_ie) 
2394                 google_ie = GoogleIE() 
2395                 google_search_ie = GoogleSearchIE(google_ie) 
2396                 photobucket_ie = PhotobucketIE() 
2397                 yahoo_ie = YahooIE() 
2398                 yahoo_search_ie = YahooSearchIE(yahoo_ie) 
2399                 deposit_files_ie = DepositFilesIE() 
2400                 generic_ie = GenericIE() 
2403                 fd = FileDownloader({ 
2404                         'usenetrc': opts.usenetrc, 
2405                         'username': opts.username, 
2406                         'password': opts.password, 
2407                         'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 
2408                         'forceurl': opts.geturl, 
2409                         'forcetitle': opts.gettitle, 
2410                         'forcethumbnail': opts.getthumbnail, 
2411                         'forcedescription': opts.getdescription, 
2412                         'simulate': (opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription), 
2413                         'format': opts.format, 
2414                         'format_limit': opts.format_limit, 
2415                         'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) 
2416                                 or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
2417                                 or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') 
2418                                 or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') 
2419                                 or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') 
2420                                 or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') 
2421                                 or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') 
2422                                 or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') 
2423                                 or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') 
2424                                 or u'%(id)s.%(ext)s'), 
2425                         'ignoreerrors': opts.ignoreerrors, 
2426                         'ratelimit': opts.ratelimit, 
2427                         'nooverwrites': opts.nooverwrites, 
2428                         'retries': opts.retries, 
2429                         'continuedl': opts.continue_dl, 
2430                         'noprogress': opts.noprogress, 
2431                         'playliststart': opts.playliststart, 
2432                         'playlistend': opts.playlistend, 
2433                         'logtostderr': opts.outtmpl == '-', 
2435                 fd.add_info_extractor(youtube_search_ie) 
2436                 fd.add_info_extractor(youtube_pl_ie) 
2437                 fd.add_info_extractor(youtube_user_ie) 
2438                 fd.add_info_extractor(metacafe_ie) 
2439                 fd.add_info_extractor(dailymotion_ie) 
2440                 fd.add_info_extractor(youtube_ie) 
2441                 fd.add_info_extractor(google_ie) 
2442                 fd.add_info_extractor(google_search_ie) 
2443                 fd.add_info_extractor(photobucket_ie) 
2444                 fd.add_info_extractor(yahoo_ie) 
2445                 fd.add_info_extractor(yahoo_search_ie) 
2446                 fd.add_info_extractor(deposit_files_ie) 
2448                 # This must come last since it's the 
2449                 # fallback if none of the others work 
2450                 fd.add_info_extractor(generic_ie) 
2453                 if opts.update_self: 
2454                         update_self(fd, sys.argv[0]) 
2457                 if len(all_urls) < 1: 
2458                         if not opts.update_self: 
2459                                 parser.error(u'you must provide at least one URL') 
2462                 retcode = fd.download(all_urls) 
2464                 # Dump cookie jar if requested 
2465                 if opts.cookiefile is not None: 
2468                         except (IOError, OSError), err: 
2469                                 sys.exit(u'ERROR: unable to save cookie jar') 
2473         except DownloadError: 
2475         except SameFileError: 
2476                 sys.exit(u'ERROR: fixed output name but more than one file to download') 
2477         except KeyboardInterrupt: 
2478                 sys.exit(u'\nERROR: Interrupted by user')