2 # -*- coding: utf-8 -*- 
   3 # Author: Ricardo Garcia Gonzalez 
   4 # Author: Danny Colligan 
   5 # Author: Benjamin Johnson 
   6 # Author: Vasyl' Vavrychuk 
   7 # Author: Witold Baryluk 
   8 # Author: Paweł Paprota 
   9 # Author: Gergely Imreh 
  10 # License: Public domain code 
  34 # parse_qs was moved from the cgi module to the urlparse module recently. 
  36         from urlparse 
import parse_qs
 
  38         from cgi 
import parse_qs
 
  41         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11', 
  42         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  43         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  44         'Accept-Encoding': 'gzip, deflate', 
  45         'Accept-Language': 'en-us,en;q=0.5', 
  48 simple_title_chars 
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii') 
  50 def preferredencoding(): 
  51         """Get preferred encoding. 
  53         Returns the best encoding scheme for the system, based on 
  54         locale.getpreferredencoding() and some further tweaks. 
  56         def yield_preferredencoding(): 
  58                         pref 
= locale
.getpreferredencoding() 
  64         return yield_preferredencoding().next() 
  66 def htmlentity_transform(matchobj
): 
  67         """Transforms an HTML entity to a Unicode character. 
  69         This function receives a match object and is intended to be used with 
  70         the re.sub() function. 
  72         entity 
= matchobj
.group(1) 
  74         # Known non-numeric HTML entity 
  75         if entity 
in htmlentitydefs
.name2codepoint
: 
  76                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
  79         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
  81                 numstr 
= mobj
.group(1) 
  82                 if numstr
.startswith(u
'x'): 
  84                         numstr 
= u
'0%s' % numstr
 
  87                 return unichr(long(numstr
, base
)) 
  89         # Unknown entity in name, return its literal representation 
  90         return (u
'&%s;' % entity
) 
  92 def sanitize_title(utitle
): 
  93         """Sanitizes a video title so it could be used as part of a filename.""" 
  94         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
  95         return utitle
.replace(unicode(os
.sep
), u
'%') 
  97 def sanitize_open(filename
, open_mode
): 
  98         """Try to open the given filename, and slightly tweak it if this fails. 
 100         Attempts to open the given filename. If this fails, it tries to change 
 101         the filename slightly, step by step, until it's either able to open it 
 102         or it fails and raises a final exception, like the standard open() 
 105         It returns the tuple (stream, definitive_file_name). 
 109                         if sys
.platform 
== 'win32': 
 111                                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 112                         return (sys
.stdout
, filename
) 
 113                 stream 
= open(filename
, open_mode
) 
 114                 return (stream
, filename
) 
 115         except (IOError, OSError), err
: 
 116                 # In case of error, try to remove win32 forbidden chars 
 117                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 119                 # An exception here should be caught in the caller 
 120                 stream 
= open(filename
, open_mode
) 
 121                 return (stream
, filename
) 
 123 def timeconvert(timestr
): 
 124     """Convert RFC 2822 defined time string into system timestamp""" 
 126     timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 127     if timetuple 
is not None: 
 128         timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 131 class DownloadError(Exception): 
 132         """Download Error exception. 
 134         This exception may be thrown by FileDownloader objects if they are not 
 135         configured to continue on errors. They will contain the appropriate 
 140 class SameFileError(Exception): 
 141         """Same File exception. 
 143         This exception will be thrown by FileDownloader objects if they detect 
 144         multiple files would have to be downloaded to the same file on disk. 
 148 class PostProcessingError(Exception): 
 149         """Post Processing exception. 
 151         This exception may be raised by PostProcessor's .run() method to 
 152         indicate an error in the postprocessing task. 
 156 class UnavailableVideoError(Exception): 
 157         """Unavailable Format exception. 
 159         This exception will be thrown when a video is requested 
 160         in a format that is not available for that video. 
 164 class ContentTooShortError(Exception): 
 165         """Content Too Short exception. 
 167         This exception may be raised by FileDownloader objects when a file they 
 168         download is too small for what the server announced first, indicating 
 169         the connection was probably interrupted. 
 175         def __init__(self
, downloaded
, expected
): 
 176                 self
.downloaded 
= downloaded
 
 177                 self
.expected 
= expected
 
 179 class YoutubeDLHandler(urllib2
.HTTPHandler
): 
 180         """Handler for HTTP requests and responses. 
 182         This class, when installed with an OpenerDirector, automatically adds 
 183         the standard headers to every HTTP request and handles gzipped and 
 184         deflated responses from web servers. If compression is to be avoided in 
 185         a particular request, the original request in the program code only has 
 186         to include the HTTP header "Youtubedl-No-Compression", which will be 
 187         removed before making the real request. 
 189         Part of this code was copied from: 
 191           http://techknack.net/python-urllib2-handlers/ 
 193         Andrew Rowls, the author of that code, agreed to release it to the 
 200                         return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 202                         return zlib
.decompress(data
) 
 205         def addinfourl_wrapper(stream
, headers
, url
, code
): 
 206                 if hasattr(urllib2
.addinfourl
, 'getcode'): 
 207                         return urllib2
.addinfourl(stream
, headers
, url
, code
) 
 208                 ret 
= urllib2
.addinfourl(stream
, headers
, url
) 
 212         def http_request(self
, req
): 
 213                 for h 
in std_headers
: 
 216                         req
.add_header(h
, std_headers
[h
]) 
 217                 if 'Youtubedl-no-compression' in req
.headers
: 
 218                         if 'Accept-encoding' in req
.headers
: 
 219                                 del req
.headers
['Accept-encoding'] 
 220                         del req
.headers
['Youtubedl-no-compression'] 
 223         def http_response(self
, req
, resp
): 
 226                 if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 227                         gz 
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(resp
.read()), mode
='r') 
 228                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 229                         resp
.msg 
= old_resp
.msg
 
 231                 if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 232                         gz 
= StringIO
.StringIO(self
.deflate(resp
.read())) 
 233                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 234                         resp
.msg 
= old_resp
.msg
 
 237 class FileDownloader(object): 
 238         """File Downloader class. 
 240         File downloader objects are the ones responsible of downloading the 
 241         actual video file and writing it to disk if the user has requested 
 242         it, among some other tasks. In most cases there should be one per 
 243         program. As, given a video URL, the downloader doesn't know how to 
 244         extract all the needed information, task that InfoExtractors do, it 
 245         has to pass the URL to one of them. 
 247         For this, file downloader objects have a method that allows 
 248         InfoExtractors to be registered in a given order. When it is passed 
 249         a URL, the file downloader handles it to the first InfoExtractor it 
 250         finds that reports being able to handle it. The InfoExtractor extracts 
 251         all the information about the video or videos the URL refers to, and 
 252         asks the FileDownloader to process the video information, possibly 
 253         downloading the video. 
 255         File downloaders accept a lot of parameters. In order not to saturate 
 256         the object constructor with arguments, it receives a dictionary of 
 257         options instead. These options are available through the params 
 258         attribute for the InfoExtractors to use. The FileDownloader also 
 259         registers itself as the downloader in charge for the InfoExtractors 
 260         that are added to it, so this is a "mutual registration". 
 264         username:         Username for authentication purposes. 
 265         password:         Password for authentication purposes. 
 266         usenetrc:         Use netrc for authentication instead. 
 267         quiet:            Do not print messages to stdout. 
 268         forceurl:         Force printing final URL. 
 269         forcetitle:       Force printing title. 
 270         forcethumbnail:   Force printing thumbnail URL. 
 271         forcedescription: Force printing description. 
 272         forcefilename:    Force printing final filename. 
 273         simulate:         Do not download the video files. 
 274         format:           Video format code. 
 275         format_limit:     Highest quality format to try. 
 276         outtmpl:          Template for output names. 
 277         ignoreerrors:     Do not stop on download errors. 
 278         ratelimit:        Download speed limit, in bytes/sec. 
 279         nooverwrites:     Prevent overwriting files. 
 280         retries:          Number of times to retry for HTTP error 5xx 
 281         continuedl:       Try to continue downloads if possible. 
 282         noprogress:       Do not print the progress bar. 
 283         playliststart:    Playlist item to start at. 
 284         playlistend:      Playlist item to end at. 
 285         logtostderr:      Log messages to stderr instead of stdout. 
 286         consoletitle:     Display progress in console window's titlebar. 
 287         nopart:           Do not use temporary .part files. 
 288         updatetime:       Use the Last-modified header to set output file timestamps. 
 294         _download_retcode 
= None 
 295         _num_downloads 
= None 
 298         def __init__(self
, params
): 
 299                 """Create a FileDownloader object with the given options.""" 
 302                 self
._download
_retcode 
= 0 
 303                 self
._num
_downloads 
= 0 
 304                 self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 308         def pmkdir(filename
): 
 309                 """Create directory components in filename. Similar to Unix "mkdir -p".""" 
 310                 components 
= filename
.split(os
.sep
) 
 311                 aggregate 
= [os
.sep
.join(components
[0:x
]) for x 
in xrange(1, len(components
))] 
 312                 aggregate 
= ['%s%s' % (x
, os
.sep
) for x 
in aggregate
] # Finish names with separator 
 313                 for dir in aggregate
: 
 314                         if not os
.path
.exists(dir): 
 318         def format_bytes(bytes): 
 321                 if type(bytes) is str: 
 326                         exponent 
= long(math
.log(bytes, 1024.0)) 
 327                 suffix 
= 'bkMGTPEZY'[exponent
] 
 328                 converted 
= float(bytes) / float(1024**exponent
) 
 329                 return '%.2f%s' % (converted
, suffix
) 
 332         def calc_percent(byte_counter
, data_len
): 
 335                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 338         def calc_eta(start
, now
, total
, current
): 
 342                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 344                 rate 
= float(current
) / dif
 
 345                 eta 
= long((float(total
) - float(current
)) / rate
) 
 346                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 349                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 352         def calc_speed(start
, now
, bytes): 
 354                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 355                         return '%10s' % '---b/s' 
 356                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 359         def best_block_size(elapsed_time
, bytes): 
 360                 new_min 
= max(bytes / 2.0, 1.0) 
 361                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 362                 if elapsed_time 
< 0.001: 
 364                 rate 
= bytes / elapsed_time
 
 372         def parse_bytes(bytestr
): 
 373                 """Parse a string indicating a byte quantity into a long integer.""" 
 374                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 377                 number 
= float(matchobj
.group(1)) 
 378                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 379                 return long(round(number 
* multiplier
)) 
 381         def add_info_extractor(self
, ie
): 
 382                 """Add an InfoExtractor object to the end of the list.""" 
 384                 ie
.set_downloader(self
) 
 386         def add_post_processor(self
, pp
): 
 387                 """Add a PostProcessor object to the end of the chain.""" 
 389                 pp
.set_downloader(self
) 
 391         def to_screen(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 392                 """Print message to stdout if not in quiet mode.""" 
 394                         if not self
.params
.get('quiet', False): 
 395                                 terminator 
= [u
'\n', u
''][skip_eol
] 
 396                                 print >>self
._screen
_file
, (u
'%s%s' % (message
, terminator
)).encode(preferredencoding()), 
 397                         self
._screen
_file
.flush() 
 398                 except (UnicodeEncodeError), err
: 
 399                         if not ignore_encoding_errors
: 
 402         def to_stderr(self
, message
): 
 403                 """Print message to stderr.""" 
 404                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 406         def to_cons_title(self
, message
): 
 407                 """Set console/terminal window title to message.""" 
 408                 if not self
.params
.get('consoletitle', False): 
 410                 if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 411                         # c_wchar_p() might not be necessary if `message` is 
 412                         # already of type unicode() 
 413                         ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 414                 elif 'TERM' in os
.environ
: 
 415                         sys
.stderr
.write('\033]0;%s\007' % message
.encode(preferredencoding())) 
 417         def fixed_template(self
): 
 418                 """Checks if the output template is fixed.""" 
 419                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 421         def trouble(self
, message
=None): 
 422                 """Determine action to take when a download problem appears. 
 424                 Depending on if the downloader has been configured to ignore 
 425                 download errors or not, this method may throw an exception or 
 426                 not when errors are found, after printing the message. 
 428                 if message 
is not None: 
 429                         self
.to_stderr(message
) 
 430                 if not self
.params
.get('ignoreerrors', False): 
 431                         raise DownloadError(message
) 
 432                 self
._download
_retcode 
= 1 
 434         def slow_down(self
, start_time
, byte_counter
): 
 435                 """Sleep if the download speed is over the rate limit.""" 
 436                 rate_limit 
= self
.params
.get('ratelimit', None) 
 437                 if rate_limit 
is None or byte_counter 
== 0: 
 440                 elapsed 
= now 
- start_time
 
 443                 speed 
= float(byte_counter
) / elapsed
 
 444                 if speed 
> rate_limit
: 
 445                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 447         def temp_name(self
, filename
): 
 448                 """Returns a temporary filename for the given filename.""" 
 449                 if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 450                                 (os
.path
.exists(filename
) and not os
.path
.isfile(filename
)): 
 452                 return filename 
+ u
'.part' 
 454         def undo_temp_name(self
, filename
): 
 455                 if filename
.endswith(u
'.part'): 
 456                         return filename
[:-len(u
'.part')] 
 459         def try_rename(self
, old_filename
, new_filename
): 
 461                         if old_filename 
== new_filename
: 
 463                         os
.rename(old_filename
, new_filename
) 
 464                 except (IOError, OSError), err
: 
 465                         self
.trouble(u
'ERROR: unable to rename file') 
 467         def try_utime(self
, filename
, last_modified_hdr
): 
 468                 """Try to set the last-modified time of the given file.""" 
 469                 if last_modified_hdr 
is None: 
 471                 if not os
.path
.isfile(filename
): 
 473                 timestr 
= last_modified_hdr
 
 476                 filetime 
= timeconvert(timestr
) 
 480                         os
.utime(filename
,(time
.time(), filetime
)) 
 484         def report_destination(self
, filename
): 
 485                 """Report destination filename.""" 
 486                 self
.to_screen(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 488         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 489                 """Report download progress.""" 
 490                 if self
.params
.get('noprogress', False): 
 492                 self
.to_screen(u
'\r[download] %s of %s at %s ETA %s' % 
 493                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 494                 self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 495                                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 497         def report_resuming_byte(self
, resume_len
): 
 498                 """Report attempt to resume at given byte.""" 
 499                 self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 501         def report_retry(self
, count
, retries
): 
 502                 """Report retry in case of HTTP error 5xx""" 
 503                 self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 505         def report_file_already_downloaded(self
, file_name
): 
 506                 """Report file has already been fully downloaded.""" 
 508                         self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 509                 except (UnicodeEncodeError), err
: 
 510                         self
.to_screen(u
'[download] The file has already been downloaded') 
 512         def report_unable_to_resume(self
): 
 513                 """Report it was impossible to resume download.""" 
 514                 self
.to_screen(u
'[download] Unable to resume') 
 516         def report_finish(self
): 
 517                 """Report download finished.""" 
 518                 if self
.params
.get('noprogress', False): 
 519                         self
.to_screen(u
'[download] Download completed') 
 523         def increment_downloads(self
): 
 524                 """Increment the ordinal that assigns a number to each file.""" 
 525                 self
._num
_downloads 
+= 1 
 527         def prepare_filename(self
, info_dict
): 
 528                 """Generate the output filename.""" 
 530                         template_dict 
= dict(info_dict
) 
 531                         template_dict
['epoch'] = unicode(long(time
.time())) 
 532                         template_dict
['autonumber'] = unicode('%05d' % self
._num
_downloads
) 
 533                         filename 
= self
.params
['outtmpl'] % template_dict
 
 535                 except (ValueError, KeyError), err
: 
 536                         self
.trouble(u
'ERROR: invalid system charset or erroneous output template') 
 539         def process_info(self
, info_dict
): 
 540                 """Process a single dictionary returned by an InfoExtractor.""" 
 541                 filename 
= self
.prepare_filename(info_dict
) 
 542                 # Do nothing else if in simulate mode 
 543                 if self
.params
.get('simulate', False): 
 545                         if self
.params
.get('forcetitle', False): 
 546                                 print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 547                         if self
.params
.get('forceurl', False): 
 548                                 print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 549                         if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 550                                 print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 551                         if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 552                                 print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 553                         if self
.params
.get('forcefilename', False) and filename 
is not None: 
 554                                 print filename
.encode(preferredencoding(), 'xmlcharrefreplace') 
 560                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 561                         self
.to_stderr(u
'WARNING: file exists and will be skipped') 
 565                         self
.pmkdir(filename
) 
 566                 except (OSError, IOError), err
: 
 567                         self
.trouble(u
'ERROR: unable to create directories: %s' % str(err
)) 
 571                         success 
= self
._do
_download
(filename
, info_dict
['url'].encode('utf-8'), info_dict
.get('player_url', None)) 
 572                 except (OSError, IOError), err
: 
 573                         raise UnavailableVideoError
 
 574                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 575                         self
.trouble(u
'ERROR: unable to download video data: %s' % str(err
)) 
 577                 except (ContentTooShortError
, ), err
: 
 578                         self
.trouble(u
'ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 583                                 self
.post_process(filename
, info_dict
) 
 584                         except (PostProcessingError
), err
: 
 585                                 self
.trouble(u
'ERROR: postprocessing: %s' % str(err
)) 
 588         def download(self
, url_list
): 
 589                 """Download a given list of URLs.""" 
 590                 if len(url_list
) > 1 and self
.fixed_template(): 
 591                         raise SameFileError(self
.params
['outtmpl']) 
 594                         suitable_found 
= False 
 596                                 # Go to next InfoExtractor if not suitable 
 597                                 if not ie
.suitable(url
): 
 600                                 # Suitable InfoExtractor found 
 601                                 suitable_found 
= True 
 603                                 # Extract information from URL and process it 
 606                                 # Suitable InfoExtractor had been found; go to next URL 
 609                         if not suitable_found
: 
 610                                 self
.trouble(u
'ERROR: no suitable InfoExtractor: %s' % url
) 
 612                 return self
._download
_retcode
 
 614         def post_process(self
, filename
, ie_info
): 
 615                 """Run the postprocessing chain on the given file.""" 
 617                 info
['filepath'] = filename
 
 623         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 624                 self
.report_destination(filename
) 
 625                 tmpfilename 
= self
.temp_name(filename
) 
 627                 # Check for rtmpdump first 
 629                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 630                 except (OSError, IOError): 
 631                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 634                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 635                 # the connection was interrumpted and resuming appears to be 
 636                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 637                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', tmpfilename
] 
 638                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 639                 while retval 
== 2 or retval 
== 1: 
 640                         prevsize 
= os
.path
.getsize(tmpfilename
) 
 641                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 642                         time
.sleep(5.0) # This seems to be needed 
 643                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 644                         cursize 
= os
.path
.getsize(tmpfilename
) 
 645                         if prevsize 
== cursize 
and retval 
== 1: 
 648                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(tmpfilename
)) 
 649                         self
.try_rename(tmpfilename
, filename
) 
 652                         self
.trouble(u
'\nERROR: rtmpdump exited with code %d' % retval
) 
 655         def _do_download(self
, filename
, url
, player_url
): 
 656                 # Check file already present 
 657                 if self
.params
.get('continuedl', False) and os
.path
.isfile(filename
) and not self
.params
.get('nopart', False): 
 658                         self
.report_file_already_downloaded(filename
) 
 661                 # Attempt to download using rtmpdump 
 662                 if url
.startswith('rtmp'): 
 663                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 665                 tmpfilename 
= self
.temp_name(filename
) 
 669                 # Do not include the Accept-Encoding header 
 670                 headers 
= {'Youtubedl-no-compression': 'True'} 
 671                 basic_request 
= urllib2
.Request(url
, None, headers
) 
 672                 request 
= urllib2
.Request(url
, None, headers
) 
 674                 # Establish possible resume length 
 675                 if os
.path
.isfile(tmpfilename
): 
 676                         resume_len 
= os
.path
.getsize(tmpfilename
) 
 680                 # Request parameters in case of being able to resume 
 681                 if self
.params
.get('continuedl', False) and resume_len 
!= 0: 
 682                         self
.report_resuming_byte(resume_len
) 
 683                         request
.add_header('Range','bytes=%d-' % resume_len
) 
 687                 retries 
= self
.params
.get('retries', 0) 
 688                 while count 
<= retries
: 
 689                         # Establish connection 
 691                                 data 
= urllib2
.urlopen(request
) 
 693                         except (urllib2
.HTTPError
, ), err
: 
 694                                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 695                                         # Unexpected HTTP error 
 697                                 elif err
.code 
== 416: 
 698                                         # Unable to resume (requested range not satisfiable) 
 700                                                 # Open the connection again without the range header 
 701                                                 data 
= urllib2
.urlopen(basic_request
) 
 702                                                 content_length 
= data
.info()['Content-Length'] 
 703                                         except (urllib2
.HTTPError
, ), err
: 
 704                                                 if err
.code 
< 500 or err
.code 
>= 600: 
 707                                                 # Examine the reported length 
 708                                                 if (content_length 
is not None and 
 709                                                     (resume_len 
- 100 < long(content_length
) < resume_len 
+ 100)): 
 710                                                         # The file had already been fully downloaded. 
 711                                                         # Explanation to the above condition: in issue #175 it was revealed that 
 712                                                         # YouTube sometimes adds or removes a few bytes from the end of the file, 
 713                                                         # changing the file size slightly and causing problems for some users. So 
 714                                                         # I decided to implement a suggested change and consider the file 
 715                                                         # completely downloaded if the file size differs less than 100 bytes from 
 716                                                         # the one in the hard drive. 
 717                                                         self
.report_file_already_downloaded(filename
) 
 718                                                         self
.try_rename(tmpfilename
, filename
) 
 721                                                         # The length does not match, we start the download over 
 722                                                         self
.report_unable_to_resume() 
 728                                 self
.report_retry(count
, retries
) 
 731                         self
.trouble(u
'ERROR: giving up after %s retries' % retries
) 
 734                 data_len 
= data
.info().get('Content-length', None) 
 735                 if data_len 
is not None: 
 736                         data_len 
= long(data_len
) + resume_len
 
 737                 data_len_str 
= self
.format_bytes(data_len
) 
 738                 byte_counter 
= 0 + resume_len
 
 744                         data_block 
= data
.read(block_size
) 
 746                         if len(data_block
) == 0: 
 748                         byte_counter 
+= len(data_block
) 
 750                         # Open file just in time 
 753                                         (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 754                                         filename 
= self
.undo_temp_name(tmpfilename
) 
 755                                         self
.report_destination(filename
) 
 756                                 except (OSError, IOError), err
: 
 757                                         self
.trouble(u
'ERROR: unable to open for writing: %s' % str(err
)) 
 760                                 stream
.write(data_block
) 
 761                         except (IOError, OSError), err
: 
 762                                 self
.trouble(u
'\nERROR: unable to write data: %s' % str(err
)) 
 764                         block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 767                         percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 768                         eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 769                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 770                         self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 773                         self
.slow_down(start
, byte_counter 
- resume_len
) 
 777                 if data_len 
is not None and byte_counter 
!= data_len
: 
 778                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
 779                 self
.try_rename(tmpfilename
, filename
) 
 781                 # Update file modification time 
 782                 if self
.params
.get('updatetime', True): 
 783                         self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 787 class InfoExtractor(object): 
 788         """Information Extractor class. 
 790         Information extractors are the classes that, given a URL, extract 
 791         information from the video (or videos) the URL refers to. This 
 792         information includes the real video URL, the video title and simplified 
 793         title, author and others. The information is stored in a dictionary 
 794         which is then passed to the FileDownloader. The FileDownloader 
 795         processes this information possibly downloading the video to the file 
 796         system, among other possible outcomes. The dictionaries must include 
 797         the following fields: 
 799         id:             Video identifier. 
 800         url:            Final video URL. 
 801         uploader:       Nickname of the video uploader. 
 802         title:          Literal title. 
 803         stitle:         Simplified title. 
 804         ext:            Video filename extension. 
 805         format:         Video format. 
 806         player_url:     SWF Player URL (may be None). 
 808         The following fields are optional. Their primary purpose is to allow 
 809         youtube-dl to serve as the backend for a video search function, such 
 810         as the one in youtube2mp3.  They are only used when their respective 
 811         forced printing functions are called: 
 813         thumbnail:      Full URL to a video thumbnail image. 
 814         description:    One-line video description. 
 816         Subclasses of this one should re-define the _real_initialize() and 
 817         _real_extract() methods, as well as the suitable() static method. 
 818         Probably, they should also be instantiated and added to the main 
 825         def __init__(self
, downloader
=None): 
 826                 """Constructor. Receives an optional downloader.""" 
 828                 self
.set_downloader(downloader
) 
 832                 """Receives a URL and returns True if suitable for this IE.""" 
 835         def initialize(self
): 
 836                 """Initializes an instance (authentication, etc).""" 
 838                         self
._real
_initialize
() 
 841         def extract(self
, url
): 
 842                 """Extracts URL information and returns it in list of dicts.""" 
 844                 return self
._real
_extract
(url
) 
 846         def set_downloader(self
, downloader
): 
 847                 """Sets the downloader for this IE.""" 
 848                 self
._downloader 
= downloader
 
 850         def _real_initialize(self
): 
 851                 """Real initialization process. Redefine in subclasses.""" 
 854         def _real_extract(self
, url
): 
 855                 """Real extraction process. Redefine in subclasses.""" 
 858 class YoutubeIE(InfoExtractor
): 
 859         """Information extractor for youtube.com.""" 
 861         _VALID_URL 
= r
'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=)))?([0-9A-Za-z_-]+)(?(1).+)?$' 
 862         _LANG_URL 
= r
'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
 863         _LOGIN_URL 
= 'https://www.youtube.com/signup?next=/&gl=US&hl=en' 
 864         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
 865         _NETRC_MACHINE 
= 'youtube' 
 866         # Listed in order of quality 
 867         _available_formats 
= ['38', '37', '22', '45', '35', '34', '43', '18', '6', '5', '17', '13'] 
 868         _video_extensions 
= { 
 874                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
 881                 return (re
.match(YoutubeIE
._VALID
_URL
, url
) is not None) 
 883         def report_lang(self
): 
 884                 """Report attempt to set language.""" 
 885                 self
._downloader
.to_screen(u
'[youtube] Setting language') 
 887         def report_login(self
): 
 888                 """Report attempt to log in.""" 
 889                 self
._downloader
.to_screen(u
'[youtube] Logging in') 
 891         def report_age_confirmation(self
): 
 892                 """Report attempt to confirm age.""" 
 893                 self
._downloader
.to_screen(u
'[youtube] Confirming age') 
 895         def report_video_webpage_download(self
, video_id
): 
 896                 """Report attempt to download video webpage.""" 
 897                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video webpage' % video_id
) 
 899         def report_video_info_webpage_download(self
, video_id
): 
 900                 """Report attempt to download video info webpage.""" 
 901                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
 903         def report_information_extraction(self
, video_id
): 
 904                 """Report attempt to extract video information.""" 
 905                 self
._downloader
.to_screen(u
'[youtube] %s: Extracting video information' % video_id
) 
 907         def report_unavailable_format(self
, video_id
, format
): 
 908                 """Report extracted video URL.""" 
 909                 self
._downloader
.to_screen(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
 911         def report_rtmp_download(self
): 
 912                 """Indicate the download will use the RTMP protocol.""" 
 913                 self
._downloader
.to_screen(u
'[youtube] RTMP download detected') 
 915         def _real_initialize(self
): 
 916                 if self
._downloader 
is None: 
 921                 downloader_params 
= self
._downloader
.params
 
 923                 # Attempt to use provided username and password or .netrc data 
 924                 if downloader_params
.get('username', None) is not None: 
 925                         username 
= downloader_params
['username'] 
 926                         password 
= downloader_params
['password'] 
 927                 elif downloader_params
.get('usenetrc', False): 
 929                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
 934                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
 935                         except (IOError, netrc
.NetrcParseError
), err
: 
 936                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
 940                 request 
= urllib2
.Request(self
._LANG
_URL
) 
 943                         urllib2
.urlopen(request
).read() 
 944                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 945                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
 948                 # No authentication to be performed 
 954                                 'current_form': 'loginForm', 
 956                                 'action_login': 'Log In', 
 957                                 'username':     username
, 
 958                                 'password':     password
, 
 960                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
)) 
 963                         login_results 
= urllib2
.urlopen(request
).read() 
 964                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
 965                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
 967                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 968                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
 974                                 'action_confirm':       'Confirm', 
 976                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
)) 
 978                         self
.report_age_confirmation() 
 979                         age_results 
= urllib2
.urlopen(request
).read() 
 980                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 981                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
 984         def _real_extract(self
, url
): 
 985                 # Extract video id from URL 
 986                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
 988                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
 990                 video_id 
= mobj
.group(2) 
 993                 self
.report_video_webpage_download(video_id
) 
 994                 request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
) 
 996                         video_webpage 
= urllib2
.urlopen(request
).read() 
 997                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 998                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
1001                 # Attempt to extract SWF player URL 
1002                 mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1003                 if mobj 
is not None: 
1004                         player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1009                 self
.report_video_info_webpage_download(video_id
) 
1010                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1011                         video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1012                                            % (video_id
, el_type
)) 
1013                         request 
= urllib2
.Request(video_info_url
) 
1015                                 video_info_webpage 
= urllib2
.urlopen(request
).read() 
1016                                 video_info 
= parse_qs(video_info_webpage
) 
1017                                 if 'token' in video_info
: 
1019                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1020                                 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
1022                 if 'token' not in video_info
: 
1023                         if 'reason' in video_info
: 
1024                                 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % video_info
['reason'][0].decode('utf-8')) 
1026                                 self
._downloader
.trouble(u
'ERROR: "token" parameter not in video info for unknown reason') 
1029                 # Start extracting information 
1030                 self
.report_information_extraction(video_id
) 
1033                 if 'author' not in video_info
: 
1034                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1036                 video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
1039                 if 'title' not in video_info
: 
1040                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1042                 video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
1043                 video_title 
= video_title
.decode('utf-8') 
1044                 video_title 
= sanitize_title(video_title
) 
1047                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1048                 simple_title 
= simple_title
.strip(ur
'_') 
1051                 if 'thumbnail_url' not in video_info
: 
1052                         self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
1053                         video_thumbnail 
= '' 
1054                 else:   # don't panic if we can't find it 
1055                         video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
1059                 mobj 
= re
.search(r
'id="eow-date".*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1060                 if mobj 
is not None: 
1061                         upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1062                         format_expressions 
= ['%d %B %Y', '%B %d %Y', '%b %d %Y'] 
1063                         for expression 
in format_expressions
: 
1065                                         upload_date 
= datetime
.datetime
.strptime(upload_date
, expression
).strftime('%Y%m%d') 
1070                 video_description 
= 'No description available.' 
1071                 if self
._downloader
.params
.get('forcedescription', False): 
1072                         mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
1073                         if mobj 
is not None: 
1074                                 video_description 
= mobj
.group(1) 
1077                 video_token 
= urllib
.unquote_plus(video_info
['token'][0]) 
1079                 # Decide which formats to download 
1080                 req_format 
= self
._downloader
.params
.get('format', None) 
1082                 if 'fmt_url_map' in video_info
: 
1083                         url_map 
= dict(tuple(pair
.split('|')) for pair 
in video_info
['fmt_url_map'][0].split(',')) 
1084                         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1085                         if format_limit 
is not None and format_limit 
in self
._available
_formats
: 
1086                                 format_list 
= self
._available
_formats
[self
._available
_formats
.index(format_limit
):] 
1088                                 format_list 
= self
._available
_formats
 
1089                         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1090                         if len(existing_formats
) == 0: 
1091                                 self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
1093                         if req_format 
is None: 
1094                                 video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1095                         elif req_format 
== '-1': 
1096                                 video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1099                                 if req_format 
not in url_map
: 
1100                                         self
._downloader
.trouble(u
'ERROR: requested format not available') 
1102                                 video_url_list 
= [(req_format
, url_map
[req_format
])] # Specific format 
1104                 elif 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1105                         self
.report_rtmp_download() 
1106                         video_url_list 
= [(None, video_info
['conn'][0])] 
1109                         self
._downloader
.trouble(u
'ERROR: no fmt_url_map or conn information found in video info') 
1112                 for format_param
, video_real_url 
in video_url_list
: 
1113                         # At this point we have a new video 
1114                         self
._downloader
.increment_downloads() 
1117                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
1119                         # Find the video URL in fmt_url_map or conn paramters 
1121                                 # Process video information 
1122                                 self
._downloader
.process_info({ 
1123                                         'id':           video_id
.decode('utf-8'), 
1124                                         'url':          video_real_url
.decode('utf-8'), 
1125                                         'uploader':     video_uploader
.decode('utf-8'), 
1126                                         'upload_date':  upload_date
, 
1127                                         'title':        video_title
, 
1128                                         'stitle':       simple_title
, 
1129                                         'ext':          video_extension
.decode('utf-8'), 
1130                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
1131                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
1132                                         'description':  video_description
.decode('utf-8'), 
1133                                         'player_url':   player_url
, 
1135                         except UnavailableVideoError
, err
: 
1136                                 self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1139 class MetacafeIE(InfoExtractor
): 
1140         """Information Extractor for metacafe.com.""" 
1142         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
1143         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
1144         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
1147         def __init__(self
, youtube_ie
, downloader
=None): 
1148                 InfoExtractor
.__init
__(self
, downloader
) 
1149                 self
._youtube
_ie 
= youtube_ie
 
1153                 return (re
.match(MetacafeIE
._VALID
_URL
, url
) is not None) 
1155         def report_disclaimer(self
): 
1156                 """Report disclaimer retrieval.""" 
1157                 self
._downloader
.to_screen(u
'[metacafe] Retrieving disclaimer') 
1159         def report_age_confirmation(self
): 
1160                 """Report attempt to confirm age.""" 
1161                 self
._downloader
.to_screen(u
'[metacafe] Confirming age') 
1163         def report_download_webpage(self
, video_id
): 
1164                 """Report webpage download.""" 
1165                 self
._downloader
.to_screen(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1167         def report_extraction(self
, video_id
): 
1168                 """Report information extraction.""" 
1169                 self
._downloader
.to_screen(u
'[metacafe] %s: Extracting information' % video_id
) 
1171         def _real_initialize(self
): 
1172                 # Retrieve disclaimer 
1173                 request 
= urllib2
.Request(self
._DISCLAIMER
) 
1175                         self
.report_disclaimer() 
1176                         disclaimer 
= urllib2
.urlopen(request
).read() 
1177                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1178                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1184                         'submit': "Continue - I'm over 18", 
1186                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
)) 
1188                         self
.report_age_confirmation() 
1189                         disclaimer 
= urllib2
.urlopen(request
).read() 
1190                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1191                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1194         def _real_extract(self
, url
): 
1195                 # Extract id and simplified title from URL 
1196                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1198                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1201                 video_id 
= mobj
.group(1) 
1203                 # Check if video comes from YouTube 
1204                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1205                 if mobj2 
is not None: 
1206                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1209                 # At this point we have a new video 
1210                 self
._downloader
.increment_downloads() 
1212                 simple_title 
= mobj
.group(2).decode('utf-8') 
1214                 # Retrieve video webpage to extract further information 
1215                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1217                         self
.report_download_webpage(video_id
) 
1218                         webpage 
= urllib2
.urlopen(request
).read() 
1219                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1220                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1223                 # Extract URL, uploader and title from webpage 
1224                 self
.report_extraction(video_id
) 
1225                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1226                 if mobj 
is not None: 
1227                         mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1228                         video_extension 
= mediaURL
[-3:] 
1230                         # Extract gdaKey if available 
1231                         mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
1233                                 video_url 
= mediaURL
 
1235                                 gdaKey 
= mobj
.group(1) 
1236                                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
1238                         mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
1240                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1242                         vardict 
= parse_qs(mobj
.group(1)) 
1243                         if 'mediaData' not in vardict
: 
1244                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1246                         mobj 
= re
.search(r
'"mediaURL":"(http.*?)","key":"(.*?)"', vardict
['mediaData'][0]) 
1248                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1250                         mediaURL 
= mobj
.group(1).replace('\\/', '/') 
1251                         video_extension 
= mediaURL
[-3:] 
1252                         video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group(2)) 
1254                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1256                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1258                 video_title 
= mobj
.group(1).decode('utf-8') 
1259                 video_title 
= sanitize_title(video_title
) 
1261                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1263                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1265                 video_uploader 
= mobj
.group(1) 
1268                         # Process video information 
1269                         self
._downloader
.process_info({ 
1270                                 'id':           video_id
.decode('utf-8'), 
1271                                 'url':          video_url
.decode('utf-8'), 
1272                                 'uploader':     video_uploader
.decode('utf-8'), 
1273                                 'upload_date':  u
'NA', 
1274                                 'title':        video_title
, 
1275                                 'stitle':       simple_title
, 
1276                                 'ext':          video_extension
.decode('utf-8'), 
1280                 except UnavailableVideoError
: 
1281                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1284 class DailymotionIE(InfoExtractor
): 
1285         """Information Extractor for Dailymotion""" 
1287         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1289         def __init__(self
, downloader
=None): 
1290                 InfoExtractor
.__init
__(self
, downloader
) 
1294                 return (re
.match(DailymotionIE
._VALID
_URL
, url
) is not None) 
1296         def report_download_webpage(self
, video_id
): 
1297                 """Report webpage download.""" 
1298                 self
._downloader
.to_screen(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1300         def report_extraction(self
, video_id
): 
1301                 """Report information extraction.""" 
1302                 self
._downloader
.to_screen(u
'[dailymotion] %s: Extracting information' % video_id
) 
1304         def _real_initialize(self
): 
1307         def _real_extract(self
, url
): 
1308                 # Extract id and simplified title from URL 
1309                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1311                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1314                 # At this point we have a new video 
1315                 self
._downloader
.increment_downloads() 
1316                 video_id 
= mobj
.group(1) 
1318                 simple_title 
= mobj
.group(2).decode('utf-8') 
1319                 video_extension 
= 'flv' 
1321                 # Retrieve video webpage to extract further information 
1322                 request 
= urllib2
.Request(url
) 
1324                         self
.report_download_webpage(video_id
) 
1325                         webpage 
= urllib2
.urlopen(request
).read() 
1326                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1327                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1330                 # Extract URL, uploader and title from webpage 
1331                 self
.report_extraction(video_id
) 
1332                 mobj 
= re
.search(r
'(?i)addVariable\(\"video\"\s*,\s*\"([^\"]*)\"\)', webpage
) 
1334                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1336                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1338                 # if needed add http://www.dailymotion.com/ if relative URL 
1340                 video_url 
= mediaURL
 
1342                 # '<meta\s+name="title"\s+content="Dailymotion\s*[:\-]\s*(.*?)"\s*\/\s*>' 
1343                 mobj 
= re
.search(r
'(?im)<title>Dailymotion\s*[\-:]\s*(.+?)</title>', webpage
) 
1345                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1347                 video_title 
= mobj
.group(1).decode('utf-8') 
1348                 video_title 
= sanitize_title(video_title
) 
1350                 mobj 
= re
.search(r
'(?im)<Attribute name="owner">(.+?)</Attribute>', webpage
) 
1352                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1354                 video_uploader 
= mobj
.group(1) 
1357                         # Process video information 
1358                         self
._downloader
.process_info({ 
1359                                 'id':           video_id
.decode('utf-8'), 
1360                                 'url':          video_url
.decode('utf-8'), 
1361                                 'uploader':     video_uploader
.decode('utf-8'), 
1362                                 'upload_date':  u
'NA', 
1363                                 'title':        video_title
, 
1364                                 'stitle':       simple_title
, 
1365                                 'ext':          video_extension
.decode('utf-8'), 
1369                 except UnavailableVideoError
: 
1370                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1372 class GoogleIE(InfoExtractor
): 
1373         """Information extractor for video.google.com.""" 
1375         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1377         def __init__(self
, downloader
=None): 
1378                 InfoExtractor
.__init
__(self
, downloader
) 
1382                 return (re
.match(GoogleIE
._VALID
_URL
, url
) is not None) 
1384         def report_download_webpage(self
, video_id
): 
1385                 """Report webpage download.""" 
1386                 self
._downloader
.to_screen(u
'[video.google] %s: Downloading webpage' % video_id
) 
1388         def report_extraction(self
, video_id
): 
1389                 """Report information extraction.""" 
1390                 self
._downloader
.to_screen(u
'[video.google] %s: Extracting information' % video_id
) 
1392         def _real_initialize(self
): 
1395         def _real_extract(self
, url
): 
1396                 # Extract id from URL 
1397                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1399                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1402                 # At this point we have a new video 
1403                 self
._downloader
.increment_downloads() 
1404                 video_id 
= mobj
.group(1) 
1406                 video_extension 
= 'mp4' 
1408                 # Retrieve video webpage to extract further information 
1409                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1411                         self
.report_download_webpage(video_id
) 
1412                         webpage 
= urllib2
.urlopen(request
).read() 
1413                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1414                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1417                 # Extract URL, uploader, and title from webpage 
1418                 self
.report_extraction(video_id
) 
1419                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1421                         video_extension 
= 'flv' 
1422                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1424                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1426                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1427                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1428                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1430                 video_url 
= mediaURL
 
1432                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1434                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1436                 video_title 
= mobj
.group(1).decode('utf-8') 
1437                 video_title 
= sanitize_title(video_title
) 
1438                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1440                 # Extract video description 
1441                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1443                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1445                 video_description 
= mobj
.group(1).decode('utf-8') 
1446                 if not video_description
: 
1447                         video_description 
= 'No description available.' 
1449                 # Extract video thumbnail 
1450                 if self
._downloader
.params
.get('forcethumbnail', False): 
1451                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1453                                 webpage 
= urllib2
.urlopen(request
).read() 
1454                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1455                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1457                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1459                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1461                         video_thumbnail 
= mobj
.group(1) 
1462                 else:   # we need something to pass to process_info 
1463                         video_thumbnail 
= '' 
1467                         # Process video information 
1468                         self
._downloader
.process_info({ 
1469                                 'id':           video_id
.decode('utf-8'), 
1470                                 'url':          video_url
.decode('utf-8'), 
1472                                 'upload_date':  u
'NA', 
1473                                 'title':        video_title
, 
1474                                 'stitle':       simple_title
, 
1475                                 'ext':          video_extension
.decode('utf-8'), 
1479                 except UnavailableVideoError
: 
1480                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1483 class PhotobucketIE(InfoExtractor
): 
1484         """Information extractor for photobucket.com.""" 
1486         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1488         def __init__(self
, downloader
=None): 
1489                 InfoExtractor
.__init
__(self
, downloader
) 
1493                 return (re
.match(PhotobucketIE
._VALID
_URL
, url
) is not None) 
1495         def report_download_webpage(self
, video_id
): 
1496                 """Report webpage download.""" 
1497                 self
._downloader
.to_screen(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1499         def report_extraction(self
, video_id
): 
1500                 """Report information extraction.""" 
1501                 self
._downloader
.to_screen(u
'[photobucket] %s: Extracting information' % video_id
) 
1503         def _real_initialize(self
): 
1506         def _real_extract(self
, url
): 
1507                 # Extract id from URL 
1508                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1510                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1513                 # At this point we have a new video 
1514                 self
._downloader
.increment_downloads() 
1515                 video_id 
= mobj
.group(1) 
1517                 video_extension 
= 'flv' 
1519                 # Retrieve video webpage to extract further information 
1520                 request 
= urllib2
.Request(url
) 
1522                         self
.report_download_webpage(video_id
) 
1523                         webpage 
= urllib2
.urlopen(request
).read() 
1524                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1525                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1528                 # Extract URL, uploader, and title from webpage 
1529                 self
.report_extraction(video_id
) 
1530                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1532                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1534                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1536                 video_url 
= mediaURL
 
1538                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1540                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1542                 video_title 
= mobj
.group(1).decode('utf-8') 
1543                 video_title 
= sanitize_title(video_title
) 
1544                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1546                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1549                         # Process video information 
1550                         self
._downloader
.process_info({ 
1551                                 'id':           video_id
.decode('utf-8'), 
1552                                 'url':          video_url
.decode('utf-8'), 
1553                                 'uploader':     video_uploader
, 
1554                                 'upload_date':  u
'NA', 
1555                                 'title':        video_title
, 
1556                                 'stitle':       simple_title
, 
1557                                 'ext':          video_extension
.decode('utf-8'), 
1561                 except UnavailableVideoError
: 
1562                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1565 class YahooIE(InfoExtractor
): 
1566         """Information extractor for video.yahoo.com.""" 
1568         # _VALID_URL matches all Yahoo! Video URLs 
1569         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1570         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1571         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1573         def __init__(self
, downloader
=None): 
1574                 InfoExtractor
.__init
__(self
, downloader
) 
1578                 return (re
.match(YahooIE
._VALID
_URL
, url
) is not None) 
1580         def report_download_webpage(self
, video_id
): 
1581                 """Report webpage download.""" 
1582                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1584         def report_extraction(self
, video_id
): 
1585                 """Report information extraction.""" 
1586                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1588         def _real_initialize(self
): 
1591         def _real_extract(self
, url
, new_video
=True): 
1592                 # Extract ID from URL 
1593                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1595                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1598                 # At this point we have a new video 
1599                 self
._downloader
.increment_downloads() 
1600                 video_id 
= mobj
.group(2) 
1601                 video_extension 
= 'flv' 
1603                 # Rewrite valid but non-extractable URLs as 
1604                 # extractable English language /watch/ URLs 
1605                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1606                         request 
= urllib2
.Request(url
) 
1608                                 webpage 
= urllib2
.urlopen(request
).read() 
1609                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1610                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1613                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1615                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1617                         yahoo_id 
= mobj
.group(1) 
1619                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1621                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1623                         yahoo_vid 
= mobj
.group(1) 
1625                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1626                         return self
._real
_extract
(url
, new_video
=False) 
1628                 # Retrieve video webpage to extract further information 
1629                 request 
= urllib2
.Request(url
) 
1631                         self
.report_download_webpage(video_id
) 
1632                         webpage 
= urllib2
.urlopen(request
).read() 
1633                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1634                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1637                 # Extract uploader and title from webpage 
1638                 self
.report_extraction(video_id
) 
1639                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1641                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1643                 video_title 
= mobj
.group(1).decode('utf-8') 
1644                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1646                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1648                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1650                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1652                 # Extract video thumbnail 
1653                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1655                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1657                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1659                 # Extract video description 
1660                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1662                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1664                 video_description 
= mobj
.group(1).decode('utf-8') 
1665                 if not video_description
: video_description 
= 'No description available.' 
1667                 # Extract video height and width 
1668                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1670                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1672                 yv_video_height 
= mobj
.group(1) 
1674                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1676                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1678                 yv_video_width 
= mobj
.group(1) 
1680                 # Retrieve video playlist to extract media URL 
1681                 # I'm not completely sure what all these options are, but we 
1682                 # seem to need most of them, otherwise the server sends a 401. 
1683                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1684                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1685                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1686                                           '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1687                                           '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1689                         self
.report_download_webpage(video_id
) 
1690                         webpage 
= urllib2
.urlopen(request
).read() 
1691                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1692                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1695                 # Extract media URL from playlist XML 
1696                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1698                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1700                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1701                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1704                         # Process video information 
1705                         self
._downloader
.process_info({ 
1706                                 'id':           video_id
.decode('utf-8'), 
1708                                 'uploader':     video_uploader
, 
1709                                 'upload_date':  u
'NA', 
1710                                 'title':        video_title
, 
1711                                 'stitle':       simple_title
, 
1712                                 'ext':          video_extension
.decode('utf-8'), 
1713                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1714                                 'description':  video_description
, 
1715                                 'thumbnail':    video_thumbnail
, 
1716                                 'description':  video_description
, 
1719                 except UnavailableVideoError
: 
1720                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1723 class GenericIE(InfoExtractor
): 
1724         """Generic last-resort information extractor.""" 
1726         def __init__(self
, downloader
=None): 
1727                 InfoExtractor
.__init
__(self
, downloader
) 
1733         def report_download_webpage(self
, video_id
): 
1734                 """Report webpage download.""" 
1735                 self
._downloader
.to_screen(u
'WARNING: Falling back on generic information extractor.') 
1736                 self
._downloader
.to_screen(u
'[generic] %s: Downloading webpage' % video_id
) 
1738         def report_extraction(self
, video_id
): 
1739                 """Report information extraction.""" 
1740                 self
._downloader
.to_screen(u
'[generic] %s: Extracting information' % video_id
) 
1742         def _real_initialize(self
): 
1745         def _real_extract(self
, url
): 
1746                 # At this point we have a new video 
1747                 self
._downloader
.increment_downloads() 
1749                 video_id 
= url
.split('/')[-1] 
1750                 request 
= urllib2
.Request(url
) 
1752                         self
.report_download_webpage(video_id
) 
1753                         webpage 
= urllib2
.urlopen(request
).read() 
1754                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1755                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1757                 except ValueError, err
: 
1758                         # since this is the last-resort InfoExtractor, if 
1759                         # this error is thrown, it'll be thrown here 
1760                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1763                 self
.report_extraction(video_id
) 
1764                 # Start with something easy: JW Player in SWFObject 
1765                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
1767                         # Broaden the search a little bit 
1768                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
1770                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1773                 # It's possible that one of the regexes 
1774                 # matched, but returned an empty group: 
1775                 if mobj.group(1) is None: 
1776                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
1779                 video_url = urllib.unquote(mobj.group(1)) 
1780                 video_id  = os.path.basename(video_url) 
1782                 # here's a fun little line of code for you: 
1783                 video_extension = os.path.splitext(video_id)[1][1:] 
1784                 video_id        = os.path.splitext(video_id)[0] 
1786                 # it's tempting to parse this further, but you would 
1787                 # have to take into account all the variations like 
1788                 #   Video Title - Site Name 
1789                 #   Site Name | Video Title 
1790                 #   Video Title - Tagline | Site Name 
1791                 # and so on and so forth; it's just not practical 
1792                 mobj = re.search(r'<title>(.*)</title>', webpage) 
1794                         self._downloader.trouble(u'ERROR: unable to extract title') 
1796                 video_title = mobj.group(1).decode('utf-8') 
1797                 video_title = sanitize_title(video_title) 
1798                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) 
1800                 # video uploader is domain name 
1801                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
1803                         self._downloader.trouble(u'ERROR: unable to extract title') 
1805                 video_uploader = mobj.group(1).decode('utf-8') 
1808                         # Process video information 
1809                         self._downloader.process_info({ 
1810                                 'id':           video_id.decode('utf-8'), 
1811                                 'url':          video_url.decode('utf-8'), 
1812                                 'uploader':     video_uploader, 
1813                                 'upload_date':  u'NA', 
1814                                 'title':        video_title, 
1815                                 'stitle':       simple_title, 
1816                                 'ext':          video_extension.decode('utf-8'), 
1820                 except UnavailableVideoError, err: 
1821                         self._downloader.trouble(u'\nERROR: unable to download video') 
1824 class YoutubeSearchIE(InfoExtractor): 
1825         """Information Extractor for YouTube search queries.""" 
1826         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' 
1827         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
1828         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
1829         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
1831         _max_youtube_results = 1000 
1833         def __init__(self, youtube_ie, downloader=None): 
1834                 InfoExtractor.__init__(self, downloader) 
1835                 self._youtube_ie = youtube_ie 
1839                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) 
1841         def report_download_page(self, query, pagenum): 
1842                 """Report attempt to download playlist page with given number.""" 
1843                 query = query.decode(preferredencoding()) 
1844                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
1846         def _real_initialize(self): 
1847                 self._youtube_ie.initialize() 
1849         def _real_extract(self, query): 
1850                 mobj = re.match(self._VALID_QUERY, query) 
1852                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1855                 prefix, query = query.split(':') 
1857                 query  = query.encode('utf-8') 
1859                         self._download_n_results(query, 1) 
1861                 elif prefix == 'all': 
1862                         self._download_n_results(query, self._max_youtube_results) 
1868                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1870                                 elif n > self._max_youtube_results: 
1871                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)'  % (self._max_youtube_results, n)) 
1872                                         n = self._max_youtube_results 
1873                                 self._download_n_results(query, n) 
1875                         except ValueError: # parsing prefix as integer fails 
1876                                 self._download_n_results(query, 1) 
1879         def _download_n_results(self, query, n): 
1880                 """Downloads a specified number of results for a query""" 
1883                 already_seen = set() 
1887                         self.report_download_page(query, pagenum) 
1888                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1889                         request = urllib2.Request(result_url) 
1891                                 page = urllib2.urlopen(request).read() 
1892                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1893                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1896                         # Extract video identifiers 
1897                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1898                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
1899                                 if video_id not in already_seen: 
1900                                         video_ids.append(video_id) 
1901                                         already_seen.add(video_id) 
1902                                         if len(video_ids) == n: 
1903                                                 # Specified n videos reached 
1904                                                 for id in video_ids: 
1905                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1908                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
1909                                 for id in video_ids: 
1910                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
1913                         pagenum = pagenum + 1 
1915 class GoogleSearchIE(InfoExtractor): 
1916         """Information Extractor for Google Video search queries.""" 
1917         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' 
1918         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
1919         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
1920         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
1922         _max_google_results = 1000 
1924         def __init__(self, google_ie, downloader=None): 
1925                 InfoExtractor.__init__(self, downloader) 
1926                 self._google_ie = google_ie 
1930                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) 
1932         def report_download_page(self, query, pagenum): 
1933                 """Report attempt to download playlist page with given number.""" 
1934                 query = query.decode(preferredencoding()) 
1935                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
1937         def _real_initialize(self): 
1938                 self._google_ie.initialize() 
1940         def _real_extract(self, query): 
1941                 mobj = re.match(self._VALID_QUERY, query) 
1943                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
1946                 prefix, query = query.split(':') 
1948                 query  = query.encode('utf-8') 
1950                         self._download_n_results(query, 1) 
1952                 elif prefix == 'all': 
1953                         self._download_n_results(query, self._max_google_results) 
1959                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
1961                                 elif n > self._max_google_results: 
1962                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)'  % (self._max_google_results, n)) 
1963                                         n = self._max_google_results 
1964                                 self._download_n_results(query, n) 
1966                         except ValueError: # parsing prefix as integer fails 
1967                                 self._download_n_results(query, 1) 
1970         def _download_n_results(self, query, n): 
1971                 """Downloads a specified number of results for a query""" 
1974                 already_seen = set() 
1978                         self.report_download_page(query, pagenum) 
1979                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
1980                         request = urllib2.Request(result_url) 
1982                                 page = urllib2.urlopen(request).read() 
1983                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
1984                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
1987                         # Extract video identifiers 
1988                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
1989                                 video_id = mobj.group(1) 
1990                                 if video_id not in already_seen: 
1991                                         video_ids.append(video_id) 
1992                                         already_seen.add(video_id) 
1993                                         if len(video_ids) == n: 
1994                                                 # Specified n videos reached 
1995                                                 for id in video_ids: 
1996                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
1999                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2000                                 for id in video_ids: 
2001                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2004                         pagenum = pagenum + 1 
2006 class YahooSearchIE(InfoExtractor): 
2007         """Information Extractor for Yahoo! Video search queries.""" 
2008         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' 
2009         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
2010         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
2011         _MORE_PAGES_INDICATOR = r'\s*Next' 
2013         _max_yahoo_results = 1000 
2015         def __init__(self, yahoo_ie, downloader=None): 
2016                 InfoExtractor.__init__(self, downloader) 
2017                 self._yahoo_ie = yahoo_ie 
2021                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) 
2023         def report_download_page(self, query, pagenum): 
2024                 """Report attempt to download playlist page with given number.""" 
2025                 query = query.decode(preferredencoding()) 
2026                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
2028         def _real_initialize(self): 
2029                 self._yahoo_ie.initialize() 
2031         def _real_extract(self, query): 
2032                 mobj = re.match(self._VALID_QUERY, query) 
2034                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2037                 prefix, query = query.split(':') 
2039                 query  = query.encode('utf-8') 
2041                         self._download_n_results(query, 1) 
2043                 elif prefix == 'all': 
2044                         self._download_n_results(query, self._max_yahoo_results) 
2050                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2052                                 elif n > self._max_yahoo_results: 
2053                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)'  % (self._max_yahoo_results, n)) 
2054                                         n = self._max_yahoo_results 
2055                                 self._download_n_results(query, n) 
2057                         except ValueError: # parsing prefix as integer fails 
2058                                 self._download_n_results(query, 1) 
2061         def _download_n_results(self, query, n): 
2062                 """Downloads a specified number of results for a query""" 
2065                 already_seen = set() 
2069                         self.report_download_page(query, pagenum) 
2070                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2071                         request = urllib2.Request(result_url) 
2073                                 page = urllib2.urlopen(request).read() 
2074                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2075                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2078                         # Extract video identifiers 
2079                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2080                                 video_id = mobj.group(1) 
2081                                 if video_id not in already_seen: 
2082                                         video_ids.append(video_id) 
2083                                         already_seen.add(video_id) 
2084                                         if len(video_ids) == n: 
2085                                                 # Specified n videos reached 
2086                                                 for id in video_ids: 
2087                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2090                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2091                                 for id in video_ids: 
2092                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2095                         pagenum = pagenum + 1 
2097 class YoutubePlaylistIE(InfoExtractor): 
2098         """Information Extractor for YouTube playlists.""" 
2100         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' 
2101         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' 
2102         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2103         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2106         def __init__(self, youtube_ie, downloader=None): 
2107                 InfoExtractor.__init__(self, downloader) 
2108                 self._youtube_ie = youtube_ie 
2112                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) 
2114         def report_download_page(self, playlist_id, pagenum): 
2115                 """Report attempt to download playlist page with given number.""" 
2116                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
2118         def _real_initialize(self): 
2119                 self._youtube_ie.initialize() 
2121         def _real_extract(self, url): 
2122                 # Extract playlist id 
2123                 mobj = re.match(self._VALID_URL, url) 
2125                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2129                 if mobj.group(3) is not None: 
2130                         self._youtube_ie.extract(mobj.group(3)) 
2133                 # Download playlist pages 
2134                 # prefix is 'p' as default for playlists but there are other types that need extra care 
2135                 playlist_prefix = mobj.group(1) 
2136                 if playlist_prefix == 'a': 
2137                         playlist_access = 'artist' 
2139                         playlist_prefix = 'p' 
2140                         playlist_access = 'view_play_list' 
2141                 playlist_id = mobj.group(2) 
2146                         self.report_download_page(playlist_id, pagenum) 
2147                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) 
2149                                 page = urllib2.urlopen(request).read() 
2150                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2151                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2154                         # Extract video identifiers 
2156                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2157                                 if mobj.group(1) not in ids_in_page: 
2158                                         ids_in_page.append(mobj.group(1)) 
2159                         video_ids.extend(ids_in_page) 
2161                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2163                         pagenum = pagenum + 1 
2165                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2166                 playlistend = self._downloader.params.get('playlistend', -1) 
2167                 video_ids = video_ids[playliststart:playlistend] 
2169                 for id in video_ids: 
2170                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2173 class YoutubeUserIE(InfoExtractor): 
2174         """Information Extractor for YouTube users.""" 
2176         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)' 
2177         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
2178         _GDATA_PAGE_SIZE = 50 
2179         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' 
2180         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2183         def __init__(self, youtube_ie, downloader=None): 
2184                 InfoExtractor.__init__(self, downloader) 
2185                 self._youtube_ie = youtube_ie 
2189                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None) 
2191         def report_download_page(self, username, start_index): 
2192                 """Report attempt to download user page.""" 
2193                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % 
2194                                            (username, start_index, start_index + self._GDATA_PAGE_SIZE)) 
2196         def _real_initialize(self): 
2197                 self._youtube_ie.initialize() 
2199         def _real_extract(self, url): 
2201                 mobj = re.match(self._VALID_URL, url) 
2203                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2206                 username = mobj.group(1) 
2208                 # Download video ids using YouTube Data API. Result size per 
2209                 # query is limited (currently to 50 videos) so we need to query 
2210                 # page by page until there are no video ids - it means we got 
2217                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1 
2218                         self.report_download_page(username, start_index) 
2220                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) 
2223                                 page = urllib2.urlopen(request).read() 
2224                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2225                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2228                         # Extract video identifiers 
2231                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2232                                 if mobj.group(1) not in ids_in_page: 
2233                                         ids_in_page.append(mobj.group(1)) 
2235                         video_ids.extend(ids_in_page) 
2237                         # A little optimization - if current page is not 
2238                         # "full
", ie. does not contain PAGE_SIZE video ids then 
2239                         # we can assume that this page is the last one - there 
2240                         # are no more ids on further pages - no need to query 
2243                         if len(ids_in_page) < self._GDATA_PAGE_SIZE: 
2248                 all_ids_count = len(video_ids) 
2249                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2250                 playlistend = self._downloader.params.get('playlistend', -1) 
2252                 if playlistend == -1: 
2253                         video_ids = video_ids[playliststart:] 
2255                         video_ids = video_ids[playliststart:playlistend] 
2257                 self._downloader.to_screen("[youtube
] user 
%s: Collected 
%d video 
ids (downloading 
%d of them
)" % 
2258                                            (username, all_ids_count, len(video_ids))) 
2260                 for video_id in video_ids: 
2261                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) 
2264 class DepositFilesIE(InfoExtractor): 
2265         """Information extractor for depositfiles.com""" 
2267         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' 
2269         def __init__(self, downloader=None): 
2270                 InfoExtractor.__init__(self, downloader) 
2274                 return (re.match(DepositFilesIE._VALID_URL, url) is not None) 
2276         def report_download_webpage(self, file_id): 
2277                 """Report webpage download.""" 
2278                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) 
2280         def report_extraction(self, file_id): 
2281                 """Report information extraction.""" 
2282                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) 
2284         def _real_initialize(self): 
2287         def _real_extract(self, url): 
2288                 # At this point we have a new file 
2289                 self._downloader.increment_downloads() 
2291                 file_id = url.split('/')[-1] 
2292                 # Rebuild url in english locale 
2293                 url = 'http://depositfiles.com/en/files/' + file_id 
2295                 # Retrieve file webpage with 'Free download' button pressed 
2296                 free_download_indication = { 'gateway_result' : '1' } 
2297                 request = urllib2.Request(url, urllib.urlencode(free_download_indication)) 
2299                         self.report_download_webpage(file_id) 
2300                         webpage = urllib2.urlopen(request).read() 
2301                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2302                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) 
2305                 # Search for the real file URL 
2306                 mobj = re.search(r'<form action="(http
://fileshare
.+?
)"', webpage) 
2307                 if (mobj is None) or (mobj.group(1) is None): 
2308                         # Try to figure out reason of the error. 
2309                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) 
2310                         if (mobj is not None) and (mobj.group(1) is not None): 
2311                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() 
2312                                 self._downloader.trouble(u'ERROR: %s' % restriction_message) 
2314                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) 
2317                 file_url = mobj.group(1) 
2318                 file_extension = os.path.splitext(file_url)[1][1:] 
2320                 # Search for file title 
2321                 mobj = re.search(r'<b title="(.*?
)">', webpage) 
2323                         self._downloader.trouble(u'ERROR: unable to extract title') 
2325                 file_title = mobj.group(1).decode('utf-8') 
2328                         # Process file information 
2329                         self._downloader.process_info({ 
2330                                 'id':           file_id.decode('utf-8'), 
2331                                 'url':          file_url.decode('utf-8'), 
2333                                 'upload_date':  u'NA', 
2334                                 'title':        file_title, 
2335                                 'stitle':       file_title, 
2336                                 'ext':          file_extension.decode('utf-8'), 
2340                 except UnavailableVideoError, err: 
2341                         self._downloader.trouble(u'ERROR: unable to download file') 
2343 class FacebookIE(InfoExtractor): 
2344         """Information Extractor for Facebook""" 
2346         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' 
2347         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' 
2348         _NETRC_MACHINE = 'facebook' 
2349         _available_formats = ['highqual', 'lowqual'] 
2350         _video_extensions = { 
2355         def __init__(self, downloader=None): 
2356                 InfoExtractor.__init__(self, downloader) 
2360                 return (re.match(FacebookIE._VALID_URL, url) is not None) 
2362         def _reporter(self, message): 
2363                 """Add header and report message.""" 
2364                 self._downloader.to_screen(u'[facebook] %s' % message) 
2366         def report_login(self): 
2367                 """Report attempt to log in.""" 
2368                 self._reporter(u'Logging in') 
2370         def report_video_webpage_download(self, video_id): 
2371                 """Report attempt to download video webpage.""" 
2372                 self._reporter(u'%s: Downloading video webpage' % video_id) 
2374         def report_information_extraction(self, video_id): 
2375                 """Report attempt to extract video information.""" 
2376                 self._reporter(u'%s: Extracting video information' % video_id) 
2378         def _parse_page(self, video_webpage): 
2379                 """Extract video information from page""" 
2381                 data = {'title': r'class="video_title datawrap
">(.*?)</', 
2382                         'description': r'<div class="datawrap
">(.*?)</div>', 
2383                         'owner': r'\("video_owner_name
", "(.*?
)"\)', 
2384                         'upload_date': r'data-date="(.*?
)"', 
2385                         'thumbnail':  r'\("thumb_url
", "(?P
<THUMB
>.*?
)"\)', 
2388                 for piece in data.keys(): 
2389                         mobj = re.search(data[piece], video_webpage) 
2390                         if mobj is not None: 
2391                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape
")) 
2395                 for fmt in self._available_formats: 
2396                         mobj = re.search(r'\("%s_src
\", "(.+?)"\
)' % fmt, video_webpage) 
2397                         if mobj is not None: 
2398                                 # URL is in a Javascript segment inside an escaped Unicode format within 
2399                                 # the generally utf-8 page 
2400                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) 
2401                 video_info['video_urls
'] = video_urls 
2405         def _real_initialize(self): 
2406                 if self._downloader is None: 
2411                 downloader_params = self._downloader.params 
2413                 # Attempt to use provided username and password or .netrc data 
2414                 if downloader_params.get('username
', None) is not None: 
2415                         useremail = downloader_params['username
'] 
2416                         password = downloader_params['password
'] 
2417                 elif downloader_params.get('usenetrc
', False): 
2419                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) 
2420                                 if info is not None: 
2424                                         raise netrc.NetrcParseError('No authenticators 
for %s' % self._NETRC_MACHINE) 
2425                         except (IOError, netrc.NetrcParseError), err: 
2426                                 self._downloader.to_stderr(u'WARNING
: parsing 
.netrc
: %s' % str(err)) 
2429                 if useremail is None: 
2438                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) 
2441                         login_results = urllib2.urlopen(request).read() 
2442                         if re.search(r'<form(.*)name
="login"(.*)</form
>', login_results) is not None: 
2443                                 self._downloader.to_stderr(u'WARNING
: unable to log 
in: bad username
/password
, or exceded login rate 
limit (~
3/min). Check credentials 
or wait
.') 
2445                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2446                         self._downloader.to_stderr(u'WARNING
: unable to log 
in: %s' % str(err)) 
2449         def _real_extract(self, url): 
2450                 mobj = re.match(self._VALID_URL, url) 
2452                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2454                 video_id = mobj.group('ID
') 
2457                 self.report_video_webpage_download(video_id) 
2458                 request = urllib2.Request('https
://www
.facebook
.com
/video
/video
.php?v
=%s' % video_id) 
2460                         page = urllib2.urlopen(request) 
2461                         video_webpage = page.read() 
2462                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2463                         self._downloader.trouble(u'ERROR
: unable to download video webpage
: %s' % str(err)) 
2466                 # Start extracting information 
2467                 self.report_information_extraction(video_id) 
2469                 # Extract information 
2470                 video_info = self._parse_page(video_webpage) 
2473                 if 'owner
' not in video_info: 
2474                         self._downloader.trouble(u'ERROR
: unable to extract uploader nickname
') 
2476                 video_uploader = video_info['owner
'] 
2479                 if 'title
' not in video_info: 
2480                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
2482                 video_title = video_info['title
'] 
2483                 video_title = video_title.decode('utf
-8') 
2484                 video_title = sanitize_title(video_title) 
2487                 simple_title = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', video_title) 
2488                 simple_title = simple_title.strip(ur'_
') 
2491                 if 'thumbnail
' not in video_info: 
2492                         self._downloader.trouble(u'WARNING
: unable to extract video thumbnail
') 
2493                         video_thumbnail = '' 
2495                         video_thumbnail = video_info['thumbnail
'] 
2499                 if 'upload_date
' in video_info: 
2500                         upload_time = video_info['upload_date
'] 
2501                         timetuple = email.utils.parsedate_tz(upload_time) 
2502                         if timetuple is not None: 
2504                                         upload_date = time.strftime('%Y
%m
%d', timetuple[0:9]) 
2509                 video_description = 'No description available
.' 
2510                 if (self._downloader.params.get('forcedescription
', False) and 
2511                     'description
' in video_info): 
2512                         video_description = video_info['description
'] 
2514                 url_map = video_info['video_urls
'] 
2515                 if len(url_map.keys()) > 0: 
2516                         # Decide which formats to download 
2517                         req_format = self._downloader.params.get('format
', None) 
2518                         format_limit = self._downloader.params.get('format_limit
', None) 
2520                         if format_limit is not None and format_limit in self._available_formats: 
2521                                 format_list = self._available_formats[self._available_formats.index(format_limit):] 
2523                                 format_list = self._available_formats 
2524                         existing_formats = [x for x in format_list if x in url_map] 
2525                         if len(existing_formats) == 0: 
2526                                 self._downloader.trouble(u'ERROR
: no known formats available 
for video
') 
2528                         if req_format is None: 
2529                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality 
2530                         elif req_format == '-1': 
2531                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats 
2534                                 if req_format not in url_map: 
2535                                         self._downloader.trouble(u'ERROR
: requested format 
not available
') 
2537                                 video_url_list = [(req_format, url_map[req_format])] # Specific format 
2539                 for format_param, video_real_url in video_url_list: 
2541                         # At this point we have a new video 
2542                         self._downloader.increment_downloads() 
2545                         video_extension = self._video_extensions.get(format_param, 'mp4
') 
2547                         # Find the video URL in fmt_url_map or conn paramters 
2549                                 # Process video information 
2550                                 self._downloader.process_info({ 
2551                                         'id':           video_id.decode('utf
-8'), 
2552                                         'url
':          video_real_url.decode('utf
-8'), 
2553                                         'uploader
':     video_uploader.decode('utf
-8'), 
2554                                         'upload_date
':  upload_date, 
2555                                         'title
':        video_title, 
2556                                         'stitle
':       simple_title, 
2557                                         'ext
':          video_extension.decode('utf
-8'), 
2558                                         'format
':       (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
2559                                         'thumbnail
':    video_thumbnail.decode('utf
-8'), 
2560                                         'description
':  video_description.decode('utf
-8'), 
2563                         except UnavailableVideoError, err: 
2564                                 self._downloader.trouble(u'\nERROR
: unable to download video
') 
2566 class PostProcessor(object): 
2567         """Post Processor class. 
2569         PostProcessor objects can be added to downloaders with their 
2570         add_post_processor() method. When the downloader has finished a 
2571         successful download, it will take its internal chain of PostProcessors 
2572         and start calling the run() method on each one of them, first with 
2573         an initial argument and then with the returned value of the previous 
2576         The chain will be stopped if one of them ever returns None or the end 
2577         of the chain is reached. 
2579         PostProcessor objects follow a "mutual registration" process similar 
2580         to InfoExtractor objects. 
2585         def __init__(self, downloader=None): 
2586                 self._downloader = downloader 
2588         def set_downloader(self, downloader): 
2589                 """Sets the downloader for this PP.""" 
2590                 self._downloader = downloader 
2592         def run(self, information): 
2593                 """Run the PostProcessor. 
2595                 The "information" argument is a dictionary like the ones 
2596                 composed by InfoExtractors. The only difference is that this 
2597                 one has an extra field called "filepath" that points to the 
2600                 When this method returns None, the postprocessing chain is 
2601                 stopped. However, this method may return an information 
2602                 dictionary that will be passed to the next postprocessing 
2603                 object in the chain. It can be the one it received after 
2604                 changing some fields. 
2606                 In addition, this method may raise a PostProcessingError 
2607                 exception that will be taken into account by the downloader 
2610                 return information # by default, do nothing 
2612 class FFmpegExtractAudioPP(PostProcessor): 
2614         def __init__(self, downloader=None, preferredcodec=None): 
2615                 PostProcessor.__init__(self, downloader) 
2616                 if preferredcodec is None: 
2617                         preferredcodec = 'best
' 
2618                 self._preferredcodec = preferredcodec 
2621         def get_audio_codec(path): 
2623                         handle = subprocess.Popen(['ffprobe
', '-show_streams
', path], 
2624                                         stderr=file(os.path.devnull, 'w
'), stdout=subprocess.PIPE) 
2625                         output = handle.communicate()[0] 
2626                         if handle.wait() != 0: 
2628                 except (IOError, OSError): 
2631                 for line in output.split('\n'): 
2632                         if line.startswith('codec_name
='): 
2633                                 audio_codec = line.split('=')[1].strip() 
2634                         elif line.strip() == 'codec_type
=audio
' and audio_codec is not None: 
2639         def run_ffmpeg(path, out_path, codec, more_opts): 
2641                         ret = subprocess.call(['ffmpeg
', '-y
', '-i
', path, '-vn
', '-acodec
', codec] + more_opts + [out_path], 
2642                                         stdout=file(os.path.devnull, 'w
'), stderr=subprocess.STDOUT) 
2644                 except (IOError, OSError): 
2647         def run(self, information): 
2648                 path = information['filepath
'] 
2650                 filecodec = self.get_audio_codec(path) 
2651                 if filecodec is None: 
2652                         self._downloader.to_stderr(u'WARNING
: unable to obtain 
file audio codec 
with ffprobe
') 
2656                 if self._preferredcodec == 'best
' or self._preferredcodec == filecodec: 
2657                         if filecodec == 'aac
' or filecodec == 'mp3
': 
2658                                 # Lossless if possible 
2660                                 extension = filecodec 
2661                                 if filecodec == 'aac
': 
2662                                         more_opts = ['-f
', 'adts
'] 
2665                                 acodec = 'libmp3lame
' 
2667                                 more_opts = ['-ab
', '128k
'] 
2669                         # We convert the audio (lossy) 
2670                         acodec = {'mp3
': 'libmp3lame
', 'aac
': 'aac
'}[self._preferredcodec] 
2671                         extension = self._preferredcodec 
2672                         more_opts = ['-ab
', '128k
'] 
2673                         if self._preferredcodec == 'aac
': 
2674                                 more_opts += ['-f
', 'adts
'] 
2676                 (prefix, ext) = os.path.splitext(path) 
2677                 new_path = prefix + '.' + extension 
2678                 self._downloader.to_screen(u'[ffmpeg
] Destination
: %s' % new_path) 
2679                 status = self.run_ffmpeg(path, new_path, acodec, more_opts) 
2682                         self._downloader.to_stderr(u'WARNING
: error running ffmpeg
') 
2687                 except (IOError, OSError): 
2688                         self._downloader.to_stderr(u'WARNING
: Unable to remove downloaded video 
file') 
2691                 information['filepath
'] = new_path 
2694 ### MAIN PROGRAM ### 
2695 if __name__ == '__main__
': 
2697                 # Modules needed only when running the main program 
2701                 # Function to update the program file with the latest version from the repository. 
2702                 def update_self(downloader, filename): 
2703                         # Note: downloader only used for options 
2704                         if not os.access(filename, os.W_OK): 
2705                                 sys.exit('ERROR
: no write permissions on 
%s' % filename) 
2707                         downloader.to_screen('Updating to latest stable version
...') 
2709                                 latest_url = 'http
://github
.com
/rg3
/youtube
-dl
/raw
/master
/LATEST_VERSION
' 
2710                                 latest_version = urllib.urlopen(latest_url).read().strip() 
2711                                 prog_url = 'http
://github
.com
/rg3
/youtube
-dl
/raw
/%s/youtube
-dl
' % latest_version 
2712                                 newcontent = urllib.urlopen(prog_url).read() 
2713                         except (IOError, OSError), err: 
2714                                 sys.exit('ERROR
: unable to download latest version
') 
2716                                 stream = open(filename, 'w
') 
2717                                 stream.write(newcontent) 
2719                         except (IOError, OSError), err: 
2720                                 sys.exit('ERROR
: unable to overwrite current version
') 
2721                         downloader.to_screen('Updated to version 
%s' % latest_version) 
2723                 # Parse command line 
2724                 parser = optparse.OptionParser( 
2725                         usage='Usage
: %prog 
[options
] url
...', 
2726                         version='2011.02.25b
', 
2727                         conflict_handler='resolve
', 
2730                 parser.add_option('-h
', '--help', 
2731                                 action='help', help='print this 
help text 
and exit
') 
2732                 parser.add_option('-v
', '--version
', 
2733                                 action='version
', help='print program version 
and exit
') 
2734                 parser.add_option('-U
', '--update
', 
2735                                 action='store_true
', dest='update_self
', help='update this program to latest stable version
') 
2736                 parser.add_option('-i
', '--ignore
-errors
', 
2737                                 action='store_true
', dest='ignoreerrors
', help='continue on download errors
', default=False) 
2738                 parser.add_option('-r
', '--rate
-limit
', 
2739                                 dest='ratelimit
', metavar='LIMIT
', help='download rate 
limit (e
.g
. 50k 
or 44.6m
)') 
2740                 parser.add_option('-R
', '--retries
', 
2741                                 dest='retries
', metavar='RETRIES
', help='number of 
retries (default 
is 10)', default=10) 
2742                 parser.add_option('--playlist
-start
', 
2743                                 dest='playliststart
', metavar='NUMBER
', help='playlist video to start 
at (default 
is 1)', default=1) 
2744                 parser.add_option('--playlist
-end
', 
2745                                 dest='playlistend
', metavar='NUMBER
', help='playlist video to end 
at (default 
is last
)', default=-1) 
2746                 parser.add_option('--dump
-user
-agent
', 
2747                                 action='store_true
', dest='dump_user_agent
', 
2748                                 help='display the current browser identification
', default=False) 
2750                 authentication = optparse.OptionGroup(parser, 'Authentication Options
') 
2751                 authentication.add_option('-u
', '--username
', 
2752                                 dest='username
', metavar='USERNAME
', help='account username
') 
2753                 authentication.add_option('-p
', '--password
', 
2754                                 dest='password
', metavar='PASSWORD
', help='account password
') 
2755                 authentication.add_option('-n
', '--netrc
', 
2756                                 action='store_true
', dest='usenetrc
', help='use 
.netrc authentication data
', default=False) 
2757                 parser.add_option_group(authentication) 
2759                 video_format = optparse.OptionGroup(parser, 'Video Format Options
') 
2760                 video_format.add_option('-f
', '--format
', 
2761                                 action='store
', dest='format
', metavar='FORMAT
', help='video format code
') 
2762                 video_format.add_option('--all
-formats
', 
2763                                 action='store_const
', dest='format
', help='download all available video formats
', const='-1') 
2764                 video_format.add_option('--max-quality
', 
2765                                 action='store
', dest='format_limit
', metavar='FORMAT
', help='highest quality format to download
') 
2766                 parser.add_option_group(video_format) 
2768                 verbosity = optparse.OptionGroup(parser, 'Verbosity 
/ Simulation Options
') 
2769                 verbosity.add_option('-q
', '--quiet
', 
2770                                 action='store_true
', dest='quiet
', help='activates quiet mode
', default=False) 
2771                 verbosity.add_option('-s
', '--simulate
', 
2772                                 action='store_true
', dest='simulate
', help='do 
not download video
', default=False) 
2773                 verbosity.add_option('-g
', '--get
-url
', 
2774                                 action='store_true
', dest='geturl
', help='simulate
, quiet but 
print URL
', default=False) 
2775                 verbosity.add_option('-e
', '--get
-title
', 
2776                                 action='store_true
', dest='gettitle
', help='simulate
, quiet but 
print title
', default=False) 
2777                 verbosity.add_option('--get
-thumbnail
', 
2778                                 action='store_true
', dest='getthumbnail
', 
2779                                 help='simulate
, quiet but 
print thumbnail URL
', default=False) 
2780                 verbosity.add_option('--get
-description
', 
2781                                 action='store_true
', dest='getdescription
', 
2782                                 help='simulate
, quiet but 
print video description
', default=False) 
2783                 verbosity.add_option('--get
-filename
', 
2784                                 action='store_true
', dest='getfilename
', 
2785                                 help='simulate
, quiet but 
print output filename
', default=False) 
2786                 verbosity.add_option('--no
-progress
', 
2787                                 action='store_true
', dest='noprogress
', help='do 
not print progress bar
', default=False) 
2788                 verbosity.add_option('--console
-title
', 
2789                                 action='store_true
', dest='consoletitle
', 
2790                                 help='display progress 
in console titlebar
', default=False) 
2791                 parser.add_option_group(verbosity) 
2793                 filesystem = optparse.OptionGroup(parser, 'Filesystem Options
') 
2794                 filesystem.add_option('-t
', '--title
', 
2795                                 action='store_true
', dest='usetitle
', help='use title 
in file name
', default=False) 
2796                 filesystem.add_option('-l
', '--literal
', 
2797                                 action='store_true
', dest='useliteral
', help='use literal title 
in file name
', default=False) 
2798                 filesystem.add_option('-A
', '--auto
-number
', 
2799                                 action='store_true
', dest='autonumber
', 
2800                                 help='number downloaded files starting 
from 00000', default=False) 
2801                 filesystem.add_option('-o
', '--output
', 
2802                                 dest='outtmpl
', metavar='TEMPLATE
', help='output filename template
') 
2803                 filesystem.add_option('-a
', '--batch
-file', 
2804                                 dest='batchfile
', metavar='FILE
', help='file containing URLs to 
download (\'-\' for stdin
)') 
2805                 filesystem.add_option('-w
', '--no
-overwrites
', 
2806                                 action='store_true
', dest='nooverwrites
', help='do 
not overwrite files
', default=False) 
2807                 filesystem.add_option('-c
', '--continue', 
2808                                 action='store_true
', dest='continue_dl
', help='resume partially downloaded files
', default=False) 
2809                 filesystem.add_option('--cookies
', 
2810                                 dest='cookiefile
', metavar='FILE
', help='file to dump cookie jar to
') 
2811                 filesystem.add_option('--no
-part
', 
2812                                 action='store_true
', dest='nopart
', help='do 
not use 
.part files
', default=False) 
2813                 filesystem.add_option('--no
-mtime
', 
2814                                 action='store_false
', dest='updatetime
', 
2815                                 help='do 
not use the Last
-modified header to 
set the 
file modification time
', default=True) 
2816                 parser.add_option_group(filesystem) 
2818                 postproc = optparse.OptionGroup(parser, 'Post
-processing Options
') 
2819                 postproc.add_option('--extract
-audio
', action='store_true
', dest='extractaudio
', default=False, 
2820                                 help='convert video files to audio
-only 
files (requires ffmpeg 
and ffprobe
)') 
2821                 postproc.add_option('--audio
-format
', metavar='FORMAT
', dest='audioformat
', default='best
', 
2822                                 help='"best", "aac" or "mp3"; best by default
') 
2823                 parser.add_option_group(postproc) 
2825                 (opts, args) = parser.parse_args() 
2827                 # Open appropriate CookieJar 
2828                 if opts.cookiefile is None: 
2829                         jar = cookielib.CookieJar() 
2832                                 jar = cookielib.MozillaCookieJar(opts.cookiefile) 
2833                                 if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): 
2835                         except (IOError, OSError), err: 
2836                                 sys.exit(u'ERROR
: unable to 
open cookie 
file') 
2839                 if opts.dump_user_agent: 
2840                         print std_headers['User
-Agent
'] 
2843                 # General configuration 
2844                 cookie_processor = urllib2.HTTPCookieProcessor(jar) 
2845                 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler())) 
2846                 socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
2848                 # Batch file verification 
2850                 if opts.batchfile is not None: 
2852                                 if opts.batchfile == '-': 
2855                                         batchfd = open(opts.batchfile, 'r
') 
2856                                 batchurls = batchfd.readlines() 
2857                                 batchurls = [x.strip() for x in batchurls] 
2858                                 batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^
[#/;]', x)] 
2860                                 sys
.exit(u
'ERROR: batch file could not be read') 
2861                 all_urls 
= batchurls 
+ args
 
2863                 # Conflicting, missing and erroneous options 
2864                 if opts
.usenetrc 
and (opts
.username 
is not None or opts
.password 
is not None): 
2865                         parser
.error(u
'using .netrc conflicts with giving username/password') 
2866                 if opts
.password 
is not None and opts
.username 
is None: 
2867                         parser
.error(u
'account username missing') 
2868                 if opts
.outtmpl 
is not None and (opts
.useliteral 
or opts
.usetitle 
or opts
.autonumber
): 
2869                         parser
.error(u
'using output template conflicts with using title, literal title or auto number') 
2870                 if opts
.usetitle 
and opts
.useliteral
: 
2871                         parser
.error(u
'using title conflicts with using literal title') 
2872                 if opts
.username 
is not None and opts
.password 
is None: 
2873                         opts
.password 
= getpass
.getpass(u
'Type account password and press return:') 
2874                 if opts
.ratelimit 
is not None: 
2875                         numeric_limit 
= FileDownloader
.parse_bytes(opts
.ratelimit
) 
2876                         if numeric_limit 
is None: 
2877                                 parser
.error(u
'invalid rate limit specified') 
2878                         opts
.ratelimit 
= numeric_limit
 
2879                 if opts
.retries 
is not None: 
2881                                 opts
.retries 
= long(opts
.retries
) 
2882                         except (TypeError, ValueError), err
: 
2883                                 parser
.error(u
'invalid retry count specified') 
2885                         opts
.playliststart 
= long(opts
.playliststart
) 
2886                         if opts
.playliststart 
<= 0: 
2888                 except (TypeError, ValueError), err
: 
2889                         parser
.error(u
'invalid playlist start number specified') 
2891                         opts
.playlistend 
= long(opts
.playlistend
) 
2892                         if opts
.playlistend 
!= -1 and (opts
.playlistend 
<= 0 or opts
.playlistend 
< opts
.playliststart
): 
2894                 except (TypeError, ValueError), err
: 
2895                         parser
.error(u
'invalid playlist end number specified') 
2896                 if opts
.extractaudio
: 
2897                         if opts
.audioformat 
not in ['best', 'aac', 'mp3']: 
2898                                 parser
.error(u
'invalid audio format specified') 
2900                 # Information extractors 
2901                 youtube_ie 
= YoutubeIE() 
2902                 metacafe_ie 
= MetacafeIE(youtube_ie
) 
2903                 dailymotion_ie 
= DailymotionIE() 
2904                 youtube_pl_ie 
= YoutubePlaylistIE(youtube_ie
) 
2905                 youtube_user_ie 
= YoutubeUserIE(youtube_ie
) 
2906                 youtube_search_ie 
= YoutubeSearchIE(youtube_ie
) 
2907                 google_ie 
= GoogleIE() 
2908                 google_search_ie 
= GoogleSearchIE(google_ie
) 
2909                 photobucket_ie 
= PhotobucketIE() 
2910                 yahoo_ie 
= YahooIE() 
2911                 yahoo_search_ie 
= YahooSearchIE(yahoo_ie
) 
2912                 deposit_files_ie 
= DepositFilesIE() 
2913                 facebook_ie 
= FacebookIE() 
2914                 generic_ie 
= GenericIE() 
2917                 fd 
= FileDownloader({ 
2918                         'usenetrc': opts
.usenetrc
, 
2919                         'username': opts
.username
, 
2920                         'password': opts
.password
, 
2921                         'quiet': (opts
.quiet 
or opts
.geturl 
or opts
.gettitle 
or opts
.getthumbnail 
or opts
.getdescription 
or opts
.getfilename
), 
2922                         'forceurl': opts
.geturl
, 
2923                         'forcetitle': opts
.gettitle
, 
2924                         'forcethumbnail': opts
.getthumbnail
, 
2925                         'forcedescription': opts
.getdescription
, 
2926                         'forcefilename': opts
.getfilename
, 
2927                         'simulate': (opts
.simulate 
or opts
.geturl 
or opts
.gettitle 
or opts
.getthumbnail 
or opts
.getdescription 
or opts
.getfilename
), 
2928                         'format': opts
.format
, 
2929                         'format_limit': opts
.format_limit
, 
2930                         'outtmpl': ((opts
.outtmpl 
is not None and opts
.outtmpl
.decode(preferredencoding())) 
2931                                 or (opts
.format 
== '-1' and opts
.usetitle 
and u
'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
2932                                 or (opts
.format 
== '-1' and opts
.useliteral 
and u
'%(title)s-%(id)s-%(format)s.%(ext)s') 
2933                                 or (opts
.format 
== '-1' and u
'%(id)s-%(format)s.%(ext)s') 
2934                                 or (opts
.usetitle 
and opts
.autonumber 
and u
'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') 
2935                                 or (opts
.useliteral 
and opts
.autonumber 
and u
'%(autonumber)s-%(title)s-%(id)s.%(ext)s') 
2936                                 or (opts
.usetitle 
and u
'%(stitle)s-%(id)s.%(ext)s') 
2937                                 or (opts
.useliteral 
and u
'%(title)s-%(id)s.%(ext)s') 
2938                                 or (opts
.autonumber 
and u
'%(autonumber)s-%(id)s.%(ext)s') 
2939                                 or u
'%(id)s.%(ext)s'), 
2940                         'ignoreerrors': opts
.ignoreerrors
, 
2941                         'ratelimit': opts
.ratelimit
, 
2942                         'nooverwrites': opts
.nooverwrites
, 
2943                         'retries': opts
.retries
, 
2944                         'continuedl': opts
.continue_dl
, 
2945                         'noprogress': opts
.noprogress
, 
2946                         'playliststart': opts
.playliststart
, 
2947                         'playlistend': opts
.playlistend
, 
2948                         'logtostderr': opts
.outtmpl 
== '-', 
2949                         'consoletitle': opts
.consoletitle
, 
2950                         'nopart': opts
.nopart
, 
2951                         'updatetime': opts
.updatetime
, 
2953                 fd
.add_info_extractor(youtube_search_ie
) 
2954                 fd
.add_info_extractor(youtube_pl_ie
) 
2955                 fd
.add_info_extractor(youtube_user_ie
) 
2956                 fd
.add_info_extractor(metacafe_ie
) 
2957                 fd
.add_info_extractor(dailymotion_ie
) 
2958                 fd
.add_info_extractor(youtube_ie
) 
2959                 fd
.add_info_extractor(google_ie
) 
2960                 fd
.add_info_extractor(google_search_ie
) 
2961                 fd
.add_info_extractor(photobucket_ie
) 
2962                 fd
.add_info_extractor(yahoo_ie
) 
2963                 fd
.add_info_extractor(yahoo_search_ie
) 
2964                 fd
.add_info_extractor(deposit_files_ie
) 
2965                 fd
.add_info_extractor(facebook_ie
) 
2967                 # This must come last since it's the 
2968                 # fallback if none of the others work 
2969                 fd
.add_info_extractor(generic_ie
) 
2972                 if opts
.extractaudio
: 
2973                         fd
.add_post_processor(FFmpegExtractAudioPP(preferredcodec
=opts
.audioformat
)) 
2976                 if opts
.update_self
: 
2977                         update_self(fd
, sys
.argv
[0]) 
2980                 if len(all_urls
) < 1: 
2981                         if not opts
.update_self
: 
2982                                 parser
.error(u
'you must provide at least one URL') 
2985                 retcode 
= fd
.download(all_urls
) 
2987                 # Dump cookie jar if requested 
2988                 if opts
.cookiefile 
is not None: 
2991                         except (IOError, OSError), err
: 
2992                                 sys
.exit(u
'ERROR: unable to save cookie jar') 
2996         except DownloadError
: 
2998         except SameFileError
: 
2999                 sys
.exit(u
'ERROR: fixed output name but more than one file to download') 
3000         except KeyboardInterrupt: 
3001                 sys
.exit(u
'\nERROR: Interrupted by user')