2 # -*- coding: utf-8 -*- 
   5         'Ricardo Garcia Gonzalez', 
  13         'Philipp Hagemeister', 
  20 __license__ 
= 'Public Domain' 
  21 __version__ 
= '2012.02.27' 
  23 UPDATE_URL 
= 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' 
  56 except ImportError: # Python 2.4 
  59         import cStringIO 
as StringIO
 
  63 # parse_qs was moved from the cgi module to the urlparse module recently. 
  65         from urlparse 
import parse_qs
 
  67         from cgi 
import parse_qs
 
  75         import xml
.etree
.ElementTree
 
  76 except ImportError: # Python<2.5: Not officially supported, but let it slip 
  77         warnings
.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.') 
  80         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 
  81         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  82         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  83         'Accept-Encoding': 'gzip, deflate', 
  84         'Accept-Language': 'en-us,en;q=0.5', 
  89 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): 
  95                         def raiseError(msg
, i
): 
  96                                 raise ValueError(msg 
+ ' at position ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:])) 
  97                         def skipSpace(i
, expectMore
=True): 
  98                                 while i 
< len(s
) and s
[i
] in ' \t\r\n': 
 102                                                 raiseError('Premature end', i
) 
 104                         def decodeEscape(match
): 
 120                                                 return unichr(int(esc
[1:5], 16)) 
 121                                         if len(esc
) == 5+6 and esc
[5:7] == '\\u': 
 122                                                 hi 
= int(esc
[1:5], 16) 
 123                                                 low 
= int(esc
[7:11], 16) 
 124                                                 return unichr((hi 
- 0xd800) * 0x400 + low 
- 0xdc00 + 0x10000) 
 125                                 raise ValueError('Unknown escape ' + str(esc
)) 
 132                                         while s
[e
-bslashes
-1] == '\\': 
 134                                         if bslashes 
% 2 == 1: 
 138                                 rexp 
= re
.compile(r
'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') 
 139                                 stri 
= rexp
.sub(decodeEscape
, s
[i
:e
]) 
 145                                 if s
[i
] == '}': # Empty dictionary 
 149                                                 raiseError('Expected a string object key', i
) 
 150                                         i
,key 
= parseString(i
) 
 152                                         if i 
>= len(s
) or s
[i
] != ':': 
 153                                                 raiseError('Expected a colon', i
) 
 160                                                 raiseError('Expected comma or closing curly brace', i
) 
 165                                 if s
[i
] == ']': # Empty array 
 170                                         i 
= skipSpace(i
) # Raise exception if premature end 
 174                                                 raiseError('Expected a comma or closing bracket', i
) 
 176                         def parseDiscrete(i
): 
 177                                 for k
,v 
in {'true': True, 'false': False, 'null': None}.items(): 
 178                                         if s
.startswith(k
, i
): 
 180                                 raiseError('Not a boolean (or null)', i
) 
 182                                 mobj 
= re
.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s
[i
:]) 
 184                                         raiseError('Not a number', i
) 
 186                                 if '.' in nums 
or 'e' in nums 
or 'E' in nums
: 
 187                                         return (i
+len(nums
), float(nums
)) 
 188                                 return (i
+len(nums
), int(nums
)) 
 189                         CHARMAP 
= {'{': parseObj
, '[': parseArray
, '"': parseString
, 't': parseDiscrete
, 'f': parseDiscrete
, 'n': parseDiscrete
} 
 192                                 i
,res 
= CHARMAP
.get(s
[i
], parseNumber
)(i
) 
 193                                 i 
= skipSpace(i
, False) 
 197                                 raise ValueError('Extra data at end of input (index ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:]) + ')') 
 200 def preferredencoding(): 
 201         """Get preferred encoding. 
 203         Returns the best encoding scheme for the system, based on 
 204         locale.getpreferredencoding() and some further tweaks. 
 206         def yield_preferredencoding(): 
 208                         pref 
= locale
.getpreferredencoding() 
 214         return yield_preferredencoding().next() 
 217 def htmlentity_transform(matchobj
): 
 218         """Transforms an HTML entity to a Unicode character. 
 220         This function receives a match object and is intended to be used with 
 221         the re.sub() function. 
 223         entity 
= matchobj
.group(1) 
 225         # Known non-numeric HTML entity 
 226         if entity 
in htmlentitydefs
.name2codepoint
: 
 227                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
 230         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
 232                 numstr 
= mobj
.group(1) 
 233                 if numstr
.startswith(u
'x'): 
 235                         numstr 
= u
'0%s' % numstr
 
 238                 return unichr(long(numstr
, base
)) 
 240         # Unknown entity in name, return its literal representation 
 241         return (u
'&%s;' % entity
) 
 244 def sanitize_title(utitle
): 
 245         """Sanitizes a video title so it could be used as part of a filename.""" 
 246         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
 247         return utitle
.replace(unicode(os
.sep
), u
'%') 
 250 def sanitize_open(filename
, open_mode
): 
 251         """Try to open the given filename, and slightly tweak it if this fails. 
 253         Attempts to open the given filename. If this fails, it tries to change 
 254         the filename slightly, step by step, until it's either able to open it 
 255         or it fails and raises a final exception, like the standard open() 
 258         It returns the tuple (stream, definitive_file_name). 
 262                         if sys
.platform 
== 'win32': 
 264                                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 265                         return (sys
.stdout
, filename
) 
 266                 stream 
= open(_encodeFilename(filename
), open_mode
) 
 267                 return (stream
, filename
) 
 268         except (IOError, OSError), err
: 
 269                 # In case of error, try to remove win32 forbidden chars 
 270                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 272                 # An exception here should be caught in the caller 
 273                 stream 
= open(_encodeFilename(filename
), open_mode
) 
 274                 return (stream
, filename
) 
 277 def timeconvert(timestr
): 
 278         """Convert RFC 2822 defined time string into system timestamp""" 
 280         timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 281         if timetuple 
is not None: 
 282                 timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 285 def _simplify_title(title
): 
 286         expr 
= re
.compile(ur
'[^\w\d_\-]+', flags
=re
.UNICODE
) 
 287         return expr
.sub(u
'_', title
).strip(u
'_') 
 289 def _orderedSet(iterable
): 
 290         """ Remove all duplicates from the input iterable """ 
 297 def _unescapeHTML(s
): 
 299         @param s a string (of type unicode) 
 301         assert type(s
) == type(u
'') 
 303         htmlParser 
= HTMLParser
.HTMLParser() 
 304         return htmlParser
.unescape(s
) 
 306 def _encodeFilename(s
): 
 308         @param s The name of the file (of type unicode) 
 311         assert type(s
) == type(u
'') 
 313         if sys
.platform 
== 'win32' and sys
.getwindowsversion().major 
>= 5: 
 314                 # Pass u'' directly to use Unicode APIs on Windows 2000 and up 
 315                 # (Detecting Windows NT 4 is tricky because 'major >= 4' would 
 316                 # match Windows 9x series as well. Besides, NT 4 is obsolete.) 
 319                 return s
.encode(sys
.getfilesystemencoding(), 'ignore') 
 321 class DownloadError(Exception): 
 322         """Download Error exception. 
 324         This exception may be thrown by FileDownloader objects if they are not 
 325         configured to continue on errors. They will contain the appropriate 
 331 class SameFileError(Exception): 
 332         """Same File exception. 
 334         This exception will be thrown by FileDownloader objects if they detect 
 335         multiple files would have to be downloaded to the same file on disk. 
 340 class PostProcessingError(Exception): 
 341         """Post Processing exception. 
 343         This exception may be raised by PostProcessor's .run() method to 
 344         indicate an error in the postprocessing task. 
 348 class MaxDownloadsReached(Exception): 
 349         """ --max-downloads limit has been reached. """ 
 353 class UnavailableVideoError(Exception): 
 354         """Unavailable Format exception. 
 356         This exception will be thrown when a video is requested 
 357         in a format that is not available for that video. 
 362 class ContentTooShortError(Exception): 
 363         """Content Too Short exception. 
 365         This exception may be raised by FileDownloader objects when a file they 
 366         download is too small for what the server announced first, indicating 
 367         the connection was probably interrupted. 
 373         def __init__(self
, downloaded
, expected
): 
 374                 self
.downloaded 
= downloaded
 
 375                 self
.expected 
= expected
 
 378 class YoutubeDLHandler(urllib2
.HTTPHandler
): 
 379         """Handler for HTTP requests and responses. 
 381         This class, when installed with an OpenerDirector, automatically adds 
 382         the standard headers to every HTTP request and handles gzipped and 
 383         deflated responses from web servers. If compression is to be avoided in 
 384         a particular request, the original request in the program code only has 
 385         to include the HTTP header "Youtubedl-No-Compression", which will be 
 386         removed before making the real request. 
 388         Part of this code was copied from: 
 390         http://techknack.net/python-urllib2-handlers/ 
 392         Andrew Rowls, the author of that code, agreed to release it to the 
 399                         return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 401                         return zlib
.decompress(data
) 
 404         def addinfourl_wrapper(stream
, headers
, url
, code
): 
 405                 if hasattr(urllib2
.addinfourl
, 'getcode'): 
 406                         return urllib2
.addinfourl(stream
, headers
, url
, code
) 
 407                 ret 
= urllib2
.addinfourl(stream
, headers
, url
) 
 411         def http_request(self
, req
): 
 412                 for h 
in std_headers
: 
 415                         req
.add_header(h
, std_headers
[h
]) 
 416                 if 'Youtubedl-no-compression' in req
.headers
: 
 417                         if 'Accept-encoding' in req
.headers
: 
 418                                 del req
.headers
['Accept-encoding'] 
 419                         del req
.headers
['Youtubedl-no-compression'] 
 422         def http_response(self
, req
, resp
): 
 425                 if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 426                         gz 
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(resp
.read()), mode
='r') 
 427                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 428                         resp
.msg 
= old_resp
.msg
 
 430                 if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 431                         gz 
= StringIO
.StringIO(self
.deflate(resp
.read())) 
 432                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 433                         resp
.msg 
= old_resp
.msg
 
 437 class FileDownloader(object): 
 438         """File Downloader class. 
 440         File downloader objects are the ones responsible of downloading the 
 441         actual video file and writing it to disk if the user has requested 
 442         it, among some other tasks. In most cases there should be one per 
 443         program. As, given a video URL, the downloader doesn't know how to 
 444         extract all the needed information, task that InfoExtractors do, it 
 445         has to pass the URL to one of them. 
 447         For this, file downloader objects have a method that allows 
 448         InfoExtractors to be registered in a given order. When it is passed 
 449         a URL, the file downloader handles it to the first InfoExtractor it 
 450         finds that reports being able to handle it. The InfoExtractor extracts 
 451         all the information about the video or videos the URL refers to, and 
 452         asks the FileDownloader to process the video information, possibly 
 453         downloading the video. 
 455         File downloaders accept a lot of parameters. In order not to saturate 
 456         the object constructor with arguments, it receives a dictionary of 
 457         options instead. These options are available through the params 
 458         attribute for the InfoExtractors to use. The FileDownloader also 
 459         registers itself as the downloader in charge for the InfoExtractors 
 460         that are added to it, so this is a "mutual registration". 
 464         username:         Username for authentication purposes. 
 465         password:         Password for authentication purposes. 
 466         usenetrc:         Use netrc for authentication instead. 
 467         quiet:            Do not print messages to stdout. 
 468         forceurl:         Force printing final URL. 
 469         forcetitle:       Force printing title. 
 470         forcethumbnail:   Force printing thumbnail URL. 
 471         forcedescription: Force printing description. 
 472         forcefilename:    Force printing final filename. 
 473         simulate:         Do not download the video files. 
 474         format:           Video format code. 
 475         format_limit:     Highest quality format to try. 
 476         outtmpl:          Template for output names. 
 477         ignoreerrors:     Do not stop on download errors. 
 478         ratelimit:        Download speed limit, in bytes/sec. 
 479         nooverwrites:     Prevent overwriting files. 
 480         retries:          Number of times to retry for HTTP error 5xx 
 481         continuedl:       Try to continue downloads if possible. 
 482         noprogress:       Do not print the progress bar. 
 483         playliststart:    Playlist item to start at. 
 484         playlistend:      Playlist item to end at. 
 485         matchtitle:       Download only matching titles. 
 486         rejecttitle:      Reject downloads for matching titles. 
 487         logtostderr:      Log messages to stderr instead of stdout. 
 488         consoletitle:     Display progress in console window's titlebar. 
 489         nopart:           Do not use temporary .part files. 
 490         updatetime:       Use the Last-modified header to set output file timestamps. 
 491         writedescription: Write the video description to a .description file 
 492         writeinfojson:    Write the video description to a .info.json file 
 498         _download_retcode 
= None 
 499         _num_downloads 
= None 
 502         def __init__(self
, params
): 
 503                 """Create a FileDownloader object with the given options.""" 
 506                 self
._download
_retcode 
= 0 
 507                 self
._num
_downloads 
= 0 
 508                 self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 512         def format_bytes(bytes): 
 515                 if type(bytes) is str: 
 520                         exponent 
= long(math
.log(bytes, 1024.0)) 
 521                 suffix 
= 'bkMGTPEZY'[exponent
] 
 522                 converted 
= float(bytes) / float(1024 ** exponent
) 
 523                 return '%.2f%s' % (converted
, suffix
) 
 526         def calc_percent(byte_counter
, data_len
): 
 529                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 532         def calc_eta(start
, now
, total
, current
): 
 536                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 538                 rate 
= float(current
) / dif
 
 539                 eta 
= long((float(total
) - float(current
)) / rate
) 
 540                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 543                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 546         def calc_speed(start
, now
, bytes): 
 548                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 549                         return '%10s' % '---b/s' 
 550                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 553         def best_block_size(elapsed_time
, bytes): 
 554                 new_min 
= max(bytes / 2.0, 1.0) 
 555                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 556                 if elapsed_time 
< 0.001: 
 558                 rate 
= bytes / elapsed_time
 
 566         def parse_bytes(bytestr
): 
 567                 """Parse a string indicating a byte quantity into a long integer.""" 
 568                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 571                 number 
= float(matchobj
.group(1)) 
 572                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 573                 return long(round(number 
* multiplier
)) 
 575         def add_info_extractor(self
, ie
): 
 576                 """Add an InfoExtractor object to the end of the list.""" 
 578                 ie
.set_downloader(self
) 
 580         def add_post_processor(self
, pp
): 
 581                 """Add a PostProcessor object to the end of the chain.""" 
 583                 pp
.set_downloader(self
) 
 585         def to_screen(self
, message
, skip_eol
=False): 
 586                 """Print message to stdout if not in quiet mode.""" 
 587                 assert type(message
) == type(u
'') 
 588                 if not self
.params
.get('quiet', False): 
 589                         terminator 
= [u
'\n', u
''][skip_eol
] 
 590                         output 
= message 
+ terminator
 
 592                         if 'b' not in self
._screen
_file
.mode 
or sys
.version_info
[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr 
 593                                 output 
= output
.encode(preferredencoding(), 'ignore') 
 594                         self
._screen
_file
.write(output
) 
 595                         self
._screen
_file
.flush() 
 597         def to_stderr(self
, message
): 
 598                 """Print message to stderr.""" 
 599                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 601         def to_cons_title(self
, message
): 
 602                 """Set console/terminal window title to message.""" 
 603                 if not self
.params
.get('consoletitle', False): 
 605                 if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 606                         # c_wchar_p() might not be necessary if `message` is 
 607                         # already of type unicode() 
 608                         ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 609                 elif 'TERM' in os
.environ
: 
 610                         sys
.stderr
.write('\033]0;%s\007' % message
.encode(preferredencoding())) 
 612         def fixed_template(self
): 
 613                 """Checks if the output template is fixed.""" 
 614                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 616         def trouble(self
, message
=None): 
 617                 """Determine action to take when a download problem appears. 
 619                 Depending on if the downloader has been configured to ignore 
 620                 download errors or not, this method may throw an exception or 
 621                 not when errors are found, after printing the message. 
 623                 if message 
is not None: 
 624                         self
.to_stderr(message
) 
 625                 if not self
.params
.get('ignoreerrors', False): 
 626                         raise DownloadError(message
) 
 627                 self
._download
_retcode 
= 1 
 629         def slow_down(self
, start_time
, byte_counter
): 
 630                 """Sleep if the download speed is over the rate limit.""" 
 631                 rate_limit 
= self
.params
.get('ratelimit', None) 
 632                 if rate_limit 
is None or byte_counter 
== 0: 
 635                 elapsed 
= now 
- start_time
 
 638                 speed 
= float(byte_counter
) / elapsed
 
 639                 if speed 
> rate_limit
: 
 640                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 642         def temp_name(self
, filename
): 
 643                 """Returns a temporary filename for the given filename.""" 
 644                 if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 645                                 (os
.path
.exists(_encodeFilename(filename
)) and not os
.path
.isfile(_encodeFilename(filename
))): 
 647                 return filename 
+ u
'.part' 
 649         def undo_temp_name(self
, filename
): 
 650                 if filename
.endswith(u
'.part'): 
 651                         return filename
[:-len(u
'.part')] 
 654         def try_rename(self
, old_filename
, new_filename
): 
 656                         if old_filename 
== new_filename
: 
 658                         os
.rename(_encodeFilename(old_filename
), _encodeFilename(new_filename
)) 
 659                 except (IOError, OSError), err
: 
 660                         self
.trouble(u
'ERROR: unable to rename file') 
 662         def try_utime(self
, filename
, last_modified_hdr
): 
 663                 """Try to set the last-modified time of the given file.""" 
 664                 if last_modified_hdr 
is None: 
 666                 if not os
.path
.isfile(_encodeFilename(filename
)): 
 668                 timestr 
= last_modified_hdr
 
 671                 filetime 
= timeconvert(timestr
) 
 675                         os
.utime(filename
, (time
.time(), filetime
)) 
 680         def report_writedescription(self
, descfn
): 
 681                 """ Report that the description file is being written """ 
 682                 self
.to_screen(u
'[info] Writing video description to: ' + descfn
) 
 684         def report_writeinfojson(self
, infofn
): 
 685                 """ Report that the metadata file has been written """ 
 686                 self
.to_screen(u
'[info] Video description metadata as JSON to: ' + infofn
) 
 688         def report_destination(self
, filename
): 
 689                 """Report destination filename.""" 
 690                 self
.to_screen(u
'[download] Destination: ' + filename
) 
 692         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 693                 """Report download progress.""" 
 694                 if self
.params
.get('noprogress', False): 
 696                 self
.to_screen(u
'\r[download] %s of %s at %s ETA %s' % 
 697                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 698                 self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 699                                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 701         def report_resuming_byte(self
, resume_len
): 
 702                 """Report attempt to resume at given byte.""" 
 703                 self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 705         def report_retry(self
, count
, retries
): 
 706                 """Report retry in case of HTTP error 5xx""" 
 707                 self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 709         def report_file_already_downloaded(self
, file_name
): 
 710                 """Report file has already been fully downloaded.""" 
 712                         self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 713                 except (UnicodeEncodeError), err
: 
 714                         self
.to_screen(u
'[download] The file has already been downloaded') 
 716         def report_unable_to_resume(self
): 
 717                 """Report it was impossible to resume download.""" 
 718                 self
.to_screen(u
'[download] Unable to resume') 
 720         def report_finish(self
): 
 721                 """Report download finished.""" 
 722                 if self
.params
.get('noprogress', False): 
 723                         self
.to_screen(u
'[download] Download completed') 
 727         def increment_downloads(self
): 
 728                 """Increment the ordinal that assigns a number to each file.""" 
 729                 self
._num
_downloads 
+= 1 
 731         def prepare_filename(self
, info_dict
): 
 732                 """Generate the output filename.""" 
 734                         template_dict 
= dict(info_dict
) 
 735                         template_dict
['epoch'] = unicode(long(time
.time())) 
 736                         template_dict
['autonumber'] = unicode('%05d' % self
._num
_downloads
) 
 737                         filename 
= self
.params
['outtmpl'] % template_dict
 
 739                 except (ValueError, KeyError), err
: 
 740                         self
.trouble(u
'ERROR: invalid system charset or erroneous output template') 
 743         def _match_entry(self
, info_dict
): 
 744                 """ Returns None iff the file should be downloaded """ 
 746                 title 
= info_dict
['title'] 
 747                 matchtitle 
= self
.params
.get('matchtitle', False) 
 748                 if matchtitle 
and not re
.search(matchtitle
, title
, re
.IGNORECASE
): 
 749                         return u
'[download] "' + title 
+ '" title did not match pattern "' + matchtitle 
+ '"' 
 750                 rejecttitle 
= self
.params
.get('rejecttitle', False) 
 751                 if rejecttitle 
and re
.search(rejecttitle
, title
, re
.IGNORECASE
): 
 752                         return u
'"' + title 
+ '" title matched reject pattern "' + rejecttitle 
+ '"' 
 755         def process_info(self
, info_dict
): 
 756                 """Process a single dictionary returned by an InfoExtractor.""" 
 758                 reason 
= self
._match
_entry
(info_dict
) 
 759                 if reason 
is not None: 
 760                         self
.to_screen(u
'[download] ' + reason
) 
 763                 max_downloads 
= self
.params
.get('max_downloads') 
 764                 if max_downloads 
is not None: 
 765                         if self
._num
_downloads 
> int(max_downloads
): 
 766                                 raise MaxDownloadsReached() 
 768                 filename 
= self
.prepare_filename(info_dict
) 
 771                 if self
.params
.get('forcetitle', False): 
 772                         print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 773                 if self
.params
.get('forceurl', False): 
 774                         print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 775                 if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 776                         print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 777                 if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 778                         print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 779                 if self
.params
.get('forcefilename', False) and filename 
is not None: 
 780                         print filename
.encode(preferredencoding(), 'xmlcharrefreplace') 
 781                 if self
.params
.get('forceformat', False): 
 782                         print info_dict
['format'].encode(preferredencoding(), 'xmlcharrefreplace') 
 784                 # Do nothing else if in simulate mode 
 785                 if self
.params
.get('simulate', False): 
 792                         dn 
= os
.path
.dirname(_encodeFilename(filename
)) 
 793                         if dn 
!= '' and not os
.path
.exists(dn
): # dn is already encoded 
 795                 except (OSError, IOError), err
: 
 796                         self
.trouble(u
'ERROR: unable to create directory ' + unicode(err
)) 
 799                 if self
.params
.get('writedescription', False): 
 801                                 descfn 
= filename 
+ u
'.description' 
 802                                 self
.report_writedescription(descfn
) 
 803                                 descfile 
= open(_encodeFilename(descfn
), 'wb') 
 805                                         descfile
.write(info_dict
['description'].encode('utf-8')) 
 808                         except (OSError, IOError): 
 809                                 self
.trouble(u
'ERROR: Cannot write description file ' + descfn
) 
 812                 if self
.params
.get('writeinfojson', False): 
 813                         infofn 
= filename 
+ u
'.info.json' 
 814                         self
.report_writeinfojson(infofn
) 
 817                         except (NameError,AttributeError): 
 818                                 self
.trouble(u
'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') 
 821                                 infof 
= open(_encodeFilename(infofn
), 'wb') 
 823                                         json_info_dict 
= dict((k
,v
) for k
,v 
in info_dict
.iteritems() if not k 
in ('urlhandle',)) 
 824                                         json
.dump(json_info_dict
, infof
) 
 827                         except (OSError, IOError): 
 828                                 self
.trouble(u
'ERROR: Cannot write metadata to JSON file ' + infofn
) 
 831                 if not self
.params
.get('skip_download', False): 
 832                         if self
.params
.get('nooverwrites', False) and os
.path
.exists(_encodeFilename(filename
)): 
 836                                         success 
= self
._do
_download
(filename
, info_dict
) 
 837                                 except (OSError, IOError), err
: 
 838                                         raise UnavailableVideoError
 
 839                                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 840                                         self
.trouble(u
'ERROR: unable to download video data: %s' % str(err
)) 
 842                                 except (ContentTooShortError
, ), err
: 
 843                                         self
.trouble(u
'ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 848                                         self
.post_process(filename
, info_dict
) 
 849                                 except (PostProcessingError
), err
: 
 850                                         self
.trouble(u
'ERROR: postprocessing: %s' % str(err
)) 
 853         def download(self
, url_list
): 
 854                 """Download a given list of URLs.""" 
 855                 if len(url_list
) > 1 and self
.fixed_template(): 
 856                         raise SameFileError(self
.params
['outtmpl']) 
 859                         suitable_found 
= False 
 861                                 # Go to next InfoExtractor if not suitable 
 862                                 if not ie
.suitable(url
): 
 865                                 # Suitable InfoExtractor found 
 866                                 suitable_found 
= True 
 868                                 # Extract information from URL and process it 
 871                                 # Suitable InfoExtractor had been found; go to next URL 
 874                         if not suitable_found
: 
 875                                 self
.trouble(u
'ERROR: no suitable InfoExtractor: %s' % url
) 
 877                 return self
._download
_retcode
 
 879         def post_process(self
, filename
, ie_info
): 
 880                 """Run the postprocessing chain on the given file.""" 
 882                 info
['filepath'] = filename
 
 888         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 889                 self
.report_destination(filename
) 
 890                 tmpfilename 
= self
.temp_name(filename
) 
 892                 # Check for rtmpdump first 
 894                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 895                 except (OSError, IOError): 
 896                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 899                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 900                 # the connection was interrumpted and resuming appears to be 
 901                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 902                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', tmpfilename
] 
 903                 args 
= basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)] 
 904                 if self
.params
.get('verbose', False): 
 907                                 shell_quote 
= lambda args
: ' '.join(map(pipes
.quote
, args
)) 
 910                         self
.to_screen(u
'[debug] rtmpdump command line: ' + shell_quote(args
)) 
 911                 retval 
= subprocess
.call(args
) 
 912                 while retval 
== 2 or retval 
== 1: 
 913                         prevsize 
= os
.path
.getsize(_encodeFilename(tmpfilename
)) 
 914                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 915                         time
.sleep(5.0) # This seems to be needed 
 916                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 917                         cursize 
= os
.path
.getsize(_encodeFilename(tmpfilename
)) 
 918                         if prevsize 
== cursize 
and retval 
== 1: 
 920                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 921                         if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 922                                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 926                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(_encodeFilename(tmpfilename
))) 
 927                         self
.try_rename(tmpfilename
, filename
) 
 930                         self
.trouble(u
'\nERROR: rtmpdump exited with code %d' % retval
) 
 933         def _do_download(self
, filename
, info_dict
): 
 934                 url 
= info_dict
['url'] 
 935                 player_url 
= info_dict
.get('player_url', None) 
 937                 # Check file already present 
 938                 if self
.params
.get('continuedl', False) and os
.path
.isfile(_encodeFilename(filename
)) and not self
.params
.get('nopart', False): 
 939                         self
.report_file_already_downloaded(filename
) 
 942                 # Attempt to download using rtmpdump 
 943                 if url
.startswith('rtmp'): 
 944                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 946                 tmpfilename 
= self
.temp_name(filename
) 
 949                 # Do not include the Accept-Encoding header 
 950                 headers 
= {'Youtubedl-no-compression': 'True'} 
 951                 basic_request 
= urllib2
.Request(url
, None, headers
) 
 952                 request 
= urllib2
.Request(url
, None, headers
) 
 954                 # Establish possible resume length 
 955                 if os
.path
.isfile(_encodeFilename(tmpfilename
)): 
 956                         resume_len 
= os
.path
.getsize(_encodeFilename(tmpfilename
)) 
 962                         if self
.params
.get('continuedl', False): 
 963                                 self
.report_resuming_byte(resume_len
) 
 964                                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 970                 retries 
= self
.params
.get('retries', 0) 
 971                 while count 
<= retries
: 
 972                         # Establish connection 
 974                                 if count 
== 0 and 'urlhandle' in info_dict
: 
 975                                         data 
= info_dict
['urlhandle'] 
 976                                 data 
= urllib2
.urlopen(request
) 
 978                         except (urllib2
.HTTPError
, ), err
: 
 979                                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 980                                         # Unexpected HTTP error 
 982                                 elif err
.code 
== 416: 
 983                                         # Unable to resume (requested range not satisfiable) 
 985                                                 # Open the connection again without the range header 
 986                                                 data 
= urllib2
.urlopen(basic_request
) 
 987                                                 content_length 
= data
.info()['Content-Length'] 
 988                                         except (urllib2
.HTTPError
, ), err
: 
 989                                                 if err
.code 
< 500 or err
.code 
>= 600: 
 992                                                 # Examine the reported length 
 993                                                 if (content_length 
is not None and 
 994                                                                 (resume_len 
- 100 < long(content_length
) < resume_len 
+ 100)): 
 995                                                         # The file had already been fully downloaded. 
 996                                                         # Explanation to the above condition: in issue #175 it was revealed that 
 997                                                         # YouTube sometimes adds or removes a few bytes from the end of the file, 
 998                                                         # changing the file size slightly and causing problems for some users. So 
 999                                                         # I decided to implement a suggested change and consider the file 
1000                                                         # completely downloaded if the file size differs less than 100 bytes from 
1001                                                         # the one in the hard drive. 
1002                                                         self
.report_file_already_downloaded(filename
) 
1003                                                         self
.try_rename(tmpfilename
, filename
) 
1006                                                         # The length does not match, we start the download over 
1007                                                         self
.report_unable_to_resume() 
1012                         if count 
<= retries
: 
1013                                 self
.report_retry(count
, retries
) 
1016                         self
.trouble(u
'ERROR: giving up after %s retries' % retries
) 
1019                 data_len 
= data
.info().get('Content-length', None) 
1020                 if data_len 
is not None: 
1021                         data_len 
= long(data_len
) + resume_len
 
1022                 data_len_str 
= self
.format_bytes(data_len
) 
1023                 byte_counter 
= 0 + resume_len
 
1027                         # Download and write 
1028                         before 
= time
.time() 
1029                         data_block 
= data
.read(block_size
) 
1031                         if len(data_block
) == 0: 
1033                         byte_counter 
+= len(data_block
) 
1035                         # Open file just in time 
1038                                         (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
1039                                         assert stream 
is not None 
1040                                         filename 
= self
.undo_temp_name(tmpfilename
) 
1041                                         self
.report_destination(filename
) 
1042                                 except (OSError, IOError), err
: 
1043                                         self
.trouble(u
'ERROR: unable to open for writing: %s' % str(err
)) 
1046                                 stream
.write(data_block
) 
1047                         except (IOError, OSError), err
: 
1048                                 self
.trouble(u
'\nERROR: unable to write data: %s' % str(err
)) 
1050                         block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
1053                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
1054                         if data_len 
is None: 
1055                                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
1057                                 percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
1058                                 eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
1059                                 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
1062                         self
.slow_down(start
, byte_counter 
- resume_len
) 
1065                         self
.trouble(u
'\nERROR: Did not get any data blocks') 
1068                 self
.report_finish() 
1069                 if data_len 
is not None and byte_counter 
!= data_len
: 
1070                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
1071                 self
.try_rename(tmpfilename
, filename
) 
1073                 # Update file modification time 
1074                 if self
.params
.get('updatetime', True): 
1075                         info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
1080 class InfoExtractor(object): 
1081         """Information Extractor class. 
1083         Information extractors are the classes that, given a URL, extract 
1084         information from the video (or videos) the URL refers to. This 
1085         information includes the real video URL, the video title and simplified 
1086         title, author and others. The information is stored in a dictionary 
1087         which is then passed to the FileDownloader. The FileDownloader 
1088         processes this information possibly downloading the video to the file 
1089         system, among other possible outcomes. The dictionaries must include 
1090         the following fields: 
1092         id:             Video identifier. 
1093         url:            Final video URL. 
1094         uploader:       Nickname of the video uploader. 
1095         title:          Literal title. 
1096         stitle:         Simplified title. 
1097         ext:            Video filename extension. 
1098         format:         Video format. 
1099         player_url:     SWF Player URL (may be None). 
1101         The following fields are optional. Their primary purpose is to allow 
1102         youtube-dl to serve as the backend for a video search function, such 
1103         as the one in youtube2mp3.  They are only used when their respective 
1104         forced printing functions are called: 
1106         thumbnail:      Full URL to a video thumbnail image. 
1107         description:    One-line video description. 
1109         Subclasses of this one should re-define the _real_initialize() and 
1110         _real_extract() methods and define a _VALID_URL regexp. 
1111         Probably, they should also be added to the list of extractors. 
1117         def __init__(self
, downloader
=None): 
1118                 """Constructor. Receives an optional downloader.""" 
1120                 self
.set_downloader(downloader
) 
1122         def suitable(self
, url
): 
1123                 """Receives a URL and returns True if suitable for this IE.""" 
1124                 return re
.match(self
._VALID
_URL
, url
) is not None 
1126         def initialize(self
): 
1127                 """Initializes an instance (authentication, etc).""" 
1129                         self
._real
_initialize
() 
1132         def extract(self
, url
): 
1133                 """Extracts URL information and returns it in list of dicts.""" 
1135                 return self
._real
_extract
(url
) 
1137         def set_downloader(self
, downloader
): 
1138                 """Sets the downloader for this IE.""" 
1139                 self
._downloader 
= downloader
 
1141         def _real_initialize(self
): 
1142                 """Real initialization process. Redefine in subclasses.""" 
1145         def _real_extract(self
, url
): 
1146                 """Real extraction process. Redefine in subclasses.""" 
1150 class YoutubeIE(InfoExtractor
): 
1151         """Information extractor for youtube.com.""" 
1153         _VALID_URL 
= r
'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' 
1154         _LANG_URL 
= r
'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
1155         _LOGIN_URL 
= 'https://www.youtube.com/signup?next=/&gl=US&hl=en' 
1156         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
1157         _NETRC_MACHINE 
= 'youtube' 
1158         # Listed in order of quality 
1159         _available_formats 
= ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] 
1160         _available_formats_prefer_free 
= ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] 
1161         _video_extensions 
= { 
1167                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
1172         _video_dimensions 
= { 
1187         IE_NAME 
= u
'youtube' 
1189         def report_lang(self
): 
1190                 """Report attempt to set language.""" 
1191                 self
._downloader
.to_screen(u
'[youtube] Setting language') 
1193         def report_login(self
): 
1194                 """Report attempt to log in.""" 
1195                 self
._downloader
.to_screen(u
'[youtube] Logging in') 
1197         def report_age_confirmation(self
): 
1198                 """Report attempt to confirm age.""" 
1199                 self
._downloader
.to_screen(u
'[youtube] Confirming age') 
1201         def report_video_webpage_download(self
, video_id
): 
1202                 """Report attempt to download video webpage.""" 
1203                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video webpage' % video_id
) 
1205         def report_video_info_webpage_download(self
, video_id
): 
1206                 """Report attempt to download video info webpage.""" 
1207                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
1209         def report_information_extraction(self
, video_id
): 
1210                 """Report attempt to extract video information.""" 
1211                 self
._downloader
.to_screen(u
'[youtube] %s: Extracting video information' % video_id
) 
1213         def report_unavailable_format(self
, video_id
, format
): 
1214                 """Report extracted video URL.""" 
1215                 self
._downloader
.to_screen(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
1217         def report_rtmp_download(self
): 
1218                 """Indicate the download will use the RTMP protocol.""" 
1219                 self
._downloader
.to_screen(u
'[youtube] RTMP download detected') 
1221         def _print_formats(self
, formats
): 
1222                 print 'Available formats:' 
1224                         print '%s\t:\t%s\t[%s]' %(x
, self
._video
_extensions
.get(x
, 'flv'), self
._video
_dimensions
.get(x
, '???')) 
1226         def _real_initialize(self
): 
1227                 if self
._downloader 
is None: 
1232                 downloader_params 
= self
._downloader
.params
 
1234                 # Attempt to use provided username and password or .netrc data 
1235                 if downloader_params
.get('username', None) is not None: 
1236                         username 
= downloader_params
['username'] 
1237                         password 
= downloader_params
['password'] 
1238                 elif downloader_params
.get('usenetrc', False): 
1240                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
1241                                 if info 
is not None: 
1245                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
1246                         except (IOError, netrc
.NetrcParseError
), err
: 
1247                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
1251                 request 
= urllib2
.Request(self
._LANG
_URL
) 
1254                         urllib2
.urlopen(request
).read() 
1255                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1256                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
1259                 # No authentication to be performed 
1260                 if username 
is None: 
1265                                 'current_form': 'loginForm', 
1267                                 'action_login': 'Log In', 
1268                                 'username':     username
, 
1269                                 'password':     password
, 
1271                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
)) 
1274                         login_results 
= urllib2
.urlopen(request
).read() 
1275                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
1276                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
1278                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1279                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
1285                                 'action_confirm':       'Confirm', 
1287                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
)) 
1289                         self
.report_age_confirmation() 
1290                         age_results 
= urllib2
.urlopen(request
).read() 
1291                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1292                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1295         def _real_extract(self
, url
): 
1296                 # Extract video id from URL 
1297                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1299                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1301                 video_id 
= mobj
.group(2) 
1304                 self
.report_video_webpage_download(video_id
) 
1305                 request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
) 
1307                         video_webpage 
= urllib2
.urlopen(request
).read() 
1308                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1309                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
1312                 # Attempt to extract SWF player URL 
1313                 mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1314                 if mobj 
is not None: 
1315                         player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1320                 self
.report_video_info_webpage_download(video_id
) 
1321                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1322                         video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1323                                         % (video_id
, el_type
)) 
1324                         request 
= urllib2
.Request(video_info_url
) 
1326                                 video_info_webpage 
= urllib2
.urlopen(request
).read() 
1327                                 video_info 
= parse_qs(video_info_webpage
) 
1328                                 if 'token' in video_info
: 
1330                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1331                                 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
1333                 if 'token' not in video_info
: 
1334                         if 'reason' in video_info
: 
1335                                 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % video_info
['reason'][0].decode('utf-8')) 
1337                                 self
._downloader
.trouble(u
'ERROR: "token" parameter not in video info for unknown reason') 
1340                 # Start extracting information 
1341                 self
.report_information_extraction(video_id
) 
1344                 if 'author' not in video_info
: 
1345                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1347                 video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
1350                 if 'title' not in video_info
: 
1351                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1353                 video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
1354                 video_title 
= video_title
.decode('utf-8') 
1355                 video_title 
= sanitize_title(video_title
) 
1358                 simple_title 
= _simplify_title(video_title
) 
1361                 if 'thumbnail_url' not in video_info
: 
1362                         self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
1363                         video_thumbnail 
= '' 
1364                 else:   # don't panic if we can't find it 
1365                         video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
1369                 mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1370                 if mobj 
is not None: 
1371                         upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1372                         format_expressions 
= ['%d %B %Y', '%B %d %Y', '%b %d %Y'] 
1373                         for expression 
in format_expressions
: 
1375                                         upload_date 
= datetime
.datetime
.strptime(upload_date
, expression
).strftime('%Y%m%d') 
1383                         video_description 
= u
'No description available.' 
1384                         mobj 
= re
.search(r
'<meta name="description" content="(.*?)">', video_webpage
) 
1385                         if mobj 
is not None: 
1386                                 video_description 
= mobj
.group(1).decode('utf-8') 
1388                         html_parser 
= lxml
.etree
.HTMLParser(encoding
='utf-8') 
1389                         vwebpage_doc 
= lxml
.etree
.parse(StringIO
.StringIO(video_webpage
), html_parser
) 
1390                         video_description 
= u
''.join(vwebpage_doc
.xpath('id("eow-description")//text()')) 
1391                         # TODO use another parser 
1394                 video_token 
= urllib
.unquote_plus(video_info
['token'][0]) 
1396                 # Decide which formats to download 
1397                 req_format 
= self
._downloader
.params
.get('format', None) 
1399                 if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1400                         self
.report_rtmp_download() 
1401                         video_url_list 
= [(None, video_info
['conn'][0])] 
1402                 elif 'url_encoded_fmt_stream_map' in video_info 
and len(video_info
['url_encoded_fmt_stream_map']) >= 1: 
1403                         url_data_strs 
= video_info
['url_encoded_fmt_stream_map'][0].split(',') 
1404                         url_data 
= [parse_qs(uds
) for uds 
in url_data_strs
] 
1405                         url_data 
= filter(lambda ud
: 'itag' in ud 
and 'url' in ud
, url_data
) 
1406                         url_map 
= dict((ud
['itag'][0], ud
['url'][0]) for ud 
in url_data
) 
1408                         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1409                         available_formats 
= self
._available
_formats
_prefer
_free 
if self
._downloader
.params
.get('prefer_free_formats', False) else self
._available
_formats
 
1410                         if format_limit 
is not None and format_limit 
in available_formats
: 
1411                                 format_list 
= available_formats
[available_formats
.index(format_limit
):] 
1413                                 format_list 
= available_formats
 
1414                         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1415                         if len(existing_formats
) == 0: 
1416                                 self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
1418                         if self
._downloader
.params
.get('listformats', None): 
1419                                 self
._print
_formats
(existing_formats
) 
1421                         if req_format 
is None or req_format 
== 'best': 
1422                                 video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1423                         elif req_format 
== 'worst': 
1424                                 video_url_list 
= [(existing_formats
[len(existing_formats
)-1], url_map
[existing_formats
[len(existing_formats
)-1]])] # worst quality 
1425                         elif req_format 
in ('-1', 'all'): 
1426                                 video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1428                                 # Specific formats. We pick the first in a slash-delimeted sequence. 
1429                                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. 
1430                                 req_formats 
= req_format
.split('/') 
1431                                 video_url_list 
= None 
1432                                 for rf 
in req_formats
: 
1434                                                 video_url_list 
= [(rf
, url_map
[rf
])] 
1436                                 if video_url_list 
is None: 
1437                                         self
._downloader
.trouble(u
'ERROR: requested format not available') 
1440                         self
._downloader
.trouble(u
'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') 
1443                 for format_param
, video_real_url 
in video_url_list
: 
1444                         # At this point we have a new video 
1445                         self
._downloader
.increment_downloads() 
1448                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
1451                                 # Process video information 
1452                                 self
._downloader
.process_info({ 
1453                                         'id':           video_id
.decode('utf-8'), 
1454                                         'url':          video_real_url
.decode('utf-8'), 
1455                                         'uploader':     video_uploader
.decode('utf-8'), 
1456                                         'upload_date':  upload_date
, 
1457                                         'title':        video_title
, 
1458                                         'stitle':       simple_title
, 
1459                                         'ext':          video_extension
.decode('utf-8'), 
1460                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
1461                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
1462                                         'description':  video_description
, 
1463                                         'player_url':   player_url
, 
1465                         except UnavailableVideoError
, err
: 
1466                                 self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1469 class MetacafeIE(InfoExtractor
): 
1470         """Information Extractor for metacafe.com.""" 
1472         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
1473         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
1474         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
1476         IE_NAME 
= u
'metacafe' 
1478         def __init__(self
, youtube_ie
, downloader
=None): 
1479                 InfoExtractor
.__init
__(self
, downloader
) 
1480                 self
._youtube
_ie 
= youtube_ie
 
1482         def report_disclaimer(self
): 
1483                 """Report disclaimer retrieval.""" 
1484                 self
._downloader
.to_screen(u
'[metacafe] Retrieving disclaimer') 
1486         def report_age_confirmation(self
): 
1487                 """Report attempt to confirm age.""" 
1488                 self
._downloader
.to_screen(u
'[metacafe] Confirming age') 
1490         def report_download_webpage(self
, video_id
): 
1491                 """Report webpage download.""" 
1492                 self
._downloader
.to_screen(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1494         def report_extraction(self
, video_id
): 
1495                 """Report information extraction.""" 
1496                 self
._downloader
.to_screen(u
'[metacafe] %s: Extracting information' % video_id
) 
1498         def _real_initialize(self
): 
1499                 # Retrieve disclaimer 
1500                 request 
= urllib2
.Request(self
._DISCLAIMER
) 
1502                         self
.report_disclaimer() 
1503                         disclaimer 
= urllib2
.urlopen(request
).read() 
1504                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1505                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1511                         'submit': "Continue - I'm over 18", 
1513                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
)) 
1515                         self
.report_age_confirmation() 
1516                         disclaimer 
= urllib2
.urlopen(request
).read() 
1517                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1518                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1521         def _real_extract(self
, url
): 
1522                 # Extract id and simplified title from URL 
1523                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1525                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1528                 video_id 
= mobj
.group(1) 
1530                 # Check if video comes from YouTube 
1531                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1532                 if mobj2 
is not None: 
1533                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1536                 # At this point we have a new video 
1537                 self
._downloader
.increment_downloads() 
1539                 simple_title 
= mobj
.group(2).decode('utf-8') 
1541                 # Retrieve video webpage to extract further information 
1542                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1544                         self
.report_download_webpage(video_id
) 
1545                         webpage 
= urllib2
.urlopen(request
).read() 
1546                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1547                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1550                 # Extract URL, uploader and title from webpage 
1551                 self
.report_extraction(video_id
) 
1552                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1553                 if mobj 
is not None: 
1554                         mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1555                         video_extension 
= mediaURL
[-3:] 
1557                         # Extract gdaKey if available 
1558                         mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
1560                                 video_url 
= mediaURL
 
1562                                 gdaKey 
= mobj
.group(1) 
1563                                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
1565                         mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
1567                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1569                         vardict 
= parse_qs(mobj
.group(1)) 
1570                         if 'mediaData' not in vardict
: 
1571                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1573                         mobj 
= re
.search(r
'"mediaURL":"(http.*?)","key":"(.*?)"', vardict
['mediaData'][0]) 
1575                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1577                         mediaURL 
= mobj
.group(1).replace('\\/', '/') 
1578                         video_extension 
= mediaURL
[-3:] 
1579                         video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group(2)) 
1581                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1583                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1585                 video_title 
= mobj
.group(1).decode('utf-8') 
1586                 video_title 
= sanitize_title(video_title
) 
1588                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1590                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1592                 video_uploader 
= mobj
.group(1) 
1595                         # Process video information 
1596                         self
._downloader
.process_info({ 
1597                                 'id':           video_id
.decode('utf-8'), 
1598                                 'url':          video_url
.decode('utf-8'), 
1599                                 'uploader':     video_uploader
.decode('utf-8'), 
1600                                 'upload_date':  u
'NA', 
1601                                 'title':        video_title
, 
1602                                 'stitle':       simple_title
, 
1603                                 'ext':          video_extension
.decode('utf-8'), 
1607                 except UnavailableVideoError
: 
1608                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1611 class DailymotionIE(InfoExtractor
): 
1612         """Information Extractor for Dailymotion""" 
1614         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1615         IE_NAME 
= u
'dailymotion' 
1617         def __init__(self
, downloader
=None): 
1618                 InfoExtractor
.__init
__(self
, downloader
) 
1620         def report_download_webpage(self
, video_id
): 
1621                 """Report webpage download.""" 
1622                 self
._downloader
.to_screen(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1624         def report_extraction(self
, video_id
): 
1625                 """Report information extraction.""" 
1626                 self
._downloader
.to_screen(u
'[dailymotion] %s: Extracting information' % video_id
) 
1628         def _real_extract(self
, url
): 
1629                 # Extract id and simplified title from URL 
1630                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1632                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1635                 # At this point we have a new video 
1636                 self
._downloader
.increment_downloads() 
1637                 video_id 
= mobj
.group(1) 
1639                 video_extension 
= 'flv' 
1641                 # Retrieve video webpage to extract further information 
1642                 request 
= urllib2
.Request(url
) 
1643                 request
.add_header('Cookie', 'family_filter=off') 
1645                         self
.report_download_webpage(video_id
) 
1646                         webpage 
= urllib2
.urlopen(request
).read() 
1647                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1648                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1651                 # Extract URL, uploader and title from webpage 
1652                 self
.report_extraction(video_id
) 
1653                 mobj 
= re
.search(r
'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage
) 
1655                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1657                 sequence 
= urllib
.unquote(mobj
.group(1)) 
1658                 mobj 
= re
.search(r
',\"sdURL\"\:\"([^\"]+?)\",', sequence
) 
1660                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1662                 mediaURL 
= urllib
.unquote(mobj
.group(1)).replace('\\', '') 
1664                 # if needed add http://www.dailymotion.com/ if relative URL 
1666                 video_url 
= mediaURL
 
1668                 mobj 
= re
.search(r
'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage
) 
1670                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1672                 video_title 
= _unescapeHTML(mobj
.group('title').decode('utf-8')) 
1673                 video_title 
= sanitize_title(video_title
) 
1674                 simple_title 
= _simplify_title(video_title
) 
1676                 mobj 
= re
.search(r
'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage
) 
1678                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1680                 video_uploader 
= mobj
.group(1) 
1683                         # Process video information 
1684                         self
._downloader
.process_info({ 
1685                                 'id':           video_id
.decode('utf-8'), 
1686                                 'url':          video_url
.decode('utf-8'), 
1687                                 'uploader':     video_uploader
.decode('utf-8'), 
1688                                 'upload_date':  u
'NA', 
1689                                 'title':        video_title
, 
1690                                 'stitle':       simple_title
, 
1691                                 'ext':          video_extension
.decode('utf-8'), 
1695                 except UnavailableVideoError
: 
1696                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1699 class GoogleIE(InfoExtractor
): 
1700         """Information extractor for video.google.com.""" 
1702         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1703         IE_NAME 
= u
'video.google' 
1705         def __init__(self
, downloader
=None): 
1706                 InfoExtractor
.__init
__(self
, downloader
) 
1708         def report_download_webpage(self
, video_id
): 
1709                 """Report webpage download.""" 
1710                 self
._downloader
.to_screen(u
'[video.google] %s: Downloading webpage' % video_id
) 
1712         def report_extraction(self
, video_id
): 
1713                 """Report information extraction.""" 
1714                 self
._downloader
.to_screen(u
'[video.google] %s: Extracting information' % video_id
) 
1716         def _real_extract(self
, url
): 
1717                 # Extract id from URL 
1718                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1720                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1723                 # At this point we have a new video 
1724                 self
._downloader
.increment_downloads() 
1725                 video_id 
= mobj
.group(1) 
1727                 video_extension 
= 'mp4' 
1729                 # Retrieve video webpage to extract further information 
1730                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1732                         self
.report_download_webpage(video_id
) 
1733                         webpage 
= urllib2
.urlopen(request
).read() 
1734                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1735                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1738                 # Extract URL, uploader, and title from webpage 
1739                 self
.report_extraction(video_id
) 
1740                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1742                         video_extension 
= 'flv' 
1743                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1745                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1747                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1748                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1749                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1751                 video_url 
= mediaURL
 
1753                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1755                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1757                 video_title 
= mobj
.group(1).decode('utf-8') 
1758                 video_title 
= sanitize_title(video_title
) 
1759                 simple_title 
= _simplify_title(video_title
) 
1761                 # Extract video description 
1762                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1764                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1766                 video_description 
= mobj
.group(1).decode('utf-8') 
1767                 if not video_description
: 
1768                         video_description 
= 'No description available.' 
1770                 # Extract video thumbnail 
1771                 if self
._downloader
.params
.get('forcethumbnail', False): 
1772                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1774                                 webpage 
= urllib2
.urlopen(request
).read() 
1775                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1776                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1778                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1780                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1782                         video_thumbnail 
= mobj
.group(1) 
1783                 else:   # we need something to pass to process_info 
1784                         video_thumbnail 
= '' 
1787                         # Process video information 
1788                         self
._downloader
.process_info({ 
1789                                 'id':           video_id
.decode('utf-8'), 
1790                                 'url':          video_url
.decode('utf-8'), 
1792                                 'upload_date':  u
'NA', 
1793                                 'title':        video_title
, 
1794                                 'stitle':       simple_title
, 
1795                                 'ext':          video_extension
.decode('utf-8'), 
1799                 except UnavailableVideoError
: 
1800                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1803 class PhotobucketIE(InfoExtractor
): 
1804         """Information extractor for photobucket.com.""" 
1806         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1807         IE_NAME 
= u
'photobucket' 
1809         def __init__(self
, downloader
=None): 
1810                 InfoExtractor
.__init
__(self
, downloader
) 
1812         def report_download_webpage(self
, video_id
): 
1813                 """Report webpage download.""" 
1814                 self
._downloader
.to_screen(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1816         def report_extraction(self
, video_id
): 
1817                 """Report information extraction.""" 
1818                 self
._downloader
.to_screen(u
'[photobucket] %s: Extracting information' % video_id
) 
1820         def _real_extract(self
, url
): 
1821                 # Extract id from URL 
1822                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1824                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1827                 # At this point we have a new video 
1828                 self
._downloader
.increment_downloads() 
1829                 video_id 
= mobj
.group(1) 
1831                 video_extension 
= 'flv' 
1833                 # Retrieve video webpage to extract further information 
1834                 request 
= urllib2
.Request(url
) 
1836                         self
.report_download_webpage(video_id
) 
1837                         webpage 
= urllib2
.urlopen(request
).read() 
1838                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1839                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1842                 # Extract URL, uploader, and title from webpage 
1843                 self
.report_extraction(video_id
) 
1844                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1846                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1848                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1850                 video_url 
= mediaURL
 
1852                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1854                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1856                 video_title 
= mobj
.group(1).decode('utf-8') 
1857                 video_title 
= sanitize_title(video_title
) 
1858                 simple_title 
= _simplify_title(vide_title
) 
1860                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1863                         # Process video information 
1864                         self
._downloader
.process_info({ 
1865                                 'id':           video_id
.decode('utf-8'), 
1866                                 'url':          video_url
.decode('utf-8'), 
1867                                 'uploader':     video_uploader
, 
1868                                 'upload_date':  u
'NA', 
1869                                 'title':        video_title
, 
1870                                 'stitle':       simple_title
, 
1871                                 'ext':          video_extension
.decode('utf-8'), 
1875                 except UnavailableVideoError
: 
1876                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1879 class YahooIE(InfoExtractor
): 
1880         """Information extractor for video.yahoo.com.""" 
1882         # _VALID_URL matches all Yahoo! Video URLs 
1883         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1884         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1885         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1886         IE_NAME 
= u
'video.yahoo' 
1888         def __init__(self
, downloader
=None): 
1889                 InfoExtractor
.__init
__(self
, downloader
) 
1891         def report_download_webpage(self
, video_id
): 
1892                 """Report webpage download.""" 
1893                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1895         def report_extraction(self
, video_id
): 
1896                 """Report information extraction.""" 
1897                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1899         def _real_extract(self
, url
, new_video
=True): 
1900                 # Extract ID from URL 
1901                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1903                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1906                 # At this point we have a new video 
1907                 self
._downloader
.increment_downloads() 
1908                 video_id 
= mobj
.group(2) 
1909                 video_extension 
= 'flv' 
1911                 # Rewrite valid but non-extractable URLs as 
1912                 # extractable English language /watch/ URLs 
1913                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1914                         request 
= urllib2
.Request(url
) 
1916                                 webpage 
= urllib2
.urlopen(request
).read() 
1917                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1918                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1921                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1923                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1925                         yahoo_id 
= mobj
.group(1) 
1927                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1929                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1931                         yahoo_vid 
= mobj
.group(1) 
1933                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1934                         return self
._real
_extract
(url
, new_video
=False) 
1936                 # Retrieve video webpage to extract further information 
1937                 request 
= urllib2
.Request(url
) 
1939                         self
.report_download_webpage(video_id
) 
1940                         webpage 
= urllib2
.urlopen(request
).read() 
1941                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1942                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1945                 # Extract uploader and title from webpage 
1946                 self
.report_extraction(video_id
) 
1947                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1949                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1951                 video_title 
= mobj
.group(1).decode('utf-8') 
1952                 simple_title 
= _simplify_title(video_title
) 
1954                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1956                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1958                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1960                 # Extract video thumbnail 
1961                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1963                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1965                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1967                 # Extract video description 
1968                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1970                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1972                 video_description 
= mobj
.group(1).decode('utf-8') 
1973                 if not video_description
: 
1974                         video_description 
= 'No description available.' 
1976                 # Extract video height and width 
1977                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1979                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1981                 yv_video_height 
= mobj
.group(1) 
1983                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1985                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1987                 yv_video_width 
= mobj
.group(1) 
1989                 # Retrieve video playlist to extract media URL 
1990                 # I'm not completely sure what all these options are, but we 
1991                 # seem to need most of them, otherwise the server sends a 401. 
1992                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1993                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1994                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1995                                 '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1996                                 '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1998                         self
.report_download_webpage(video_id
) 
1999                         webpage 
= urllib2
.urlopen(request
).read() 
2000                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2001                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2004                 # Extract media URL from playlist XML 
2005                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
2007                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
2009                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
2010                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
2013                         # Process video information 
2014                         self
._downloader
.process_info({ 
2015                                 'id':           video_id
.decode('utf-8'), 
2017                                 'uploader':     video_uploader
, 
2018                                 'upload_date':  u
'NA', 
2019                                 'title':        video_title
, 
2020                                 'stitle':       simple_title
, 
2021                                 'ext':          video_extension
.decode('utf-8'), 
2022                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
2023                                 'description':  video_description
, 
2024                                 'thumbnail':    video_thumbnail
, 
2027                 except UnavailableVideoError
: 
2028                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
2031 class VimeoIE(InfoExtractor
): 
2032         """Information extractor for vimeo.com.""" 
2034         # _VALID_URL matches Vimeo URLs 
2035         _VALID_URL 
= r
'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' 
2038         def __init__(self
, downloader
=None): 
2039                 InfoExtractor
.__init
__(self
, downloader
) 
2041         def report_download_webpage(self
, video_id
): 
2042                 """Report webpage download.""" 
2043                 self
._downloader
.to_screen(u
'[vimeo] %s: Downloading webpage' % video_id
) 
2045         def report_extraction(self
, video_id
): 
2046                 """Report information extraction.""" 
2047                 self
._downloader
.to_screen(u
'[vimeo] %s: Extracting information' % video_id
) 
2049         def _real_extract(self
, url
, new_video
=True): 
2050                 # Extract ID from URL 
2051                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
2053                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2056                 # At this point we have a new video 
2057                 self
._downloader
.increment_downloads() 
2058                 video_id 
= mobj
.group(1) 
2060                 # Retrieve video webpage to extract further information 
2061                 request 
= urllib2
.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id
, None, std_headers
) 
2063                         self
.report_download_webpage(video_id
) 
2064                         webpage 
= urllib2
.urlopen(request
).read() 
2065                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2066                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2069                 # Now we begin extracting as much information as we can from what we 
2070                 # retrieved. First we extract the information common to all extractors, 
2071                 # and latter we extract those that are Vimeo specific. 
2072                 self
.report_extraction(video_id
) 
2075                 mobj 
= re
.search(r
'<caption>(.*?)</caption>', webpage
) 
2077                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
2079                 video_title 
= mobj
.group(1).decode('utf-8') 
2080                 simple_title 
= _simplify_title(video_title
) 
2083                 mobj 
= re
.search(r
'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage
) 
2085                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
2087                 video_uploader 
= mobj
.group(1).decode('utf-8') 
2089                 # Extract video thumbnail 
2090                 mobj 
= re
.search(r
'<thumbnail>(.*?)</thumbnail>', webpage
) 
2092                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
2094                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
2096                 # # Extract video description 
2097                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage) 
2099                 #       self._downloader.trouble(u'ERROR: unable to extract video description') 
2101                 # video_description = mobj.group(1).decode('utf-8') 
2102                 # if not video_description: video_description = 'No description available.' 
2103                 video_description 
= 'Foo.' 
2105                 # Vimeo specific: extract request signature 
2106                 mobj 
= re
.search(r
'<request_signature>(.*?)</request_signature>', webpage
) 
2108                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature') 
2110                 sig 
= mobj
.group(1).decode('utf-8') 
2112                 # Vimeo specific: extract video quality information 
2113                 mobj 
= re
.search(r
'<isHD>(\d+)</isHD>', webpage
) 
2115                         self
._downloader
.trouble(u
'ERROR: unable to extract video quality information') 
2117                 quality 
= mobj
.group(1).decode('utf-8') 
2119                 if int(quality
) == 1: 
2124                 # Vimeo specific: Extract request signature expiration 
2125                 mobj 
= re
.search(r
'<request_signature_expires>(.*?)</request_signature_expires>', webpage
) 
2127                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature expiration') 
2129                 sig_exp 
= mobj
.group(1).decode('utf-8') 
2131                 video_url 
= "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id
, sig
, sig_exp
, quality
) 
2134                         # Process video information 
2135                         self
._downloader
.process_info({ 
2136                                 'id':           video_id
.decode('utf-8'), 
2138                                 'uploader':     video_uploader
, 
2139                                 'upload_date':  u
'NA', 
2140                                 'title':        video_title
, 
2141                                 'stitle':       simple_title
, 
2143                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
2144                                 'description':  video_description
, 
2145                                 'thumbnail':    video_thumbnail
, 
2146                                 'description':  video_description
, 
2149                 except UnavailableVideoError
: 
2150                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
2153 class GenericIE(InfoExtractor
): 
2154         """Generic last-resort information extractor.""" 
2157         IE_NAME 
= u
'generic' 
2159         def __init__(self
, downloader
=None): 
2160                 InfoExtractor
.__init
__(self
, downloader
) 
2162         def report_download_webpage(self
, video_id
): 
2163                 """Report webpage download.""" 
2164                 self
._downloader
.to_screen(u
'WARNING: Falling back on generic information extractor.') 
2165                 self
._downloader
.to_screen(u
'[generic] %s: Downloading webpage' % video_id
) 
2167         def report_extraction(self
, video_id
): 
2168                 """Report information extraction.""" 
2169                 self
._downloader
.to_screen(u
'[generic] %s: Extracting information' % video_id
) 
2171         def _real_extract(self
, url
): 
2172                 # At this point we have a new video 
2173                 self
._downloader
.increment_downloads() 
2175                 video_id 
= url
.split('/')[-1] 
2176                 request 
= urllib2
.Request(url
) 
2178                         self
.report_download_webpage(video_id
) 
2179                         webpage 
= urllib2
.urlopen(request
).read() 
2180                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2181                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2183                 except ValueError, err
: 
2184                         # since this is the last-resort InfoExtractor, if 
2185                         # this error is thrown, it'll be thrown here 
2186                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2189                 self
.report_extraction(video_id
) 
2190                 # Start with something easy: JW Player in SWFObject 
2191                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
2193                         # Broaden the search a little bit 
2194                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
2196                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2199                 # It's possible that one of the regexes 
2200                 # matched, but returned an empty group: 
2201                 if mobj.group(1) is None: 
2202                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2205                 video_url = urllib.unquote(mobj.group(1)) 
2206                 video_id = os.path.basename(video_url) 
2208                 # here's a fun little line of code for you: 
2209                 video_extension = os.path.splitext(video_id)[1][1:] 
2210                 video_id = os.path.splitext(video_id)[0] 
2212                 # it's tempting to parse this further, but you would 
2213                 # have to take into account all the variations like 
2214                 #   Video Title - Site Name 
2215                 #   Site Name | Video Title 
2216                 #   Video Title - Tagline | Site Name 
2217                 # and so on and so forth; it's just not practical 
2218                 mobj = re.search(r'<title>(.*)</title>', webpage) 
2220                         self._downloader.trouble(u'ERROR: unable to extract title') 
2222                 video_title = mobj.group(1).decode('utf-8') 
2223                 video_title = sanitize_title(video_title) 
2224                 simple_title = _simplify_title(video_title) 
2226                 # video uploader is domain name 
2227                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
2229                         self._downloader.trouble(u'ERROR: unable to extract title') 
2231                 video_uploader = mobj.group(1).decode('utf-8') 
2234                         # Process video information 
2235                         self._downloader.process_info({ 
2236                                 'id':           video_id.decode('utf-8'), 
2237                                 'url':          video_url.decode('utf-8'), 
2238                                 'uploader':     video_uploader, 
2239                                 'upload_date':  u'NA', 
2240                                 'title':        video_title, 
2241                                 'stitle':       simple_title, 
2242                                 'ext':          video_extension.decode('utf-8'), 
2246                 except UnavailableVideoError, err: 
2247                         self._downloader.trouble(u'\nERROR: unable to download video') 
2250 class YoutubeSearchIE(InfoExtractor): 
2251         """Information Extractor for YouTube search queries.""" 
2252         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+' 
2253         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
2254         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
2255         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2257         _max_youtube_results = 1000 
2258         IE_NAME = u'youtube:search' 
2260         def __init__(self, youtube_ie, downloader=None): 
2261                 InfoExtractor.__init__(self, downloader) 
2262                 self._youtube_ie = youtube_ie 
2264         def report_download_page(self, query, pagenum): 
2265                 """Report attempt to download playlist page with given number.""" 
2266                 query = query.decode(preferredencoding()) 
2267                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
2269         def _real_initialize(self): 
2270                 self._youtube_ie.initialize() 
2272         def _real_extract(self, query): 
2273                 mobj = re.match(self._VALID_URL, query) 
2275                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2278                 prefix, query = query.split(':') 
2280                 query = query.encode('utf-8') 
2282                         self._download_n_results(query, 1) 
2284                 elif prefix == 'all': 
2285                         self._download_n_results(query, self._max_youtube_results) 
2291                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2293                                 elif n > self._max_youtube_results: 
2294                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) 
2295                                         n = self._max_youtube_results 
2296                                 self._download_n_results(query, n) 
2298                         except ValueError: # parsing prefix as integer fails 
2299                                 self._download_n_results(query, 1) 
2302         def _download_n_results(self, query, n): 
2303                 """Downloads a specified number of results for a query""" 
2306                 already_seen = set() 
2310                         self.report_download_page(query, pagenum) 
2311                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2312                         request = urllib2.Request(result_url) 
2314                                 page = urllib2.urlopen(request).read() 
2315                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2316                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2319                         # Extract video identifiers 
2320                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2321                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
2322                                 if video_id not in already_seen: 
2323                                         video_ids.append(video_id) 
2324                                         already_seen.add(video_id) 
2325                                         if len(video_ids) == n: 
2326                                                 # Specified n videos reached 
2327                                                 for id in video_ids: 
2328                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2331                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2332                                 for id in video_ids: 
2333                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2336                         pagenum = pagenum + 1 
2339 class GoogleSearchIE(InfoExtractor): 
2340         """Information Extractor for Google Video search queries.""" 
2341         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+' 
2342         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
2343         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
2344         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
2346         _max_google_results = 1000 
2347         IE_NAME = u'video.google:search' 
2349         def __init__(self, google_ie, downloader=None): 
2350                 InfoExtractor.__init__(self, downloader) 
2351                 self._google_ie = google_ie 
2353         def report_download_page(self, query, pagenum): 
2354                 """Report attempt to download playlist page with given number.""" 
2355                 query = query.decode(preferredencoding()) 
2356                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
2358         def _real_initialize(self): 
2359                 self._google_ie.initialize() 
2361         def _real_extract(self, query): 
2362                 mobj = re.match(self._VALID_URL, query) 
2364                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2367                 prefix, query = query.split(':') 
2369                 query = query.encode('utf-8') 
2371                         self._download_n_results(query, 1) 
2373                 elif prefix == 'all': 
2374                         self._download_n_results(query, self._max_google_results) 
2380                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2382                                 elif n > self._max_google_results: 
2383                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) 
2384                                         n = self._max_google_results 
2385                                 self._download_n_results(query, n) 
2387                         except ValueError: # parsing prefix as integer fails 
2388                                 self._download_n_results(query, 1) 
2391         def _download_n_results(self, query, n): 
2392                 """Downloads a specified number of results for a query""" 
2395                 already_seen = set() 
2399                         self.report_download_page(query, pagenum) 
2400                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2401                         request = urllib2.Request(result_url) 
2403                                 page = urllib2.urlopen(request).read() 
2404                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2405                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2408                         # Extract video identifiers 
2409                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2410                                 video_id = mobj.group(1) 
2411                                 if video_id not in already_seen: 
2412                                         video_ids.append(video_id) 
2413                                         already_seen.add(video_id) 
2414                                         if len(video_ids) == n: 
2415                                                 # Specified n videos reached 
2416                                                 for id in video_ids: 
2417                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2420                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2421                                 for id in video_ids: 
2422                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2425                         pagenum = pagenum + 1 
2428 class YahooSearchIE(InfoExtractor): 
2429         """Information Extractor for Yahoo! Video search queries.""" 
2430         _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+' 
2431         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
2432         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
2433         _MORE_PAGES_INDICATOR = r'\s*Next' 
2435         _max_yahoo_results = 1000 
2436         IE_NAME = u'video.yahoo:search' 
2438         def __init__(self, yahoo_ie, downloader=None): 
2439                 InfoExtractor.__init__(self, downloader) 
2440                 self._yahoo_ie = yahoo_ie 
2442         def report_download_page(self, query, pagenum): 
2443                 """Report attempt to download playlist page with given number.""" 
2444                 query = query.decode(preferredencoding()) 
2445                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
2447         def _real_initialize(self): 
2448                 self._yahoo_ie.initialize() 
2450         def _real_extract(self, query): 
2451                 mobj = re.match(self._VALID_URL, query) 
2453                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2456                 prefix, query = query.split(':') 
2458                 query = query.encode('utf-8') 
2460                         self._download_n_results(query, 1) 
2462                 elif prefix == 'all': 
2463                         self._download_n_results(query, self._max_yahoo_results) 
2469                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2471                                 elif n > self._max_yahoo_results: 
2472                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) 
2473                                         n = self._max_yahoo_results 
2474                                 self._download_n_results(query, n) 
2476                         except ValueError: # parsing prefix as integer fails 
2477                                 self._download_n_results(query, 1) 
2480         def _download_n_results(self, query, n): 
2481                 """Downloads a specified number of results for a query""" 
2484                 already_seen = set() 
2488                         self.report_download_page(query, pagenum) 
2489                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2490                         request = urllib2.Request(result_url) 
2492                                 page = urllib2.urlopen(request).read() 
2493                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2494                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2497                         # Extract video identifiers 
2498                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2499                                 video_id = mobj.group(1) 
2500                                 if video_id not in already_seen: 
2501                                         video_ids.append(video_id) 
2502                                         already_seen.add(video_id) 
2503                                         if len(video_ids) == n: 
2504                                                 # Specified n videos reached 
2505                                                 for id in video_ids: 
2506                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2509                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2510                                 for id in video_ids: 
2511                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2514                         pagenum = pagenum + 1 
2517 class YoutubePlaylistIE(InfoExtractor): 
2518         """Information Extractor for YouTube playlists.""" 
2520         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' 
2521         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' 
2522         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2523         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2525         IE_NAME = u'youtube:playlist' 
2527         def __init__(self, youtube_ie, downloader=None): 
2528                 InfoExtractor.__init__(self, downloader) 
2529                 self._youtube_ie = youtube_ie 
2531         def report_download_page(self, playlist_id, pagenum): 
2532                 """Report attempt to download playlist page with given number.""" 
2533                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
2535         def _real_initialize(self): 
2536                 self._youtube_ie.initialize() 
2538         def _real_extract(self, url): 
2539                 # Extract playlist id 
2540                 mobj = re.match(self._VALID_URL, url) 
2542                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2546                 if mobj.group(3) is not None: 
2547                         self._youtube_ie.extract(mobj.group(3)) 
2550                 # Download playlist pages 
2551                 # prefix is 'p' as default for playlists but there are other types that need extra care 
2552                 playlist_prefix = mobj.group(1) 
2553                 if playlist_prefix == 'a': 
2554                         playlist_access = 'artist' 
2556                         playlist_prefix = 'p' 
2557                         playlist_access = 'view_play_list' 
2558                 playlist_id = mobj.group(2) 
2563                         self.report_download_page(playlist_id, pagenum) 
2564                         url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum) 
2565                         request = urllib2.Request(url) 
2567                                 page = urllib2.urlopen(request).read() 
2568                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2569                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2572                         # Extract video identifiers 
2574                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2575                                 if mobj.group(1) not in ids_in_page: 
2576                                         ids_in_page.append(mobj.group(1)) 
2577                         video_ids.extend(ids_in_page) 
2579                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2581                         pagenum = pagenum + 1 
2583                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2584                 playlistend = self._downloader.params.get('playlistend', -1) 
2585                 video_ids = video_ids[playliststart:playlistend] 
2587                 for id in video_ids: 
2588                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2592 class YoutubeUserIE(InfoExtractor): 
2593         """Information Extractor for YouTube users.""" 
2595         _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' 
2596         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
2597         _GDATA_PAGE_SIZE = 50 
2598         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' 
2599         _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' 
2601         IE_NAME = u'youtube:user' 
2603         def __init__(self, youtube_ie, downloader=None): 
2604                 InfoExtractor.__init__(self, downloader) 
2605                 self._youtube_ie = youtube_ie 
2607         def report_download_page(self, username, start_index): 
2608                 """Report attempt to download user page.""" 
2609                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % 
2610                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE)) 
2612         def _real_initialize(self): 
2613                 self._youtube_ie.initialize() 
2615         def _real_extract(self, url): 
2617                 mobj = re.match(self._VALID_URL, url) 
2619                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2622                 username = mobj.group(1) 
2624                 # Download video ids using YouTube Data API. Result size per 
2625                 # query is limited (currently to 50 videos) so we need to query 
2626                 # page by page until there are no video ids - it means we got 
2633                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1 
2634                         self.report_download_page(username, start_index) 
2636                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) 
2639                                 page = urllib2.urlopen(request).read() 
2640                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2641                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2644                         # Extract video identifiers 
2647                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2648                                 if mobj.group(1) not in ids_in_page: 
2649                                         ids_in_page.append(mobj.group(1)) 
2651                         video_ids.extend(ids_in_page) 
2653                         # A little optimization - if current page is not 
2654                         # "full
", ie. does not contain PAGE_SIZE video ids then 
2655                         # we can assume that this page is the last one - there 
2656                         # are no more ids on further pages - no need to query 
2659                         if len(ids_in_page) < self._GDATA_PAGE_SIZE: 
2664                 all_ids_count = len(video_ids) 
2665                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2666                 playlistend = self._downloader.params.get('playlistend', -1) 
2668                 if playlistend == -1: 
2669                         video_ids = video_ids[playliststart:] 
2671                         video_ids = video_ids[playliststart:playlistend] 
2673                 self._downloader.to_screen(u"[youtube
] user 
%s: Collected 
%d video 
ids (downloading 
%d of them
)" % 
2674                                 (username, all_ids_count, len(video_ids))) 
2676                 for video_id in video_ids: 
2677                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) 
2680 class DepositFilesIE(InfoExtractor): 
2681         """Information extractor for depositfiles.com""" 
2683         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' 
2684         IE_NAME = u'DepositFiles' 
2686         def __init__(self, downloader=None): 
2687                 InfoExtractor.__init__(self, downloader) 
2689         def report_download_webpage(self, file_id): 
2690                 """Report webpage download.""" 
2691                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) 
2693         def report_extraction(self, file_id): 
2694                 """Report information extraction.""" 
2695                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) 
2697         def _real_extract(self, url): 
2698                 # At this point we have a new file 
2699                 self._downloader.increment_downloads() 
2701                 file_id = url.split('/')[-1] 
2702                 # Rebuild url in english locale 
2703                 url = 'http://depositfiles.com/en/files/' + file_id 
2705                 # Retrieve file webpage with 'Free download' button pressed 
2706                 free_download_indication = { 'gateway_result' : '1' } 
2707                 request = urllib2.Request(url, urllib.urlencode(free_download_indication)) 
2709                         self.report_download_webpage(file_id) 
2710                         webpage = urllib2.urlopen(request).read() 
2711                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2712                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) 
2715                 # Search for the real file URL 
2716                 mobj = re.search(r'<form action="(http
://fileshare
.+?
)"', webpage) 
2717                 if (mobj is None) or (mobj.group(1) is None): 
2718                         # Try to figure out reason of the error. 
2719                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) 
2720                         if (mobj is not None) and (mobj.group(1) is not None): 
2721                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() 
2722                                 self._downloader.trouble(u'ERROR: %s' % restriction_message) 
2724                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) 
2727                 file_url = mobj.group(1) 
2728                 file_extension = os.path.splitext(file_url)[1][1:] 
2730                 # Search for file title 
2731                 mobj = re.search(r'<b title="(.*?
)">', webpage) 
2733                         self._downloader.trouble(u'ERROR: unable to extract title') 
2735                 file_title = mobj.group(1).decode('utf-8') 
2738                         # Process file information 
2739                         self._downloader.process_info({ 
2740                                 'id':           file_id.decode('utf-8'), 
2741                                 'url':          file_url.decode('utf-8'), 
2743                                 'upload_date':  u'NA', 
2744                                 'title':        file_title, 
2745                                 'stitle':       file_title, 
2746                                 'ext':          file_extension.decode('utf-8'), 
2750                 except UnavailableVideoError, err: 
2751                         self._downloader.trouble(u'ERROR: unable to download file') 
2754 class FacebookIE(InfoExtractor): 
2755         """Information Extractor for Facebook""" 
2757         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' 
2758         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' 
2759         _NETRC_MACHINE = 'facebook' 
2760         _available_formats = ['video', 'highqual', 'lowqual'] 
2761         _video_extensions = { 
2766         IE_NAME = u'facebook' 
2768         def __init__(self, downloader=None): 
2769                 InfoExtractor.__init__(self, downloader) 
2771         def _reporter(self, message): 
2772                 """Add header and report message.""" 
2773                 self._downloader.to_screen(u'[facebook] %s' % message) 
2775         def report_login(self): 
2776                 """Report attempt to log in.""" 
2777                 self._reporter(u'Logging in') 
2779         def report_video_webpage_download(self, video_id): 
2780                 """Report attempt to download video webpage.""" 
2781                 self._reporter(u'%s: Downloading video webpage' % video_id) 
2783         def report_information_extraction(self, video_id): 
2784                 """Report attempt to extract video information.""" 
2785                 self._reporter(u'%s: Extracting video information' % video_id) 
2787         def _parse_page(self, video_webpage): 
2788                 """Extract video information from page""" 
2790                 data = {'title': r'\("video_title
", "(.*?
)"\)', 
2791                         'description': r'<div class="datawrap
">(.*?)</div>', 
2792                         'owner': r'\("video_owner_name
", "(.*?
)"\)', 
2793                         'thumbnail':  r'\("thumb_url
", "(?P
<THUMB
>.*?
)"\)', 
2796                 for piece in data.keys(): 
2797                         mobj = re.search(data[piece], video_webpage) 
2798                         if mobj is not None: 
2799                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape
")) 
2803                 for fmt in self._available_formats: 
2804                         mobj = re.search(r'\("%s_src
\", "(.+?)"\
)' % fmt, video_webpage) 
2805                         if mobj is not None: 
2806                                 # URL is in a Javascript segment inside an escaped Unicode format within 
2807                                 # the generally utf-8 page 
2808                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) 
2809                 video_info['video_urls
'] = video_urls 
2813         def _real_initialize(self): 
2814                 if self._downloader is None: 
2819                 downloader_params = self._downloader.params 
2821                 # Attempt to use provided username and password or .netrc data 
2822                 if downloader_params.get('username
', None) is not None: 
2823                         useremail = downloader_params['username
'] 
2824                         password = downloader_params['password
'] 
2825                 elif downloader_params.get('usenetrc
', False): 
2827                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) 
2828                                 if info is not None: 
2832                                         raise netrc.NetrcParseError('No authenticators 
for %s' % self._NETRC_MACHINE) 
2833                         except (IOError, netrc.NetrcParseError), err: 
2834                                 self._downloader.to_stderr(u'WARNING
: parsing 
.netrc
: %s' % str(err)) 
2837                 if useremail is None: 
2846                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) 
2849                         login_results = urllib2.urlopen(request).read() 
2850                         if re.search(r'<form(.*)name
="login"(.*)</form
>', login_results) is not None: 
2851                                 self._downloader.to_stderr(u'WARNING
: unable to log 
in: bad username
/password
, or exceded login rate 
limit (~
3/min). Check credentials 
or wait
.') 
2853                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2854                         self._downloader.to_stderr(u'WARNING
: unable to log 
in: %s' % str(err)) 
2857         def _real_extract(self, url): 
2858                 mobj = re.match(self._VALID_URL, url) 
2860                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2862                 video_id = mobj.group('ID
') 
2865                 self.report_video_webpage_download(video_id) 
2866                 request = urllib2.Request('https
://www
.facebook
.com
/video
/video
.php?v
=%s' % video_id) 
2868                         page = urllib2.urlopen(request) 
2869                         video_webpage = page.read() 
2870                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2871                         self._downloader.trouble(u'ERROR
: unable to download video webpage
: %s' % str(err)) 
2874                 # Start extracting information 
2875                 self.report_information_extraction(video_id) 
2877                 # Extract information 
2878                 video_info = self._parse_page(video_webpage) 
2881                 if 'owner
' not in video_info: 
2882                         self._downloader.trouble(u'ERROR
: unable to extract uploader nickname
') 
2884                 video_uploader = video_info['owner
'] 
2887                 if 'title
' not in video_info: 
2888                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
2890                 video_title = video_info['title
'] 
2891                 video_title = video_title.decode('utf
-8') 
2892                 video_title = sanitize_title(video_title) 
2894                 simple_title = _simplify_title(video_title) 
2897                 if 'thumbnail
' not in video_info: 
2898                         self._downloader.trouble(u'WARNING
: unable to extract video thumbnail
') 
2899                         video_thumbnail = '' 
2901                         video_thumbnail = video_info['thumbnail
'] 
2905                 if 'upload_date
' in video_info: 
2906                         upload_time = video_info['upload_date
'] 
2907                         timetuple = email.utils.parsedate_tz(upload_time) 
2908                         if timetuple is not None: 
2910                                         upload_date = time.strftime('%Y
%m
%d', timetuple[0:9]) 
2915                 video_description = video_info.get('description
', 'No description available
.') 
2917                 url_map = video_info['video_urls
'] 
2918                 if len(url_map.keys()) > 0: 
2919                         # Decide which formats to download 
2920                         req_format = self._downloader.params.get('format
', None) 
2921                         format_limit = self._downloader.params.get('format_limit
', None) 
2923                         if format_limit is not None and format_limit in self._available_formats: 
2924                                 format_list = self._available_formats[self._available_formats.index(format_limit):] 
2926                                 format_list = self._available_formats 
2927                         existing_formats = [x for x in format_list if x in url_map] 
2928                         if len(existing_formats) == 0: 
2929                                 self._downloader.trouble(u'ERROR
: no known formats available 
for video
') 
2931                         if req_format is None: 
2932                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality 
2933                         elif req_format == 'worst
': 
2934                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality 
2935                         elif req_format == '-1': 
2936                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats 
2939                                 if req_format not in url_map: 
2940                                         self._downloader.trouble(u'ERROR
: requested format 
not available
') 
2942                                 video_url_list = [(req_format, url_map[req_format])] # Specific format 
2944                 for format_param, video_real_url in video_url_list: 
2946                         # At this point we have a new video 
2947                         self._downloader.increment_downloads() 
2950                         video_extension = self._video_extensions.get(format_param, 'mp4
') 
2953                                 # Process video information 
2954                                 self._downloader.process_info({ 
2955                                         'id':           video_id.decode('utf
-8'), 
2956                                         'url
':          video_real_url.decode('utf
-8'), 
2957                                         'uploader
':     video_uploader.decode('utf
-8'), 
2958                                         'upload_date
':  upload_date, 
2959                                         'title
':        video_title, 
2960                                         'stitle
':       simple_title, 
2961                                         'ext
':          video_extension.decode('utf
-8'), 
2962                                         'format
':       (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
2963                                         'thumbnail
':    video_thumbnail.decode('utf
-8'), 
2964                                         'description
':  video_description.decode('utf
-8'), 
2967                         except UnavailableVideoError, err: 
2968                                 self._downloader.trouble(u'\nERROR
: unable to download video
') 
2970 class BlipTVIE(InfoExtractor): 
2971         """Information extractor for blip.tv""" 
2973         _VALID_URL = r'^
(?
:https?
://)?
(?
:\w
+\
.)?blip\
.tv(/.+)$
' 
2974         _URL_EXT = r'^
.*\
.([a
-z0
-9]+)$
' 
2975         IE_NAME = u'blip
.tv
' 
2977         def report_extraction(self, file_id): 
2978                 """Report information extraction.""" 
2979                 self._downloader.to_screen(u'[%s] %s: Extracting information
' % (self.IE_NAME, file_id)) 
2981         def report_direct_download(self, title): 
2982                 """Report information extraction.""" 
2983                 self._downloader.to_screen(u'[%s] %s: Direct download detected
' % (self.IE_NAME, title)) 
2985         def _real_extract(self, url): 
2986                 mobj = re.match(self._VALID_URL, url) 
2988                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2995                 json_url = url + cchar + 'skin
=json
&version
=2&no_wrap
=1' 
2996                 request = urllib2.Request(json_url) 
2997                 self.report_extraction(mobj.group(1)) 
3000                         urlh = urllib2.urlopen(request) 
3001                         if urlh.headers.get('Content
-Type
', '').startswith('video
/'): # Direct download 
3002                                 basename = url.split('/')[-1] 
3003                                 title,ext = os.path.splitext(basename) 
3004                                 title = title.decode('UTF
-8') 
3005                                 ext = ext.replace('.', '') 
3006                                 self.report_direct_download(title) 
3011                                         'stitle
': _simplify_title(title), 
3015                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3016                         self._downloader.trouble(u'ERROR
: unable to download video info webpage
: %s' % str(err)) 
3018                 if info is None: # Regular URL 
3020                                 json_code = urlh.read() 
3021                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3022                                 self._downloader.trouble(u'ERROR
: unable to read video info webpage
: %s' % str(err)) 
3026                                 json_data = json.loads(json_code) 
3027                                 if 'Post
' in json_data: 
3028                                         data = json_data['Post
'] 
3032                                 upload_date = datetime.datetime.strptime(data['datestamp
'], '%m
-%d-%y 
%H
:%M
%p
').strftime('%Y
%m
%d') 
3033                                 video_url = data['media
']['url
'] 
3034                                 umobj = re.match(self._URL_EXT, video_url) 
3036                                         raise ValueError('Can 
not determine filename extension
') 
3037                                 ext = umobj.group(1) 
3040                                         'id': data['item_id
'], 
3042                                         'uploader
': data['display_name
'], 
3043                                         'upload_date
': upload_date, 
3044                                         'title
': data['title
'], 
3045                                         'stitle
': _simplify_title(data['title
']), 
3047                                         'format
': data['media
']['mimeType
'], 
3048                                         'thumbnail
': data['thumbnailUrl
'], 
3049                                         'description
': data['description
'], 
3050                                         'player_url
': data['embedUrl
'] 
3052                         except (ValueError,KeyError), err: 
3053                                 self._downloader.trouble(u'ERROR
: unable to parse video information
: %s' % repr(err)) 
3056                 self._downloader.increment_downloads() 
3059                         self._downloader.process_info(info) 
3060                 except UnavailableVideoError, err: 
3061                         self._downloader.trouble(u'\nERROR
: unable to download video
') 
3064 class MyVideoIE(InfoExtractor): 
3065         """Information Extractor for myvideo.de.""" 
3067         _VALID_URL = r'(?
:http
://)?
(?
:www\
.)?myvideo\
.de
/watch
/([0-9]+)/([^?
/]+).*' 
3068         IE_NAME = u'myvideo
' 
3070         def __init__(self, downloader=None): 
3071                 InfoExtractor.__init__(self, downloader) 
3073         def report_download_webpage(self, video_id): 
3074                 """Report webpage download.""" 
3075                 self._downloader.to_screen(u'[myvideo
] %s: Downloading webpage
' % video_id) 
3077         def report_extraction(self, video_id): 
3078                 """Report information extraction.""" 
3079                 self._downloader.to_screen(u'[myvideo
] %s: Extracting information
' % video_id) 
3081         def _real_extract(self,url): 
3082                 mobj = re.match(self._VALID_URL, url) 
3084                         self._download.trouble(u'ERROR
: invalid URL
: %s' % url) 
3087                 video_id = mobj.group(1) 
3090                 request = urllib2.Request('http
://www
.myvideo
.de
/watch
/%s' % video_id) 
3092                         self.report_download_webpage(video_id) 
3093                         webpage = urllib2.urlopen(request).read() 
3094                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3095                         self._downloader.trouble(u'ERROR
: Unable to retrieve video webpage
: %s' % str(err)) 
3098                 self.report_extraction(video_id) 
3099                 mobj = re.search(r'<link rel
=\'image_src
\' href
=\'(http
://is[0-9].myvideo\
.de
/de
/movie
[0-9]+/[a
-f0
-9]+)/thumbs
/[^
.]+\
.jpg
\' />', 
3102                         self._downloader.trouble(u'ERROR
: unable to extract media URL
') 
3104                 video_url = mobj.group(1) + ('/%s.flv
' % video_id) 
3106                 mobj = re.search('<title
>([^
<]+)</title
>', webpage) 
3108                         self._downloader.trouble(u'ERROR
: unable to extract title
') 
3111                 video_title = mobj.group(1) 
3112                 video_title = sanitize_title(video_title) 
3114                 simple_title = _simplify_title(video_title) 
3117                         self._downloader.process_info({ 
3121                                 'upload_date
':  u'NA
', 
3122                                 'title
':        video_title, 
3123                                 'stitle
':       simple_title, 
3128                 except UnavailableVideoError: 
3129                         self._downloader.trouble(u'\nERROR
: Unable to download video
') 
3131 class ComedyCentralIE(InfoExtractor): 
3132         """Information extractor for The Daily Show and Colbert Report """ 
3134         _VALID_URL = r'^
(:(?P
<shortname
>tds|thedailyshow|cr|colbert|colbertnation|colbertreport
))|
(https?
://)?
(www\
.)?
(?P
<showname
>thedailyshow|colbertnation
)\
.com
/full
-episodes
/(?P
<episode
>.*)$
' 
3135         IE_NAME = u'comedycentral
' 
3137         def report_extraction(self, episode_id): 
3138                 self._downloader.to_screen(u'[comedycentral
] %s: Extracting information
' % episode_id) 
3140         def report_config_download(self, episode_id): 
3141                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading configuration
' % episode_id) 
3143         def report_index_download(self, episode_id): 
3144                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading show index
' % episode_id) 
3146         def report_player_url(self, episode_id): 
3147                 self._downloader.to_screen(u'[comedycentral
] %s: Determining player URL
' % episode_id) 
3149         def _real_extract(self, url): 
3150                 mobj = re.match(self._VALID_URL, url) 
3152                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3155                 if mobj.group('shortname
'): 
3156                         if mobj.group('shortname
') in ('tds
', 'thedailyshow
'): 
3157                                 url = u'http
://www
.thedailyshow
.com
/full
-episodes
/' 
3159                                 url = u'http
://www
.colbertnation
.com
/full
-episodes
/' 
3160                         mobj = re.match(self._VALID_URL, url) 
3161                         assert mobj is not None 
3163                 dlNewest = not mobj.group('episode
') 
3165                         epTitle = mobj.group('showname
') 
3167                         epTitle = mobj.group('episode
') 
3169                 req = urllib2.Request(url) 
3170                 self.report_extraction(epTitle) 
3172                         htmlHandle = urllib2.urlopen(req) 
3173                         html = htmlHandle.read() 
3174                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3175                         self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3178                         url = htmlHandle.geturl() 
3179                         mobj = re.match(self._VALID_URL, url) 
3181                                 self._downloader.trouble(u'ERROR
: Invalid redirected URL
: ' + url) 
3183                         if mobj.group('episode
') == '': 
3184                                 self._downloader.trouble(u'ERROR
: Redirected URL 
is still 
not specific
: ' + url) 
3186                         epTitle = mobj.group('episode
') 
3188                 mMovieParams = re.findall('(?
:<param name
="movie" value
="|var url = ")(http
://media
.mtvnservices
.com
/([^
"]*episode.*?:.*?))"', html) 
3189                 if len(mMovieParams) == 0: 
3190                         self._downloader.trouble(u'ERROR
: unable to find Flash URL 
in webpage 
' + url) 
3193                 playerUrl_raw = mMovieParams[0][0] 
3194                 self.report_player_url(epTitle) 
3196                         urlHandle = urllib2.urlopen(playerUrl_raw) 
3197                         playerUrl = urlHandle.geturl() 
3198                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3199                         self._downloader.trouble(u'ERROR
: unable to find out player URL
: ' + unicode(err)) 
3202                 uri = mMovieParams[0][1] 
3203                 indexUrl = 'http
://shadow
.comedycentral
.com
/feeds
/video_player
/mrss
/?
' + urllib.urlencode({'uri
': uri}) 
3204                 self.report_index_download(epTitle) 
3206                         indexXml = urllib2.urlopen(indexUrl).read() 
3207                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3208                         self._downloader.trouble(u'ERROR
: unable to download episode index
: ' + unicode(err)) 
3211                 idoc = xml.etree.ElementTree.fromstring(indexXml) 
3212                 itemEls = idoc.findall('.//item
') 
3213                 for itemEl in itemEls: 
3214                         mediaId = itemEl.findall('./guid
')[0].text 
3215                         shortMediaId = mediaId.split(':')[-1] 
3216                         showId = mediaId.split(':')[-2].replace('.com
', '') 
3217                         officialTitle = itemEl.findall('./title
')[0].text 
3218                         officialDate = itemEl.findall('./pubDate
')[0].text 
3220                         configUrl = ('http
://www
.comedycentral
.com
/global/feeds
/entertainment
/media
/mediaGenEntertainment
.jhtml?
' + 
3221                                                 urllib.urlencode({'uri
': mediaId})) 
3222                         configReq = urllib2.Request(configUrl) 
3223                         self.report_config_download(epTitle) 
3225                                 configXml = urllib2.urlopen(configReq).read() 
3226                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3227                                 self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3230                         cdoc = xml.etree.ElementTree.fromstring(configXml) 
3232                         for rendition in cdoc.findall('.//rendition
'): 
3233                                 finfo = (rendition.attrib['bitrate
'], rendition.findall('./src
')[0].text) 
3237                                 self._downloader.trouble(u'\nERROR
: unable to download 
' + mediaId + ': No videos found
') 
3240                         # For now, just pick the highest bitrate 
3241                         format,video_url = turls[-1] 
3243                         self._downloader.increment_downloads() 
3245                         effTitle = showId + u'-' + epTitle 
3250                                 'upload_date
': officialDate, 
3252                                 'stitle
': _simplify_title(effTitle), 
3256                                 'description
': officialTitle, 
3257                                 'player_url
': playerUrl 
3261                                 self._downloader.process_info(info) 
3262                         except UnavailableVideoError, err: 
3263                                 self._downloader.trouble(u'\nERROR
: unable to download 
' + mediaId) 
3267 class EscapistIE(InfoExtractor): 
3268         """Information extractor for The Escapist """ 
3270         _VALID_URL = r'^
(https?
://)?
(www\
.)?escapistmagazine\
.com
/videos
/view
/(?P
<showname
>[^
/]+)/(?P
<episode
>[^
/?
]+)[/?
]?
.*$
' 
3271         IE_NAME = u'escapist
' 
3273         def report_extraction(self, showName): 
3274                 self._downloader.to_screen(u'[escapist
] %s: Extracting information
' % showName) 
3276         def report_config_download(self, showName): 
3277                 self._downloader.to_screen(u'[escapist
] %s: Downloading configuration
' % showName) 
3279         def _real_extract(self, url): 
3280                 htmlParser = HTMLParser.HTMLParser() 
3282                 mobj = re.match(self._VALID_URL, url) 
3284                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3286                 showName = mobj.group('showname
') 
3287                 videoId = mobj.group('episode
') 
3289                 self.report_extraction(showName) 
3291                         webPage = urllib2.urlopen(url).read() 
3292                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3293                         self._downloader.trouble(u'ERROR
: unable to download webpage
: ' + unicode(err)) 
3296                 descMatch = re.search('<meta name
="description" content
="([^"]*)"', webPage) 
3297                 description = htmlParser.unescape(descMatch.group(1)) 
3298                 imgMatch = re.search('<meta property="og
:image
" content="([^
"]*)"', webPage) 
3299                 imgUrl = htmlParser.unescape(imgMatch.group(1)) 
3300                 playerUrlMatch = re.search('<meta 
property="og:video" content
="([^"]*)"', webPage) 
3301                 playerUrl = htmlParser.unescape(playerUrlMatch.group(1)) 
3302                 configUrlMatch = re.search('config=(.*)$', playerUrl) 
3303                 configUrl = urllib2.unquote(configUrlMatch.group(1)) 
3305                 self.report_config_download(showName) 
3307                         configJSON = urllib2.urlopen(configUrl).read() 
3308                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3309                         self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err)) 
3312                 # Technically, it's JavaScript, not JSON 
3313                 configJSON = configJSON.replace("'", '"') 
3316                         config = json.loads(configJSON) 
3317                 except (ValueError,), err: 
3318                         self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err)) 
3321                 playlist = config['playlist'] 
3322                 videoUrl = playlist[1]['url'] 
3324                 self._downloader.increment_downloads() 
3328                         'uploader': showName, 
3329                         'upload_date': None, 
3331                         'stitle': _simplify_title(showName), 
3334                         'thumbnail': imgUrl, 
3335                         'description': description, 
3336                         'player_url': playerUrl, 
3340                         self._downloader.process_info(info) 
3341                 except UnavailableVideoError, err: 
3342                         self._downloader.trouble(u'\nERROR: unable to download ' + videoId) 
3345 class CollegeHumorIE(InfoExtractor): 
3346         """Information extractor for collegehumor.com""" 
3348         _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' 
3349         IE_NAME = u'collegehumor' 
3351         def report_webpage(self, video_id): 
3352                 """Report information extraction.""" 
3353                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3355         def report_extraction(self, video_id): 
3356                 """Report information extraction.""" 
3357                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3359         def _real_extract(self, url): 
3360                 htmlParser = HTMLParser.HTMLParser() 
3362                 mobj = re.match(self._VALID_URL, url) 
3364                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3366                 video_id = mobj.group('videoid') 
3368                 self.report_webpage(video_id) 
3369                 request = urllib2.Request(url) 
3371                         webpage = urllib2.urlopen(request).read() 
3372                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3373                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3376                 m = re.search(r'id="video
:(?P
<internalvideoid
>[0-9]+)"', webpage) 
3378                         self._downloader.trouble(u'ERROR: Cannot extract internal video ID') 
3380                 internal_video_id = m.group('internalvideoid') 
3384                         'internal_id': internal_video_id, 
3387                 self.report_extraction(video_id) 
3388                 xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id 
3390                         metaXml = urllib2.urlopen(xmlUrl).read() 
3391                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3392                         self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err)) 
3395                 mdoc = xml.etree.ElementTree.fromstring(metaXml) 
3397                         videoNode = mdoc.findall('./video')[0] 
3398                         info['description'] = videoNode.findall('./description')[0].text 
3399                         info['title'] = videoNode.findall('./caption')[0].text 
3400                         info['stitle'] = _simplify_title(info['title']) 
3401                         info['url'] = videoNode.findall('./file')[0].text 
3402                         info['thumbnail'] = videoNode.findall('./thumbnail')[0].text 
3403                         info['ext'] = info['url'].rpartition('.')[2] 
3404                         info['format'] = info['ext'] 
3406                         self._downloader.trouble(u'\nERROR: Invalid metadata XML file') 
3409                 self._downloader.increment_downloads() 
3412                         self._downloader.process_info(info) 
3413                 except UnavailableVideoError, err: 
3414                         self._downloader.trouble(u'\nERROR: unable to download video') 
3417 class XVideosIE(InfoExtractor): 
3418         """Information extractor for xvideos.com""" 
3420         _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' 
3421         IE_NAME = u'xvideos' 
3423         def report_webpage(self, video_id): 
3424                 """Report information extraction.""" 
3425                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3427         def report_extraction(self, video_id): 
3428                 """Report information extraction.""" 
3429                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3431         def _real_extract(self, url): 
3432                 htmlParser = HTMLParser.HTMLParser() 
3434                 mobj = re.match(self._VALID_URL, url) 
3436                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3438                 video_id = mobj.group(1).decode('utf-8') 
3440                 self.report_webpage(video_id) 
3442                 request = urllib2.Request(r'http://www.xvideos.com/video' + video_id) 
3444                         webpage = urllib2.urlopen(request).read() 
3445                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3446                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3449                 self.report_extraction(video_id) 
3453                 mobj = re.search(r'flv_url=(.+?)&', webpage) 
3455                         self._downloader.trouble(u'ERROR: unable to extract video url') 
3457                 video_url = urllib2.unquote(mobj.group(1).decode('utf-8')) 
3461                 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage) 
3463                         self._downloader.trouble(u'ERROR: unable to extract video title') 
3465                 video_title = mobj.group(1).decode('utf-8') 
3468                 # Extract video thumbnail 
3469                 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage) 
3471                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail') 
3473                 video_thumbnail = mobj.group(1).decode('utf-8') 
3477                 self._downloader.increment_downloads() 
3482                         'upload_date': None, 
3483                         'title': video_title, 
3484                         'stitle': _simplify_title(video_title), 
3487                         'thumbnail': video_thumbnail, 
3488                         'description': None, 
3493                         self._downloader.process_info(info) 
3494                 except UnavailableVideoError, err: 
3495                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id) 
3498 class SoundcloudIE(InfoExtractor): 
3499         """Information extractor for soundcloud.com 
3500            To access the media, the uid of the song and a stream token 
3501            must be extracted from the page source and the script must make 
3502            a request to media.soundcloud.com/crossdomain.xml. Then 
3503            the media can be grabbed by requesting from an url composed 
3504            of the stream token and uid 
3507         _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' 
3508         IE_NAME = u'soundcloud' 
3510         def __init__(self, downloader=None): 
3511                 InfoExtractor.__init__(self, downloader) 
3513         def report_webpage(self, video_id): 
3514                 """Report information extraction.""" 
3515                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3517         def report_extraction(self, video_id): 
3518                 """Report information extraction.""" 
3519                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3521         def _real_extract(self, url): 
3522                 htmlParser = HTMLParser.HTMLParser() 
3524                 mobj = re.match(self._VALID_URL, url) 
3526                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3529                 # extract uploader (which is in the url) 
3530                 uploader = mobj.group(1).decode('utf-8') 
3531                 # extract simple title (uploader + slug of song title) 
3532                 slug_title =  mobj.group(2).decode('utf-8') 
3533                 simple_title = uploader + '-' + slug_title 
3535                 self.report_webpage('%s/%s' % (uploader, slug_title)) 
3537                 request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title)) 
3539                         webpage = urllib2.urlopen(request).read() 
3540                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3541                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3544                 self.report_extraction('%s/%s' % (uploader, slug_title)) 
3546                 # extract uid and stream token that soundcloud hands out for access 
3547                 mobj = re.search('"uid
":"([\w\d
]+?
)".*?stream_token=([\w\d]+)', webpage) 
3549                         video_id = mobj.group(1) 
3550                         stream_token = mobj.group(2) 
3552                 # extract unsimplified title 
3553                 mobj = re.search('"title
":"(.*?
)",', webpage) 
3555                         title = mobj.group(1) 
3557                 # construct media url (with uid/token) 
3558                 mediaURL = "http
://media
.soundcloud
.com
/stream
/%s?stream_token
=%s" 
3559                 mediaURL = mediaURL % (video_id, stream_token) 
3562                 description = u'No description available' 
3563                 mobj = re.search('track-description-value"><p
>(.*?
)</p
>', webpage) 
3565                         description = mobj.group(1) 
3569                 mobj = re.search("pretty-date'>on ([\w
]+ [\d
]+, [\d
]+ \d
+:\d
+)</abbr
></h2
>", webpage) 
3572                                 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') 
3573                         except Exception, e: 
3576                 # for soundcloud, a request to a cross domain is required for cookies 
3577                 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) 
3580                         self._downloader.process_info({ 
3581                                 'id':           video_id.decode('utf-8'), 
3583                                 'uploader':     uploader.decode('utf-8'), 
3584                                 'upload_date':  upload_date, 
3585                                 'title':        simple_title.decode('utf-8'), 
3586                                 'stitle':       simple_title.decode('utf-8'), 
3590                                 'description': description.decode('utf-8') 
3592                 except UnavailableVideoError: 
3593                         self._downloader.trouble(u'\nERROR: unable to download video') 
3596 class InfoQIE(InfoExtractor): 
3597         """Information extractor for infoq.com""" 
3599         _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' 
3602         def report_webpage(self, video_id): 
3603                 """Report information extraction.""" 
3604                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3606         def report_extraction(self, video_id): 
3607                 """Report information extraction.""" 
3608                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3610         def _real_extract(self, url): 
3611                 htmlParser = HTMLParser.HTMLParser() 
3613                 mobj = re.match(self._VALID_URL, url) 
3615                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3618                 self.report_webpage(url) 
3620                 request = urllib2.Request(url) 
3622                         webpage = urllib2.urlopen(request).read() 
3623                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3624                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3627                 self.report_extraction(url) 
3631                 mobj = re.search(r"jsclassref
='([^']*)'", webpage) 
3633                         self._downloader.trouble(u'ERROR
: unable to extract video url
') 
3635                 video_url = 'rtmpe
://video
.infoq
.com
/cfx
/st
/' + urllib2.unquote(mobj.group(1).decode('base64
')) 
3639                 mobj = re.search(r'contentTitle 
= "(.*?)";', webpage) 
3641                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
3643                 video_title = mobj.group(1).decode('utf
-8') 
3645                 # Extract description 
3646                 video_description = u'No description available
.' 
3647                 mobj = re.search(r'<meta name
="description" content
="(.*)"(?
:\s
*/)?
>', webpage) 
3648                 if mobj is not None: 
3649                         video_description = mobj.group(1).decode('utf
-8') 
3651                 video_filename = video_url.split('/')[-1] 
3652                 video_id, extension = video_filename.split('.') 
3654                 self._downloader.increment_downloads() 
3659                         'upload_date
': None, 
3660                         'title
': video_title, 
3661                         'stitle
': _simplify_title(video_title), 
3663                         'format
': extension, # Extension is always(?) mp4, but seems to be flv 
3665                         'description
': video_description, 
3670                         self._downloader.process_info(info) 
3671                 except UnavailableVideoError, err: 
3672                         self._downloader.trouble(u'\nERROR
: unable to download 
' + video_url) 
3674 class MixcloudIE(InfoExtractor): 
3675         """Information extractor for www.mixcloud.com""" 
3676         _VALID_URL = r'^
(?
:https?
://)?
(?
:www\
.)?mixcloud\
.com
/([\w\d
-]+)/([\w\d
-]+)' 
3677         IE_NAME = u'mixcloud
' 
3679         def __init__(self, downloader=None): 
3680                 InfoExtractor.__init__(self, downloader) 
3682         def report_download_json(self, file_id): 
3683                 """Report JSON download.""" 
3684                 self._downloader.to_screen(u'[%s] Downloading json
' % self.IE_NAME) 
3686         def report_extraction(self, file_id): 
3687                 """Report information extraction.""" 
3688                 self._downloader.to_screen(u'[%s] %s: Extracting information
' % (self.IE_NAME, file_id)) 
3690         def get_urls(self, jsonData, fmt, bitrate='best
'): 
3691                 """Get urls from 'audio_formats
' section in json""" 
3694                         bitrate_list = jsonData[fmt] 
3695                         if bitrate is None or bitrate == 'best
' or bitrate not in bitrate_list: 
3696                                 bitrate = max(bitrate_list) # select highest 
3698                         url_list = jsonData[fmt][bitrate] 
3699                 except TypeError: # we have no bitrate info. 
3700                         url_list = jsonData[fmt] 
3704         def check_urls(self, url_list): 
3705                 """Returns 1st active url from list""" 
3706                 for url in url_list: 
3708                                 urllib2.urlopen(url) 
3710                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3715         def _print_formats(self, formats): 
3716                 print 'Available formats
:' 
3717                 for fmt in formats.keys(): 
3718                         for b in formats[fmt]: 
3720                                         ext = formats[fmt][b][0] 
3721                                         print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]) 
3722                                 except TypeError: # we have no bitrate info 
3723                                         ext = formats[fmt][0] 
3724                                         print '%s\t%s\t[%s]' % (fmt, '??
', ext.split('.')[-1]) 
3727         def _real_extract(self, url): 
3728                 mobj = re.match(self._VALID_URL, url) 
3730                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3732                 # extract uploader & filename from url 
3733                 uploader = mobj.group(1).decode('utf
-8') 
3734                 file_id = uploader + "-" + mobj.group(2).decode('utf
-8') 
3736                 # construct API request 
3737                 file_url = 'http
://www
.mixcloud
.com
/api
/1/cloudcast
/' + '/'.join(url.split('/')[-3:-1]) + '.json
' 
3738                 # retrieve .json file with links to files 
3739                 request = urllib2.Request(file_url) 
3741                         self.report_download_json(file_url) 
3742                         jsonData = urllib2.urlopen(request).read() 
3743                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3744                         self._downloader.trouble(u'ERROR
: Unable to retrieve 
file: %s' % str(err)) 
3748                 json_data = json.loads(jsonData) 
3749                 player_url = json_data['player_swf_url
'] 
3750                 formats = dict(json_data['audio_formats
']) 
3752                 req_format = self._downloader.params.get('format
', None) 
3755                 if self._downloader.params.get('listformats
', None): 
3756                         self._print_formats(formats) 
3759                 if req_format is None or req_format == 'best
': 
3760                         for format_param in formats.keys(): 
3761                                 url_list = self.get_urls(formats, format_param) 
3763                                 file_url = self.check_urls(url_list) 
3764                                 if file_url is not None: 
3767                         if req_format not in formats.keys(): 
3768                                 self._downloader.trouble(u'ERROR
: format 
is not available
') 
3771                         url_list = self.get_urls(formats, req_format) 
3772                         file_url = self.check_urls(url_list) 
3773                         format_param = req_format 
3776                 self._downloader.increment_downloads() 
3778                         # Process file information 
3779                         self._downloader.process_info({ 
3780                                 'id': file_id.decode('utf
-8'), 
3781                                 'url
': file_url.decode('utf
-8'), 
3782                                 'uploader
':     uploader.decode('utf
-8'), 
3783                                 'upload_date
': u'NA
', 
3784                                 'title
': json_data['name
'], 
3785                                 'stitle
': _simplify_title(json_data['name
']), 
3786                                 'ext
': file_url.split('.')[-1].decode('utf
-8'), 
3787                                 'format
': (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
3788                                 'thumbnail
': json_data['thumbnail_url
'], 
3789                                 'description
': json_data['description
'], 
3790                                 'player_url
': player_url.decode('utf
-8'), 
3792                 except UnavailableVideoError, err: 
3793                         self._downloader.trouble(u'ERROR
: unable to download 
file') 
3795 class StanfordOpenClassroomIE(InfoExtractor): 
3796         """Information extractor for Stanford's Open ClassRoom
""" 
3798         _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' 
3799         IE_NAME = u'stanfordoc' 
3801         def report_download_webpage(self, objid): 
3802                 """Report information extraction
.""" 
3803                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid)) 
3805         def report_extraction(self, video_id): 
3806                 """Report information extraction
.""" 
3807                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3809         def _real_extract(self, url): 
3810                 mobj = re.match(self._VALID_URL, url) 
3812                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3815                 if mobj.group('course') and mobj.group('video'): # A specific video 
3816                         course = mobj.group('course') 
3817                         video = mobj.group('video') 
3819                                 'id': _simplify_title(course + '_' + video), 
3822                         self.report_extraction(info['id']) 
3823                         baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' 
3824                         xmlUrl = baseUrl + video + '.xml' 
3826                                 metaXml = urllib2.urlopen(xmlUrl).read() 
3827                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3828                                 self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err)) 
3830                         mdoc = xml.etree.ElementTree.fromstring(metaXml) 
3832                                 info['title'] = mdoc.findall('./title')[0].text 
3833                                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text 
3835                                 self._downloader.trouble(u'\nERROR: Invalid metadata XML file') 
3837                         info['stitle'] = _simplify_title(info['title']) 
3838                         info['ext'] = info['url'].rpartition('.')[2] 
3839                         info['format'] = info['ext'] 
3840                         self._downloader.increment_downloads() 
3842                                 self._downloader.process_info(info) 
3843                         except UnavailableVideoError, err: 
3844                                 self._downloader.trouble(u'\nERROR: unable to download video') 
3845                 elif mobj.group('course'): # A course page 
3846                         unescapeHTML = HTMLParser.HTMLParser().unescape 
3848                         course = mobj.group('course') 
3850                                 'id': _simplify_title(course), 
3854                         self.report_download_webpage(info['id']) 
3856                                 coursepage = urllib2.urlopen(url).read() 
3857                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3858                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) 
3861                         m = re.search('<h1>([^<]+)</h1>', coursepage) 
3863                                 info['title'] = unescapeHTML(m.group(1)) 
3865                                 info['title'] = info['id'] 
3866                         info['stitle'] = _simplify_title(info['title']) 
3868                         m = re.search('<description>([^<]+)</description>', coursepage) 
3870                                 info['description'] = unescapeHTML(m.group(1)) 
3872                         links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage)) 
3875                                         'type': 'reference', 
3876                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage), 
3880                         for entry in info['list']: 
3881                                 assert entry['type'] == 'reference' 
3882                                 self.extract(entry['url']) 
3884                         unescapeHTML = HTMLParser.HTMLParser().unescape 
3887                                 'id': 'Stanford OpenClassroom', 
3891                         self.report_download_webpage(info['id']) 
3892                         rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' 
3894                                 rootpage = urllib2.urlopen(rootURL).read() 
3895                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3896                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) 
3899                         info['title'] = info['id'] 
3900                         info['stitle'] = _simplify_title(info['title']) 
3902                         links = _orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage)) 
3905                                         'type': 'reference', 
3906                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage), 
3910                         for entry in info['list']: 
3911                                 assert entry['type'] == 'reference' 
3912                                 self.extract(entry['url']) 
3914 class MTVIE(InfoExtractor): 
3915         """Information extractor 
for MTV
.com
""" 
3917         _VALID_URL = r'^(?P<proto>https?://)?(?:www\.)?mtv\.com/videos/[^/]+/(?P<videoid>[0-9]+)/[^/]+$' 
3920         def report_webpage(self, video_id): 
3921                 """Report information extraction
.""" 
3922                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3924         def report_extraction(self, video_id): 
3925                 """Report information extraction
.""" 
3926                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3928         def _real_extract(self, url): 
3929                 mobj = re.match(self._VALID_URL, url) 
3931                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3933                 if not mobj.group('proto'): 
3934                         url = 'http://' + url 
3935                 video_id = mobj.group('videoid') 
3936                 self.report_webpage(video_id) 
3938                 request = urllib2.Request(url) 
3940                         webpage = urllib2.urlopen(request).read() 
3941                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3942                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3945                 mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage) 
3947                         self._downloader.trouble(u'ERROR: unable to extract song name') 
3949                 song_name = _unescapeHTML(mobj.group(1).decode('iso-8859-1')) 
3950                 mobj = re.search(r'<meta name="mtv_an" content="([^"]+)"/>', webpage) 
3952                         self._downloader.trouble(u'ERROR: unable to extract performer') 
3954                 performer = _unescapeHTML(mobj.group(1).decode('iso-8859-1')) 
3955                 video_title = performer + ' - ' + song_name  
3957                 mobj = re.search(r'<meta name="mtvn_uri" content="([^"]+)"/>', webpage) 
3959                         self._downloader.trouble(u'ERROR: unable to mtvn_uri') 
3961                 mtvn_uri = mobj.group(1) 
3963                 mobj = re.search(r'MTVN.Player.defaultPlaylistId = ([0-9]+);', webpage) 
3965                         self._downloader.trouble(u'ERROR: unable to extract content id') 
3967                 content_id = mobj.group(1) 
3969                 videogen_url = 'http://www.mtv.com/player/includes/mediaGen.jhtml?uri=' + mtvn_uri + '&id=' + content_id + '&vid=' + video_id + '&ref=www.mtvn.com&viewUri=' + mtvn_uri 
3970                 self.report_extraction(video_id) 
3971                 request = urllib2.Request(videogen_url) 
3973                         metadataXml = urllib2.urlopen(request).read() 
3974                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3975                         self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err)) 
3978                 mdoc = xml.etree.ElementTree.fromstring(metadataXml) 
3979                 renditions = mdoc.findall('.//rendition') 
3981                 # For now, always pick the highest quality. 
3982                 rendition = renditions[-1] 
3985                         _,_,ext = rendition.attrib['type'].partition('/') 
3986                         format = ext + '-' + rendition.attrib['width'] + 'x' + rendition.attrib['height'] + '_' + rendition.attrib['bitrate'] 
3987                         video_url = rendition.find('./src').text 
3989                         self._downloader.trouble('Invalid rendition field.') 
3992                 self._downloader.increment_downloads() 
3996                         'uploader': performer, 
3997                         'title': video_title, 
3998                         'stitle': _simplify_title(video_title), 
4004                         self._downloader.process_info(info) 
4005                 except UnavailableVideoError, err: 
4006                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id) 
4009 class PostProcessor(object): 
4010         """Post Processor 
class. 
4012         PostProcessor objects can be added to downloaders 
with their
 
4013         add_post_processor() method
. When the downloader has finished a
 
4014         successful download
, it will take its internal chain of PostProcessors
 
4015         and start calling the 
run() method on each one of them
, first 
with 
4016         an initial argument 
and then 
with the returned value of the previous
 
4019         The chain will be stopped 
if one of them ever returns 
None or the end
 
4020         of the chain 
is reached
. 
4022         PostProcessor objects follow a 
"mutual registration" process similar
 
4023         to InfoExtractor objects
. 
4028         def __init__(self, downloader=None): 
4029                 self._downloader = downloader 
4031         def set_downloader(self, downloader): 
4032                 """Sets the downloader 
for this PP
.""" 
4033                 self._downloader = downloader 
4035         def run(self, information): 
4036                 """Run the PostProcessor
. 
4038                 The 
"information" argument 
is a dictionary like the ones
 
4039                 composed by InfoExtractors
. The only difference 
is that this
 
4040                 one has an extra field called 
"filepath" that points to the
 
4043                 When this method returns 
None, the postprocessing chain 
is 
4044                 stopped
. However
, this method may 
return an information
 
4045                 dictionary that will be passed to the next postprocessing
 
4046                 object in the chain
. It can be the one it received after
 
4047                 changing some fields
. 
4049                 In addition
, this method may 
raise a PostProcessingError
 
4050                 exception that will be taken into account by the downloader
 
4053                 return information # by default, do nothing 
4055 class AudioConversionError(BaseException): 
4056         def __init__(self, message): 
4057                 self.message = message 
4059 class FFmpegExtractAudioPP(PostProcessor): 
4061         def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False): 
4062                 PostProcessor.__init__(self, downloader) 
4063                 if preferredcodec is None: 
4064                         preferredcodec = 'best' 
4065                 self._preferredcodec = preferredcodec 
4066                 self._preferredquality = preferredquality 
4067                 self._keepvideo = keepvideo 
4070         def get_audio_codec(path): 
4072                         cmd = ['ffprobe', '-show_streams', '--', _encodeFilename(path)] 
4073                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) 
4074                         output = handle.communicate()[0] 
4075                         if handle.wait() != 0: 
4077                 except (IOError, OSError): 
4080                 for line in output.split('\n'): 
4081                         if line.startswith('codec_name='): 
4082                                 audio_codec = line.split('=')[1].strip() 
4083                         elif line.strip() == 'codec_type=audio' and audio_codec is not None: 
4088         def run_ffmpeg(path, out_path, codec, more_opts): 
4092                         acodec_opts = ['-acodec', codec] 
4093                 cmd = ['ffmpeg', '-y', '-i', _encodeFilename(path), '-vn'] + acodec_opts + more_opts + ['--', _encodeFilename(out_path)] 
4095                         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
4096                         stdout,stderr = p.communicate() 
4097                 except (IOError, OSError): 
4098                         e = sys.exc_info()[1] 
4099                         if isinstance(e, OSError) and e.errno == 2: 
4100                                 raise AudioConversionError('ffmpeg not found. Please install ffmpeg.') 
4103                 if p.returncode != 0: 
4104                         msg = stderr.strip().split('\n')[-1] 
4105                         raise AudioConversionError(msg) 
4107         def run(self, information): 
4108                 path = information['filepath'] 
4110                 filecodec = self.get_audio_codec(path) 
4111                 if filecodec is None: 
4112                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe') 
4116                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): 
4117                         if self._preferredcodec == 'm4a' and filecodec == 'aac': 
4118                                 # Lossless, but in another container 
4120                                 extension = self._preferredcodec 
4121                                 more_opts = ['-absf', 'aac_adtstoasc'] 
4122                         elif filecodec in ['aac', 'mp3', 'vorbis']: 
4123                                 # Lossless if possible 
4125                                 extension = filecodec 
4126                                 if filecodec == 'aac': 
4127                                         more_opts = ['-f', 'adts'] 
4128                                 if filecodec == 'vorbis': 
4132                                 acodec = 'libmp3lame' 
4135                                 if self._preferredquality is not None: 
4136                                         more_opts += ['-ab', self._preferredquality] 
4138                         # We convert the audio (lossy) 
4139                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] 
4140                         extension = self._preferredcodec 
4142                         if self._preferredquality is not None: 
4143                                 more_opts += ['-ab', self._preferredquality] 
4144                         if self._preferredcodec == 'aac': 
4145                                 more_opts += ['-f', 'adts'] 
4146                         if self._preferredcodec == 'm4a': 
4147                                 more_opts += ['-absf', 'aac_adtstoasc'] 
4148                         if self._preferredcodec == 'vorbis': 
4150                         if self._preferredcodec == 'wav': 
4152                                 more_opts += ['-f', 'wav'] 
4154                 prefix, sep, ext = path.rpartition(u'.') # not os.path.splitext, since the latter does not work on unicode in all setups 
4155                 new_path = prefix + sep + extension 
4156                 self._downloader.to_screen(u'[ffmpeg] Destination: ' + new_path) 
4158                         self.run_ffmpeg(path, new_path, acodec, more_opts) 
4160                         etype,e,tb = sys.exc_info() 
4161                         if isinstance(e, AudioConversionError): 
4162                                 self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message) 
4164                                 self._downloader.to_stderr(u'ERROR: error running ffmpeg') 
4167                 # Try to update the date time for extracted audio file. 
4168                 if information.get('filetime') is not None: 
4170                                 os.utime(_encodeFilename(new_path), (time.time(), information['filetime'])) 
4172                                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') 
4174                 if not self._keepvideo: 
4176                                 os.remove(_encodeFilename(path)) 
4177                         except (IOError, OSError): 
4178                                 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') 
4181                 information['filepath'] = new_path 
4185 def updateSelf(downloader, filename): 
4186         ''' Update the program file with the latest version from the repository ''' 
4187         # Note: downloader only used for options 
4188         if not os.access(filename, os.W_OK): 
4189                 sys.exit('ERROR: no write permissions on %s' % filename) 
4191         downloader.to_screen(u'Updating to latest version...') 
4195                         urlh = urllib.urlopen(UPDATE_URL) 
4196                         newcontent = urlh.read() 
4198                         vmatch = re.search("__version__ = '([^']+)'", newcontent) 
4199                         if vmatch is not None and vmatch.group(1) == __version__: 
4200                                 downloader.to_screen(u'youtube-dl is up-to-date (' + __version__ + ')') 
4204         except (IOError, OSError), err: 
4205                 sys.exit('ERROR: unable to download latest version') 
4208                 outf = open(filename, 'wb') 
4210                         outf.write(newcontent) 
4213         except (IOError, OSError), err: 
4214                 sys.exit('ERROR: unable to overwrite current version') 
4216         downloader.to_screen(u'Updated youtube-dl. Restart youtube-dl to use the new version.') 
4219         def _readOptions(filename_bytes): 
4221                         optionf = open(filename_bytes) 
4223                         return [] # silently skip if file is not present 
4227                                 res += shlex.split(l, comments=True) 
4232         def _format_option_string(option): 
4233                 ''' ('-o', '--option') -> -o, --format METAVAR''' 
4237                 if option._short_opts: opts.append(option._short_opts[0]) 
4238                 if option._long_opts: opts.append(option._long_opts[0]) 
4239                 if len(opts) > 1: opts.insert(1, ', ') 
4241                 if option.takes_value(): opts.append(' %s' % option.metavar) 
4243                 return "".join(opts) 
4245         def _find_term_columns(): 
4246                 columns = os.environ.get('COLUMNS', None) 
4251                         sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
4252                         out,err = sp.communicate() 
4253                         return int(out.split()[1]) 
4259         max_help_position = 80 
4261         # No need to wrap help messages if we're on a wide console 
4262         columns = _find_term_columns() 
4263         if columns: max_width = columns 
4265         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) 
4266         fmt.format_option_strings = _format_option_string 
4269                 'version'   : __version__, 
4271                 'usage' : '%prog [options] url [url...]', 
4272                 'conflict_handler' : 'resolve', 
4275         parser = optparse.OptionParser(**kw) 
4278         general        = optparse.OptionGroup(parser, 'General Options') 
4279         selection      = optparse.OptionGroup(parser, 'Video Selection') 
4280         authentication = optparse.OptionGroup(parser, 'Authentication Options') 
4281         video_format   = optparse.OptionGroup(parser, 'Video Format Options') 
4282         postproc       = optparse.OptionGroup(parser, 'Post-processing Options') 
4283         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') 
4284         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') 
4286         general.add_option('-h', '--help', 
4287                         action='help', help='print this help text and exit') 
4288         general.add_option('-v', '--version', 
4289                         action='version', help='print program version and exit') 
4290         general.add_option('-U', '--update', 
4291                         action='store_true', dest='update_self', help='update this program to latest version') 
4292         general.add_option('-i', '--ignore-errors', 
4293                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) 
4294         general.add_option('-r', '--rate-limit', 
4295                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') 
4296         general.add_option('-R', '--retries', 
4297                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) 
4298         general.add_option('--dump-user-agent', 
4299                         action='store_true', dest='dump_user_agent', 
4300                         help='display the current browser identification', default=False) 
4301         general.add_option('--list-extractors', 
4302                         action='store_true', dest='list_extractors', 
4303                         help='List all supported extractors and the URLs they would handle', default=False) 
4305         selection.add_option('--playlist-start', 
4306                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) 
4307         selection.add_option('--playlist-end', 
4308                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) 
4309         selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') 
4310         selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') 
4311         selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) 
4313         authentication.add_option('-u', '--username', 
4314                         dest='username', metavar='USERNAME', help='account username') 
4315         authentication.add_option('-p', '--password', 
4316                         dest='password', metavar='PASSWORD', help='account password') 
4317         authentication.add_option('-n', '--netrc', 
4318                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) 
4321         video_format.add_option('-f', '--format', 
4322                         action='store', dest='format', metavar='FORMAT', help='video format code') 
4323         video_format.add_option('--all-formats', 
4324                         action='store_const', dest='format', help='download all available video formats', const='all') 
4325         video_format.add_option('--prefer-free-formats', 
4326                         action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested') 
4327         video_format.add_option('--max-quality', 
4328                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') 
4329         video_format.add_option('-F', '--list-formats', 
4330                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)') 
4333         verbosity.add_option('-q', '--quiet', 
4334                         action='store_true', dest='quiet', help='activates quiet mode', default=False) 
4335         verbosity.add_option('-s', '--simulate', 
4336                         action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) 
4337         verbosity.add_option('--skip-download', 
4338                         action='store_true', dest='skip_download', help='do not download the video', default=False) 
4339         verbosity.add_option('-g', '--get-url', 
4340                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) 
4341         verbosity.add_option('-e', '--get-title', 
4342                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) 
4343         verbosity.add_option('--get-thumbnail', 
4344                         action='store_true', dest='getthumbnail', 
4345                         help='simulate, quiet but print thumbnail URL', default=False) 
4346         verbosity.add_option('--get-description', 
4347                         action='store_true', dest='getdescription', 
4348                         help='simulate, quiet but print video description', default=False) 
4349         verbosity.add_option('--get-filename', 
4350                         action='store_true', dest='getfilename', 
4351                         help='simulate, quiet but print output filename', default=False) 
4352         verbosity.add_option('--get-format', 
4353                         action='store_true', dest='getformat', 
4354                         help='simulate, quiet but print output format', default=False) 
4355         verbosity.add_option('--no-progress', 
4356                         action='store_true', dest='noprogress', help='do not print progress bar', default=False) 
4357         verbosity.add_option('--console-title', 
4358                         action='store_true', dest='consoletitle', 
4359                         help='display progress in console titlebar', default=False) 
4360         verbosity.add_option('-v', '--verbose', 
4361                         action='store_true', dest='verbose', help='print various debugging information', default=False) 
4364         filesystem.add_option('-t', '--title', 
4365                         action='store_true', dest='usetitle', help='use title in file name', default=False) 
4366         filesystem.add_option('-l', '--literal', 
4367                         action='store_true', dest='useliteral', help='use literal title in file name', default=False) 
4368         filesystem.add_option('-A', '--auto-number', 
4369                         action='store_true', dest='autonumber', 
4370                         help='number downloaded files starting from 00000', default=False) 
4371         filesystem.add_option('-o', '--output', 
4372                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.') 
4373         filesystem.add_option('-a', '--batch-file', 
4374                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') 
4375         filesystem.add_option('-w', '--no-overwrites', 
4376                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) 
4377         filesystem.add_option('-c', '--continue', 
4378                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=True) 
4379         filesystem.add_option('--no-continue', 
4380                         action='store_false', dest='continue_dl', 
4381                         help='do not resume partially downloaded files (restart from beginning)') 
4382         filesystem.add_option('--cookies', 
4383                         dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') 
4384         filesystem.add_option('--no-part', 
4385                         action='store_true', dest='nopart', help='do not use .part files', default=False) 
4386         filesystem.add_option('--no-mtime', 
4387                         action='store_false', dest='updatetime', 
4388                         help='do not use the Last-modified header to set the file modification time', default=True) 
4389         filesystem.add_option('--write-description', 
4390                         action='store_true', dest='writedescription', 
4391                         help='write video description to a .description file', default=False) 
4392         filesystem.add_option('--write-info-json', 
4393                         action='store_true', dest='writeinfojson', 
4394                         help='write video metadata to a .info.json file', default=False) 
4397         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, 
4398                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)') 
4399         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', 
4400                         help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') 
4401         postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', 
4402                         help='ffmpeg audio bitrate specification, 128k by default') 
4403         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, 
4404                         help='keeps the video file on disk after the post-processing; the video is erased by default') 
4407         parser.add_option_group(general) 
4408         parser.add_option_group(selection) 
4409         parser.add_option_group(filesystem) 
4410         parser.add_option_group(verbosity) 
4411         parser.add_option_group(video_format) 
4412         parser.add_option_group(authentication) 
4413         parser.add_option_group(postproc) 
4415         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') 
4417                 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf') 
4419                 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') 
4420         argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:] 
4421         opts, args = parser.parse_args(argv) 
4423         return parser, opts, args 
4425 def gen_extractors(): 
4426         """ Return a 
list of an instance of every supported extractor
. 
4427         The order does matter
; the first extractor matched 
is the one handling the URL
. 
4429         youtube_ie = YoutubeIE() 
4430         google_ie = GoogleIE() 
4431         yahoo_ie = YahooIE() 
4433                 YoutubePlaylistIE(youtube_ie), 
4434                 YoutubeUserIE(youtube_ie), 
4435                 YoutubeSearchIE(youtube_ie), 
4437                 MetacafeIE(youtube_ie), 
4440                 GoogleSearchIE(google_ie), 
4443                 YahooSearchIE(yahoo_ie), 
4456                 StanfordOpenClassroomIE(), 
4463         parser, opts, args = parseOpts() 
4465         # Open appropriate CookieJar 
4466         if opts.cookiefile is None: 
4467                 jar = cookielib.CookieJar() 
4470                         jar = cookielib.MozillaCookieJar(opts.cookiefile) 
4471                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): 
4473                 except (IOError, OSError), err: 
4474                         sys.exit(u'ERROR: unable to open cookie file') 
4477         if opts.dump_user_agent: 
4478                 print std_headers['User-Agent'] 
4481         # Batch file verification 
4483         if opts.batchfile is not None: 
4485                         if opts.batchfile == '-': 
4488                                 batchfd = open(opts.batchfile, 'r') 
4489                         batchurls = batchfd.readlines() 
4490                         batchurls = [x.strip() for x in batchurls] 
4491                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] 
4493                         sys.exit(u'ERROR: batch file could not be read') 
4494         all_urls = batchurls + args 
4496         # General configuration 
4497         cookie_processor = urllib2.HTTPCookieProcessor(jar) 
4498         proxy_handler = urllib2.ProxyHandler() 
4499         opener = urllib2.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler()) 
4500         urllib2.install_opener(opener) 
4501         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
4504                 print(u'[debug] Proxy map: ' + str(proxy_handler.proxies)) 
4506         extractors = gen_extractors() 
4508         if opts.list_extractors: 
4509                 for ie in extractors: 
4511                         matchedUrls = filter(lambda url: ie.suitable(url), all_urls) 
4512                         all_urls = filter(lambda url: url not in matchedUrls, all_urls) 
4513                         for mu in matchedUrls: 
4517         # Conflicting, missing and erroneous options 
4518         if opts.usenetrc and (opts.username is not None or opts.password is not None): 
4519                 parser.error(u'using .netrc conflicts with giving username/password') 
4520         if opts.password is not None and opts.username is None: 
4521                 parser.error(u'account username missing') 
4522         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): 
4523                 parser.error(u'using output template conflicts with using title, literal title or auto number') 
4524         if opts.usetitle and opts.useliteral: 
4525                 parser.error(u'using title conflicts with using literal title') 
4526         if opts.username is not None and opts.password is None: 
4527                 opts.password = getpass.getpass(u'Type account password and press return:') 
4528         if opts.ratelimit is not None: 
4529                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) 
4530                 if numeric_limit is None: 
4531                         parser.error(u'invalid rate limit specified') 
4532                 opts.ratelimit = numeric_limit 
4533         if opts.retries is not None: 
4535                         opts.retries = long(opts.retries) 
4536                 except (TypeError, ValueError), err: 
4537                         parser.error(u'invalid retry count specified') 
4539                 opts.playliststart = int(opts.playliststart) 
4540                 if opts.playliststart <= 0: 
4541                         raise ValueError(u'Playlist start must be positive') 
4542         except (TypeError, ValueError), err: 
4543                 parser.error(u'invalid playlist start number specified') 
4545                 opts.playlistend = int(opts.playlistend) 
4546                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): 
4547                         raise ValueError(u'Playlist end must be greater than playlist start') 
4548         except (TypeError, ValueError), err: 
4549                 parser.error(u'invalid playlist end number specified') 
4550         if opts.extractaudio: 
4551                 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']: 
4552                         parser.error(u'invalid audio format specified') 
4555         fd = FileDownloader({ 
4556                 'usenetrc': opts.usenetrc, 
4557                 'username': opts.username, 
4558                 'password': opts.password, 
4559                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 
4560                 'forceurl': opts.geturl, 
4561                 'forcetitle': opts.gettitle, 
4562                 'forcethumbnail': opts.getthumbnail, 
4563                 'forcedescription': opts.getdescription, 
4564                 'forcefilename': opts.getfilename, 
4565                 'forceformat': opts.getformat, 
4566                 'simulate': opts.simulate, 
4567                 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 
4568                 'format': opts.format, 
4569                 'format_limit': opts.format_limit, 
4570                 'listformats': opts.listformats, 
4571                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) 
4572                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
4573                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') 
4574                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') 
4575                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') 
4576                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') 
4577                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') 
4578                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') 
4579                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') 
4580                         or u'%(id)s.%(ext)s'), 
4581                 'ignoreerrors': opts.ignoreerrors, 
4582                 'ratelimit': opts.ratelimit, 
4583                 'nooverwrites': opts.nooverwrites, 
4584                 'retries': opts.retries, 
4585                 'continuedl': opts.continue_dl, 
4586                 'noprogress': opts.noprogress, 
4587                 'playliststart': opts.playliststart, 
4588                 'playlistend': opts.playlistend, 
4589                 'logtostderr': opts.outtmpl == '-', 
4590                 'consoletitle': opts.consoletitle, 
4591                 'nopart': opts.nopart, 
4592                 'updatetime': opts.updatetime, 
4593                 'writedescription': opts.writedescription, 
4594                 'writeinfojson': opts.writeinfojson, 
4595                 'matchtitle': opts.matchtitle, 
4596                 'rejecttitle': opts.rejecttitle, 
4597                 'max_downloads': opts.max_downloads, 
4598                 'prefer_free_formats': opts.prefer_free_formats, 
4599                 'verbose': opts.verbose, 
4601         for extractor in extractors: 
4602                 fd.add_info_extractor(extractor) 
4605         if opts.extractaudio: 
4606                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo)) 
4609         if opts.update_self: 
4610                 updateSelf(fd, sys.argv[0]) 
4613         if len(all_urls) < 1: 
4614                 if not opts.update_self: 
4615                         parser.error(u'you must provide at least one URL') 
4620                 retcode = fd.download(all_urls) 
4621         except MaxDownloadsReached: 
4622                 fd.to_screen(u'--max-download limit reached, aborting.') 
4625         # Dump cookie jar if requested 
4626         if opts.cookiefile is not None: 
4629                 except (IOError, OSError), err: 
4630                         sys.exit(u'ERROR: unable to save cookie jar') 
4637         except DownloadError: 
4639         except SameFileError: 
4640                 sys.exit(u'ERROR: fixed output name but more than one file to download') 
4641         except KeyboardInterrupt: 
4642                 sys.exit(u'\nERROR: Interrupted by user') 
4644 if __name__ == '__main__': 
4647 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: