2 # -*- coding: utf-8 -*- 
   5         'Ricardo Garcia Gonzalez', 
  13         'Philipp Hagemeister', 
  17 __license__ 
= 'Public Domain' 
  18 __version__ 
= '2011.10.19' 
  20 UPDATE_URL 
= 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' 
  49 except ImportError: # Python 2.4 
  52         import cStringIO 
as StringIO
 
  56 # parse_qs was moved from the cgi module to the urlparse module recently. 
  58         from urlparse 
import parse_qs
 
  60         from cgi 
import parse_qs
 
  68         import xml
.etree
.ElementTree
 
  69 except ImportError: # Python<2.5: Not officially supported, but let it slip 
  70         warnings
.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.') 
  73         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 
  74         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  75         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  76         'Accept-Encoding': 'gzip, deflate', 
  77         'Accept-Language': 'en-us,en;q=0.5', 
  80 simple_title_chars 
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii') 
  84 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): 
  90                         def raiseError(msg
, i
): 
  91                                 raise ValueError(msg 
+ ' at position ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:])) 
  92                         def skipSpace(i
, expectMore
=True): 
  93                                 while i 
< len(s
) and s
[i
] in ' \t\r\n': 
  97                                                 raiseError('Premature end', i
) 
  99                         def decodeEscape(match
): 
 115                                                 return unichr(int(esc
[1:5], 16)) 
 116                                         if len(esc
) == 5+6 and esc
[5:7] == '\\u': 
 117                                                 hi 
= int(esc
[1:5], 16) 
 118                                                 low 
= int(esc
[7:11], 16) 
 119                                                 return unichr((hi 
- 0xd800) * 0x400 + low 
- 0xdc00 + 0x10000) 
 120                                 raise ValueError('Unknown escape ' + str(esc
)) 
 127                                         while s
[e
-bslashes
-1] == '\\': 
 129                                         if bslashes 
% 2 == 1: 
 133                                 rexp 
= re
.compile(r
'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') 
 134                                 stri 
= rexp
.sub(decodeEscape
, s
[i
:e
]) 
 140                                 if s
[i
] == '}': # Empty dictionary 
 144                                                 raiseError('Expected a string object key', i
) 
 145                                         i
,key 
= parseString(i
) 
 147                                         if i 
>= len(s
) or s
[i
] != ':': 
 148                                                 raiseError('Expected a colon', i
) 
 155                                                 raiseError('Expected comma or closing curly brace', i
) 
 160                                 if s
[i
] == ']': # Empty array 
 165                                         i 
= skipSpace(i
) # Raise exception if premature end 
 169                                                 raiseError('Expected a comma or closing bracket', i
) 
 171                         def parseDiscrete(i
): 
 172                                 for k
,v 
in {'true': True, 'false': False, 'null': None}.items(): 
 173                                         if s
.startswith(k
, i
): 
 175                                 raiseError('Not a boolean (or null)', i
) 
 177                                 mobj 
= re
.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s
[i
:]) 
 179                                         raiseError('Not a number', i
) 
 181                                 if '.' in nums 
or 'e' in nums 
or 'E' in nums
: 
 182                                         return (i
+len(nums
), float(nums
)) 
 183                                 return (i
+len(nums
), int(nums
)) 
 184                         CHARMAP 
= {'{': parseObj
, '[': parseArray
, '"': parseString
, 't': parseDiscrete
, 'f': parseDiscrete
, 'n': parseDiscrete
} 
 187                                 i
,res 
= CHARMAP
.get(s
[i
], parseNumber
)(i
) 
 188                                 i 
= skipSpace(i
, False) 
 192                                 raise ValueError('Extra data at end of input (index ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:]) + ')') 
 195 def preferredencoding(): 
 196         """Get preferred encoding. 
 198         Returns the best encoding scheme for the system, based on 
 199         locale.getpreferredencoding() and some further tweaks. 
 201         def yield_preferredencoding(): 
 203                         pref 
= locale
.getpreferredencoding() 
 209         return yield_preferredencoding().next() 
 212 def htmlentity_transform(matchobj
): 
 213         """Transforms an HTML entity to a Unicode character. 
 215         This function receives a match object and is intended to be used with 
 216         the re.sub() function. 
 218         entity 
= matchobj
.group(1) 
 220         # Known non-numeric HTML entity 
 221         if entity 
in htmlentitydefs
.name2codepoint
: 
 222                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
 225         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
 227                 numstr 
= mobj
.group(1) 
 228                 if numstr
.startswith(u
'x'): 
 230                         numstr 
= u
'0%s' % numstr
 
 233                 return unichr(long(numstr
, base
)) 
 235         # Unknown entity in name, return its literal representation 
 236         return (u
'&%s;' % entity
) 
 239 def sanitize_title(utitle
): 
 240         """Sanitizes a video title so it could be used as part of a filename.""" 
 241         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
 242         return utitle
.replace(unicode(os
.sep
), u
'%') 
 245 def sanitize_open(filename
, open_mode
): 
 246         """Try to open the given filename, and slightly tweak it if this fails. 
 248         Attempts to open the given filename. If this fails, it tries to change 
 249         the filename slightly, step by step, until it's either able to open it 
 250         or it fails and raises a final exception, like the standard open() 
 253         It returns the tuple (stream, definitive_file_name). 
 257                         if sys
.platform 
== 'win32': 
 259                                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 260                         return (sys
.stdout
, filename
) 
 261                 stream 
= open(filename
, open_mode
) 
 262                 return (stream
, filename
) 
 263         except (IOError, OSError), err
: 
 264                 # In case of error, try to remove win32 forbidden chars 
 265                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 267                 # An exception here should be caught in the caller 
 268                 stream 
= open(filename
, open_mode
) 
 269                 return (stream
, filename
) 
 272 def timeconvert(timestr
): 
 273         """Convert RFC 2822 defined time string into system timestamp""" 
 275         timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 276         if timetuple 
is not None: 
 277                 timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 281 class DownloadError(Exception): 
 282         """Download Error exception. 
 284         This exception may be thrown by FileDownloader objects if they are not 
 285         configured to continue on errors. They will contain the appropriate 
 291 class SameFileError(Exception): 
 292         """Same File exception. 
 294         This exception will be thrown by FileDownloader objects if they detect 
 295         multiple files would have to be downloaded to the same file on disk. 
 300 class PostProcessingError(Exception): 
 301         """Post Processing exception. 
 303         This exception may be raised by PostProcessor's .run() method to 
 304         indicate an error in the postprocessing task. 
 309 class UnavailableVideoError(Exception): 
 310         """Unavailable Format exception. 
 312         This exception will be thrown when a video is requested 
 313         in a format that is not available for that video. 
 318 class ContentTooShortError(Exception): 
 319         """Content Too Short exception. 
 321         This exception may be raised by FileDownloader objects when a file they 
 322         download is too small for what the server announced first, indicating 
 323         the connection was probably interrupted. 
 329         def __init__(self
, downloaded
, expected
): 
 330                 self
.downloaded 
= downloaded
 
 331                 self
.expected 
= expected
 
 334 class YoutubeDLHandler(urllib2
.HTTPHandler
): 
 335         """Handler for HTTP requests and responses. 
 337         This class, when installed with an OpenerDirector, automatically adds 
 338         the standard headers to every HTTP request and handles gzipped and 
 339         deflated responses from web servers. If compression is to be avoided in 
 340         a particular request, the original request in the program code only has 
 341         to include the HTTP header "Youtubedl-No-Compression", which will be 
 342         removed before making the real request. 
 344         Part of this code was copied from: 
 346         http://techknack.net/python-urllib2-handlers/ 
 348         Andrew Rowls, the author of that code, agreed to release it to the 
 355                         return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 357                         return zlib
.decompress(data
) 
 360         def addinfourl_wrapper(stream
, headers
, url
, code
): 
 361                 if hasattr(urllib2
.addinfourl
, 'getcode'): 
 362                         return urllib2
.addinfourl(stream
, headers
, url
, code
) 
 363                 ret 
= urllib2
.addinfourl(stream
, headers
, url
) 
 367         def http_request(self
, req
): 
 368                 for h 
in std_headers
: 
 371                         req
.add_header(h
, std_headers
[h
]) 
 372                 if 'Youtubedl-no-compression' in req
.headers
: 
 373                         if 'Accept-encoding' in req
.headers
: 
 374                                 del req
.headers
['Accept-encoding'] 
 375                         del req
.headers
['Youtubedl-no-compression'] 
 378         def http_response(self
, req
, resp
): 
 381                 if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 382                         gz 
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(resp
.read()), mode
='r') 
 383                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 384                         resp
.msg 
= old_resp
.msg
 
 386                 if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 387                         gz 
= StringIO
.StringIO(self
.deflate(resp
.read())) 
 388                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 389                         resp
.msg 
= old_resp
.msg
 
 393 class FileDownloader(object): 
 394         """File Downloader class. 
 396         File downloader objects are the ones responsible of downloading the 
 397         actual video file and writing it to disk if the user has requested 
 398         it, among some other tasks. In most cases there should be one per 
 399         program. As, given a video URL, the downloader doesn't know how to 
 400         extract all the needed information, task that InfoExtractors do, it 
 401         has to pass the URL to one of them. 
 403         For this, file downloader objects have a method that allows 
 404         InfoExtractors to be registered in a given order. When it is passed 
 405         a URL, the file downloader handles it to the first InfoExtractor it 
 406         finds that reports being able to handle it. The InfoExtractor extracts 
 407         all the information about the video or videos the URL refers to, and 
 408         asks the FileDownloader to process the video information, possibly 
 409         downloading the video. 
 411         File downloaders accept a lot of parameters. In order not to saturate 
 412         the object constructor with arguments, it receives a dictionary of 
 413         options instead. These options are available through the params 
 414         attribute for the InfoExtractors to use. The FileDownloader also 
 415         registers itself as the downloader in charge for the InfoExtractors 
 416         that are added to it, so this is a "mutual registration". 
 420         username:         Username for authentication purposes. 
 421         password:         Password for authentication purposes. 
 422         usenetrc:         Use netrc for authentication instead. 
 423         quiet:            Do not print messages to stdout. 
 424         forceurl:         Force printing final URL. 
 425         forcetitle:       Force printing title. 
 426         forcethumbnail:   Force printing thumbnail URL. 
 427         forcedescription: Force printing description. 
 428         forcefilename:    Force printing final filename. 
 429         simulate:         Do not download the video files. 
 430         format:           Video format code. 
 431         format_limit:     Highest quality format to try. 
 432         outtmpl:          Template for output names. 
 433         ignoreerrors:     Do not stop on download errors. 
 434         ratelimit:        Download speed limit, in bytes/sec. 
 435         nooverwrites:     Prevent overwriting files. 
 436         retries:          Number of times to retry for HTTP error 5xx 
 437         continuedl:       Try to continue downloads if possible. 
 438         noprogress:       Do not print the progress bar. 
 439         playliststart:    Playlist item to start at. 
 440         playlistend:      Playlist item to end at. 
 441         matchtitle:       Download only matching titles. 
 442         rejecttitle:      Reject downloads for matching titles. 
 443         logtostderr:      Log messages to stderr instead of stdout. 
 444         consoletitle:     Display progress in console window's titlebar. 
 445         nopart:           Do not use temporary .part files. 
 446         updatetime:       Use the Last-modified header to set output file timestamps. 
 447         writedescription: Write the video description to a .description file 
 448         writeinfojson:    Write the video description to a .info.json file 
 454         _download_retcode 
= None 
 455         _num_downloads 
= None 
 458         def __init__(self
, params
): 
 459                 """Create a FileDownloader object with the given options.""" 
 462                 self
._download
_retcode 
= 0 
 463                 self
._num
_downloads 
= 0 
 464                 self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 468         def format_bytes(bytes): 
 471                 if type(bytes) is str: 
 476                         exponent 
= long(math
.log(bytes, 1024.0)) 
 477                 suffix 
= 'bkMGTPEZY'[exponent
] 
 478                 converted 
= float(bytes) / float(1024 ** exponent
) 
 479                 return '%.2f%s' % (converted
, suffix
) 
 482         def calc_percent(byte_counter
, data_len
): 
 485                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 488         def calc_eta(start
, now
, total
, current
): 
 492                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 494                 rate 
= float(current
) / dif
 
 495                 eta 
= long((float(total
) - float(current
)) / rate
) 
 496                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 499                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 502         def calc_speed(start
, now
, bytes): 
 504                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 505                         return '%10s' % '---b/s' 
 506                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 509         def best_block_size(elapsed_time
, bytes): 
 510                 new_min 
= max(bytes / 2.0, 1.0) 
 511                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 512                 if elapsed_time 
< 0.001: 
 514                 rate 
= bytes / elapsed_time
 
 522         def parse_bytes(bytestr
): 
 523                 """Parse a string indicating a byte quantity into a long integer.""" 
 524                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 527                 number 
= float(matchobj
.group(1)) 
 528                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 529                 return long(round(number 
* multiplier
)) 
 531         def add_info_extractor(self
, ie
): 
 532                 """Add an InfoExtractor object to the end of the list.""" 
 534                 ie
.set_downloader(self
) 
 536         def add_post_processor(self
, pp
): 
 537                 """Add a PostProcessor object to the end of the chain.""" 
 539                 pp
.set_downloader(self
) 
 541         def to_screen(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 542                 """Print message to stdout if not in quiet mode.""" 
 544                         if not self
.params
.get('quiet', False): 
 545                                 terminator 
= [u
'\n', u
''][skip_eol
] 
 546                                 print >>self
._screen
_file
, (u
'%s%s' % (message
, terminator
)).encode(preferredencoding()), 
 547                         self
._screen
_file
.flush() 
 548                 except (UnicodeEncodeError), err
: 
 549                         if not ignore_encoding_errors
: 
 552         def to_stderr(self
, message
): 
 553                 """Print message to stderr.""" 
 554                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 556         def to_cons_title(self
, message
): 
 557                 """Set console/terminal window title to message.""" 
 558                 if not self
.params
.get('consoletitle', False): 
 560                 if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 561                         # c_wchar_p() might not be necessary if `message` is 
 562                         # already of type unicode() 
 563                         ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 564                 elif 'TERM' in os
.environ
: 
 565                         sys
.stderr
.write('\033]0;%s\007' % message
.encode(preferredencoding())) 
 567         def fixed_template(self
): 
 568                 """Checks if the output template is fixed.""" 
 569                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 571         def trouble(self
, message
=None): 
 572                 """Determine action to take when a download problem appears. 
 574                 Depending on if the downloader has been configured to ignore 
 575                 download errors or not, this method may throw an exception or 
 576                 not when errors are found, after printing the message. 
 578                 if message 
is not None: 
 579                         self
.to_stderr(message
) 
 580                 if not self
.params
.get('ignoreerrors', False): 
 581                         raise DownloadError(message
) 
 582                 self
._download
_retcode 
= 1 
 584         def slow_down(self
, start_time
, byte_counter
): 
 585                 """Sleep if the download speed is over the rate limit.""" 
 586                 rate_limit 
= self
.params
.get('ratelimit', None) 
 587                 if rate_limit 
is None or byte_counter 
== 0: 
 590                 elapsed 
= now 
- start_time
 
 593                 speed 
= float(byte_counter
) / elapsed
 
 594                 if speed 
> rate_limit
: 
 595                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 597         def temp_name(self
, filename
): 
 598                 """Returns a temporary filename for the given filename.""" 
 599                 if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 600                                 (os
.path
.exists(filename
) and not os
.path
.isfile(filename
)): 
 602                 return filename 
+ u
'.part' 
 604         def undo_temp_name(self
, filename
): 
 605                 if filename
.endswith(u
'.part'): 
 606                         return filename
[:-len(u
'.part')] 
 609         def try_rename(self
, old_filename
, new_filename
): 
 611                         if old_filename 
== new_filename
: 
 613                         os
.rename(old_filename
, new_filename
) 
 614                 except (IOError, OSError), err
: 
 615                         self
.trouble(u
'ERROR: unable to rename file') 
 617         def try_utime(self
, filename
, last_modified_hdr
): 
 618                 """Try to set the last-modified time of the given file.""" 
 619                 if last_modified_hdr 
is None: 
 621                 if not os
.path
.isfile(filename
): 
 623                 timestr 
= last_modified_hdr
 
 626                 filetime 
= timeconvert(timestr
) 
 630                         os
.utime(filename
, (time
.time(), filetime
)) 
 635         def report_writedescription(self
, descfn
): 
 636                 """ Report that the description file is being written """ 
 637                 self
.to_screen(u
'[info] Writing video description to: %s' % descfn
, ignore_encoding_errors
=True) 
 639         def report_writeinfojson(self
, infofn
): 
 640                 """ Report that the metadata file has been written """ 
 641                 self
.to_screen(u
'[info] Video description metadata as JSON to: %s' % infofn
, ignore_encoding_errors
=True) 
 643         def report_destination(self
, filename
): 
 644                 """Report destination filename.""" 
 645                 self
.to_screen(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 647         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 648                 """Report download progress.""" 
 649                 if self
.params
.get('noprogress', False): 
 651                 self
.to_screen(u
'\r[download] %s of %s at %s ETA %s' % 
 652                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 653                 self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 654                                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 656         def report_resuming_byte(self
, resume_len
): 
 657                 """Report attempt to resume at given byte.""" 
 658                 self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 660         def report_retry(self
, count
, retries
): 
 661                 """Report retry in case of HTTP error 5xx""" 
 662                 self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 664         def report_file_already_downloaded(self
, file_name
): 
 665                 """Report file has already been fully downloaded.""" 
 667                         self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 668                 except (UnicodeEncodeError), err
: 
 669                         self
.to_screen(u
'[download] The file has already been downloaded') 
 671         def report_unable_to_resume(self
): 
 672                 """Report it was impossible to resume download.""" 
 673                 self
.to_screen(u
'[download] Unable to resume') 
 675         def report_finish(self
): 
 676                 """Report download finished.""" 
 677                 if self
.params
.get('noprogress', False): 
 678                         self
.to_screen(u
'[download] Download completed') 
 682         def increment_downloads(self
): 
 683                 """Increment the ordinal that assigns a number to each file.""" 
 684                 self
._num
_downloads 
+= 1 
 686         def prepare_filename(self
, info_dict
): 
 687                 """Generate the output filename.""" 
 689                         template_dict 
= dict(info_dict
) 
 690                         template_dict
['epoch'] = unicode(long(time
.time())) 
 691                         template_dict
['autonumber'] = unicode('%05d' % self
._num
_downloads
) 
 692                         filename 
= self
.params
['outtmpl'] % template_dict
 
 694                 except (ValueError, KeyError), err
: 
 695                         self
.trouble(u
'ERROR: invalid system charset or erroneous output template') 
 698         def process_info(self
, info_dict
): 
 699                 """Process a single dictionary returned by an InfoExtractor.""" 
 700                 filename 
= self
.prepare_filename(info_dict
) 
 703                 if self
.params
.get('forcetitle', False): 
 704                         print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 705                 if self
.params
.get('forceurl', False): 
 706                         print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 707                 if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 708                         print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 709                 if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 710                         print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 711                 if self
.params
.get('forcefilename', False) and filename 
is not None: 
 712                         print filename
.encode(preferredencoding(), 'xmlcharrefreplace') 
 713                 if self
.params
.get('forceformat', False): 
 714                         print info_dict
['format'].encode(preferredencoding(), 'xmlcharrefreplace') 
 716                 # Do nothing else if in simulate mode 
 717                 if self
.params
.get('simulate', False): 
 723                 matchtitle
=self
.params
.get('matchtitle',False) 
 724                 rejecttitle
=self
.params
.get('rejecttitle',False) 
 725                 title
=info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 726                 if matchtitle 
and not re
.search(matchtitle
, title
, re
.IGNORECASE
): 
 727                         self
.to_screen(u
'[download] "%s" title did not match pattern "%s"' % (title
, matchtitle
)) 
 729                 if rejecttitle 
and re
.search(rejecttitle
, title
, re
.IGNORECASE
): 
 730                         self
.to_screen(u
'[download] "%s" title matched reject pattern "%s"' % (title
, rejecttitle
)) 
 733                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 734                         self
.to_stderr(u
'WARNING: file exists and will be skipped') 
 738                         dn 
= os
.path
.dirname(filename
) 
 739                         if dn 
!= '' and not os
.path
.exists(dn
): 
 741                 except (OSError, IOError), err
: 
 742                         self
.trouble(u
'ERROR: unable to create directory ' + unicode(err
)) 
 745                 if self
.params
.get('writedescription', False): 
 747                                 descfn 
= filename 
+ '.description' 
 748                                 self
.report_writedescription(descfn
) 
 749                                 descfile 
= open(descfn
, 'wb') 
 751                                         descfile
.write(info_dict
['description'].encode('utf-8')) 
 754                         except (OSError, IOError): 
 755                                 self
.trouble(u
'ERROR: Cannot write description file ' + descfn
) 
 758                 if self
.params
.get('writeinfojson', False): 
 759                         infofn 
= filename 
+ '.info.json' 
 760                         self
.report_writeinfojson(infofn
) 
 763                         except (NameError,AttributeError): 
 764                                 self
.trouble(u
'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') 
 767                                 infof 
= open(infofn
, 'wb') 
 769                                         json_info_dict 
= dict((k
,v
) for k
,v 
in info_dict
.iteritems() if not k 
in ('urlhandle',)) 
 770                                         json
.dump(json_info_dict
, infof
) 
 773                         except (OSError, IOError): 
 774                                 self
.trouble(u
'ERROR: Cannot write metadata to JSON file ' + infofn
) 
 777                 if not self
.params
.get('skip_download', False): 
 779                                 success 
= self
._do
_download
(filename
, info_dict
) 
 780                         except (OSError, IOError), err
: 
 781                                 raise UnavailableVideoError
 
 782                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 783                                 self
.trouble(u
'ERROR: unable to download video data: %s' % str(err
)) 
 785                         except (ContentTooShortError
, ), err
: 
 786                                 self
.trouble(u
'ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 791                                         self
.post_process(filename
, info_dict
) 
 792                                 except (PostProcessingError
), err
: 
 793                                         self
.trouble(u
'ERROR: postprocessing: %s' % str(err
)) 
 796         def download(self
, url_list
): 
 797                 """Download a given list of URLs.""" 
 798                 if len(url_list
) > 1 and self
.fixed_template(): 
 799                         raise SameFileError(self
.params
['outtmpl']) 
 802                         suitable_found 
= False 
 804                                 # Go to next InfoExtractor if not suitable 
 805                                 if not ie
.suitable(url
): 
 808                                 # Suitable InfoExtractor found 
 809                                 suitable_found 
= True 
 811                                 # Extract information from URL and process it 
 814                                 # Suitable InfoExtractor had been found; go to next URL 
 817                         if not suitable_found
: 
 818                                 self
.trouble(u
'ERROR: no suitable InfoExtractor: %s' % url
) 
 820                 return self
._download
_retcode
 
 822         def post_process(self
, filename
, ie_info
): 
 823                 """Run the postprocessing chain on the given file.""" 
 825                 info
['filepath'] = filename
 
 831         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 832                 self
.report_destination(filename
) 
 833                 tmpfilename 
= self
.temp_name(filename
) 
 835                 # Check for rtmpdump first 
 837                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 838                 except (OSError, IOError): 
 839                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 842                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 843                 # the connection was interrumpted and resuming appears to be 
 844                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 845                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', tmpfilename
] 
 846                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 847                 while retval 
== 2 or retval 
== 1: 
 848                         prevsize 
= os
.path
.getsize(tmpfilename
) 
 849                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 850                         time
.sleep(5.0) # This seems to be needed 
 851                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 852                         cursize 
= os
.path
.getsize(tmpfilename
) 
 853                         if prevsize 
== cursize 
and retval 
== 1: 
 855                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 856                         if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 857                                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 861                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(tmpfilename
)) 
 862                         self
.try_rename(tmpfilename
, filename
) 
 865                         self
.trouble(u
'\nERROR: rtmpdump exited with code %d' % retval
) 
 868         def _do_download(self
, filename
, info_dict
): 
 869                 url 
= info_dict
['url'] 
 870                 player_url 
= info_dict
.get('player_url', None) 
 872                 # Check file already present 
 873                 if self
.params
.get('continuedl', False) and os
.path
.isfile(filename
) and not self
.params
.get('nopart', False): 
 874                         self
.report_file_already_downloaded(filename
) 
 877                 # Attempt to download using rtmpdump 
 878                 if url
.startswith('rtmp'): 
 879                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 881                 tmpfilename 
= self
.temp_name(filename
) 
 884                 # Do not include the Accept-Encoding header 
 885                 headers 
= {'Youtubedl-no-compression': 'True'} 
 886                 basic_request 
= urllib2
.Request(url
, None, headers
) 
 887                 request 
= urllib2
.Request(url
, None, headers
) 
 889                 # Establish possible resume length 
 890                 if os
.path
.isfile(tmpfilename
): 
 891                         resume_len 
= os
.path
.getsize(tmpfilename
) 
 897                         if self
.params
.get('continuedl', False): 
 898                                 self
.report_resuming_byte(resume_len
) 
 899                                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 905                 retries 
= self
.params
.get('retries', 0) 
 906                 while count 
<= retries
: 
 907                         # Establish connection 
 909                                 if count 
== 0 and 'urlhandle' in info_dict
: 
 910                                         data 
= info_dict
['urlhandle'] 
 911                                 data 
= urllib2
.urlopen(request
) 
 913                         except (urllib2
.HTTPError
, ), err
: 
 914                                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 915                                         # Unexpected HTTP error 
 917                                 elif err
.code 
== 416: 
 918                                         # Unable to resume (requested range not satisfiable) 
 920                                                 # Open the connection again without the range header 
 921                                                 data 
= urllib2
.urlopen(basic_request
) 
 922                                                 content_length 
= data
.info()['Content-Length'] 
 923                                         except (urllib2
.HTTPError
, ), err
: 
 924                                                 if err
.code 
< 500 or err
.code 
>= 600: 
 927                                                 # Examine the reported length 
 928                                                 if (content_length 
is not None and 
 929                                                                 (resume_len 
- 100 < long(content_length
) < resume_len 
+ 100)): 
 930                                                         # The file had already been fully downloaded. 
 931                                                         # Explanation to the above condition: in issue #175 it was revealed that 
 932                                                         # YouTube sometimes adds or removes a few bytes from the end of the file, 
 933                                                         # changing the file size slightly and causing problems for some users. So 
 934                                                         # I decided to implement a suggested change and consider the file 
 935                                                         # completely downloaded if the file size differs less than 100 bytes from 
 936                                                         # the one in the hard drive. 
 937                                                         self
.report_file_already_downloaded(filename
) 
 938                                                         self
.try_rename(tmpfilename
, filename
) 
 941                                                         # The length does not match, we start the download over 
 942                                                         self
.report_unable_to_resume() 
 948                                 self
.report_retry(count
, retries
) 
 951                         self
.trouble(u
'ERROR: giving up after %s retries' % retries
) 
 954                 data_len 
= data
.info().get('Content-length', None) 
 955                 if data_len 
is not None: 
 956                         data_len 
= long(data_len
) + resume_len
 
 957                 data_len_str 
= self
.format_bytes(data_len
) 
 958                 byte_counter 
= 0 + resume_len
 
 964                         data_block 
= data
.read(block_size
) 
 966                         if len(data_block
) == 0: 
 968                         byte_counter 
+= len(data_block
) 
 970                         # Open file just in time 
 973                                         (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 974                                         assert stream 
is not None 
 975                                         filename 
= self
.undo_temp_name(tmpfilename
) 
 976                                         self
.report_destination(filename
) 
 977                                 except (OSError, IOError), err
: 
 978                                         self
.trouble(u
'ERROR: unable to open for writing: %s' % str(err
)) 
 981                                 stream
.write(data_block
) 
 982                         except (IOError, OSError), err
: 
 983                                 self
.trouble(u
'\nERROR: unable to write data: %s' % str(err
)) 
 985                         block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 988                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 990                                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
 992                                 percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 993                                 eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 994                                 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 997                         self
.slow_down(start
, byte_counter 
- resume_len
) 
1000                         self
.trouble(u
'\nERROR: Did not get any data blocks') 
1003                 self
.report_finish() 
1004                 if data_len 
is not None and byte_counter 
!= data_len
: 
1005                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
1006                 self
.try_rename(tmpfilename
, filename
) 
1008                 # Update file modification time 
1009                 if self
.params
.get('updatetime', True): 
1010                         info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
1015 class InfoExtractor(object): 
1016         """Information Extractor class. 
1018         Information extractors are the classes that, given a URL, extract 
1019         information from the video (or videos) the URL refers to. This 
1020         information includes the real video URL, the video title and simplified 
1021         title, author and others. The information is stored in a dictionary 
1022         which is then passed to the FileDownloader. The FileDownloader 
1023         processes this information possibly downloading the video to the file 
1024         system, among other possible outcomes. The dictionaries must include 
1025         the following fields: 
1027         id:             Video identifier. 
1028         url:            Final video URL. 
1029         uploader:       Nickname of the video uploader. 
1030         title:          Literal title. 
1031         stitle:         Simplified title. 
1032         ext:            Video filename extension. 
1033         format:         Video format. 
1034         player_url:     SWF Player URL (may be None). 
1036         The following fields are optional. Their primary purpose is to allow 
1037         youtube-dl to serve as the backend for a video search function, such 
1038         as the one in youtube2mp3.  They are only used when their respective 
1039         forced printing functions are called: 
1041         thumbnail:      Full URL to a video thumbnail image. 
1042         description:    One-line video description. 
1044         Subclasses of this one should re-define the _real_initialize() and 
1045         _real_extract() methods and define a _VALID_URL regexp. 
1046         Probably, they should also be added to the list of extractors. 
1052         def __init__(self
, downloader
=None): 
1053                 """Constructor. Receives an optional downloader.""" 
1055                 self
.set_downloader(downloader
) 
1057         def suitable(self
, url
): 
1058                 """Receives a URL and returns True if suitable for this IE.""" 
1059                 return re
.match(self
._VALID
_URL
, url
) is not None 
1061         def initialize(self
): 
1062                 """Initializes an instance (authentication, etc).""" 
1064                         self
._real
_initialize
() 
1067         def extract(self
, url
): 
1068                 """Extracts URL information and returns it in list of dicts.""" 
1070                 return self
._real
_extract
(url
) 
1072         def set_downloader(self
, downloader
): 
1073                 """Sets the downloader for this IE.""" 
1074                 self
._downloader 
= downloader
 
1076         def _real_initialize(self
): 
1077                 """Real initialization process. Redefine in subclasses.""" 
1080         def _real_extract(self
, url
): 
1081                 """Real extraction process. Redefine in subclasses.""" 
1085 class YoutubeIE(InfoExtractor
): 
1086         """Information extractor for youtube.com.""" 
1088         _VALID_URL 
= r
'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' 
1089         _LANG_URL 
= r
'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
1090         _LOGIN_URL 
= 'https://www.youtube.com/signup?next=/&gl=US&hl=en' 
1091         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
1092         _NETRC_MACHINE 
= 'youtube' 
1093         # Listed in order of quality 
1094         _available_formats 
= ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] 
1095         _video_extensions 
= { 
1101                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
1106         _video_dimensions 
= { 
1121         IE_NAME 
= u
'youtube' 
1123         def report_lang(self
): 
1124                 """Report attempt to set language.""" 
1125                 self
._downloader
.to_screen(u
'[youtube] Setting language') 
1127         def report_login(self
): 
1128                 """Report attempt to log in.""" 
1129                 self
._downloader
.to_screen(u
'[youtube] Logging in') 
1131         def report_age_confirmation(self
): 
1132                 """Report attempt to confirm age.""" 
1133                 self
._downloader
.to_screen(u
'[youtube] Confirming age') 
1135         def report_video_webpage_download(self
, video_id
): 
1136                 """Report attempt to download video webpage.""" 
1137                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video webpage' % video_id
) 
1139         def report_video_info_webpage_download(self
, video_id
): 
1140                 """Report attempt to download video info webpage.""" 
1141                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
1143         def report_information_extraction(self
, video_id
): 
1144                 """Report attempt to extract video information.""" 
1145                 self
._downloader
.to_screen(u
'[youtube] %s: Extracting video information' % video_id
) 
1147         def report_unavailable_format(self
, video_id
, format
): 
1148                 """Report extracted video URL.""" 
1149                 self
._downloader
.to_screen(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
1151         def report_rtmp_download(self
): 
1152                 """Indicate the download will use the RTMP protocol.""" 
1153                 self
._downloader
.to_screen(u
'[youtube] RTMP download detected') 
1155         def _print_formats(self
, formats
): 
1156                 print 'Available formats:' 
1158                         print '%s\t:\t%s\t[%s]' %(x
, self
._video
_extensions
.get(x
, 'flv'), self
._video
_dimensions
.get(x
, '???')) 
1160         def _real_initialize(self
): 
1161                 if self
._downloader 
is None: 
1166                 downloader_params 
= self
._downloader
.params
 
1168                 # Attempt to use provided username and password or .netrc data 
1169                 if downloader_params
.get('username', None) is not None: 
1170                         username 
= downloader_params
['username'] 
1171                         password 
= downloader_params
['password'] 
1172                 elif downloader_params
.get('usenetrc', False): 
1174                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
1175                                 if info 
is not None: 
1179                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
1180                         except (IOError, netrc
.NetrcParseError
), err
: 
1181                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
1185                 request 
= urllib2
.Request(self
._LANG
_URL
) 
1188                         urllib2
.urlopen(request
).read() 
1189                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1190                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
1193                 # No authentication to be performed 
1194                 if username 
is None: 
1199                                 'current_form': 'loginForm', 
1201                                 'action_login': 'Log In', 
1202                                 'username':     username
, 
1203                                 'password':     password
, 
1205                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
)) 
1208                         login_results 
= urllib2
.urlopen(request
).read() 
1209                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
1210                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
1212                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1213                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
1219                                 'action_confirm':       'Confirm', 
1221                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
)) 
1223                         self
.report_age_confirmation() 
1224                         age_results 
= urllib2
.urlopen(request
).read() 
1225                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1226                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1229         def _real_extract(self
, url
): 
1230                 # Extract video id from URL 
1231                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1233                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1235                 video_id 
= mobj
.group(2) 
1238                 self
.report_video_webpage_download(video_id
) 
1239                 request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
) 
1241                         video_webpage 
= urllib2
.urlopen(request
).read() 
1242                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1243                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
1246                 # Attempt to extract SWF player URL 
1247                 mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1248                 if mobj 
is not None: 
1249                         player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1254                 self
.report_video_info_webpage_download(video_id
) 
1255                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1256                         video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1257                                         % (video_id
, el_type
)) 
1258                         request 
= urllib2
.Request(video_info_url
) 
1260                                 video_info_webpage 
= urllib2
.urlopen(request
).read() 
1261                                 video_info 
= parse_qs(video_info_webpage
) 
1262                                 if 'token' in video_info
: 
1264                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1265                                 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
1267                 if 'token' not in video_info
: 
1268                         if 'reason' in video_info
: 
1269                                 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % video_info
['reason'][0].decode('utf-8')) 
1271                                 self
._downloader
.trouble(u
'ERROR: "token" parameter not in video info for unknown reason') 
1274                 # Start extracting information 
1275                 self
.report_information_extraction(video_id
) 
1278                 if 'author' not in video_info
: 
1279                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1281                 video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
1284                 if 'title' not in video_info
: 
1285                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1287                 video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
1288                 video_title 
= video_title
.decode('utf-8') 
1289                 video_title 
= sanitize_title(video_title
) 
1292                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1293                 simple_title 
= simple_title
.strip(ur
'_') 
1296                 if 'thumbnail_url' not in video_info
: 
1297                         self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
1298                         video_thumbnail 
= '' 
1299                 else:   # don't panic if we can't find it 
1300                         video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
1304                 mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1305                 if mobj 
is not None: 
1306                         upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1307                         format_expressions 
= ['%d %B %Y', '%B %d %Y', '%b %d %Y'] 
1308                         for expression 
in format_expressions
: 
1310                                         upload_date 
= datetime
.datetime
.strptime(upload_date
, expression
).strftime('%Y%m%d') 
1318                         video_description 
= u
'No description available.' 
1319                         if self
._downloader
.params
.get('forcedescription', False) or self
._downloader
.params
.get('writedescription', False): 
1320                                 mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
1321                                 if mobj 
is not None: 
1322                                         video_description 
= mobj
.group(1).decode('utf-8') 
1324                         html_parser 
= lxml
.etree
.HTMLParser(encoding
='utf-8') 
1325                         vwebpage_doc 
= lxml
.etree
.parse(StringIO
.StringIO(video_webpage
), html_parser
) 
1326                         video_description 
= u
''.join(vwebpage_doc
.xpath('id("eow-description")//text()')) 
1327                         # TODO use another parser 
1330                 video_token 
= urllib
.unquote_plus(video_info
['token'][0]) 
1332                 # Decide which formats to download 
1333                 req_format 
= self
._downloader
.params
.get('format', None) 
1335                 if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1336                         self
.report_rtmp_download() 
1337                         video_url_list 
= [(None, video_info
['conn'][0])] 
1338                 elif 'url_encoded_fmt_stream_map' in video_info 
and len(video_info
['url_encoded_fmt_stream_map']) >= 1: 
1339                         url_data_strs 
= video_info
['url_encoded_fmt_stream_map'][0].split(',') 
1340                         url_data 
= [parse_qs(uds
) for uds 
in url_data_strs
] 
1341                         url_data 
= filter(lambda ud
: 'itag' in ud 
and 'url' in ud
, url_data
) 
1342                         url_map 
= dict((ud
['itag'][0], ud
['url'][0]) for ud 
in url_data
) 
1344                         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1345                         if format_limit 
is not None and format_limit 
in self
._available
_formats
: 
1346                                 format_list 
= self
._available
_formats
[self
._available
_formats
.index(format_limit
):] 
1348                                 format_list 
= self
._available
_formats
 
1349                         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1350                         if len(existing_formats
) == 0: 
1351                                 self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
1353                         if self
._downloader
.params
.get('listformats', None): 
1354                                 self
._print
_formats
(existing_formats
) 
1356                         if req_format 
is None or req_format 
== 'best': 
1357                                 video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1358                         elif req_format 
== 'worst': 
1359                                 video_url_list 
= [(existing_formats
[len(existing_formats
)-1], url_map
[existing_formats
[len(existing_formats
)-1]])] # worst quality 
1360                         elif req_format 
in ('-1', 'all'): 
1361                                 video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1363                                 # Specific formats. We pick the first in a slash-delimeted sequence. 
1364                                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. 
1365                                 req_formats 
= req_format
.split('/') 
1366                                 video_url_list 
= None 
1367                                 for rf 
in req_formats
: 
1369                                                 video_url_list 
= [(rf
, url_map
[rf
])] 
1371                                 if video_url_list 
is None: 
1372                                         self
._downloader
.trouble(u
'ERROR: requested format not available') 
1375                         self
._downloader
.trouble(u
'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') 
1378                 for format_param
, video_real_url 
in video_url_list
: 
1379                         # At this point we have a new video 
1380                         self
._downloader
.increment_downloads() 
1383                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
1386                                 # Process video information 
1387                                 self
._downloader
.process_info({ 
1388                                         'id':           video_id
.decode('utf-8'), 
1389                                         'url':          video_real_url
.decode('utf-8'), 
1390                                         'uploader':     video_uploader
.decode('utf-8'), 
1391                                         'upload_date':  upload_date
, 
1392                                         'title':        video_title
, 
1393                                         'stitle':       simple_title
, 
1394                                         'ext':          video_extension
.decode('utf-8'), 
1395                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
1396                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
1397                                         'description':  video_description
, 
1398                                         'player_url':   player_url
, 
1400                         except UnavailableVideoError
, err
: 
1401                                 self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1404 class MetacafeIE(InfoExtractor
): 
1405         """Information Extractor for metacafe.com.""" 
1407         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
1408         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
1409         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
1411         IE_NAME 
= u
'metacafe' 
1413         def __init__(self
, youtube_ie
, downloader
=None): 
1414                 InfoExtractor
.__init
__(self
, downloader
) 
1415                 self
._youtube
_ie 
= youtube_ie
 
1417         def report_disclaimer(self
): 
1418                 """Report disclaimer retrieval.""" 
1419                 self
._downloader
.to_screen(u
'[metacafe] Retrieving disclaimer') 
1421         def report_age_confirmation(self
): 
1422                 """Report attempt to confirm age.""" 
1423                 self
._downloader
.to_screen(u
'[metacafe] Confirming age') 
1425         def report_download_webpage(self
, video_id
): 
1426                 """Report webpage download.""" 
1427                 self
._downloader
.to_screen(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1429         def report_extraction(self
, video_id
): 
1430                 """Report information extraction.""" 
1431                 self
._downloader
.to_screen(u
'[metacafe] %s: Extracting information' % video_id
) 
1433         def _real_initialize(self
): 
1434                 # Retrieve disclaimer 
1435                 request 
= urllib2
.Request(self
._DISCLAIMER
) 
1437                         self
.report_disclaimer() 
1438                         disclaimer 
= urllib2
.urlopen(request
).read() 
1439                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1440                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1446                         'submit': "Continue - I'm over 18", 
1448                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
)) 
1450                         self
.report_age_confirmation() 
1451                         disclaimer 
= urllib2
.urlopen(request
).read() 
1452                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1453                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1456         def _real_extract(self
, url
): 
1457                 # Extract id and simplified title from URL 
1458                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1460                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1463                 video_id 
= mobj
.group(1) 
1465                 # Check if video comes from YouTube 
1466                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1467                 if mobj2 
is not None: 
1468                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1471                 # At this point we have a new video 
1472                 self
._downloader
.increment_downloads() 
1474                 simple_title 
= mobj
.group(2).decode('utf-8') 
1476                 # Retrieve video webpage to extract further information 
1477                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1479                         self
.report_download_webpage(video_id
) 
1480                         webpage 
= urllib2
.urlopen(request
).read() 
1481                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1482                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1485                 # Extract URL, uploader and title from webpage 
1486                 self
.report_extraction(video_id
) 
1487                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1488                 if mobj 
is not None: 
1489                         mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1490                         video_extension 
= mediaURL
[-3:] 
1492                         # Extract gdaKey if available 
1493                         mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
1495                                 video_url 
= mediaURL
 
1497                                 gdaKey 
= mobj
.group(1) 
1498                                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
1500                         mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
1502                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1504                         vardict 
= parse_qs(mobj
.group(1)) 
1505                         if 'mediaData' not in vardict
: 
1506                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1508                         mobj 
= re
.search(r
'"mediaURL":"(http.*?)","key":"(.*?)"', vardict
['mediaData'][0]) 
1510                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1512                         mediaURL 
= mobj
.group(1).replace('\\/', '/') 
1513                         video_extension 
= mediaURL
[-3:] 
1514                         video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group(2)) 
1516                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1518                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1520                 video_title 
= mobj
.group(1).decode('utf-8') 
1521                 video_title 
= sanitize_title(video_title
) 
1523                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1525                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1527                 video_uploader 
= mobj
.group(1) 
1530                         # Process video information 
1531                         self
._downloader
.process_info({ 
1532                                 'id':           video_id
.decode('utf-8'), 
1533                                 'url':          video_url
.decode('utf-8'), 
1534                                 'uploader':     video_uploader
.decode('utf-8'), 
1535                                 'upload_date':  u
'NA', 
1536                                 'title':        video_title
, 
1537                                 'stitle':       simple_title
, 
1538                                 'ext':          video_extension
.decode('utf-8'), 
1542                 except UnavailableVideoError
: 
1543                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1546 class DailymotionIE(InfoExtractor
): 
1547         """Information Extractor for Dailymotion""" 
1549         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1550         IE_NAME 
= u
'dailymotion' 
1552         def __init__(self
, downloader
=None): 
1553                 InfoExtractor
.__init
__(self
, downloader
) 
1555         def report_download_webpage(self
, video_id
): 
1556                 """Report webpage download.""" 
1557                 self
._downloader
.to_screen(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1559         def report_extraction(self
, video_id
): 
1560                 """Report information extraction.""" 
1561                 self
._downloader
.to_screen(u
'[dailymotion] %s: Extracting information' % video_id
) 
1563         def _real_initialize(self
): 
1566         def _real_extract(self
, url
): 
1567                 # Extract id and simplified title from URL 
1568                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1570                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1573                 # At this point we have a new video 
1574                 self
._downloader
.increment_downloads() 
1575                 video_id 
= mobj
.group(1) 
1577                 simple_title 
= mobj
.group(2).decode('utf-8') 
1578                 video_extension 
= 'flv' 
1580                 # Retrieve video webpage to extract further information 
1581                 request 
= urllib2
.Request(url
) 
1582                 request
.add_header('Cookie', 'family_filter=off') 
1584                         self
.report_download_webpage(video_id
) 
1585                         webpage 
= urllib2
.urlopen(request
).read() 
1586                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1587                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1590                 # Extract URL, uploader and title from webpage 
1591                 self
.report_extraction(video_id
) 
1592                 mobj 
= re
.search(r
'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage
) 
1594                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1596                 sequence 
= urllib
.unquote(mobj
.group(1)) 
1597                 mobj 
= re
.search(r
',\"sdURL\"\:\"([^\"]+?)\",', sequence
) 
1599                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1601                 mediaURL 
= urllib
.unquote(mobj
.group(1)).replace('\\', '') 
1603                 # if needed add http://www.dailymotion.com/ if relative URL 
1605                 video_url 
= mediaURL
 
1607                 mobj 
= re
.search(r
'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage
) 
1609                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1611                 video_title 
= mobj
.group(1).decode('utf-8') 
1612                 video_title 
= sanitize_title(video_title
) 
1614                 mobj 
= re
.search(r
'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage
) 
1616                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1618                 video_uploader 
= mobj
.group(1) 
1621                         # Process video information 
1622                         self
._downloader
.process_info({ 
1623                                 'id':           video_id
.decode('utf-8'), 
1624                                 'url':          video_url
.decode('utf-8'), 
1625                                 'uploader':     video_uploader
.decode('utf-8'), 
1626                                 'upload_date':  u
'NA', 
1627                                 'title':        video_title
, 
1628                                 'stitle':       simple_title
, 
1629                                 'ext':          video_extension
.decode('utf-8'), 
1633                 except UnavailableVideoError
: 
1634                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1637 class GoogleIE(InfoExtractor
): 
1638         """Information extractor for video.google.com.""" 
1640         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1641         IE_NAME 
= u
'video.google' 
1643         def __init__(self
, downloader
=None): 
1644                 InfoExtractor
.__init
__(self
, downloader
) 
1646         def report_download_webpage(self
, video_id
): 
1647                 """Report webpage download.""" 
1648                 self
._downloader
.to_screen(u
'[video.google] %s: Downloading webpage' % video_id
) 
1650         def report_extraction(self
, video_id
): 
1651                 """Report information extraction.""" 
1652                 self
._downloader
.to_screen(u
'[video.google] %s: Extracting information' % video_id
) 
1654         def _real_initialize(self
): 
1657         def _real_extract(self
, url
): 
1658                 # Extract id from URL 
1659                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1661                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1664                 # At this point we have a new video 
1665                 self
._downloader
.increment_downloads() 
1666                 video_id 
= mobj
.group(1) 
1668                 video_extension 
= 'mp4' 
1670                 # Retrieve video webpage to extract further information 
1671                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1673                         self
.report_download_webpage(video_id
) 
1674                         webpage 
= urllib2
.urlopen(request
).read() 
1675                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1676                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1679                 # Extract URL, uploader, and title from webpage 
1680                 self
.report_extraction(video_id
) 
1681                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1683                         video_extension 
= 'flv' 
1684                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1686                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1688                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1689                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1690                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1692                 video_url 
= mediaURL
 
1694                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1696                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1698                 video_title 
= mobj
.group(1).decode('utf-8') 
1699                 video_title 
= sanitize_title(video_title
) 
1700                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1702                 # Extract video description 
1703                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1705                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1707                 video_description 
= mobj
.group(1).decode('utf-8') 
1708                 if not video_description
: 
1709                         video_description 
= 'No description available.' 
1711                 # Extract video thumbnail 
1712                 if self
._downloader
.params
.get('forcethumbnail', False): 
1713                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1715                                 webpage 
= urllib2
.urlopen(request
).read() 
1716                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1717                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1719                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1721                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1723                         video_thumbnail 
= mobj
.group(1) 
1724                 else:   # we need something to pass to process_info 
1725                         video_thumbnail 
= '' 
1728                         # Process video information 
1729                         self
._downloader
.process_info({ 
1730                                 'id':           video_id
.decode('utf-8'), 
1731                                 'url':          video_url
.decode('utf-8'), 
1733                                 'upload_date':  u
'NA', 
1734                                 'title':        video_title
, 
1735                                 'stitle':       simple_title
, 
1736                                 'ext':          video_extension
.decode('utf-8'), 
1740                 except UnavailableVideoError
: 
1741                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1744 class PhotobucketIE(InfoExtractor
): 
1745         """Information extractor for photobucket.com.""" 
1747         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1748         IE_NAME 
= u
'photobucket' 
1750         def __init__(self
, downloader
=None): 
1751                 InfoExtractor
.__init
__(self
, downloader
) 
1753         def report_download_webpage(self
, video_id
): 
1754                 """Report webpage download.""" 
1755                 self
._downloader
.to_screen(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1757         def report_extraction(self
, video_id
): 
1758                 """Report information extraction.""" 
1759                 self
._downloader
.to_screen(u
'[photobucket] %s: Extracting information' % video_id
) 
1761         def _real_initialize(self
): 
1764         def _real_extract(self
, url
): 
1765                 # Extract id from URL 
1766                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1768                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1771                 # At this point we have a new video 
1772                 self
._downloader
.increment_downloads() 
1773                 video_id 
= mobj
.group(1) 
1775                 video_extension 
= 'flv' 
1777                 # Retrieve video webpage to extract further information 
1778                 request 
= urllib2
.Request(url
) 
1780                         self
.report_download_webpage(video_id
) 
1781                         webpage 
= urllib2
.urlopen(request
).read() 
1782                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1783                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1786                 # Extract URL, uploader, and title from webpage 
1787                 self
.report_extraction(video_id
) 
1788                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1790                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1792                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1794                 video_url 
= mediaURL
 
1796                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1798                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1800                 video_title 
= mobj
.group(1).decode('utf-8') 
1801                 video_title 
= sanitize_title(video_title
) 
1802                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1804                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1807                         # Process video information 
1808                         self
._downloader
.process_info({ 
1809                                 'id':           video_id
.decode('utf-8'), 
1810                                 'url':          video_url
.decode('utf-8'), 
1811                                 'uploader':     video_uploader
, 
1812                                 'upload_date':  u
'NA', 
1813                                 'title':        video_title
, 
1814                                 'stitle':       simple_title
, 
1815                                 'ext':          video_extension
.decode('utf-8'), 
1819                 except UnavailableVideoError
: 
1820                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1823 class YahooIE(InfoExtractor
): 
1824         """Information extractor for video.yahoo.com.""" 
1826         # _VALID_URL matches all Yahoo! Video URLs 
1827         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1828         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1829         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1830         IE_NAME 
= u
'video.yahoo' 
1832         def __init__(self
, downloader
=None): 
1833                 InfoExtractor
.__init
__(self
, downloader
) 
1835         def report_download_webpage(self
, video_id
): 
1836                 """Report webpage download.""" 
1837                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1839         def report_extraction(self
, video_id
): 
1840                 """Report information extraction.""" 
1841                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1843         def _real_initialize(self
): 
1846         def _real_extract(self
, url
, new_video
=True): 
1847                 # Extract ID from URL 
1848                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1850                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1853                 # At this point we have a new video 
1854                 self
._downloader
.increment_downloads() 
1855                 video_id 
= mobj
.group(2) 
1856                 video_extension 
= 'flv' 
1858                 # Rewrite valid but non-extractable URLs as 
1859                 # extractable English language /watch/ URLs 
1860                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1861                         request 
= urllib2
.Request(url
) 
1863                                 webpage 
= urllib2
.urlopen(request
).read() 
1864                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1865                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1868                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1870                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1872                         yahoo_id 
= mobj
.group(1) 
1874                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1876                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1878                         yahoo_vid 
= mobj
.group(1) 
1880                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1881                         return self
._real
_extract
(url
, new_video
=False) 
1883                 # Retrieve video webpage to extract further information 
1884                 request 
= urllib2
.Request(url
) 
1886                         self
.report_download_webpage(video_id
) 
1887                         webpage 
= urllib2
.urlopen(request
).read() 
1888                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1889                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1892                 # Extract uploader and title from webpage 
1893                 self
.report_extraction(video_id
) 
1894                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1896                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1898                 video_title 
= mobj
.group(1).decode('utf-8') 
1899                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1901                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1903                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1905                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1907                 # Extract video thumbnail 
1908                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1910                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1912                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1914                 # Extract video description 
1915                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1917                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1919                 video_description 
= mobj
.group(1).decode('utf-8') 
1920                 if not video_description
: 
1921                         video_description 
= 'No description available.' 
1923                 # Extract video height and width 
1924                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1926                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1928                 yv_video_height 
= mobj
.group(1) 
1930                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1932                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1934                 yv_video_width 
= mobj
.group(1) 
1936                 # Retrieve video playlist to extract media URL 
1937                 # I'm not completely sure what all these options are, but we 
1938                 # seem to need most of them, otherwise the server sends a 401. 
1939                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1940                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1941                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1942                                 '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1943                                 '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1945                         self
.report_download_webpage(video_id
) 
1946                         webpage 
= urllib2
.urlopen(request
).read() 
1947                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1948                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1951                 # Extract media URL from playlist XML 
1952                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1954                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1956                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1957                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1960                         # Process video information 
1961                         self
._downloader
.process_info({ 
1962                                 'id':           video_id
.decode('utf-8'), 
1964                                 'uploader':     video_uploader
, 
1965                                 'upload_date':  u
'NA', 
1966                                 'title':        video_title
, 
1967                                 'stitle':       simple_title
, 
1968                                 'ext':          video_extension
.decode('utf-8'), 
1969                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1970                                 'description':  video_description
, 
1971                                 'thumbnail':    video_thumbnail
, 
1974                 except UnavailableVideoError
: 
1975                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1978 class VimeoIE(InfoExtractor
): 
1979         """Information extractor for vimeo.com.""" 
1981         # _VALID_URL matches Vimeo URLs 
1982         _VALID_URL 
= r
'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' 
1985         def __init__(self
, downloader
=None): 
1986                 InfoExtractor
.__init
__(self
, downloader
) 
1988         def report_download_webpage(self
, video_id
): 
1989                 """Report webpage download.""" 
1990                 self
._downloader
.to_screen(u
'[vimeo] %s: Downloading webpage' % video_id
) 
1992         def report_extraction(self
, video_id
): 
1993                 """Report information extraction.""" 
1994                 self
._downloader
.to_screen(u
'[vimeo] %s: Extracting information' % video_id
) 
1996         def _real_initialize(self
): 
1999         def _real_extract(self
, url
, new_video
=True): 
2000                 # Extract ID from URL 
2001                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
2003                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2006                 # At this point we have a new video 
2007                 self
._downloader
.increment_downloads() 
2008                 video_id 
= mobj
.group(1) 
2010                 # Retrieve video webpage to extract further information 
2011                 request 
= urllib2
.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id
, None, std_headers
) 
2013                         self
.report_download_webpage(video_id
) 
2014                         webpage 
= urllib2
.urlopen(request
).read() 
2015                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2016                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2019                 # Now we begin extracting as much information as we can from what we 
2020                 # retrieved. First we extract the information common to all extractors, 
2021                 # and latter we extract those that are Vimeo specific. 
2022                 self
.report_extraction(video_id
) 
2025                 mobj 
= re
.search(r
'<caption>(.*?)</caption>', webpage
) 
2027                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
2029                 video_title 
= mobj
.group(1).decode('utf-8') 
2030                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
2033                 mobj 
= re
.search(r
'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage
) 
2035                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
2037                 video_uploader 
= mobj
.group(1).decode('utf-8') 
2039                 # Extract video thumbnail 
2040                 mobj 
= re
.search(r
'<thumbnail>(.*?)</thumbnail>', webpage
) 
2042                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
2044                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
2046                 # # Extract video description 
2047                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage) 
2049                 #       self._downloader.trouble(u'ERROR: unable to extract video description') 
2051                 # video_description = mobj.group(1).decode('utf-8') 
2052                 # if not video_description: video_description = 'No description available.' 
2053                 video_description 
= 'Foo.' 
2055                 # Vimeo specific: extract request signature 
2056                 mobj 
= re
.search(r
'<request_signature>(.*?)</request_signature>', webpage
) 
2058                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature') 
2060                 sig 
= mobj
.group(1).decode('utf-8') 
2062                 # Vimeo specific: extract video quality information 
2063                 mobj 
= re
.search(r
'<isHD>(\d+)</isHD>', webpage
) 
2065                         self
._downloader
.trouble(u
'ERROR: unable to extract video quality information') 
2067                 quality 
= mobj
.group(1).decode('utf-8') 
2069                 if int(quality
) == 1: 
2074                 # Vimeo specific: Extract request signature expiration 
2075                 mobj 
= re
.search(r
'<request_signature_expires>(.*?)</request_signature_expires>', webpage
) 
2077                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature expiration') 
2079                 sig_exp 
= mobj
.group(1).decode('utf-8') 
2081                 video_url 
= "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id
, sig
, sig_exp
, quality
) 
2084                         # Process video information 
2085                         self
._downloader
.process_info({ 
2086                                 'id':           video_id
.decode('utf-8'), 
2088                                 'uploader':     video_uploader
, 
2089                                 'upload_date':  u
'NA', 
2090                                 'title':        video_title
, 
2091                                 'stitle':       simple_title
, 
2093                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
2094                                 'description':  video_description
, 
2095                                 'thumbnail':    video_thumbnail
, 
2096                                 'description':  video_description
, 
2099                 except UnavailableVideoError
: 
2100                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
2103 class GenericIE(InfoExtractor
): 
2104         """Generic last-resort information extractor.""" 
2107         IE_NAME 
= u
'generic' 
2109         def __init__(self
, downloader
=None): 
2110                 InfoExtractor
.__init
__(self
, downloader
) 
2112         def report_download_webpage(self
, video_id
): 
2113                 """Report webpage download.""" 
2114                 self
._downloader
.to_screen(u
'WARNING: Falling back on generic information extractor.') 
2115                 self
._downloader
.to_screen(u
'[generic] %s: Downloading webpage' % video_id
) 
2117         def report_extraction(self
, video_id
): 
2118                 """Report information extraction.""" 
2119                 self
._downloader
.to_screen(u
'[generic] %s: Extracting information' % video_id
) 
2121         def _real_initialize(self
): 
2124         def _real_extract(self
, url
): 
2125                 # At this point we have a new video 
2126                 self
._downloader
.increment_downloads() 
2128                 video_id 
= url
.split('/')[-1] 
2129                 request 
= urllib2
.Request(url
) 
2131                         self
.report_download_webpage(video_id
) 
2132                         webpage 
= urllib2
.urlopen(request
).read() 
2133                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2134                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2136                 except ValueError, err
: 
2137                         # since this is the last-resort InfoExtractor, if 
2138                         # this error is thrown, it'll be thrown here 
2139                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2142                 self
.report_extraction(video_id
) 
2143                 # Start with something easy: JW Player in SWFObject 
2144                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
2146                         # Broaden the search a little bit 
2147                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
2149                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2152                 # It's possible that one of the regexes 
2153                 # matched, but returned an empty group: 
2154                 if mobj.group(1) is None: 
2155                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2158                 video_url = urllib.unquote(mobj.group(1)) 
2159                 video_id = os.path.basename(video_url) 
2161                 # here's a fun little line of code for you: 
2162                 video_extension = os.path.splitext(video_id)[1][1:] 
2163                 video_id = os.path.splitext(video_id)[0] 
2165                 # it's tempting to parse this further, but you would 
2166                 # have to take into account all the variations like 
2167                 #   Video Title - Site Name 
2168                 #   Site Name | Video Title 
2169                 #   Video Title - Tagline | Site Name 
2170                 # and so on and so forth; it's just not practical 
2171                 mobj = re.search(r'<title>(.*)</title>', webpage) 
2173                         self._downloader.trouble(u'ERROR: unable to extract title') 
2175                 video_title = mobj.group(1).decode('utf-8') 
2176                 video_title = sanitize_title(video_title) 
2177                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) 
2179                 # video uploader is domain name 
2180                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
2182                         self._downloader.trouble(u'ERROR: unable to extract title') 
2184                 video_uploader = mobj.group(1).decode('utf-8') 
2187                         # Process video information 
2188                         self._downloader.process_info({ 
2189                                 'id':           video_id.decode('utf-8'), 
2190                                 'url':          video_url.decode('utf-8'), 
2191                                 'uploader':     video_uploader, 
2192                                 'upload_date':  u'NA', 
2193                                 'title':        video_title, 
2194                                 'stitle':       simple_title, 
2195                                 'ext':          video_extension.decode('utf-8'), 
2199                 except UnavailableVideoError, err: 
2200                         self._downloader.trouble(u'\nERROR: unable to download video') 
2203 class YoutubeSearchIE(InfoExtractor): 
2204         """Information Extractor for YouTube search queries.""" 
2205         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+' 
2206         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
2207         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
2208         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2210         _max_youtube_results = 1000 
2211         IE_NAME = u'youtube:search' 
2213         def __init__(self, youtube_ie, downloader=None): 
2214                 InfoExtractor.__init__(self, downloader) 
2215                 self._youtube_ie = youtube_ie 
2217         def report_download_page(self, query, pagenum): 
2218                 """Report attempt to download playlist page with given number.""" 
2219                 query = query.decode(preferredencoding()) 
2220                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
2222         def _real_initialize(self): 
2223                 self._youtube_ie.initialize() 
2225         def _real_extract(self, query): 
2226                 mobj = re.match(self._VALID_URL, query) 
2228                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2231                 prefix, query = query.split(':') 
2233                 query = query.encode('utf-8') 
2235                         self._download_n_results(query, 1) 
2237                 elif prefix == 'all': 
2238                         self._download_n_results(query, self._max_youtube_results) 
2244                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2246                                 elif n > self._max_youtube_results: 
2247                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) 
2248                                         n = self._max_youtube_results 
2249                                 self._download_n_results(query, n) 
2251                         except ValueError: # parsing prefix as integer fails 
2252                                 self._download_n_results(query, 1) 
2255         def _download_n_results(self, query, n): 
2256                 """Downloads a specified number of results for a query""" 
2259                 already_seen = set() 
2263                         self.report_download_page(query, pagenum) 
2264                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2265                         request = urllib2.Request(result_url) 
2267                                 page = urllib2.urlopen(request).read() 
2268                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2269                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2272                         # Extract video identifiers 
2273                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2274                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
2275                                 if video_id not in already_seen: 
2276                                         video_ids.append(video_id) 
2277                                         already_seen.add(video_id) 
2278                                         if len(video_ids) == n: 
2279                                                 # Specified n videos reached 
2280                                                 for id in video_ids: 
2281                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2284                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2285                                 for id in video_ids: 
2286                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2289                         pagenum = pagenum + 1 
2292 class GoogleSearchIE(InfoExtractor): 
2293         """Information Extractor for Google Video search queries.""" 
2294         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+' 
2295         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
2296         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
2297         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
2299         _max_google_results = 1000 
2300         IE_NAME = u'video.google:search' 
2302         def __init__(self, google_ie, downloader=None): 
2303                 InfoExtractor.__init__(self, downloader) 
2304                 self._google_ie = google_ie 
2306         def report_download_page(self, query, pagenum): 
2307                 """Report attempt to download playlist page with given number.""" 
2308                 query = query.decode(preferredencoding()) 
2309                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
2311         def _real_initialize(self): 
2312                 self._google_ie.initialize() 
2314         def _real_extract(self, query): 
2315                 mobj = re.match(self._VALID_URL, query) 
2317                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2320                 prefix, query = query.split(':') 
2322                 query = query.encode('utf-8') 
2324                         self._download_n_results(query, 1) 
2326                 elif prefix == 'all': 
2327                         self._download_n_results(query, self._max_google_results) 
2333                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2335                                 elif n > self._max_google_results: 
2336                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) 
2337                                         n = self._max_google_results 
2338                                 self._download_n_results(query, n) 
2340                         except ValueError: # parsing prefix as integer fails 
2341                                 self._download_n_results(query, 1) 
2344         def _download_n_results(self, query, n): 
2345                 """Downloads a specified number of results for a query""" 
2348                 already_seen = set() 
2352                         self.report_download_page(query, pagenum) 
2353                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2354                         request = urllib2.Request(result_url) 
2356                                 page = urllib2.urlopen(request).read() 
2357                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2358                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2361                         # Extract video identifiers 
2362                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2363                                 video_id = mobj.group(1) 
2364                                 if video_id not in already_seen: 
2365                                         video_ids.append(video_id) 
2366                                         already_seen.add(video_id) 
2367                                         if len(video_ids) == n: 
2368                                                 # Specified n videos reached 
2369                                                 for id in video_ids: 
2370                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2373                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2374                                 for id in video_ids: 
2375                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2378                         pagenum = pagenum + 1 
2381 class YahooSearchIE(InfoExtractor): 
2382         """Information Extractor for Yahoo! Video search queries.""" 
2383         _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+' 
2384         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
2385         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
2386         _MORE_PAGES_INDICATOR = r'\s*Next' 
2388         _max_yahoo_results = 1000 
2389         IE_NAME = u'video.yahoo:search' 
2391         def __init__(self, yahoo_ie, downloader=None): 
2392                 InfoExtractor.__init__(self, downloader) 
2393                 self._yahoo_ie = yahoo_ie 
2395         def report_download_page(self, query, pagenum): 
2396                 """Report attempt to download playlist page with given number.""" 
2397                 query = query.decode(preferredencoding()) 
2398                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
2400         def _real_initialize(self): 
2401                 self._yahoo_ie.initialize() 
2403         def _real_extract(self, query): 
2404                 mobj = re.match(self._VALID_URL, query) 
2406                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2409                 prefix, query = query.split(':') 
2411                 query = query.encode('utf-8') 
2413                         self._download_n_results(query, 1) 
2415                 elif prefix == 'all': 
2416                         self._download_n_results(query, self._max_yahoo_results) 
2422                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2424                                 elif n > self._max_yahoo_results: 
2425                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) 
2426                                         n = self._max_yahoo_results 
2427                                 self._download_n_results(query, n) 
2429                         except ValueError: # parsing prefix as integer fails 
2430                                 self._download_n_results(query, 1) 
2433         def _download_n_results(self, query, n): 
2434                 """Downloads a specified number of results for a query""" 
2437                 already_seen = set() 
2441                         self.report_download_page(query, pagenum) 
2442                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2443                         request = urllib2.Request(result_url) 
2445                                 page = urllib2.urlopen(request).read() 
2446                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2447                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2450                         # Extract video identifiers 
2451                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2452                                 video_id = mobj.group(1) 
2453                                 if video_id not in already_seen: 
2454                                         video_ids.append(video_id) 
2455                                         already_seen.add(video_id) 
2456                                         if len(video_ids) == n: 
2457                                                 # Specified n videos reached 
2458                                                 for id in video_ids: 
2459                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2462                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2463                                 for id in video_ids: 
2464                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2467                         pagenum = pagenum + 1 
2470 class YoutubePlaylistIE(InfoExtractor): 
2471         """Information Extractor for YouTube playlists.""" 
2473         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' 
2474         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' 
2475         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2476         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2478         IE_NAME = u'youtube:playlist' 
2480         def __init__(self, youtube_ie, downloader=None): 
2481                 InfoExtractor.__init__(self, downloader) 
2482                 self._youtube_ie = youtube_ie 
2484         def report_download_page(self, playlist_id, pagenum): 
2485                 """Report attempt to download playlist page with given number.""" 
2486                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
2488         def _real_initialize(self): 
2489                 self._youtube_ie.initialize() 
2491         def _real_extract(self, url): 
2492                 # Extract playlist id 
2493                 mobj = re.match(self._VALID_URL, url) 
2495                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2499                 if mobj.group(3) is not None: 
2500                         self._youtube_ie.extract(mobj.group(3)) 
2503                 # Download playlist pages 
2504                 # prefix is 'p' as default for playlists but there are other types that need extra care 
2505                 playlist_prefix = mobj.group(1) 
2506                 if playlist_prefix == 'a': 
2507                         playlist_access = 'artist' 
2509                         playlist_prefix = 'p' 
2510                         playlist_access = 'view_play_list' 
2511                 playlist_id = mobj.group(2) 
2516                         self.report_download_page(playlist_id, pagenum) 
2517                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) 
2519                                 page = urllib2.urlopen(request).read() 
2520                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2521                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2524                         # Extract video identifiers 
2526                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2527                                 if mobj.group(1) not in ids_in_page: 
2528                                         ids_in_page.append(mobj.group(1)) 
2529                         video_ids.extend(ids_in_page) 
2531                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2533                         pagenum = pagenum + 1 
2535                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2536                 playlistend = self._downloader.params.get('playlistend', -1) 
2537                 video_ids = video_ids[playliststart:playlistend] 
2539                 for id in video_ids: 
2540                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2544 class YoutubeUserIE(InfoExtractor): 
2545         """Information Extractor for YouTube users.""" 
2547         _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' 
2548         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
2549         _GDATA_PAGE_SIZE = 50 
2550         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' 
2551         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2553         IE_NAME = u'youtube:user' 
2555         def __init__(self, youtube_ie, downloader=None): 
2556                 InfoExtractor.__init__(self, downloader) 
2557                 self._youtube_ie = youtube_ie 
2559         def report_download_page(self, username, start_index): 
2560                 """Report attempt to download user page.""" 
2561                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % 
2562                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE)) 
2564         def _real_initialize(self): 
2565                 self._youtube_ie.initialize() 
2567         def _real_extract(self, url): 
2569                 mobj = re.match(self._VALID_URL, url) 
2571                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2574                 username = mobj.group(1) 
2576                 # Download video ids using YouTube Data API. Result size per 
2577                 # query is limited (currently to 50 videos) so we need to query 
2578                 # page by page until there are no video ids - it means we got 
2585                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1 
2586                         self.report_download_page(username, start_index) 
2588                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) 
2591                                 page = urllib2.urlopen(request).read() 
2592                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2593                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2596                         # Extract video identifiers 
2599                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2600                                 if mobj.group(1) not in ids_in_page: 
2601                                         ids_in_page.append(mobj.group(1)) 
2603                         video_ids.extend(ids_in_page) 
2605                         # A little optimization - if current page is not 
2606                         # "full
", ie. does not contain PAGE_SIZE video ids then 
2607                         # we can assume that this page is the last one - there 
2608                         # are no more ids on further pages - no need to query 
2611                         if len(ids_in_page) < self._GDATA_PAGE_SIZE: 
2616                 all_ids_count = len(video_ids) 
2617                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2618                 playlistend = self._downloader.params.get('playlistend', -1) 
2620                 if playlistend == -1: 
2621                         video_ids = video_ids[playliststart:] 
2623                         video_ids = video_ids[playliststart:playlistend] 
2625                 self._downloader.to_screen("[youtube
] user 
%s: Collected 
%d video 
ids (downloading 
%d of them
)" % 
2626                                 (username, all_ids_count, len(video_ids))) 
2628                 for video_id in video_ids: 
2629                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) 
2632 class DepositFilesIE(InfoExtractor): 
2633         """Information extractor for depositfiles.com""" 
2635         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' 
2636         IE_NAME = u'DepositFiles' 
2638         def __init__(self, downloader=None): 
2639                 InfoExtractor.__init__(self, downloader) 
2641         def report_download_webpage(self, file_id): 
2642                 """Report webpage download.""" 
2643                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) 
2645         def report_extraction(self, file_id): 
2646                 """Report information extraction.""" 
2647                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) 
2649         def _real_initialize(self): 
2652         def _real_extract(self, url): 
2653                 # At this point we have a new file 
2654                 self._downloader.increment_downloads() 
2656                 file_id = url.split('/')[-1] 
2657                 # Rebuild url in english locale 
2658                 url = 'http://depositfiles.com/en/files/' + file_id 
2660                 # Retrieve file webpage with 'Free download' button pressed 
2661                 free_download_indication = { 'gateway_result' : '1' } 
2662                 request = urllib2.Request(url, urllib.urlencode(free_download_indication)) 
2664                         self.report_download_webpage(file_id) 
2665                         webpage = urllib2.urlopen(request).read() 
2666                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2667                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) 
2670                 # Search for the real file URL 
2671                 mobj = re.search(r'<form action="(http
://fileshare
.+?
)"', webpage) 
2672                 if (mobj is None) or (mobj.group(1) is None): 
2673                         # Try to figure out reason of the error. 
2674                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) 
2675                         if (mobj is not None) and (mobj.group(1) is not None): 
2676                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() 
2677                                 self._downloader.trouble(u'ERROR: %s' % restriction_message) 
2679                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) 
2682                 file_url = mobj.group(1) 
2683                 file_extension = os.path.splitext(file_url)[1][1:] 
2685                 # Search for file title 
2686                 mobj = re.search(r'<b title="(.*?
)">', webpage) 
2688                         self._downloader.trouble(u'ERROR: unable to extract title') 
2690                 file_title = mobj.group(1).decode('utf-8') 
2693                         # Process file information 
2694                         self._downloader.process_info({ 
2695                                 'id':           file_id.decode('utf-8'), 
2696                                 'url':          file_url.decode('utf-8'), 
2698                                 'upload_date':  u'NA', 
2699                                 'title':        file_title, 
2700                                 'stitle':       file_title, 
2701                                 'ext':          file_extension.decode('utf-8'), 
2705                 except UnavailableVideoError, err: 
2706                         self._downloader.trouble(u'ERROR: unable to download file') 
2709 class FacebookIE(InfoExtractor): 
2710         """Information Extractor for Facebook""" 
2712         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/video/video\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' 
2713         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' 
2714         _NETRC_MACHINE = 'facebook' 
2715         _available_formats = ['highqual', 'lowqual'] 
2716         _video_extensions = { 
2720         IE_NAME = u'facebook' 
2722         def __init__(self, downloader=None): 
2723                 InfoExtractor.__init__(self, downloader) 
2725         def _reporter(self, message): 
2726                 """Add header and report message.""" 
2727                 self._downloader.to_screen(u'[facebook] %s' % message) 
2729         def report_login(self): 
2730                 """Report attempt to log in.""" 
2731                 self._reporter(u'Logging in') 
2733         def report_video_webpage_download(self, video_id): 
2734                 """Report attempt to download video webpage.""" 
2735                 self._reporter(u'%s: Downloading video webpage' % video_id) 
2737         def report_information_extraction(self, video_id): 
2738                 """Report attempt to extract video information.""" 
2739                 self._reporter(u'%s: Extracting video information' % video_id) 
2741         def _parse_page(self, video_webpage): 
2742                 """Extract video information from page""" 
2744                 data = {'title': r'class="video_title datawrap
">(.*?)</', 
2745                         'description': r'<div class="datawrap
">(.*?)</div>', 
2746                         'owner': r'\("video_owner_name
", "(.*?
)"\)', 
2747                         'upload_date': r'data-date="(.*?
)"', 
2748                         'thumbnail':  r'\("thumb_url
", "(?P
<THUMB
>.*?
)"\)', 
2751                 for piece in data.keys(): 
2752                         mobj = re.search(data[piece], video_webpage) 
2753                         if mobj is not None: 
2754                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape
")) 
2758                 for fmt in self._available_formats: 
2759                         mobj = re.search(r'\("%s_src
\", "(.+?)"\
)' % fmt, video_webpage) 
2760                         if mobj is not None: 
2761                                 # URL is in a Javascript segment inside an escaped Unicode format within 
2762                                 # the generally utf-8 page 
2763                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) 
2764                 video_info['video_urls
'] = video_urls 
2768         def _real_initialize(self): 
2769                 if self._downloader is None: 
2774                 downloader_params = self._downloader.params 
2776                 # Attempt to use provided username and password or .netrc data 
2777                 if downloader_params.get('username
', None) is not None: 
2778                         useremail = downloader_params['username
'] 
2779                         password = downloader_params['password
'] 
2780                 elif downloader_params.get('usenetrc
', False): 
2782                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) 
2783                                 if info is not None: 
2787                                         raise netrc.NetrcParseError('No authenticators 
for %s' % self._NETRC_MACHINE) 
2788                         except (IOError, netrc.NetrcParseError), err: 
2789                                 self._downloader.to_stderr(u'WARNING
: parsing 
.netrc
: %s' % str(err)) 
2792                 if useremail is None: 
2801                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) 
2804                         login_results = urllib2.urlopen(request).read() 
2805                         if re.search(r'<form(.*)name
="login"(.*)</form
>', login_results) is not None: 
2806                                 self._downloader.to_stderr(u'WARNING
: unable to log 
in: bad username
/password
, or exceded login rate 
limit (~
3/min). Check credentials 
or wait
.') 
2808                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2809                         self._downloader.to_stderr(u'WARNING
: unable to log 
in: %s' % str(err)) 
2812         def _real_extract(self, url): 
2813                 mobj = re.match(self._VALID_URL, url) 
2815                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2817                 video_id = mobj.group('ID
') 
2820                 self.report_video_webpage_download(video_id) 
2821                 request = urllib2.Request('https
://www
.facebook
.com
/video
/video
.php?v
=%s' % video_id) 
2823                         page = urllib2.urlopen(request) 
2824                         video_webpage = page.read() 
2825                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2826                         self._downloader.trouble(u'ERROR
: unable to download video webpage
: %s' % str(err)) 
2829                 # Start extracting information 
2830                 self.report_information_extraction(video_id) 
2832                 # Extract information 
2833                 video_info = self._parse_page(video_webpage) 
2836                 if 'owner
' not in video_info: 
2837                         self._downloader.trouble(u'ERROR
: unable to extract uploader nickname
') 
2839                 video_uploader = video_info['owner
'] 
2842                 if 'title
' not in video_info: 
2843                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
2845                 video_title = video_info['title
'] 
2846                 video_title = video_title.decode('utf
-8') 
2847                 video_title = sanitize_title(video_title) 
2850                 simple_title = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', video_title) 
2851                 simple_title = simple_title.strip(ur'_
') 
2854                 if 'thumbnail
' not in video_info: 
2855                         self._downloader.trouble(u'WARNING
: unable to extract video thumbnail
') 
2856                         video_thumbnail = '' 
2858                         video_thumbnail = video_info['thumbnail
'] 
2862                 if 'upload_date
' in video_info: 
2863                         upload_time = video_info['upload_date
'] 
2864                         timetuple = email.utils.parsedate_tz(upload_time) 
2865                         if timetuple is not None: 
2867                                         upload_date = time.strftime('%Y
%m
%d', timetuple[0:9]) 
2872                 video_description = video_info.get('description
', 'No description available
.') 
2874                 url_map = video_info['video_urls
'] 
2875                 if len(url_map.keys()) > 0: 
2876                         # Decide which formats to download 
2877                         req_format = self._downloader.params.get('format
', None) 
2878                         format_limit = self._downloader.params.get('format_limit
', None) 
2880                         if format_limit is not None and format_limit in self._available_formats: 
2881                                 format_list = self._available_formats[self._available_formats.index(format_limit):] 
2883                                 format_list = self._available_formats 
2884                         existing_formats = [x for x in format_list if x in url_map] 
2885                         if len(existing_formats) == 0: 
2886                                 self._downloader.trouble(u'ERROR
: no known formats available 
for video
') 
2888                         if req_format is None: 
2889                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality 
2890                         elif req_format == 'worst
': 
2891                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality 
2892                         elif req_format == '-1': 
2893                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats 
2896                                 if req_format not in url_map: 
2897                                         self._downloader.trouble(u'ERROR
: requested format 
not available
') 
2899                                 video_url_list = [(req_format, url_map[req_format])] # Specific format 
2901                 for format_param, video_real_url in video_url_list: 
2903                         # At this point we have a new video 
2904                         self._downloader.increment_downloads() 
2907                         video_extension = self._video_extensions.get(format_param, 'mp4
') 
2910                                 # Process video information 
2911                                 self._downloader.process_info({ 
2912                                         'id':           video_id.decode('utf
-8'), 
2913                                         'url
':          video_real_url.decode('utf
-8'), 
2914                                         'uploader
':     video_uploader.decode('utf
-8'), 
2915                                         'upload_date
':  upload_date, 
2916                                         'title
':        video_title, 
2917                                         'stitle
':       simple_title, 
2918                                         'ext
':          video_extension.decode('utf
-8'), 
2919                                         'format
':       (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
2920                                         'thumbnail
':    video_thumbnail.decode('utf
-8'), 
2921                                         'description
':  video_description.decode('utf
-8'), 
2924                         except UnavailableVideoError, err: 
2925                                 self._downloader.trouble(u'\nERROR
: unable to download video
') 
2927 class BlipTVIE(InfoExtractor): 
2928         """Information extractor for blip.tv""" 
2930         _VALID_URL = r'^
(?
:https?
://)?
(?
:\w
+\
.)?blip\
.tv(/.+)$
' 
2931         _URL_EXT = r'^
.*\
.([a
-z0
-9]+)$
' 
2932         IE_NAME = u'blip
.tv
' 
2934         def report_extraction(self, file_id): 
2935                 """Report information extraction.""" 
2936                 self._downloader.to_screen(u'[%s] %s: Extracting information
' % (self.IE_NAME, file_id)) 
2938         def report_direct_download(self, title): 
2939                 """Report information extraction.""" 
2940                 self._downloader.to_screen(u'[%s] %s: Direct download detected
' % (self.IE_NAME, title)) 
2942         def _simplify_title(self, title): 
2943                 res = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', title) 
2944                 res = res.strip(ur'_
') 
2947         def _real_extract(self, url): 
2948                 mobj = re.match(self._VALID_URL, url) 
2950                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2957                 json_url = url + cchar + 'skin
=json
&version
=2&no_wrap
=1' 
2958                 request = urllib2.Request(json_url) 
2959                 self.report_extraction(mobj.group(1)) 
2962                         urlh = urllib2.urlopen(request) 
2963                         if urlh.headers.get('Content
-Type
', '').startswith('video
/'): # Direct download 
2964                                 basename = url.split('/')[-1] 
2965                                 title,ext = os.path.splitext(basename) 
2966                                 ext = ext.replace('.', '') 
2967                                 self.report_direct_download(title) 
2972                                         'stitle
': self._simplify_title(title), 
2976                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2977                         self._downloader.trouble(u'ERROR
: unable to download video info webpage
: %s' % str(err)) 
2979                 if info is None: # Regular URL 
2981                                 json_code = urlh.read() 
2982                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2983                                 self._downloader.trouble(u'ERROR
: unable to read video info webpage
: %s' % str(err)) 
2987                                 json_data = json.loads(json_code) 
2988                                 if 'Post
' in json_data: 
2989                                         data = json_data['Post
'] 
2993                                 upload_date = datetime.datetime.strptime(data['datestamp
'], '%m
-%d-%y 
%H
:%M
%p
').strftime('%Y
%m
%d') 
2994                                 video_url = data['media
']['url
'] 
2995                                 umobj = re.match(self._URL_EXT, video_url) 
2997                                         raise ValueError('Can 
not determine filename extension
') 
2998                                 ext = umobj.group(1) 
3001                                         'id': data['item_id
'], 
3003                                         'uploader
': data['display_name
'], 
3004                                         'upload_date
': upload_date, 
3005                                         'title
': data['title
'], 
3006                                         'stitle
': self._simplify_title(data['title
']), 
3008                                         'format
': data['media
']['mimeType
'], 
3009                                         'thumbnail
': data['thumbnailUrl
'], 
3010                                         'description
': data['description
'], 
3011                                         'player_url
': data['embedUrl
'] 
3013                         except (ValueError,KeyError), err: 
3014                                 self._downloader.trouble(u'ERROR
: unable to parse video information
: %s' % repr(err)) 
3017                 self._downloader.increment_downloads() 
3020                         self._downloader.process_info(info) 
3021                 except UnavailableVideoError, err: 
3022                         self._downloader.trouble(u'\nERROR
: unable to download video
') 
3025 class MyVideoIE(InfoExtractor): 
3026         """Information Extractor for myvideo.de.""" 
3028         _VALID_URL = r'(?
:http
://)?
(?
:www\
.)?myvideo\
.de
/watch
/([0-9]+)/([^?
/]+).*' 
3029         IE_NAME = u'myvideo
' 
3031         def __init__(self, downloader=None): 
3032                 InfoExtractor.__init__(self, downloader) 
3034         def report_download_webpage(self, video_id): 
3035                 """Report webpage download.""" 
3036                 self._downloader.to_screen(u'[myvideo
] %s: Downloading webpage
' % video_id) 
3038         def report_extraction(self, video_id): 
3039                 """Report information extraction.""" 
3040                 self._downloader.to_screen(u'[myvideo
] %s: Extracting information
' % video_id) 
3042         def _real_initialize(self): 
3045         def _real_extract(self,url): 
3046                 mobj = re.match(self._VALID_URL, url) 
3048                         self._download.trouble(u'ERROR
: invalid URL
: %s' % url) 
3051                 video_id = mobj.group(1) 
3052                 simple_title = mobj.group(2).decode('utf
-8') 
3053                 # should actually not be necessary 
3054                 simple_title = sanitize_title(simple_title) 
3055                 simple_title = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', simple_title) 
3058                 request = urllib2.Request('http
://www
.myvideo
.de
/watch
/%s' % video_id) 
3060                         self.report_download_webpage(video_id) 
3061                         webpage = urllib2.urlopen(request).read() 
3062                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3063                         self._downloader.trouble(u'ERROR
: Unable to retrieve video webpage
: %s' % str(err)) 
3066                 self.report_extraction(video_id) 
3067                 mobj = re.search(r'<link rel
=\'image_src
\' href
=\'(http
://is[0-9].myvideo\
.de
/de
/movie
[0-9]+/[a
-f0
-9]+)/thumbs
/[^
.]+\
.jpg
\' />', 
3070                         self._downloader.trouble(u'ERROR
: unable to extract media URL
') 
3072                 video_url = mobj.group(1) + ('/%s.flv
' % video_id) 
3074                 mobj = re.search('<title
>([^
<]+)</title
>', webpage) 
3076                         self._downloader.trouble(u'ERROR
: unable to extract title
') 
3079                 video_title = mobj.group(1) 
3080                 video_title = sanitize_title(video_title) 
3083                         self._downloader.process_info({ 
3087                                 'upload_date
':  u'NA
', 
3088                                 'title
':        video_title, 
3089                                 'stitle
':       simple_title, 
3094                 except UnavailableVideoError: 
3095                         self._downloader.trouble(u'\nERROR
: Unable to download video
') 
3097 class ComedyCentralIE(InfoExtractor): 
3098         """Information extractor for The Daily Show and Colbert Report """ 
3100         _VALID_URL = r'^
(:(?P
<shortname
>tds|thedailyshow|cr|colbert|colbertnation|colbertreport
))|
(https?
://)?
(www\
.)?
(?P
<showname
>thedailyshow|colbertnation
)\
.com
/full
-episodes
/(?P
<episode
>.*)$
' 
3101         IE_NAME = u'comedycentral
' 
3103         def report_extraction(self, episode_id): 
3104                 self._downloader.to_screen(u'[comedycentral
] %s: Extracting information
' % episode_id) 
3106         def report_config_download(self, episode_id): 
3107                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading configuration
' % episode_id) 
3109         def report_index_download(self, episode_id): 
3110                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading show index
' % episode_id) 
3112         def report_player_url(self, episode_id): 
3113                 self._downloader.to_screen(u'[comedycentral
] %s: Determining player URL
' % episode_id) 
3115         def _simplify_title(self, title): 
3116                 res = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', title) 
3117                 res = res.strip(ur'_
') 
3120         def _real_extract(self, url): 
3121                 mobj = re.match(self._VALID_URL, url) 
3123                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3126                 if mobj.group('shortname
'): 
3127                         if mobj.group('shortname
') in ('tds
', 'thedailyshow
'): 
3128                                 url = 'http
://www
.thedailyshow
.com
/full
-episodes
/' 
3130                                 url = 'http
://www
.colbertnation
.com
/full
-episodes
/' 
3131                         mobj = re.match(self._VALID_URL, url) 
3132                         assert mobj is not None 
3134                 dlNewest = not mobj.group('episode
') 
3136                         epTitle = mobj.group('showname
') 
3138                         epTitle = mobj.group('episode
') 
3140                 req = urllib2.Request(url) 
3141                 self.report_extraction(epTitle) 
3143                         htmlHandle = urllib2.urlopen(req) 
3144                         html = htmlHandle.read() 
3145                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3146                         self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3149                         url = htmlHandle.geturl() 
3150                         mobj = re.match(self._VALID_URL, url) 
3152                                 self._downloader.trouble(u'ERROR
: Invalid redirected URL
: ' + url) 
3154                         if mobj.group('episode
') == '': 
3155                                 self._downloader.trouble(u'ERROR
: Redirected URL 
is still 
not specific
: ' + url) 
3157                         epTitle = mobj.group('episode
') 
3159                 mMovieParams = re.findall('<param name
="movie" value
="(http://media.mtvnservices.com/([^"]*episode
.*?
:.*?
))"/>', html) 
3160                 if len(mMovieParams) == 0: 
3161                         self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) 
3164                 playerUrl_raw = mMovieParams[0][0] 
3165                 self.report_player_url(epTitle) 
3167                         urlHandle = urllib2.urlopen(playerUrl_raw) 
3168                         playerUrl = urlHandle.geturl() 
3169                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3170                         self._downloader.trouble(u'ERROR: unable to find out player URL: ' + unicode(err)) 
3173                 uri = mMovieParams[0][1] 
3174                 indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + urllib.urlencode({'uri': uri}) 
3175                 self.report_index_download(epTitle) 
3177                         indexXml = urllib2.urlopen(indexUrl).read() 
3178                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3179                         self._downloader.trouble(u'ERROR: unable to download episode index: ' + unicode(err)) 
3182                 idoc = xml.etree.ElementTree.fromstring(indexXml) 
3183                 itemEls = idoc.findall('.//item') 
3184                 for itemEl in itemEls: 
3185                         mediaId = itemEl.findall('./guid')[0].text 
3186                         shortMediaId = mediaId.split(':')[-1] 
3187                         showId = mediaId.split(':')[-2].replace('.com', '') 
3188                         officialTitle = itemEl.findall('./title')[0].text 
3189                         officialDate = itemEl.findall('./pubDate')[0].text 
3191                         configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + 
3192                                                 urllib.urlencode({'uri': mediaId})) 
3193                         configReq = urllib2.Request(configUrl) 
3194                         self.report_config_download(epTitle) 
3196                                 configXml = urllib2.urlopen(configReq).read() 
3197                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3198                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % unicode(err)) 
3201                         cdoc = xml.etree.ElementTree.fromstring(configXml) 
3203                         for rendition in cdoc.findall('.//rendition'): 
3204                                 finfo = (rendition.attrib['bitrate'], rendition.findall('./src')[0].text) 
3208                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') 
3211                         # For now, just pick the highest bitrate 
3212                         format,video_url = turls[-1] 
3214                         self._downloader.increment_downloads() 
3216                         effTitle = showId + '-' + epTitle 
3221                                 'upload_date': officialDate, 
3223                                 'stitle': self._simplify_title(effTitle), 
3227                                 'description': officialTitle, 
3228                                 'player_url': playerUrl 
3232                                 self._downloader.process_info(info) 
3233                         except UnavailableVideoError, err: 
3234                                 self._downloader.trouble(u'\nERROR: unable to download ' + mediaId) 
3238 class EscapistIE(InfoExtractor): 
3239         """Information extractor for The Escapist """ 
3241         _VALID_URL = r'^(https?://)?(www\.)?escapistmagazine\.com/videos/view/(?P<showname>[^/]+)/(?P<episode>[^/?]+)[/?]?.*$' 
3242         IE_NAME = u'escapist' 
3244         def report_extraction(self, showName): 
3245                 self._downloader.to_screen(u'[escapist] %s: Extracting information' % showName) 
3247         def report_config_download(self, showName): 
3248                 self._downloader.to_screen(u'[escapist] %s: Downloading configuration' % showName) 
3250         def _simplify_title(self, title): 
3251                 res = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', title) 
3252                 res = res.strip(ur'_') 
3255         def _real_extract(self, url): 
3256                 htmlParser = HTMLParser.HTMLParser() 
3258                 mobj = re.match(self._VALID_URL, url) 
3260                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3262                 showName = mobj.group('showname') 
3263                 videoId = mobj.group('episode') 
3265                 self.report_extraction(showName) 
3267                         webPage = urllib2.urlopen(url).read() 
3268                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3269                         self._downloader.trouble(u'ERROR: unable to download webpage: ' + unicode(err)) 
3272                 descMatch = re.search('<meta name="description
" content="([^
"]*)"', webPage) 
3273                 description = htmlParser.unescape(descMatch.group(1)) 
3274                 imgMatch = re.search('<meta 
property="og:image" content
="([^"]*)"', webPage) 
3275                 imgUrl = htmlParser.unescape(imgMatch.group(1)) 
3276                 playerUrlMatch = re.search('<meta property="og
:video
" content="([^
"]*)"', webPage) 
3277                 playerUrl = htmlParser.unescape(playerUrlMatch.group(1)) 
3278                 configUrlMatch = re.search('config
=(.*)$
', playerUrl) 
3279                 configUrl = urllib2.unquote(configUrlMatch.group(1)) 
3281                 self.report_config_download(showName) 
3283                         configJSON = urllib2.urlopen(configUrl).read() 
3284                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3285                         self._downloader.trouble(u'ERROR
: unable to download configuration
: ' + unicode(err)) 
3288                 # Technically, it's JavaScript
, not JSON
 
3289                 configJSON 
= configJSON
.replace("'", '"') 
3292                         config 
= json
.loads(configJSON
) 
3293                 except (ValueError,), err
: 
3294                         self
._downloader
.trouble(u
'ERROR: Invalid JSON in configuration file: ' + unicode(err
)) 
3297                 playlist 
= config
['playlist'] 
3298                 videoUrl 
= playlist
[1]['url'] 
3300                 self
._downloader
.increment_downloads() 
3304                         'uploader': showName
, 
3305                         'upload_date': None, 
3307                         'stitle': self
._simplify
_title
(showName
), 
3310                         'thumbnail': imgUrl
, 
3311                         'description': description
, 
3312                         'player_url': playerUrl
, 
3316                         self
._downloader
.process_info(info
) 
3317                 except UnavailableVideoError
, err
: 
3318                         self
._downloader
.trouble(u
'\nERROR: unable to download ' + videoId
) 
3321 class CollegeHumorIE(InfoExtractor
): 
3322         """Information extractor for collegehumor.com""" 
3324         _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' 
3325         IE_NAME 
= u
'collegehumor' 
3327         def report_webpage(self
, video_id
): 
3328                 """Report information extraction.""" 
3329                 self
._downloader
.to_screen(u
'[%s] %s: Downloading webpage' % (self
.IE_NAME
, video_id
)) 
3331         def report_extraction(self
, video_id
): 
3332                 """Report information extraction.""" 
3333                 self
._downloader
.to_screen(u
'[%s] %s: Extracting information' % (self
.IE_NAME
, video_id
)) 
3335         def _simplify_title(self
, title
): 
3336                 res 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', title
) 
3337                 res 
= res
.strip(ur
'_') 
3340         def _real_extract(self
, url
): 
3341                 htmlParser 
= HTMLParser
.HTMLParser() 
3343                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
3345                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
3347                 video_id 
= mobj
.group('videoid') 
3349                 self
.report_webpage(video_id
) 
3350                 request 
= urllib2
.Request(url
) 
3352                         webpage 
= urllib2
.urlopen(request
).read() 
3353                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
3354                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
3357                 m 
= re
.search(r
'id="video:(?P<internalvideoid>[0-9]+)"', webpage
) 
3359                         self
._downloader
.trouble(u
'ERROR: Cannot extract internal video ID') 
3361                 internal_video_id 
= m
.group('internalvideoid') 
3365                         'internal_id': internal_video_id
, 
3368                 self
.report_extraction(video_id
) 
3369                 xmlUrl 
= 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id
 
3371                         metaXml 
= urllib2
.urlopen(xmlUrl
).read() 
3372                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
3373                         self
._downloader
.trouble(u
'ERROR: unable to download video info XML: %s' % str(err
)) 
3376                 mdoc 
= xml
.etree
.ElementTree
.fromstring(metaXml
) 
3378                         videoNode 
= mdoc
.findall('./video')[0] 
3379                         info
['description'] = videoNode
.findall('./description')[0].text
 
3380                         info
['title'] = videoNode
.findall('./caption')[0].text
 
3381                         info
['stitle'] = self
._simplify
_title
(info
['title']) 
3382                         info
['url'] = videoNode
.findall('./file')[0].text
 
3383                         info
['thumbnail'] = videoNode
.findall('./thumbnail')[0].text
 
3384                         info
['ext'] = info
['url'].rpartition('.')[2] 
3385                         info
['format'] = info
['ext'] 
3387                         self
._downloader
.trouble(u
'\nERROR: Invalid metadata XML file') 
3390                 self
._downloader
.increment_downloads() 
3393                         self
._downloader
.process_info(info
) 
3394                 except UnavailableVideoError
, err
: 
3395                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
3398 class XVideosIE(InfoExtractor
): 
3399         """Information extractor for xvideos.com""" 
3401         _VALID_URL 
= r
'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' 
3402         IE_NAME 
= u
'xvideos' 
3404         def report_webpage(self
, video_id
): 
3405                 """Report information extraction.""" 
3406                 self
._downloader
.to_screen(u
'[%s] %s: Downloading webpage' % (self
.IE_NAME
, video_id
)) 
3408         def report_extraction(self
, video_id
): 
3409                 """Report information extraction.""" 
3410                 self
._downloader
.to_screen(u
'[%s] %s: Extracting information' % (self
.IE_NAME
, video_id
)) 
3412         def _simplify_title(self
, title
): 
3413                 res 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', title
) 
3414                 res 
= res
.strip(ur
'_') 
3417         def _real_extract(self
, url
): 
3418                 htmlParser 
= HTMLParser
.HTMLParser() 
3420                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
3422                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
3424                 video_id 
= mobj
.group(1).decode('utf-8') 
3426                 self
.report_webpage(video_id
) 
3428                 request 
= urllib2
.Request(r
'http://www.xvideos.com/video' + video_id
) 
3430                         webpage 
= urllib2
.urlopen(request
).read() 
3431                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
3432                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
3435                 self
.report_extraction(video_id
) 
3439                 mobj 
= re
.search(r
'flv_url=(.+?)&', webpage
) 
3441                         self
._downloader
.trouble(u
'ERROR: unable to extract video url') 
3443                 video_url 
= urllib2
.unquote(mobj
.group(1).decode('utf-8')) 
3447                 mobj 
= re
.search(r
'<title>(.*?)\s+-\s+XVID', webpage
) 
3449                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
3451                 video_title 
= mobj
.group(1).decode('utf-8') 
3454                 # Extract video thumbnail 
3455                 mobj 
= re
.search(r
'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage
) 
3457                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
3459                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
3463                 self
._downloader
.increment_downloads() 
3468                         'upload_date': None, 
3469                         'title': video_title
, 
3470                         'stitle': self
._simplify
_title
(video_title
), 
3473                         'thumbnail': video_thumbnail
, 
3474                         'description': None, 
3479                         self
._downloader
.process_info(info
) 
3480                 except UnavailableVideoError
, err
: 
3481                         self
._downloader
.trouble(u
'\nERROR: unable to download ' + video_id
) 
3484 class PostProcessor(object): 
3485         """Post Processor class. 
3487         PostProcessor objects can be added to downloaders with their 
3488         add_post_processor() method. When the downloader has finished a 
3489         successful download, it will take its internal chain of PostProcessors 
3490         and start calling the run() method on each one of them, first with 
3491         an initial argument and then with the returned value of the previous 
3494         The chain will be stopped if one of them ever returns None or the end 
3495         of the chain is reached. 
3497         PostProcessor objects follow a "mutual registration" process similar 
3498         to InfoExtractor objects. 
3503         def __init__(self
, downloader
=None): 
3504                 self
._downloader 
= downloader
 
3506         def set_downloader(self
, downloader
): 
3507                 """Sets the downloader for this PP.""" 
3508                 self
._downloader 
= downloader
 
3510         def run(self
, information
): 
3511                 """Run the PostProcessor. 
3513                 The "information" argument is a dictionary like the ones 
3514                 composed by InfoExtractors. The only difference is that this 
3515                 one has an extra field called "filepath" that points to the 
3518                 When this method returns None, the postprocessing chain is 
3519                 stopped. However, this method may return an information 
3520                 dictionary that will be passed to the next postprocessing 
3521                 object in the chain. It can be the one it received after 
3522                 changing some fields. 
3524                 In addition, this method may raise a PostProcessingError 
3525                 exception that will be taken into account by the downloader 
3528                 return information 
# by default, do nothing 
3531 class FFmpegExtractAudioPP(PostProcessor
): 
3533         def __init__(self
, downloader
=None, preferredcodec
=None, preferredquality
=None, keepvideo
=False): 
3534                 PostProcessor
.__init
__(self
, downloader
) 
3535                 if preferredcodec 
is None: 
3536                         preferredcodec 
= 'best' 
3537                 self
._preferredcodec 
= preferredcodec
 
3538                 self
._preferredquality 
= preferredquality
 
3539                 self
._keepvideo 
= keepvideo
 
3542         def get_audio_codec(path
): 
3544                         cmd 
= ['ffprobe', '-show_streams', '--', path
] 
3545                         handle 
= subprocess
.Popen(cmd
, stderr
=file(os
.path
.devnull
, 'w'), stdout
=subprocess
.PIPE
) 
3546                         output 
= handle
.communicate()[0] 
3547                         if handle
.wait() != 0: 
3549                 except (IOError, OSError): 
3552                 for line 
in output
.split('\n'): 
3553                         if line
.startswith('codec_name='): 
3554                                 audio_codec 
= line
.split('=')[1].strip() 
3555                         elif line
.strip() == 'codec_type=audio' and audio_codec 
is not None: 
3560         def run_ffmpeg(path
, out_path
, codec
, more_opts
): 
3562                         cmd 
= ['ffmpeg', '-y', '-i', path
, '-vn', '-acodec', codec
] + more_opts 
+ ['--', out_path
] 
3563                         ret 
= subprocess
.call(cmd
, stdout
=file(os
.path
.devnull
, 'w'), stderr
=subprocess
.STDOUT
) 
3565                 except (IOError, OSError): 
3568         def run(self
, information
): 
3569                 path 
= information
['filepath'] 
3571                 filecodec 
= self
.get_audio_codec(path
) 
3572                 if filecodec 
is None: 
3573                         self
._downloader
.to_stderr(u
'WARNING: unable to obtain file audio codec with ffprobe') 
3577                 if self
._preferredcodec 
== 'best' or self
._preferredcodec 
== filecodec
: 
3578                         if filecodec 
in ['aac', 'mp3', 'vorbis']: 
3579                                 # Lossless if possible 
3581                                 extension 
= filecodec
 
3582                                 if filecodec 
== 'aac': 
3583                                         more_opts 
= ['-f', 'adts'] 
3584                                 if filecodec 
== 'vorbis': 
3588                                 acodec 
= 'libmp3lame' 
3591                                 if self
._preferredquality 
is not None: 
3592                                         more_opts 
+= ['-ab', self
._preferredquality
] 
3594                         # We convert the audio (lossy) 
3595                         acodec 
= {'mp3': 'libmp3lame', 'aac': 'aac', 'vorbis': 'libvorbis'}[self
._preferredcodec
] 
3596                         extension 
= self
._preferredcodec
 
3598                         if self
._preferredquality 
is not None: 
3599                                 more_opts 
+= ['-ab', self
._preferredquality
] 
3600                         if self
._preferredcodec 
== 'aac': 
3601                                 more_opts 
+= ['-f', 'adts'] 
3602                         if self
._preferredcodec 
== 'vorbis': 
3605                 (prefix
, ext
) = os
.path
.splitext(path
) 
3606                 new_path 
= prefix 
+ '.' + extension
 
3607                 self
._downloader
.to_screen(u
'[ffmpeg] Destination: %s' % new_path
) 
3608                 status 
= self
.run_ffmpeg(path
, new_path
, acodec
, more_opts
) 
3611                         self
._downloader
.to_stderr(u
'WARNING: error running ffmpeg') 
3614                 # Try to update the date time for extracted audio file. 
3615                 if information
.get('filetime') is not None: 
3617                                 os
.utime(new_path
, (time
.time(), information
['filetime'])) 
3619                                 self
._downloader
.to_stderr(u
'WARNING: Cannot update utime of audio file') 
3621                 if not self
._keepvideo
: 
3624                         except (IOError, OSError): 
3625                                 self
._downloader
.to_stderr(u
'WARNING: Unable to remove downloaded video file') 
3628                 information
['filepath'] = new_path
 
3632 def updateSelf(downloader
, filename
): 
3633         ''' Update the program file with the latest version from the repository ''' 
3634         # Note: downloader only used for options 
3635         if not os
.access(filename
, os
.W_OK
): 
3636                 sys
.exit('ERROR: no write permissions on %s' % filename
) 
3638         downloader
.to_screen('Updating to latest version...') 
3642                         urlh 
= urllib
.urlopen(UPDATE_URL
) 
3643                         newcontent 
= urlh
.read() 
3645                         vmatch 
= re
.search("__version__ = '([^']+)'", newcontent
) 
3646                         if vmatch 
is not None and vmatch
.group(1) == __version__
: 
3647                                 downloader
.to_screen('youtube-dl is up-to-date (' + __version__ 
+ ')') 
3651         except (IOError, OSError), err
: 
3652                 sys
.exit('ERROR: unable to download latest version') 
3655                 outf 
= open(filename
, 'wb') 
3657                         outf
.write(newcontent
) 
3660         except (IOError, OSError), err
: 
3661                 sys
.exit('ERROR: unable to overwrite current version') 
3663         downloader
.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.') 
3670         def _format_option_string(option
): 
3671                 ''' ('-o', '--option') -> -o, --format METAVAR''' 
3675                 if option
._short
_opts
: opts
.append(option
._short
_opts
[0]) 
3676                 if option
._long
_opts
: opts
.append(option
._long
_opts
[0]) 
3677                 if len(opts
) > 1: opts
.insert(1, ', ') 
3679                 if option
.takes_value(): opts
.append(' %s' % option
.metavar
) 
3681                 return "".join(opts
) 
3683         def _find_term_columns(): 
3684                 columns 
= os
.environ
.get('COLUMNS', None) 
3689                         sp 
= subprocess
.Popen(['stty', 'size'], stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
3690                         out
,err 
= sp
.communicate() 
3691                         return int(out
.split()[1]) 
3697         max_help_position 
= 80 
3699         # No need to wrap help messages if we're on a wide console 
3700         columns 
= _find_term_columns() 
3701         if columns
: max_width 
= columns
 
3703         fmt 
= optparse
.IndentedHelpFormatter(width
=max_width
, max_help_position
=max_help_position
) 
3704         fmt
.format_option_strings 
= _format_option_string
 
3707                 'version'   : __version__
, 
3709                 'usage' : '%prog [options] url [url...]', 
3710                 'conflict_handler' : 'resolve', 
3713         parser 
= optparse
.OptionParser(**kw
) 
3716         general        
= optparse
.OptionGroup(parser
, 'General Options') 
3717         selection      
= optparse
.OptionGroup(parser
, 'Video Selection') 
3718         authentication 
= optparse
.OptionGroup(parser
, 'Authentication Options') 
3719         video_format   
= optparse
.OptionGroup(parser
, 'Video Format Options') 
3720         postproc       
= optparse
.OptionGroup(parser
, 'Post-processing Options') 
3721         filesystem     
= optparse
.OptionGroup(parser
, 'Filesystem Options') 
3722         verbosity      
= optparse
.OptionGroup(parser
, 'Verbosity / Simulation Options') 
3724         general
.add_option('-h', '--help', 
3725                         action
='help', help='print this help text and exit') 
3726         general
.add_option('-v', '--version', 
3727                         action
='version', help='print program version and exit') 
3728         general
.add_option('-U', '--update', 
3729                         action
='store_true', dest
='update_self', help='update this program to latest version') 
3730         general
.add_option('-i', '--ignore-errors', 
3731                         action
='store_true', dest
='ignoreerrors', help='continue on download errors', default
=False) 
3732         general
.add_option('-r', '--rate-limit', 
3733                         dest
='ratelimit', metavar
='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') 
3734         general
.add_option('-R', '--retries', 
3735                         dest
='retries', metavar
='RETRIES', help='number of retries (default is 10)', default
=10) 
3736         general
.add_option('--dump-user-agent', 
3737                         action
='store_true', dest
='dump_user_agent', 
3738                         help='display the current browser identification', default
=False) 
3739         general
.add_option('--list-extractors', 
3740                         action
='store_true', dest
='list_extractors', 
3741                         help='List all supported extractors and the URLs they would handle', default
=False) 
3743         selection
.add_option('--playlist-start', 
3744                         dest
='playliststart', metavar
='NUMBER', help='playlist video to start at (default is 1)', default
=1) 
3745         selection
.add_option('--playlist-end', 
3746                         dest
='playlistend', metavar
='NUMBER', help='playlist video to end at (default is last)', default
=-1) 
3747         selection
.add_option('--match-title', dest
='matchtitle', metavar
='REGEX',help='download only matching titles (regex or caseless sub-string)') 
3748         selection
.add_option('--reject-title', dest
='rejecttitle', metavar
='REGEX',help='skip download for matching titles (regex or caseless sub-string)') 
3750         authentication
.add_option('-u', '--username', 
3751                         dest
='username', metavar
='USERNAME', help='account username') 
3752         authentication
.add_option('-p', '--password', 
3753                         dest
='password', metavar
='PASSWORD', help='account password') 
3754         authentication
.add_option('-n', '--netrc', 
3755                         action
='store_true', dest
='usenetrc', help='use .netrc authentication data', default
=False) 
3758         video_format
.add_option('-f', '--format', 
3759                         action
='store', dest
='format', metavar
='FORMAT', help='video format code') 
3760         video_format
.add_option('--all-formats', 
3761                         action
='store_const', dest
='format', help='download all available video formats', const
='all') 
3762         video_format
.add_option('--max-quality', 
3763                         action
='store', dest
='format_limit', metavar
='FORMAT', help='highest quality format to download') 
3764         video_format
.add_option('-F', '--list-formats', 
3765                         action
='store_true', dest
='listformats', help='list all available formats (currently youtube only)') 
3768         verbosity
.add_option('-q', '--quiet', 
3769                         action
='store_true', dest
='quiet', help='activates quiet mode', default
=False) 
3770         verbosity
.add_option('-s', '--simulate', 
3771                         action
='store_true', dest
='simulate', help='do not download the video and do not write anything to disk', default
=False) 
3772         verbosity
.add_option('--skip-download', 
3773                         action
='store_true', dest
='skip_download', help='do not download the video', default
=False) 
3774         verbosity
.add_option('-g', '--get-url', 
3775                         action
='store_true', dest
='geturl', help='simulate, quiet but print URL', default
=False) 
3776         verbosity
.add_option('-e', '--get-title', 
3777                         action
='store_true', dest
='gettitle', help='simulate, quiet but print title', default
=False) 
3778         verbosity
.add_option('--get-thumbnail', 
3779                         action
='store_true', dest
='getthumbnail', 
3780                         help='simulate, quiet but print thumbnail URL', default
=False) 
3781         verbosity
.add_option('--get-description', 
3782                         action
='store_true', dest
='getdescription', 
3783                         help='simulate, quiet but print video description', default
=False) 
3784         verbosity
.add_option('--get-filename', 
3785                         action
='store_true', dest
='getfilename', 
3786                         help='simulate, quiet but print output filename', default
=False) 
3787         verbosity
.add_option('--get-format', 
3788                         action
='store_true', dest
='getformat', 
3789                         help='simulate, quiet but print output format', default
=False) 
3790         verbosity
.add_option('--no-progress', 
3791                         action
='store_true', dest
='noprogress', help='do not print progress bar', default
=False) 
3792         verbosity
.add_option('--console-title', 
3793                         action
='store_true', dest
='consoletitle', 
3794                         help='display progress in console titlebar', default
=False) 
3797         filesystem
.add_option('-t', '--title', 
3798                         action
='store_true', dest
='usetitle', help='use title in file name', default
=False) 
3799         filesystem
.add_option('-l', '--literal', 
3800                         action
='store_true', dest
='useliteral', help='use literal title in file name', default
=False) 
3801         filesystem
.add_option('-A', '--auto-number', 
3802                         action
='store_true', dest
='autonumber', 
3803                         help='number downloaded files starting from 00000', default
=False) 
3804         filesystem
.add_option('-o', '--output', 
3805                         dest
='outtmpl', metavar
='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, and %% for a literal percent') 
3806         filesystem
.add_option('-a', '--batch-file', 
3807                         dest
='batchfile', metavar
='FILE', help='file containing URLs to download (\'-\' for stdin)') 
3808         filesystem
.add_option('-w', '--no-overwrites', 
3809                         action
='store_true', dest
='nooverwrites', help='do not overwrite files', default
=False) 
3810         filesystem
.add_option('-c', '--continue', 
3811                         action
='store_true', dest
='continue_dl', help='resume partially downloaded files', default
=False) 
3812         filesystem
.add_option('--no-continue', 
3813                         action
='store_false', dest
='continue_dl', 
3814                         help='do not resume partially downloaded files (restart from beginning)') 
3815         filesystem
.add_option('--cookies', 
3816                         dest
='cookiefile', metavar
='FILE', help='file to read cookies from and dump cookie jar in') 
3817         filesystem
.add_option('--no-part', 
3818                         action
='store_true', dest
='nopart', help='do not use .part files', default
=False) 
3819         filesystem
.add_option('--no-mtime', 
3820                         action
='store_false', dest
='updatetime', 
3821                         help='do not use the Last-modified header to set the file modification time', default
=True) 
3822         filesystem
.add_option('--write-description', 
3823                         action
='store_true', dest
='writedescription', 
3824                         help='write video description to a .description file', default
=False) 
3825         filesystem
.add_option('--write-info-json', 
3826                         action
='store_true', dest
='writeinfojson', 
3827                         help='write video metadata to a .info.json file', default
=False) 
3830         postproc
.add_option('--extract-audio', action
='store_true', dest
='extractaudio', default
=False, 
3831                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)') 
3832         postproc
.add_option('--audio-format', metavar
='FORMAT', dest
='audioformat', default
='best', 
3833                         help='"best", "aac", "vorbis" or "mp3"; best by default') 
3834         postproc
.add_option('--audio-quality', metavar
='QUALITY', dest
='audioquality', default
='128K', 
3835                         help='ffmpeg audio bitrate specification, 128k by default') 
3836         postproc
.add_option('-k', '--keep-video', action
='store_true', dest
='keepvideo', default
=False, 
3837                         help='keeps the video file on disk after the post-processing; the video is erased by default') 
3840         parser
.add_option_group(general
) 
3841         parser
.add_option_group(selection
) 
3842         parser
.add_option_group(filesystem
) 
3843         parser
.add_option_group(verbosity
) 
3844         parser
.add_option_group(video_format
) 
3845         parser
.add_option_group(authentication
) 
3846         parser
.add_option_group(postproc
) 
3848         opts
, args 
= parser
.parse_args() 
3850         return parser
, opts
, args
 
3852 def gen_extractors(): 
3853         """ Return a list of an instance of every supported extractor. 
3854         The order does matter; the first extractor matched is the one handling the URL. 
3856         youtube_ie 
= YoutubeIE() 
3857         google_ie 
= GoogleIE() 
3858         yahoo_ie 
= YahooIE() 
3860                 YoutubePlaylistIE(youtube_ie
), 
3861                 YoutubeUserIE(youtube_ie
), 
3862                 YoutubeSearchIE(youtube_ie
), 
3864                 MetacafeIE(youtube_ie
), 
3867                 GoogleSearchIE(google_ie
), 
3870                 YahooSearchIE(yahoo_ie
), 
3885         parser
, opts
, args 
= parseOpts() 
3887         # Open appropriate CookieJar 
3888         if opts
.cookiefile 
is None: 
3889                 jar 
= cookielib
.CookieJar() 
3892                         jar 
= cookielib
.MozillaCookieJar(opts
.cookiefile
) 
3893                         if os
.path
.isfile(opts
.cookiefile
) and os
.access(opts
.cookiefile
, os
.R_OK
): 
3895                 except (IOError, OSError), err
: 
3896                         sys
.exit(u
'ERROR: unable to open cookie file') 
3899         if opts
.dump_user_agent
: 
3900                 print std_headers
['User-Agent'] 
3903         # Batch file verification 
3905         if opts
.batchfile 
is not None: 
3907                         if opts
.batchfile 
== '-': 
3910                                 batchfd 
= open(opts
.batchfile
, 'r') 
3911                         batchurls 
= batchfd
.readlines() 
3912                         batchurls 
= [x
.strip() for x 
in batchurls
] 
3913                         batchurls 
= [x 
for x 
in batchurls 
if len(x
) > 0 and not re
.search(r
'^[#/;]', x
)] 
3915                         sys
.exit(u
'ERROR: batch file could not be read') 
3916         all_urls 
= batchurls 
+ args
 
3918         # General configuration 
3919         cookie_processor 
= urllib2
.HTTPCookieProcessor(jar
) 
3920         opener 
= urllib2
.build_opener(urllib2
.ProxyHandler(), cookie_processor
, YoutubeDLHandler()) 
3921         urllib2
.install_opener(opener
) 
3922         socket
.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
3924         extractors 
= gen_extractors() 
3926         if opts
.list_extractors
: 
3927                 for ie 
in extractors
: 
3929                         matchedUrls 
= filter(lambda url
: ie
.suitable(url
), all_urls
) 
3930                         all_urls 
= filter(lambda url
: url 
not in matchedUrls
, all_urls
) 
3931                         for mu 
in matchedUrls
: 
3935         # Conflicting, missing and erroneous options 
3936         if opts
.usenetrc 
and (opts
.username 
is not None or opts
.password 
is not None): 
3937                 parser
.error(u
'using .netrc conflicts with giving username/password') 
3938         if opts
.password 
is not None and opts
.username 
is None: 
3939                 parser
.error(u
'account username missing') 
3940         if opts
.outtmpl 
is not None and (opts
.useliteral 
or opts
.usetitle 
or opts
.autonumber
): 
3941                 parser
.error(u
'using output template conflicts with using title, literal title or auto number') 
3942         if opts
.usetitle 
and opts
.useliteral
: 
3943                 parser
.error(u
'using title conflicts with using literal title') 
3944         if opts
.username 
is not None and opts
.password 
is None: 
3945                 opts
.password 
= getpass
.getpass(u
'Type account password and press return:') 
3946         if opts
.ratelimit 
is not None: 
3947                 numeric_limit 
= FileDownloader
.parse_bytes(opts
.ratelimit
) 
3948                 if numeric_limit 
is None: 
3949                         parser
.error(u
'invalid rate limit specified') 
3950                 opts
.ratelimit 
= numeric_limit
 
3951         if opts
.retries 
is not None: 
3953                         opts
.retries 
= long(opts
.retries
) 
3954                 except (TypeError, ValueError), err
: 
3955                         parser
.error(u
'invalid retry count specified') 
3957                 opts
.playliststart 
= int(opts
.playliststart
) 
3958                 if opts
.playliststart 
<= 0: 
3959                         raise ValueError(u
'Playlist start must be positive') 
3960         except (TypeError, ValueError), err
: 
3961                 parser
.error(u
'invalid playlist start number specified') 
3963                 opts
.playlistend 
= int(opts
.playlistend
) 
3964                 if opts
.playlistend 
!= -1 and (opts
.playlistend 
<= 0 or opts
.playlistend 
< opts
.playliststart
): 
3965                         raise ValueError(u
'Playlist end must be greater than playlist start') 
3966         except (TypeError, ValueError), err
: 
3967                 parser
.error(u
'invalid playlist end number specified') 
3968         if opts
.extractaudio
: 
3969                 if opts
.audioformat 
not in ['best', 'aac', 'mp3', 'vorbis']: 
3970                         parser
.error(u
'invalid audio format specified') 
3973         fd 
= FileDownloader({ 
3974                 'usenetrc': opts
.usenetrc
, 
3975                 'username': opts
.username
, 
3976                 'password': opts
.password
, 
3977                 'quiet': (opts
.quiet 
or opts
.geturl 
or opts
.gettitle 
or opts
.getthumbnail 
or opts
.getdescription 
or opts
.getfilename 
or opts
.getformat
), 
3978                 'forceurl': opts
.geturl
, 
3979                 'forcetitle': opts
.gettitle
, 
3980                 'forcethumbnail': opts
.getthumbnail
, 
3981                 'forcedescription': opts
.getdescription
, 
3982                 'forcefilename': opts
.getfilename
, 
3983                 'forceformat': opts
.getformat
, 
3984                 'simulate': opts
.simulate
, 
3985                 'skip_download': (opts
.skip_download 
or opts
.simulate 
or opts
.geturl 
or opts
.gettitle 
or opts
.getthumbnail 
or opts
.getdescription 
or opts
.getfilename 
or opts
.getformat
), 
3986                 'format': opts
.format
, 
3987                 'format_limit': opts
.format_limit
, 
3988                 'listformats': opts
.listformats
, 
3989                 'outtmpl': ((opts
.outtmpl 
is not None and opts
.outtmpl
.decode(preferredencoding())) 
3990                         or (opts
.format 
== '-1' and opts
.usetitle 
and u
'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
3991                         or (opts
.format 
== '-1' and opts
.useliteral 
and u
'%(title)s-%(id)s-%(format)s.%(ext)s') 
3992                         or (opts
.format 
== '-1' and u
'%(id)s-%(format)s.%(ext)s') 
3993                         or (opts
.usetitle 
and opts
.autonumber 
and u
'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') 
3994                         or (opts
.useliteral 
and opts
.autonumber 
and u
'%(autonumber)s-%(title)s-%(id)s.%(ext)s') 
3995                         or (opts
.usetitle 
and u
'%(stitle)s-%(id)s.%(ext)s') 
3996                         or (opts
.useliteral 
and u
'%(title)s-%(id)s.%(ext)s') 
3997                         or (opts
.autonumber 
and u
'%(autonumber)s-%(id)s.%(ext)s') 
3998                         or u
'%(id)s.%(ext)s'), 
3999                 'ignoreerrors': opts
.ignoreerrors
, 
4000                 'ratelimit': opts
.ratelimit
, 
4001                 'nooverwrites': opts
.nooverwrites
, 
4002                 'retries': opts
.retries
, 
4003                 'continuedl': opts
.continue_dl
, 
4004                 'noprogress': opts
.noprogress
, 
4005                 'playliststart': opts
.playliststart
, 
4006                 'playlistend': opts
.playlistend
, 
4007                 'logtostderr': opts
.outtmpl 
== '-', 
4008                 'consoletitle': opts
.consoletitle
, 
4009                 'nopart': opts
.nopart
, 
4010                 'updatetime': opts
.updatetime
, 
4011                 'writedescription': opts
.writedescription
, 
4012                 'writeinfojson': opts
.writeinfojson
, 
4013                 'matchtitle': opts
.matchtitle
, 
4014                 'rejecttitle': opts
.rejecttitle
, 
4016         for extractor 
in extractors
: 
4017                 fd
.add_info_extractor(extractor
) 
4020         if opts
.extractaudio
: 
4021                 fd
.add_post_processor(FFmpegExtractAudioPP(preferredcodec
=opts
.audioformat
, preferredquality
=opts
.audioquality
, keepvideo
=opts
.keepvideo
)) 
4024         if opts
.update_self
: 
4025                 updateSelf(fd
, sys
.argv
[0]) 
4028         if len(all_urls
) < 1: 
4029                 if not opts
.update_self
: 
4030                         parser
.error(u
'you must provide at least one URL') 
4033         retcode 
= fd
.download(all_urls
) 
4035         # Dump cookie jar if requested 
4036         if opts
.cookiefile 
is not None: 
4039                 except (IOError, OSError), err
: 
4040                         sys
.exit(u
'ERROR: unable to save cookie jar') 
4045 if __name__ 
== '__main__': 
4048         except DownloadError
: 
4050         except SameFileError
: 
4051                 sys
.exit(u
'ERROR: fixed output name but more than one file to download') 
4052         except KeyboardInterrupt: 
4053                 sys
.exit(u
'\nERROR: Interrupted by user') 
4055 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: