2 # -*- coding: utf-8 -*- 
   5         'Ricardo Garcia Gonzalez', 
  13         'Philipp Hagemeister', 
  20 __license__ 
= 'Public Domain' 
  21 __version__ 
= '2012.01.05' 
  23 UPDATE_URL 
= 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' 
  52 except ImportError: # Python 2.4 
  55         import cStringIO 
as StringIO
 
  59 # parse_qs was moved from the cgi module to the urlparse module recently. 
  61         from urlparse 
import parse_qs
 
  63         from cgi 
import parse_qs
 
  71         import xml
.etree
.ElementTree
 
  72 except ImportError: # Python<2.5: Not officially supported, but let it slip 
  73         warnings
.warn('xml.etree.ElementTree support is missing. Consider upgrading to Python >= 2.5 if you get related errors.') 
  76         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 
  77         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  78         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  79         'Accept-Encoding': 'gzip, deflate', 
  80         'Accept-Language': 'en-us,en;q=0.5', 
  85 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): 
  91                         def raiseError(msg
, i
): 
  92                                 raise ValueError(msg 
+ ' at position ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:])) 
  93                         def skipSpace(i
, expectMore
=True): 
  94                                 while i 
< len(s
) and s
[i
] in ' \t\r\n': 
  98                                                 raiseError('Premature end', i
) 
 100                         def decodeEscape(match
): 
 116                                                 return unichr(int(esc
[1:5], 16)) 
 117                                         if len(esc
) == 5+6 and esc
[5:7] == '\\u': 
 118                                                 hi 
= int(esc
[1:5], 16) 
 119                                                 low 
= int(esc
[7:11], 16) 
 120                                                 return unichr((hi 
- 0xd800) * 0x400 + low 
- 0xdc00 + 0x10000) 
 121                                 raise ValueError('Unknown escape ' + str(esc
)) 
 128                                         while s
[e
-bslashes
-1] == '\\': 
 130                                         if bslashes 
% 2 == 1: 
 134                                 rexp 
= re
.compile(r
'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') 
 135                                 stri 
= rexp
.sub(decodeEscape
, s
[i
:e
]) 
 141                                 if s
[i
] == '}': # Empty dictionary 
 145                                                 raiseError('Expected a string object key', i
) 
 146                                         i
,key 
= parseString(i
) 
 148                                         if i 
>= len(s
) or s
[i
] != ':': 
 149                                                 raiseError('Expected a colon', i
) 
 156                                                 raiseError('Expected comma or closing curly brace', i
) 
 161                                 if s
[i
] == ']': # Empty array 
 166                                         i 
= skipSpace(i
) # Raise exception if premature end 
 170                                                 raiseError('Expected a comma or closing bracket', i
) 
 172                         def parseDiscrete(i
): 
 173                                 for k
,v 
in {'true': True, 'false': False, 'null': None}.items(): 
 174                                         if s
.startswith(k
, i
): 
 176                                 raiseError('Not a boolean (or null)', i
) 
 178                                 mobj 
= re
.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s
[i
:]) 
 180                                         raiseError('Not a number', i
) 
 182                                 if '.' in nums 
or 'e' in nums 
or 'E' in nums
: 
 183                                         return (i
+len(nums
), float(nums
)) 
 184                                 return (i
+len(nums
), int(nums
)) 
 185                         CHARMAP 
= {'{': parseObj
, '[': parseArray
, '"': parseString
, 't': parseDiscrete
, 'f': parseDiscrete
, 'n': parseDiscrete
} 
 188                                 i
,res 
= CHARMAP
.get(s
[i
], parseNumber
)(i
) 
 189                                 i 
= skipSpace(i
, False) 
 193                                 raise ValueError('Extra data at end of input (index ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:]) + ')') 
 196 def preferredencoding(): 
 197         """Get preferred encoding. 
 199         Returns the best encoding scheme for the system, based on 
 200         locale.getpreferredencoding() and some further tweaks. 
 202         def yield_preferredencoding(): 
 204                         pref 
= locale
.getpreferredencoding() 
 210         return yield_preferredencoding().next() 
 213 def htmlentity_transform(matchobj
): 
 214         """Transforms an HTML entity to a Unicode character. 
 216         This function receives a match object and is intended to be used with 
 217         the re.sub() function. 
 219         entity 
= matchobj
.group(1) 
 221         # Known non-numeric HTML entity 
 222         if entity 
in htmlentitydefs
.name2codepoint
: 
 223                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
 226         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
 228                 numstr 
= mobj
.group(1) 
 229                 if numstr
.startswith(u
'x'): 
 231                         numstr 
= u
'0%s' % numstr
 
 234                 return unichr(long(numstr
, base
)) 
 236         # Unknown entity in name, return its literal representation 
 237         return (u
'&%s;' % entity
) 
 240 def sanitize_title(utitle
): 
 241         """Sanitizes a video title so it could be used as part of a filename.""" 
 242         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
 243         return utitle
.replace(unicode(os
.sep
), u
'%') 
 246 def sanitize_open(filename
, open_mode
): 
 247         """Try to open the given filename, and slightly tweak it if this fails. 
 249         Attempts to open the given filename. If this fails, it tries to change 
 250         the filename slightly, step by step, until it's either able to open it 
 251         or it fails and raises a final exception, like the standard open() 
 254         It returns the tuple (stream, definitive_file_name). 
 258                         if sys
.platform 
== 'win32': 
 260                                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 261                         return (sys
.stdout
, filename
) 
 262                 stream 
= open(filename
, open_mode
) 
 263                 return (stream
, filename
) 
 264         except (IOError, OSError), err
: 
 265                 # In case of error, try to remove win32 forbidden chars 
 266                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 268                 # An exception here should be caught in the caller 
 269                 stream 
= open(filename
, open_mode
) 
 270                 return (stream
, filename
) 
 273 def timeconvert(timestr
): 
 274         """Convert RFC 2822 defined time string into system timestamp""" 
 276         timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 277         if timetuple 
is not None: 
 278                 timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 281 def _simplify_title(title
): 
 282         expr 
= re
.compile(ur
'[^\w\d_\-]+', flags
=re
.UNICODE
) 
 283         return expr
.sub(u
'_', title
).strip(u
'_') 
 285 def _orderedSet(iterable
): 
 286         """ Remove all duplicates from the input iterable """ 
 293 def _unescapeHTML(s
): 
 295     @param s a string (of type unicode) 
 297     assert type(s
) == type(u
'') 
 299     htmlParser 
= HTMLParser
.HTMLParser() 
 300     return htmlParser
.unescape(s
) 
 302 class DownloadError(Exception): 
 303         """Download Error exception. 
 305         This exception may be thrown by FileDownloader objects if they are not 
 306         configured to continue on errors. They will contain the appropriate 
 312 class SameFileError(Exception): 
 313         """Same File exception. 
 315         This exception will be thrown by FileDownloader objects if they detect 
 316         multiple files would have to be downloaded to the same file on disk. 
 321 class PostProcessingError(Exception): 
 322         """Post Processing exception. 
 324         This exception may be raised by PostProcessor's .run() method to 
 325         indicate an error in the postprocessing task. 
 329 class MaxDownloadsReached(Exception): 
 330         """ --max-downloads limit has been reached. """ 
 334 class UnavailableVideoError(Exception): 
 335         """Unavailable Format exception. 
 337         This exception will be thrown when a video is requested 
 338         in a format that is not available for that video. 
 343 class ContentTooShortError(Exception): 
 344         """Content Too Short exception. 
 346         This exception may be raised by FileDownloader objects when a file they 
 347         download is too small for what the server announced first, indicating 
 348         the connection was probably interrupted. 
 354         def __init__(self
, downloaded
, expected
): 
 355                 self
.downloaded 
= downloaded
 
 356                 self
.expected 
= expected
 
 359 class YoutubeDLHandler(urllib2
.HTTPHandler
): 
 360         """Handler for HTTP requests and responses. 
 362         This class, when installed with an OpenerDirector, automatically adds 
 363         the standard headers to every HTTP request and handles gzipped and 
 364         deflated responses from web servers. If compression is to be avoided in 
 365         a particular request, the original request in the program code only has 
 366         to include the HTTP header "Youtubedl-No-Compression", which will be 
 367         removed before making the real request. 
 369         Part of this code was copied from: 
 371         http://techknack.net/python-urllib2-handlers/ 
 373         Andrew Rowls, the author of that code, agreed to release it to the 
 380                         return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 382                         return zlib
.decompress(data
) 
 385         def addinfourl_wrapper(stream
, headers
, url
, code
): 
 386                 if hasattr(urllib2
.addinfourl
, 'getcode'): 
 387                         return urllib2
.addinfourl(stream
, headers
, url
, code
) 
 388                 ret 
= urllib2
.addinfourl(stream
, headers
, url
) 
 392         def http_request(self
, req
): 
 393                 for h 
in std_headers
: 
 396                         req
.add_header(h
, std_headers
[h
]) 
 397                 if 'Youtubedl-no-compression' in req
.headers
: 
 398                         if 'Accept-encoding' in req
.headers
: 
 399                                 del req
.headers
['Accept-encoding'] 
 400                         del req
.headers
['Youtubedl-no-compression'] 
 403         def http_response(self
, req
, resp
): 
 406                 if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 407                         gz 
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(resp
.read()), mode
='r') 
 408                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 409                         resp
.msg 
= old_resp
.msg
 
 411                 if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 412                         gz 
= StringIO
.StringIO(self
.deflate(resp
.read())) 
 413                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 414                         resp
.msg 
= old_resp
.msg
 
 418 class FileDownloader(object): 
 419         """File Downloader class. 
 421         File downloader objects are the ones responsible of downloading the 
 422         actual video file and writing it to disk if the user has requested 
 423         it, among some other tasks. In most cases there should be one per 
 424         program. As, given a video URL, the downloader doesn't know how to 
 425         extract all the needed information, task that InfoExtractors do, it 
 426         has to pass the URL to one of them. 
 428         For this, file downloader objects have a method that allows 
 429         InfoExtractors to be registered in a given order. When it is passed 
 430         a URL, the file downloader handles it to the first InfoExtractor it 
 431         finds that reports being able to handle it. The InfoExtractor extracts 
 432         all the information about the video or videos the URL refers to, and 
 433         asks the FileDownloader to process the video information, possibly 
 434         downloading the video. 
 436         File downloaders accept a lot of parameters. In order not to saturate 
 437         the object constructor with arguments, it receives a dictionary of 
 438         options instead. These options are available through the params 
 439         attribute for the InfoExtractors to use. The FileDownloader also 
 440         registers itself as the downloader in charge for the InfoExtractors 
 441         that are added to it, so this is a "mutual registration". 
 445         username:         Username for authentication purposes. 
 446         password:         Password for authentication purposes. 
 447         usenetrc:         Use netrc for authentication instead. 
 448         quiet:            Do not print messages to stdout. 
 449         forceurl:         Force printing final URL. 
 450         forcetitle:       Force printing title. 
 451         forcethumbnail:   Force printing thumbnail URL. 
 452         forcedescription: Force printing description. 
 453         forcefilename:    Force printing final filename. 
 454         simulate:         Do not download the video files. 
 455         format:           Video format code. 
 456         format_limit:     Highest quality format to try. 
 457         outtmpl:          Template for output names. 
 458         ignoreerrors:     Do not stop on download errors. 
 459         ratelimit:        Download speed limit, in bytes/sec. 
 460         nooverwrites:     Prevent overwriting files. 
 461         retries:          Number of times to retry for HTTP error 5xx 
 462         continuedl:       Try to continue downloads if possible. 
 463         noprogress:       Do not print the progress bar. 
 464         playliststart:    Playlist item to start at. 
 465         playlistend:      Playlist item to end at. 
 466         matchtitle:       Download only matching titles. 
 467         rejecttitle:      Reject downloads for matching titles. 
 468         logtostderr:      Log messages to stderr instead of stdout. 
 469         consoletitle:     Display progress in console window's titlebar. 
 470         nopart:           Do not use temporary .part files. 
 471         updatetime:       Use the Last-modified header to set output file timestamps. 
 472         writedescription: Write the video description to a .description file 
 473         writeinfojson:    Write the video description to a .info.json file 
 479         _download_retcode 
= None 
 480         _num_downloads 
= None 
 483         def __init__(self
, params
): 
 484                 """Create a FileDownloader object with the given options.""" 
 487                 self
._download
_retcode 
= 0 
 488                 self
._num
_downloads 
= 0 
 489                 self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 493         def format_bytes(bytes): 
 496                 if type(bytes) is str: 
 501                         exponent 
= long(math
.log(bytes, 1024.0)) 
 502                 suffix 
= 'bkMGTPEZY'[exponent
] 
 503                 converted 
= float(bytes) / float(1024 ** exponent
) 
 504                 return '%.2f%s' % (converted
, suffix
) 
 507         def calc_percent(byte_counter
, data_len
): 
 510                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 513         def calc_eta(start
, now
, total
, current
): 
 517                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 519                 rate 
= float(current
) / dif
 
 520                 eta 
= long((float(total
) - float(current
)) / rate
) 
 521                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 524                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 527         def calc_speed(start
, now
, bytes): 
 529                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 530                         return '%10s' % '---b/s' 
 531                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 534         def best_block_size(elapsed_time
, bytes): 
 535                 new_min 
= max(bytes / 2.0, 1.0) 
 536                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 537                 if elapsed_time 
< 0.001: 
 539                 rate 
= bytes / elapsed_time
 
 547         def parse_bytes(bytestr
): 
 548                 """Parse a string indicating a byte quantity into a long integer.""" 
 549                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 552                 number 
= float(matchobj
.group(1)) 
 553                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 554                 return long(round(number 
* multiplier
)) 
 556         def add_info_extractor(self
, ie
): 
 557                 """Add an InfoExtractor object to the end of the list.""" 
 559                 ie
.set_downloader(self
) 
 561         def add_post_processor(self
, pp
): 
 562                 """Add a PostProcessor object to the end of the chain.""" 
 564                 pp
.set_downloader(self
) 
 566         def to_screen(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 567                 """Print message to stdout if not in quiet mode.""" 
 569                         if not self
.params
.get('quiet', False): 
 570                                 terminator 
= [u
'\n', u
''][skip_eol
] 
 571                                 print >>self
._screen
_file
, (u
'%s%s' % (message
, terminator
)).encode(preferredencoding()), 
 572                         self
._screen
_file
.flush() 
 573                 except (UnicodeEncodeError), err
: 
 574                         if not ignore_encoding_errors
: 
 577         def to_stderr(self
, message
): 
 578                 """Print message to stderr.""" 
 579                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 581         def to_cons_title(self
, message
): 
 582                 """Set console/terminal window title to message.""" 
 583                 if not self
.params
.get('consoletitle', False): 
 585                 if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 586                         # c_wchar_p() might not be necessary if `message` is 
 587                         # already of type unicode() 
 588                         ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 589                 elif 'TERM' in os
.environ
: 
 590                         sys
.stderr
.write('\033]0;%s\007' % message
.encode(preferredencoding())) 
 592         def fixed_template(self
): 
 593                 """Checks if the output template is fixed.""" 
 594                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 596         def trouble(self
, message
=None): 
 597                 """Determine action to take when a download problem appears. 
 599                 Depending on if the downloader has been configured to ignore 
 600                 download errors or not, this method may throw an exception or 
 601                 not when errors are found, after printing the message. 
 603                 if message 
is not None: 
 604                         self
.to_stderr(message
) 
 605                 if not self
.params
.get('ignoreerrors', False): 
 606                         raise DownloadError(message
) 
 607                 self
._download
_retcode 
= 1 
 609         def slow_down(self
, start_time
, byte_counter
): 
 610                 """Sleep if the download speed is over the rate limit.""" 
 611                 rate_limit 
= self
.params
.get('ratelimit', None) 
 612                 if rate_limit 
is None or byte_counter 
== 0: 
 615                 elapsed 
= now 
- start_time
 
 618                 speed 
= float(byte_counter
) / elapsed
 
 619                 if speed 
> rate_limit
: 
 620                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 622         def temp_name(self
, filename
): 
 623                 """Returns a temporary filename for the given filename.""" 
 624                 if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 625                                 (os
.path
.exists(filename
) and not os
.path
.isfile(filename
)): 
 627                 return filename 
+ u
'.part' 
 629         def undo_temp_name(self
, filename
): 
 630                 if filename
.endswith(u
'.part'): 
 631                         return filename
[:-len(u
'.part')] 
 634         def try_rename(self
, old_filename
, new_filename
): 
 636                         if old_filename 
== new_filename
: 
 638                         os
.rename(old_filename
, new_filename
) 
 639                 except (IOError, OSError), err
: 
 640                         self
.trouble(u
'ERROR: unable to rename file') 
 642         def try_utime(self
, filename
, last_modified_hdr
): 
 643                 """Try to set the last-modified time of the given file.""" 
 644                 if last_modified_hdr 
is None: 
 646                 if not os
.path
.isfile(filename
): 
 648                 timestr 
= last_modified_hdr
 
 651                 filetime 
= timeconvert(timestr
) 
 655                         os
.utime(filename
, (time
.time(), filetime
)) 
 660         def report_writedescription(self
, descfn
): 
 661                 """ Report that the description file is being written """ 
 662                 self
.to_screen(u
'[info] Writing video description to: %s' % descfn
, ignore_encoding_errors
=True) 
 664         def report_writeinfojson(self
, infofn
): 
 665                 """ Report that the metadata file has been written """ 
 666                 self
.to_screen(u
'[info] Video description metadata as JSON to: %s' % infofn
, ignore_encoding_errors
=True) 
 668         def report_destination(self
, filename
): 
 669                 """Report destination filename.""" 
 670                 self
.to_screen(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 672         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 673                 """Report download progress.""" 
 674                 if self
.params
.get('noprogress', False): 
 676                 self
.to_screen(u
'\r[download] %s of %s at %s ETA %s' % 
 677                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 678                 self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 679                                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 681         def report_resuming_byte(self
, resume_len
): 
 682                 """Report attempt to resume at given byte.""" 
 683                 self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 685         def report_retry(self
, count
, retries
): 
 686                 """Report retry in case of HTTP error 5xx""" 
 687                 self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 689         def report_file_already_downloaded(self
, file_name
): 
 690                 """Report file has already been fully downloaded.""" 
 692                         self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 693                 except (UnicodeEncodeError), err
: 
 694                         self
.to_screen(u
'[download] The file has already been downloaded') 
 696         def report_unable_to_resume(self
): 
 697                 """Report it was impossible to resume download.""" 
 698                 self
.to_screen(u
'[download] Unable to resume') 
 700         def report_finish(self
): 
 701                 """Report download finished.""" 
 702                 if self
.params
.get('noprogress', False): 
 703                         self
.to_screen(u
'[download] Download completed') 
 707         def increment_downloads(self
): 
 708                 """Increment the ordinal that assigns a number to each file.""" 
 709                 self
._num
_downloads 
+= 1 
 711         def prepare_filename(self
, info_dict
): 
 712                 """Generate the output filename.""" 
 714                         template_dict 
= dict(info_dict
) 
 715                         template_dict
['epoch'] = unicode(long(time
.time())) 
 716                         template_dict
['autonumber'] = unicode('%05d' % self
._num
_downloads
) 
 717                         filename 
= self
.params
['outtmpl'] % template_dict
 
 719                 except (ValueError, KeyError), err
: 
 720                         self
.trouble(u
'ERROR: invalid system charset or erroneous output template') 
 723         def _match_entry(self
, info_dict
): 
 724                 """ Returns None iff the file should be downloaded """ 
 726                 title 
= info_dict
['title'] 
 727                 matchtitle 
= self
.params
.get('matchtitle', False) 
 728                 if matchtitle 
and not re
.search(matchtitle
, title
, re
.IGNORECASE
): 
 729                         return u
'[download] "' + title 
+ '" title did not match pattern "' + matchtitle 
+ '"' 
 730                 rejecttitle 
= self
.params
.get('rejecttitle', False) 
 731                 if rejecttitle 
and re
.search(rejecttitle
, title
, re
.IGNORECASE
): 
 732                         return u
'"' + title 
+ '" title matched reject pattern "' + rejecttitle 
+ '"' 
 735         def process_info(self
, info_dict
): 
 736                 """Process a single dictionary returned by an InfoExtractor.""" 
 738                 reason 
= self
._match
_entry
(info_dict
) 
 739                 if reason 
is not None: 
 740                         self
.to_screen(u
'[download] ' + reason
) 
 743                 max_downloads 
= self
.params
.get('max_downloads') 
 744                 if max_downloads 
is not None: 
 745                         if self
._num
_downloads 
> int(max_downloads
): 
 746                                 raise MaxDownloadsReached() 
 748                 filename 
= self
.prepare_filename(info_dict
) 
 751                 if self
.params
.get('forcetitle', False): 
 752                         print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 753                 if self
.params
.get('forceurl', False): 
 754                         print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 755                 if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 756                         print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 757                 if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 758                         print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 759                 if self
.params
.get('forcefilename', False) and filename 
is not None: 
 760                         print filename
.encode(preferredencoding(), 'xmlcharrefreplace') 
 761                 if self
.params
.get('forceformat', False): 
 762                         print info_dict
['format'].encode(preferredencoding(), 'xmlcharrefreplace') 
 764                 # Do nothing else if in simulate mode 
 765                 if self
.params
.get('simulate', False): 
 772                         dn 
= os
.path
.dirname(filename
) 
 773                         if dn 
!= '' and not os
.path
.exists(dn
): 
 775                 except (OSError, IOError), err
: 
 776                         self
.trouble(u
'ERROR: unable to create directory ' + unicode(err
)) 
 779                 if self
.params
.get('writedescription', False): 
 781                                 descfn 
= filename 
+ '.description' 
 782                                 self
.report_writedescription(descfn
) 
 783                                 descfile 
= open(descfn
, 'wb') 
 785                                         descfile
.write(info_dict
['description'].encode('utf-8')) 
 788                         except (OSError, IOError): 
 789                                 self
.trouble(u
'ERROR: Cannot write description file ' + descfn
) 
 792                 if self
.params
.get('writeinfojson', False): 
 793                         infofn 
= filename 
+ '.info.json' 
 794                         self
.report_writeinfojson(infofn
) 
 797                         except (NameError,AttributeError): 
 798                                 self
.trouble(u
'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') 
 801                                 infof 
= open(infofn
, 'wb') 
 803                                         json_info_dict 
= dict((k
,v
) for k
,v 
in info_dict
.iteritems() if not k 
in ('urlhandle',)) 
 804                                         json
.dump(json_info_dict
, infof
) 
 807                         except (OSError, IOError): 
 808                                 self
.trouble(u
'ERROR: Cannot write metadata to JSON file ' + infofn
) 
 811                 if not self
.params
.get('skip_download', False): 
 812                         if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 816                                         success 
= self
._do
_download
(filename
, info_dict
) 
 817                                 except (OSError, IOError), err
: 
 818                                         raise UnavailableVideoError
 
 819                                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 820                                         self
.trouble(u
'ERROR: unable to download video data: %s' % str(err
)) 
 822                                 except (ContentTooShortError
, ), err
: 
 823                                         self
.trouble(u
'ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 828                                         self
.post_process(filename
, info_dict
) 
 829                                 except (PostProcessingError
), err
: 
 830                                         self
.trouble(u
'ERROR: postprocessing: %s' % str(err
)) 
 833         def download(self
, url_list
): 
 834                 """Download a given list of URLs.""" 
 835                 if len(url_list
) > 1 and self
.fixed_template(): 
 836                         raise SameFileError(self
.params
['outtmpl']) 
 839                         suitable_found 
= False 
 841                                 # Go to next InfoExtractor if not suitable 
 842                                 if not ie
.suitable(url
): 
 845                                 # Suitable InfoExtractor found 
 846                                 suitable_found 
= True 
 848                                 # Extract information from URL and process it 
 851                                 # Suitable InfoExtractor had been found; go to next URL 
 854                         if not suitable_found
: 
 855                                 self
.trouble(u
'ERROR: no suitable InfoExtractor: %s' % url
) 
 857                 return self
._download
_retcode
 
 859         def post_process(self
, filename
, ie_info
): 
 860                 """Run the postprocessing chain on the given file.""" 
 862                 info
['filepath'] = filename
 
 868         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 869                 self
.report_destination(filename
) 
 870                 tmpfilename 
= self
.temp_name(filename
) 
 872                 # Check for rtmpdump first 
 874                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 875                 except (OSError, IOError): 
 876                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 879                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 880                 # the connection was interrumpted and resuming appears to be 
 881                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 882                 basic_args 
= ['rtmpdump', '-q'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', tmpfilename
] 
 883                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 884                 while retval 
== 2 or retval 
== 1: 
 885                         prevsize 
= os
.path
.getsize(tmpfilename
) 
 886                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 887                         time
.sleep(5.0) # This seems to be needed 
 888                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 889                         cursize 
= os
.path
.getsize(tmpfilename
) 
 890                         if prevsize 
== cursize 
and retval 
== 1: 
 892                          # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those 
 893                         if prevsize 
== cursize 
and retval 
== 2 and cursize 
> 1024: 
 894                                 self
.to_screen(u
'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') 
 898                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(tmpfilename
)) 
 899                         self
.try_rename(tmpfilename
, filename
) 
 902                         self
.trouble(u
'\nERROR: rtmpdump exited with code %d' % retval
) 
 905         def _do_download(self
, filename
, info_dict
): 
 906                 url 
= info_dict
['url'] 
 907                 player_url 
= info_dict
.get('player_url', None) 
 909                 # Check file already present 
 910                 if self
.params
.get('continuedl', False) and os
.path
.isfile(filename
) and not self
.params
.get('nopart', False): 
 911                         self
.report_file_already_downloaded(filename
) 
 914                 # Attempt to download using rtmpdump 
 915                 if url
.startswith('rtmp'): 
 916                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 918                 tmpfilename 
= self
.temp_name(filename
) 
 921                 # Do not include the Accept-Encoding header 
 922                 headers 
= {'Youtubedl-no-compression': 'True'} 
 923                 basic_request 
= urllib2
.Request(url
, None, headers
) 
 924                 request 
= urllib2
.Request(url
, None, headers
) 
 926                 # Establish possible resume length 
 927                 if os
.path
.isfile(tmpfilename
): 
 928                         resume_len 
= os
.path
.getsize(tmpfilename
) 
 934                         if self
.params
.get('continuedl', False): 
 935                                 self
.report_resuming_byte(resume_len
) 
 936                                 request
.add_header('Range','bytes=%d-' % resume_len
) 
 942                 retries 
= self
.params
.get('retries', 0) 
 943                 while count 
<= retries
: 
 944                         # Establish connection 
 946                                 if count 
== 0 and 'urlhandle' in info_dict
: 
 947                                         data 
= info_dict
['urlhandle'] 
 948                                 data 
= urllib2
.urlopen(request
) 
 950                         except (urllib2
.HTTPError
, ), err
: 
 951                                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 952                                         # Unexpected HTTP error 
 954                                 elif err
.code 
== 416: 
 955                                         # Unable to resume (requested range not satisfiable) 
 957                                                 # Open the connection again without the range header 
 958                                                 data 
= urllib2
.urlopen(basic_request
) 
 959                                                 content_length 
= data
.info()['Content-Length'] 
 960                                         except (urllib2
.HTTPError
, ), err
: 
 961                                                 if err
.code 
< 500 or err
.code 
>= 600: 
 964                                                 # Examine the reported length 
 965                                                 if (content_length 
is not None and 
 966                                                                 (resume_len 
- 100 < long(content_length
) < resume_len 
+ 100)): 
 967                                                         # The file had already been fully downloaded. 
 968                                                         # Explanation to the above condition: in issue #175 it was revealed that 
 969                                                         # YouTube sometimes adds or removes a few bytes from the end of the file, 
 970                                                         # changing the file size slightly and causing problems for some users. So 
 971                                                         # I decided to implement a suggested change and consider the file 
 972                                                         # completely downloaded if the file size differs less than 100 bytes from 
 973                                                         # the one in the hard drive. 
 974                                                         self
.report_file_already_downloaded(filename
) 
 975                                                         self
.try_rename(tmpfilename
, filename
) 
 978                                                         # The length does not match, we start the download over 
 979                                                         self
.report_unable_to_resume() 
 985                                 self
.report_retry(count
, retries
) 
 988                         self
.trouble(u
'ERROR: giving up after %s retries' % retries
) 
 991                 data_len 
= data
.info().get('Content-length', None) 
 992                 if data_len 
is not None: 
 993                         data_len 
= long(data_len
) + resume_len
 
 994                 data_len_str 
= self
.format_bytes(data_len
) 
 995                 byte_counter 
= 0 + resume_len
 
1000                         before 
= time
.time() 
1001                         data_block 
= data
.read(block_size
) 
1003                         if len(data_block
) == 0: 
1005                         byte_counter 
+= len(data_block
) 
1007                         # Open file just in time 
1010                                         (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
1011                                         assert stream 
is not None 
1012                                         filename 
= self
.undo_temp_name(tmpfilename
) 
1013                                         self
.report_destination(filename
) 
1014                                 except (OSError, IOError), err
: 
1015                                         self
.trouble(u
'ERROR: unable to open for writing: %s' % str(err
)) 
1018                                 stream
.write(data_block
) 
1019                         except (IOError, OSError), err
: 
1020                                 self
.trouble(u
'\nERROR: unable to write data: %s' % str(err
)) 
1022                         block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
1025                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
1026                         if data_len 
is None: 
1027                                 self
.report_progress('Unknown %', data_len_str
, speed_str
, 'Unknown ETA') 
1029                                 percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
1030                                 eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
1031                                 self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
1034                         self
.slow_down(start
, byte_counter 
- resume_len
) 
1037                         self
.trouble(u
'\nERROR: Did not get any data blocks') 
1040                 self
.report_finish() 
1041                 if data_len 
is not None and byte_counter 
!= data_len
: 
1042                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
1043                 self
.try_rename(tmpfilename
, filename
) 
1045                 # Update file modification time 
1046                 if self
.params
.get('updatetime', True): 
1047                         info_dict
['filetime'] = self
.try_utime(filename
, data
.info().get('last-modified', None)) 
1052 class InfoExtractor(object): 
1053         """Information Extractor class. 
1055         Information extractors are the classes that, given a URL, extract 
1056         information from the video (or videos) the URL refers to. This 
1057         information includes the real video URL, the video title and simplified 
1058         title, author and others. The information is stored in a dictionary 
1059         which is then passed to the FileDownloader. The FileDownloader 
1060         processes this information possibly downloading the video to the file 
1061         system, among other possible outcomes. The dictionaries must include 
1062         the following fields: 
1064         id:             Video identifier. 
1065         url:            Final video URL. 
1066         uploader:       Nickname of the video uploader. 
1067         title:          Literal title. 
1068         stitle:         Simplified title. 
1069         ext:            Video filename extension. 
1070         format:         Video format. 
1071         player_url:     SWF Player URL (may be None). 
1073         The following fields are optional. Their primary purpose is to allow 
1074         youtube-dl to serve as the backend for a video search function, such 
1075         as the one in youtube2mp3.  They are only used when their respective 
1076         forced printing functions are called: 
1078         thumbnail:      Full URL to a video thumbnail image. 
1079         description:    One-line video description. 
1081         Subclasses of this one should re-define the _real_initialize() and 
1082         _real_extract() methods and define a _VALID_URL regexp. 
1083         Probably, they should also be added to the list of extractors. 
1089         def __init__(self
, downloader
=None): 
1090                 """Constructor. Receives an optional downloader.""" 
1092                 self
.set_downloader(downloader
) 
1094         def suitable(self
, url
): 
1095                 """Receives a URL and returns True if suitable for this IE.""" 
1096                 return re
.match(self
._VALID
_URL
, url
) is not None 
1098         def initialize(self
): 
1099                 """Initializes an instance (authentication, etc).""" 
1101                         self
._real
_initialize
() 
1104         def extract(self
, url
): 
1105                 """Extracts URL information and returns it in list of dicts.""" 
1107                 return self
._real
_extract
(url
) 
1109         def set_downloader(self
, downloader
): 
1110                 """Sets the downloader for this IE.""" 
1111                 self
._downloader 
= downloader
 
1113         def _real_initialize(self
): 
1114                 """Real initialization process. Redefine in subclasses.""" 
1117         def _real_extract(self
, url
): 
1118                 """Real extraction process. Redefine in subclasses.""" 
1122 class YoutubeIE(InfoExtractor
): 
1123         """Information extractor for youtube.com.""" 
1125         _VALID_URL 
= r
'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?!view_play_list|my_playlists|artist|playlist)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' 
1126         _LANG_URL 
= r
'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
1127         _LOGIN_URL 
= 'https://www.youtube.com/signup?next=/&gl=US&hl=en' 
1128         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
1129         _NETRC_MACHINE 
= 'youtube' 
1130         # Listed in order of quality 
1131         _available_formats 
= ['38', '37', '22', '45', '35', '44', '34', '18', '43', '6', '5', '17', '13'] 
1132         _available_formats_prefer_free 
= ['38', '37', '45', '22', '44', '35', '43', '34', '18', '6', '5', '17', '13'] 
1133         _video_extensions 
= { 
1139                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
1144         _video_dimensions 
= { 
1159         IE_NAME 
= u
'youtube' 
1161         def report_lang(self
): 
1162                 """Report attempt to set language.""" 
1163                 self
._downloader
.to_screen(u
'[youtube] Setting language') 
1165         def report_login(self
): 
1166                 """Report attempt to log in.""" 
1167                 self
._downloader
.to_screen(u
'[youtube] Logging in') 
1169         def report_age_confirmation(self
): 
1170                 """Report attempt to confirm age.""" 
1171                 self
._downloader
.to_screen(u
'[youtube] Confirming age') 
1173         def report_video_webpage_download(self
, video_id
): 
1174                 """Report attempt to download video webpage.""" 
1175                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video webpage' % video_id
) 
1177         def report_video_info_webpage_download(self
, video_id
): 
1178                 """Report attempt to download video info webpage.""" 
1179                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
1181         def report_information_extraction(self
, video_id
): 
1182                 """Report attempt to extract video information.""" 
1183                 self
._downloader
.to_screen(u
'[youtube] %s: Extracting video information' % video_id
) 
1185         def report_unavailable_format(self
, video_id
, format
): 
1186                 """Report extracted video URL.""" 
1187                 self
._downloader
.to_screen(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
1189         def report_rtmp_download(self
): 
1190                 """Indicate the download will use the RTMP protocol.""" 
1191                 self
._downloader
.to_screen(u
'[youtube] RTMP download detected') 
1193         def _print_formats(self
, formats
): 
1194                 print 'Available formats:' 
1196                         print '%s\t:\t%s\t[%s]' %(x
, self
._video
_extensions
.get(x
, 'flv'), self
._video
_dimensions
.get(x
, '???')) 
1198         def _real_initialize(self
): 
1199                 if self
._downloader 
is None: 
1204                 downloader_params 
= self
._downloader
.params
 
1206                 # Attempt to use provided username and password or .netrc data 
1207                 if downloader_params
.get('username', None) is not None: 
1208                         username 
= downloader_params
['username'] 
1209                         password 
= downloader_params
['password'] 
1210                 elif downloader_params
.get('usenetrc', False): 
1212                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
1213                                 if info 
is not None: 
1217                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
1218                         except (IOError, netrc
.NetrcParseError
), err
: 
1219                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
1223                 request 
= urllib2
.Request(self
._LANG
_URL
) 
1226                         urllib2
.urlopen(request
).read() 
1227                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1228                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
1231                 # No authentication to be performed 
1232                 if username 
is None: 
1237                                 'current_form': 'loginForm', 
1239                                 'action_login': 'Log In', 
1240                                 'username':     username
, 
1241                                 'password':     password
, 
1243                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
)) 
1246                         login_results 
= urllib2
.urlopen(request
).read() 
1247                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
1248                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
1250                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1251                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
1257                                 'action_confirm':       'Confirm', 
1259                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
)) 
1261                         self
.report_age_confirmation() 
1262                         age_results 
= urllib2
.urlopen(request
).read() 
1263                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1264                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1267         def _real_extract(self
, url
): 
1268                 # Extract video id from URL 
1269                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1271                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1273                 video_id 
= mobj
.group(2) 
1276                 self
.report_video_webpage_download(video_id
) 
1277                 request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
) 
1279                         video_webpage 
= urllib2
.urlopen(request
).read() 
1280                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1281                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
1284                 # Attempt to extract SWF player URL 
1285                 mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1286                 if mobj 
is not None: 
1287                         player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1292                 self
.report_video_info_webpage_download(video_id
) 
1293                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1294                         video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1295                                         % (video_id
, el_type
)) 
1296                         request 
= urllib2
.Request(video_info_url
) 
1298                                 video_info_webpage 
= urllib2
.urlopen(request
).read() 
1299                                 video_info 
= parse_qs(video_info_webpage
) 
1300                                 if 'token' in video_info
: 
1302                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1303                                 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
1305                 if 'token' not in video_info
: 
1306                         if 'reason' in video_info
: 
1307                                 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % video_info
['reason'][0].decode('utf-8')) 
1309                                 self
._downloader
.trouble(u
'ERROR: "token" parameter not in video info for unknown reason') 
1312                 # Start extracting information 
1313                 self
.report_information_extraction(video_id
) 
1316                 if 'author' not in video_info
: 
1317                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1319                 video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
1322                 if 'title' not in video_info
: 
1323                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1325                 video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
1326                 video_title 
= video_title
.decode('utf-8') 
1327                 video_title 
= sanitize_title(video_title
) 
1330                 simple_title 
= _simplify_title(video_title
) 
1333                 if 'thumbnail_url' not in video_info
: 
1334                         self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
1335                         video_thumbnail 
= '' 
1336                 else:   # don't panic if we can't find it 
1337                         video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
1341                 mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1342                 if mobj 
is not None: 
1343                         upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1344                         format_expressions 
= ['%d %B %Y', '%B %d %Y', '%b %d %Y'] 
1345                         for expression 
in format_expressions
: 
1347                                         upload_date 
= datetime
.datetime
.strptime(upload_date
, expression
).strftime('%Y%m%d') 
1355                         video_description 
= u
'No description available.' 
1356                         if self
._downloader
.params
.get('forcedescription', False) or self
._downloader
.params
.get('writedescription', False): 
1357                                 mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
1358                                 if mobj 
is not None: 
1359                                         video_description 
= mobj
.group(1).decode('utf-8') 
1361                         html_parser 
= lxml
.etree
.HTMLParser(encoding
='utf-8') 
1362                         vwebpage_doc 
= lxml
.etree
.parse(StringIO
.StringIO(video_webpage
), html_parser
) 
1363                         video_description 
= u
''.join(vwebpage_doc
.xpath('id("eow-description")//text()')) 
1364                         # TODO use another parser 
1367                 video_token 
= urllib
.unquote_plus(video_info
['token'][0]) 
1369                 # Decide which formats to download 
1370                 req_format 
= self
._downloader
.params
.get('format', None) 
1372                 if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1373                         self
.report_rtmp_download() 
1374                         video_url_list 
= [(None, video_info
['conn'][0])] 
1375                 elif 'url_encoded_fmt_stream_map' in video_info 
and len(video_info
['url_encoded_fmt_stream_map']) >= 1: 
1376                         url_data_strs 
= video_info
['url_encoded_fmt_stream_map'][0].split(',') 
1377                         url_data 
= [parse_qs(uds
) for uds 
in url_data_strs
] 
1378                         url_data 
= filter(lambda ud
: 'itag' in ud 
and 'url' in ud
, url_data
) 
1379                         url_map 
= dict((ud
['itag'][0], ud
['url'][0]) for ud 
in url_data
) 
1381                         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1382                         available_formats 
= self
._available
_formats
_prefer
_free 
if self
._downloader
.params
.get('prefer_free_formats', False) else self
._available
_formats
 
1383                         if format_limit 
is not None and format_limit 
in available_formats
: 
1384                                 format_list 
= available_formats
[available_formats
.index(format_limit
):] 
1386                                 format_list 
= available_formats
 
1387                         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1388                         if len(existing_formats
) == 0: 
1389                                 self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
1391                         if self
._downloader
.params
.get('listformats', None): 
1392                                 self
._print
_formats
(existing_formats
) 
1394                         if req_format 
is None or req_format 
== 'best': 
1395                                 video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1396                         elif req_format 
== 'worst': 
1397                                 video_url_list 
= [(existing_formats
[len(existing_formats
)-1], url_map
[existing_formats
[len(existing_formats
)-1]])] # worst quality 
1398                         elif req_format 
in ('-1', 'all'): 
1399                                 video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1401                                 # Specific formats. We pick the first in a slash-delimeted sequence. 
1402                                 # For example, if '1/2/3/4' is requested and '2' and '4' are available, we pick '2'. 
1403                                 req_formats 
= req_format
.split('/') 
1404                                 video_url_list 
= None 
1405                                 for rf 
in req_formats
: 
1407                                                 video_url_list 
= [(rf
, url_map
[rf
])] 
1409                                 if video_url_list 
is None: 
1410                                         self
._downloader
.trouble(u
'ERROR: requested format not available') 
1413                         self
._downloader
.trouble(u
'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') 
1416                 for format_param
, video_real_url 
in video_url_list
: 
1417                         # At this point we have a new video 
1418                         self
._downloader
.increment_downloads() 
1421                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
1424                                 # Process video information 
1425                                 self
._downloader
.process_info({ 
1426                                         'id':           video_id
.decode('utf-8'), 
1427                                         'url':          video_real_url
.decode('utf-8'), 
1428                                         'uploader':     video_uploader
.decode('utf-8'), 
1429                                         'upload_date':  upload_date
, 
1430                                         'title':        video_title
, 
1431                                         'stitle':       simple_title
, 
1432                                         'ext':          video_extension
.decode('utf-8'), 
1433                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
1434                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
1435                                         'description':  video_description
, 
1436                                         'player_url':   player_url
, 
1438                         except UnavailableVideoError
, err
: 
1439                                 self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1442 class MetacafeIE(InfoExtractor
): 
1443         """Information Extractor for metacafe.com.""" 
1445         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
1446         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
1447         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
1449         IE_NAME 
= u
'metacafe' 
1451         def __init__(self
, youtube_ie
, downloader
=None): 
1452                 InfoExtractor
.__init
__(self
, downloader
) 
1453                 self
._youtube
_ie 
= youtube_ie
 
1455         def report_disclaimer(self
): 
1456                 """Report disclaimer retrieval.""" 
1457                 self
._downloader
.to_screen(u
'[metacafe] Retrieving disclaimer') 
1459         def report_age_confirmation(self
): 
1460                 """Report attempt to confirm age.""" 
1461                 self
._downloader
.to_screen(u
'[metacafe] Confirming age') 
1463         def report_download_webpage(self
, video_id
): 
1464                 """Report webpage download.""" 
1465                 self
._downloader
.to_screen(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1467         def report_extraction(self
, video_id
): 
1468                 """Report information extraction.""" 
1469                 self
._downloader
.to_screen(u
'[metacafe] %s: Extracting information' % video_id
) 
1471         def _real_initialize(self
): 
1472                 # Retrieve disclaimer 
1473                 request 
= urllib2
.Request(self
._DISCLAIMER
) 
1475                         self
.report_disclaimer() 
1476                         disclaimer 
= urllib2
.urlopen(request
).read() 
1477                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1478                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1484                         'submit': "Continue - I'm over 18", 
1486                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
)) 
1488                         self
.report_age_confirmation() 
1489                         disclaimer 
= urllib2
.urlopen(request
).read() 
1490                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1491                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1494         def _real_extract(self
, url
): 
1495                 # Extract id and simplified title from URL 
1496                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1498                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1501                 video_id 
= mobj
.group(1) 
1503                 # Check if video comes from YouTube 
1504                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1505                 if mobj2 
is not None: 
1506                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1509                 # At this point we have a new video 
1510                 self
._downloader
.increment_downloads() 
1512                 simple_title 
= mobj
.group(2).decode('utf-8') 
1514                 # Retrieve video webpage to extract further information 
1515                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1517                         self
.report_download_webpage(video_id
) 
1518                         webpage 
= urllib2
.urlopen(request
).read() 
1519                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1520                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1523                 # Extract URL, uploader and title from webpage 
1524                 self
.report_extraction(video_id
) 
1525                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1526                 if mobj 
is not None: 
1527                         mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1528                         video_extension 
= mediaURL
[-3:] 
1530                         # Extract gdaKey if available 
1531                         mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
1533                                 video_url 
= mediaURL
 
1535                                 gdaKey 
= mobj
.group(1) 
1536                                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
1538                         mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
1540                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1542                         vardict 
= parse_qs(mobj
.group(1)) 
1543                         if 'mediaData' not in vardict
: 
1544                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1546                         mobj 
= re
.search(r
'"mediaURL":"(http.*?)","key":"(.*?)"', vardict
['mediaData'][0]) 
1548                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1550                         mediaURL 
= mobj
.group(1).replace('\\/', '/') 
1551                         video_extension 
= mediaURL
[-3:] 
1552                         video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group(2)) 
1554                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1556                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1558                 video_title 
= mobj
.group(1).decode('utf-8') 
1559                 video_title 
= sanitize_title(video_title
) 
1561                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1563                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1565                 video_uploader 
= mobj
.group(1) 
1568                         # Process video information 
1569                         self
._downloader
.process_info({ 
1570                                 'id':           video_id
.decode('utf-8'), 
1571                                 'url':          video_url
.decode('utf-8'), 
1572                                 'uploader':     video_uploader
.decode('utf-8'), 
1573                                 'upload_date':  u
'NA', 
1574                                 'title':        video_title
, 
1575                                 'stitle':       simple_title
, 
1576                                 'ext':          video_extension
.decode('utf-8'), 
1580                 except UnavailableVideoError
: 
1581                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1584 class DailymotionIE(InfoExtractor
): 
1585         """Information Extractor for Dailymotion""" 
1587         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1588         IE_NAME 
= u
'dailymotion' 
1590         def __init__(self
, downloader
=None): 
1591                 InfoExtractor
.__init
__(self
, downloader
) 
1593         def report_download_webpage(self
, video_id
): 
1594                 """Report webpage download.""" 
1595                 self
._downloader
.to_screen(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1597         def report_extraction(self
, video_id
): 
1598                 """Report information extraction.""" 
1599                 self
._downloader
.to_screen(u
'[dailymotion] %s: Extracting information' % video_id
) 
1601         def _real_extract(self
, url
): 
1602                 # Extract id and simplified title from URL 
1603                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1605                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1608                 # At this point we have a new video 
1609                 self
._downloader
.increment_downloads() 
1610                 video_id 
= mobj
.group(1) 
1612                 video_extension 
= 'flv' 
1614                 # Retrieve video webpage to extract further information 
1615                 request 
= urllib2
.Request(url
) 
1616                 request
.add_header('Cookie', 'family_filter=off') 
1618                         self
.report_download_webpage(video_id
) 
1619                         webpage 
= urllib2
.urlopen(request
).read() 
1620                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1621                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1624                 # Extract URL, uploader and title from webpage 
1625                 self
.report_extraction(video_id
) 
1626                 mobj 
= re
.search(r
'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage
) 
1628                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1630                 sequence 
= urllib
.unquote(mobj
.group(1)) 
1631                 mobj 
= re
.search(r
',\"sdURL\"\:\"([^\"]+?)\",', sequence
) 
1633                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1635                 mediaURL 
= urllib
.unquote(mobj
.group(1)).replace('\\', '') 
1637                 # if needed add http://www.dailymotion.com/ if relative URL 
1639                 video_url 
= mediaURL
 
1641                 mobj 
= re
.search(r
'<meta property="og:title" content="(?P<title>[^"]*)" />', webpage
) 
1643                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1645                 video_title 
= _unescapeHTML(mobj
.group('title').decode('utf-8')) 
1646                 video_title 
= sanitize_title(video_title
) 
1647                 simple_title 
= _simplify_title(video_title
) 
1649                 mobj 
= re
.search(r
'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage
) 
1651                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1653                 video_uploader 
= mobj
.group(1) 
1656                         # Process video information 
1657                         self
._downloader
.process_info({ 
1658                                 'id':           video_id
.decode('utf-8'), 
1659                                 'url':          video_url
.decode('utf-8'), 
1660                                 'uploader':     video_uploader
.decode('utf-8'), 
1661                                 'upload_date':  u
'NA', 
1662                                 'title':        video_title
, 
1663                                 'stitle':       simple_title
, 
1664                                 'ext':          video_extension
.decode('utf-8'), 
1668                 except UnavailableVideoError
: 
1669                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1672 class GoogleIE(InfoExtractor
): 
1673         """Information extractor for video.google.com.""" 
1675         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1676         IE_NAME 
= u
'video.google' 
1678         def __init__(self
, downloader
=None): 
1679                 InfoExtractor
.__init
__(self
, downloader
) 
1681         def report_download_webpage(self
, video_id
): 
1682                 """Report webpage download.""" 
1683                 self
._downloader
.to_screen(u
'[video.google] %s: Downloading webpage' % video_id
) 
1685         def report_extraction(self
, video_id
): 
1686                 """Report information extraction.""" 
1687                 self
._downloader
.to_screen(u
'[video.google] %s: Extracting information' % video_id
) 
1689         def _real_extract(self
, url
): 
1690                 # Extract id from URL 
1691                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1693                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1696                 # At this point we have a new video 
1697                 self
._downloader
.increment_downloads() 
1698                 video_id 
= mobj
.group(1) 
1700                 video_extension 
= 'mp4' 
1702                 # Retrieve video webpage to extract further information 
1703                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1705                         self
.report_download_webpage(video_id
) 
1706                         webpage 
= urllib2
.urlopen(request
).read() 
1707                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1708                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1711                 # Extract URL, uploader, and title from webpage 
1712                 self
.report_extraction(video_id
) 
1713                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1715                         video_extension 
= 'flv' 
1716                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1718                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1720                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1721                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1722                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1724                 video_url 
= mediaURL
 
1726                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1728                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1730                 video_title 
= mobj
.group(1).decode('utf-8') 
1731                 video_title 
= sanitize_title(video_title
) 
1732                 simple_title 
= _simplify_title(video_title
) 
1734                 # Extract video description 
1735                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1737                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1739                 video_description 
= mobj
.group(1).decode('utf-8') 
1740                 if not video_description
: 
1741                         video_description 
= 'No description available.' 
1743                 # Extract video thumbnail 
1744                 if self
._downloader
.params
.get('forcethumbnail', False): 
1745                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1747                                 webpage 
= urllib2
.urlopen(request
).read() 
1748                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1749                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1751                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1753                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1755                         video_thumbnail 
= mobj
.group(1) 
1756                 else:   # we need something to pass to process_info 
1757                         video_thumbnail 
= '' 
1760                         # Process video information 
1761                         self
._downloader
.process_info({ 
1762                                 'id':           video_id
.decode('utf-8'), 
1763                                 'url':          video_url
.decode('utf-8'), 
1765                                 'upload_date':  u
'NA', 
1766                                 'title':        video_title
, 
1767                                 'stitle':       simple_title
, 
1768                                 'ext':          video_extension
.decode('utf-8'), 
1772                 except UnavailableVideoError
: 
1773                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1776 class PhotobucketIE(InfoExtractor
): 
1777         """Information extractor for photobucket.com.""" 
1779         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1780         IE_NAME 
= u
'photobucket' 
1782         def __init__(self
, downloader
=None): 
1783                 InfoExtractor
.__init
__(self
, downloader
) 
1785         def report_download_webpage(self
, video_id
): 
1786                 """Report webpage download.""" 
1787                 self
._downloader
.to_screen(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1789         def report_extraction(self
, video_id
): 
1790                 """Report information extraction.""" 
1791                 self
._downloader
.to_screen(u
'[photobucket] %s: Extracting information' % video_id
) 
1793         def _real_extract(self
, url
): 
1794                 # Extract id from URL 
1795                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1797                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1800                 # At this point we have a new video 
1801                 self
._downloader
.increment_downloads() 
1802                 video_id 
= mobj
.group(1) 
1804                 video_extension 
= 'flv' 
1806                 # Retrieve video webpage to extract further information 
1807                 request 
= urllib2
.Request(url
) 
1809                         self
.report_download_webpage(video_id
) 
1810                         webpage 
= urllib2
.urlopen(request
).read() 
1811                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1812                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1815                 # Extract URL, uploader, and title from webpage 
1816                 self
.report_extraction(video_id
) 
1817                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1819                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1821                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1823                 video_url 
= mediaURL
 
1825                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1827                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1829                 video_title 
= mobj
.group(1).decode('utf-8') 
1830                 video_title 
= sanitize_title(video_title
) 
1831                 simple_title 
= _simplify_title(vide_title
) 
1833                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1836                         # Process video information 
1837                         self
._downloader
.process_info({ 
1838                                 'id':           video_id
.decode('utf-8'), 
1839                                 'url':          video_url
.decode('utf-8'), 
1840                                 'uploader':     video_uploader
, 
1841                                 'upload_date':  u
'NA', 
1842                                 'title':        video_title
, 
1843                                 'stitle':       simple_title
, 
1844                                 'ext':          video_extension
.decode('utf-8'), 
1848                 except UnavailableVideoError
: 
1849                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1852 class YahooIE(InfoExtractor
): 
1853         """Information extractor for video.yahoo.com.""" 
1855         # _VALID_URL matches all Yahoo! Video URLs 
1856         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1857         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1858         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1859         IE_NAME 
= u
'video.yahoo' 
1861         def __init__(self
, downloader
=None): 
1862                 InfoExtractor
.__init
__(self
, downloader
) 
1864         def report_download_webpage(self
, video_id
): 
1865                 """Report webpage download.""" 
1866                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1868         def report_extraction(self
, video_id
): 
1869                 """Report information extraction.""" 
1870                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1872         def _real_extract(self
, url
, new_video
=True): 
1873                 # Extract ID from URL 
1874                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1876                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1879                 # At this point we have a new video 
1880                 self
._downloader
.increment_downloads() 
1881                 video_id 
= mobj
.group(2) 
1882                 video_extension 
= 'flv' 
1884                 # Rewrite valid but non-extractable URLs as 
1885                 # extractable English language /watch/ URLs 
1886                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1887                         request 
= urllib2
.Request(url
) 
1889                                 webpage 
= urllib2
.urlopen(request
).read() 
1890                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1891                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1894                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1896                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1898                         yahoo_id 
= mobj
.group(1) 
1900                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1902                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1904                         yahoo_vid 
= mobj
.group(1) 
1906                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1907                         return self
._real
_extract
(url
, new_video
=False) 
1909                 # Retrieve video webpage to extract further information 
1910                 request 
= urllib2
.Request(url
) 
1912                         self
.report_download_webpage(video_id
) 
1913                         webpage 
= urllib2
.urlopen(request
).read() 
1914                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1915                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1918                 # Extract uploader and title from webpage 
1919                 self
.report_extraction(video_id
) 
1920                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1922                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1924                 video_title 
= mobj
.group(1).decode('utf-8') 
1925                 simple_title 
= _simplify_title(video_title
) 
1927                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1929                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1931                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1933                 # Extract video thumbnail 
1934                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1936                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1938                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1940                 # Extract video description 
1941                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1943                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1945                 video_description 
= mobj
.group(1).decode('utf-8') 
1946                 if not video_description
: 
1947                         video_description 
= 'No description available.' 
1949                 # Extract video height and width 
1950                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1952                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1954                 yv_video_height 
= mobj
.group(1) 
1956                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1958                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1960                 yv_video_width 
= mobj
.group(1) 
1962                 # Retrieve video playlist to extract media URL 
1963                 # I'm not completely sure what all these options are, but we 
1964                 # seem to need most of them, otherwise the server sends a 401. 
1965                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1966                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1967                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1968                                 '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1969                                 '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1971                         self
.report_download_webpage(video_id
) 
1972                         webpage 
= urllib2
.urlopen(request
).read() 
1973                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1974                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1977                 # Extract media URL from playlist XML 
1978                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1980                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1982                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1983                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1986                         # Process video information 
1987                         self
._downloader
.process_info({ 
1988                                 'id':           video_id
.decode('utf-8'), 
1990                                 'uploader':     video_uploader
, 
1991                                 'upload_date':  u
'NA', 
1992                                 'title':        video_title
, 
1993                                 'stitle':       simple_title
, 
1994                                 'ext':          video_extension
.decode('utf-8'), 
1995                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1996                                 'description':  video_description
, 
1997                                 'thumbnail':    video_thumbnail
, 
2000                 except UnavailableVideoError
: 
2001                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
2004 class VimeoIE(InfoExtractor
): 
2005         """Information extractor for vimeo.com.""" 
2007         # _VALID_URL matches Vimeo URLs 
2008         _VALID_URL 
= r
'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' 
2011         def __init__(self
, downloader
=None): 
2012                 InfoExtractor
.__init
__(self
, downloader
) 
2014         def report_download_webpage(self
, video_id
): 
2015                 """Report webpage download.""" 
2016                 self
._downloader
.to_screen(u
'[vimeo] %s: Downloading webpage' % video_id
) 
2018         def report_extraction(self
, video_id
): 
2019                 """Report information extraction.""" 
2020                 self
._downloader
.to_screen(u
'[vimeo] %s: Extracting information' % video_id
) 
2022         def _real_extract(self
, url
, new_video
=True): 
2023                 # Extract ID from URL 
2024                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
2026                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2029                 # At this point we have a new video 
2030                 self
._downloader
.increment_downloads() 
2031                 video_id 
= mobj
.group(1) 
2033                 # Retrieve video webpage to extract further information 
2034                 request 
= urllib2
.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id
, None, std_headers
) 
2036                         self
.report_download_webpage(video_id
) 
2037                         webpage 
= urllib2
.urlopen(request
).read() 
2038                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2039                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2042                 # Now we begin extracting as much information as we can from what we 
2043                 # retrieved. First we extract the information common to all extractors, 
2044                 # and latter we extract those that are Vimeo specific. 
2045                 self
.report_extraction(video_id
) 
2048                 mobj 
= re
.search(r
'<caption>(.*?)</caption>', webpage
) 
2050                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
2052                 video_title 
= mobj
.group(1).decode('utf-8') 
2053                 simple_title 
= _simplify_title(video_title
) 
2056                 mobj 
= re
.search(r
'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage
) 
2058                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
2060                 video_uploader 
= mobj
.group(1).decode('utf-8') 
2062                 # Extract video thumbnail 
2063                 mobj 
= re
.search(r
'<thumbnail>(.*?)</thumbnail>', webpage
) 
2065                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
2067                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
2069                 # # Extract video description 
2070                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage) 
2072                 #       self._downloader.trouble(u'ERROR: unable to extract video description') 
2074                 # video_description = mobj.group(1).decode('utf-8') 
2075                 # if not video_description: video_description = 'No description available.' 
2076                 video_description 
= 'Foo.' 
2078                 # Vimeo specific: extract request signature 
2079                 mobj 
= re
.search(r
'<request_signature>(.*?)</request_signature>', webpage
) 
2081                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature') 
2083                 sig 
= mobj
.group(1).decode('utf-8') 
2085                 # Vimeo specific: extract video quality information 
2086                 mobj 
= re
.search(r
'<isHD>(\d+)</isHD>', webpage
) 
2088                         self
._downloader
.trouble(u
'ERROR: unable to extract video quality information') 
2090                 quality 
= mobj
.group(1).decode('utf-8') 
2092                 if int(quality
) == 1: 
2097                 # Vimeo specific: Extract request signature expiration 
2098                 mobj 
= re
.search(r
'<request_signature_expires>(.*?)</request_signature_expires>', webpage
) 
2100                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature expiration') 
2102                 sig_exp 
= mobj
.group(1).decode('utf-8') 
2104                 video_url 
= "http://vimeo.com/moogaloop/play/clip:%s/%s/%s/?q=%s" % (video_id
, sig
, sig_exp
, quality
) 
2107                         # Process video information 
2108                         self
._downloader
.process_info({ 
2109                                 'id':           video_id
.decode('utf-8'), 
2111                                 'uploader':     video_uploader
, 
2112                                 'upload_date':  u
'NA', 
2113                                 'title':        video_title
, 
2114                                 'stitle':       simple_title
, 
2116                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
2117                                 'description':  video_description
, 
2118                                 'thumbnail':    video_thumbnail
, 
2119                                 'description':  video_description
, 
2122                 except UnavailableVideoError
: 
2123                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
2126 class GenericIE(InfoExtractor
): 
2127         """Generic last-resort information extractor.""" 
2130         IE_NAME 
= u
'generic' 
2132         def __init__(self
, downloader
=None): 
2133                 InfoExtractor
.__init
__(self
, downloader
) 
2135         def report_download_webpage(self
, video_id
): 
2136                 """Report webpage download.""" 
2137                 self
._downloader
.to_screen(u
'WARNING: Falling back on generic information extractor.') 
2138                 self
._downloader
.to_screen(u
'[generic] %s: Downloading webpage' % video_id
) 
2140         def report_extraction(self
, video_id
): 
2141                 """Report information extraction.""" 
2142                 self
._downloader
.to_screen(u
'[generic] %s: Extracting information' % video_id
) 
2144         def _real_extract(self
, url
): 
2145                 # At this point we have a new video 
2146                 self
._downloader
.increment_downloads() 
2148                 video_id 
= url
.split('/')[-1] 
2149                 request 
= urllib2
.Request(url
) 
2151                         self
.report_download_webpage(video_id
) 
2152                         webpage 
= urllib2
.urlopen(request
).read() 
2153                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2154                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2156                 except ValueError, err
: 
2157                         # since this is the last-resort InfoExtractor, if 
2158                         # this error is thrown, it'll be thrown here 
2159                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2162                 self
.report_extraction(video_id
) 
2163                 # Start with something easy: JW Player in SWFObject 
2164                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
2166                         # Broaden the search a little bit 
2167                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
2169                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2172                 # It's possible that one of the regexes 
2173                 # matched, but returned an empty group: 
2174                 if mobj.group(1) is None: 
2175                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2178                 video_url = urllib.unquote(mobj.group(1)) 
2179                 video_id = os.path.basename(video_url) 
2181                 # here's a fun little line of code for you: 
2182                 video_extension = os.path.splitext(video_id)[1][1:] 
2183                 video_id = os.path.splitext(video_id)[0] 
2185                 # it's tempting to parse this further, but you would 
2186                 # have to take into account all the variations like 
2187                 #   Video Title - Site Name 
2188                 #   Site Name | Video Title 
2189                 #   Video Title - Tagline | Site Name 
2190                 # and so on and so forth; it's just not practical 
2191                 mobj = re.search(r'<title>(.*)</title>', webpage) 
2193                         self._downloader.trouble(u'ERROR: unable to extract title') 
2195                 video_title = mobj.group(1).decode('utf-8') 
2196                 video_title = sanitize_title(video_title) 
2197                 simple_title = _simplify_title(video_title) 
2199                 # video uploader is domain name 
2200                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
2202                         self._downloader.trouble(u'ERROR: unable to extract title') 
2204                 video_uploader = mobj.group(1).decode('utf-8') 
2207                         # Process video information 
2208                         self._downloader.process_info({ 
2209                                 'id':           video_id.decode('utf-8'), 
2210                                 'url':          video_url.decode('utf-8'), 
2211                                 'uploader':     video_uploader, 
2212                                 'upload_date':  u'NA', 
2213                                 'title':        video_title, 
2214                                 'stitle':       simple_title, 
2215                                 'ext':          video_extension.decode('utf-8'), 
2219                 except UnavailableVideoError, err: 
2220                         self._downloader.trouble(u'\nERROR: unable to download video') 
2223 class YoutubeSearchIE(InfoExtractor): 
2224         """Information Extractor for YouTube search queries.""" 
2225         _VALID_URL = r'ytsearch(\d+|all)?:[\s\S]+' 
2226         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
2227         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
2228         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2230         _max_youtube_results = 1000 
2231         IE_NAME = u'youtube:search' 
2233         def __init__(self, youtube_ie, downloader=None): 
2234                 InfoExtractor.__init__(self, downloader) 
2235                 self._youtube_ie = youtube_ie 
2237         def report_download_page(self, query, pagenum): 
2238                 """Report attempt to download playlist page with given number.""" 
2239                 query = query.decode(preferredencoding()) 
2240                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
2242         def _real_initialize(self): 
2243                 self._youtube_ie.initialize() 
2245         def _real_extract(self, query): 
2246                 mobj = re.match(self._VALID_URL, query) 
2248                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2251                 prefix, query = query.split(':') 
2253                 query = query.encode('utf-8') 
2255                         self._download_n_results(query, 1) 
2257                 elif prefix == 'all': 
2258                         self._download_n_results(query, self._max_youtube_results) 
2264                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2266                                 elif n > self._max_youtube_results: 
2267                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) 
2268                                         n = self._max_youtube_results 
2269                                 self._download_n_results(query, n) 
2271                         except ValueError: # parsing prefix as integer fails 
2272                                 self._download_n_results(query, 1) 
2275         def _download_n_results(self, query, n): 
2276                 """Downloads a specified number of results for a query""" 
2279                 already_seen = set() 
2283                         self.report_download_page(query, pagenum) 
2284                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2285                         request = urllib2.Request(result_url) 
2287                                 page = urllib2.urlopen(request).read() 
2288                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2289                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2292                         # Extract video identifiers 
2293                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2294                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
2295                                 if video_id not in already_seen: 
2296                                         video_ids.append(video_id) 
2297                                         already_seen.add(video_id) 
2298                                         if len(video_ids) == n: 
2299                                                 # Specified n videos reached 
2300                                                 for id in video_ids: 
2301                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2304                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2305                                 for id in video_ids: 
2306                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2309                         pagenum = pagenum + 1 
2312 class GoogleSearchIE(InfoExtractor): 
2313         """Information Extractor for Google Video search queries.""" 
2314         _VALID_URL = r'gvsearch(\d+|all)?:[\s\S]+' 
2315         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
2316         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
2317         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
2319         _max_google_results = 1000 
2320         IE_NAME = u'video.google:search' 
2322         def __init__(self, google_ie, downloader=None): 
2323                 InfoExtractor.__init__(self, downloader) 
2324                 self._google_ie = google_ie 
2326         def report_download_page(self, query, pagenum): 
2327                 """Report attempt to download playlist page with given number.""" 
2328                 query = query.decode(preferredencoding()) 
2329                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
2331         def _real_initialize(self): 
2332                 self._google_ie.initialize() 
2334         def _real_extract(self, query): 
2335                 mobj = re.match(self._VALID_URL, query) 
2337                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2340                 prefix, query = query.split(':') 
2342                 query = query.encode('utf-8') 
2344                         self._download_n_results(query, 1) 
2346                 elif prefix == 'all': 
2347                         self._download_n_results(query, self._max_google_results) 
2353                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2355                                 elif n > self._max_google_results: 
2356                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) 
2357                                         n = self._max_google_results 
2358                                 self._download_n_results(query, n) 
2360                         except ValueError: # parsing prefix as integer fails 
2361                                 self._download_n_results(query, 1) 
2364         def _download_n_results(self, query, n): 
2365                 """Downloads a specified number of results for a query""" 
2368                 already_seen = set() 
2372                         self.report_download_page(query, pagenum) 
2373                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2374                         request = urllib2.Request(result_url) 
2376                                 page = urllib2.urlopen(request).read() 
2377                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2378                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2381                         # Extract video identifiers 
2382                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2383                                 video_id = mobj.group(1) 
2384                                 if video_id not in already_seen: 
2385                                         video_ids.append(video_id) 
2386                                         already_seen.add(video_id) 
2387                                         if len(video_ids) == n: 
2388                                                 # Specified n videos reached 
2389                                                 for id in video_ids: 
2390                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2393                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2394                                 for id in video_ids: 
2395                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2398                         pagenum = pagenum + 1 
2401 class YahooSearchIE(InfoExtractor): 
2402         """Information Extractor for Yahoo! Video search queries.""" 
2403         _VALID_URL = r'yvsearch(\d+|all)?:[\s\S]+' 
2404         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
2405         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
2406         _MORE_PAGES_INDICATOR = r'\s*Next' 
2408         _max_yahoo_results = 1000 
2409         IE_NAME = u'video.yahoo:search' 
2411         def __init__(self, yahoo_ie, downloader=None): 
2412                 InfoExtractor.__init__(self, downloader) 
2413                 self._yahoo_ie = yahoo_ie 
2415         def report_download_page(self, query, pagenum): 
2416                 """Report attempt to download playlist page with given number.""" 
2417                 query = query.decode(preferredencoding()) 
2418                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
2420         def _real_initialize(self): 
2421                 self._yahoo_ie.initialize() 
2423         def _real_extract(self, query): 
2424                 mobj = re.match(self._VALID_URL, query) 
2426                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2429                 prefix, query = query.split(':') 
2431                 query = query.encode('utf-8') 
2433                         self._download_n_results(query, 1) 
2435                 elif prefix == 'all': 
2436                         self._download_n_results(query, self._max_yahoo_results) 
2442                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2444                                 elif n > self._max_yahoo_results: 
2445                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) 
2446                                         n = self._max_yahoo_results 
2447                                 self._download_n_results(query, n) 
2449                         except ValueError: # parsing prefix as integer fails 
2450                                 self._download_n_results(query, 1) 
2453         def _download_n_results(self, query, n): 
2454                 """Downloads a specified number of results for a query""" 
2457                 already_seen = set() 
2461                         self.report_download_page(query, pagenum) 
2462                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2463                         request = urllib2.Request(result_url) 
2465                                 page = urllib2.urlopen(request).read() 
2466                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2467                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2470                         # Extract video identifiers 
2471                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2472                                 video_id = mobj.group(1) 
2473                                 if video_id not in already_seen: 
2474                                         video_ids.append(video_id) 
2475                                         already_seen.add(video_id) 
2476                                         if len(video_ids) == n: 
2477                                                 # Specified n videos reached 
2478                                                 for id in video_ids: 
2479                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2482                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2483                                 for id in video_ids: 
2484                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2487                         pagenum = pagenum + 1 
2490 class YoutubePlaylistIE(InfoExtractor): 
2491         """Information Extractor for YouTube playlists.""" 
2493         _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' 
2494         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' 
2495         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2496         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2498         IE_NAME = u'youtube:playlist' 
2500         def __init__(self, youtube_ie, downloader=None): 
2501                 InfoExtractor.__init__(self, downloader) 
2502                 self._youtube_ie = youtube_ie 
2504         def report_download_page(self, playlist_id, pagenum): 
2505                 """Report attempt to download playlist page with given number.""" 
2506                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
2508         def _real_initialize(self): 
2509                 self._youtube_ie.initialize() 
2511         def _real_extract(self, url): 
2512                 # Extract playlist id 
2513                 mobj = re.match(self._VALID_URL, url) 
2515                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2519                 if mobj.group(3) is not None: 
2520                         self._youtube_ie.extract(mobj.group(3)) 
2523                 # Download playlist pages 
2524                 # prefix is 'p' as default for playlists but there are other types that need extra care 
2525                 playlist_prefix = mobj.group(1) 
2526                 if playlist_prefix == 'a': 
2527                         playlist_access = 'artist' 
2529                         playlist_prefix = 'p' 
2530                         playlist_access = 'view_play_list' 
2531                 playlist_id = mobj.group(2) 
2536                         self.report_download_page(playlist_id, pagenum) 
2537                         url = self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum) 
2538                         request = urllib2.Request(url) 
2540                                 page = urllib2.urlopen(request).read() 
2541                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2542                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2545                         # Extract video identifiers 
2547                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2548                                 if mobj.group(1) not in ids_in_page: 
2549                                         ids_in_page.append(mobj.group(1)) 
2550                         video_ids.extend(ids_in_page) 
2552                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2554                         pagenum = pagenum + 1 
2556                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2557                 playlistend = self._downloader.params.get('playlistend', -1) 
2558                 video_ids = video_ids[playliststart:playlistend] 
2560                 for id in video_ids: 
2561                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2565 class YoutubeUserIE(InfoExtractor): 
2566         """Information Extractor for YouTube users.""" 
2568         _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/user/)|ytuser:)([A-Za-z0-9_-]+)' 
2569         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
2570         _GDATA_PAGE_SIZE = 50 
2571         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' 
2572         _VIDEO_INDICATOR = r'/watch\?v=(.+?)[\<&]' 
2574         IE_NAME = u'youtube:user' 
2576         def __init__(self, youtube_ie, downloader=None): 
2577                 InfoExtractor.__init__(self, downloader) 
2578                 self._youtube_ie = youtube_ie 
2580         def report_download_page(self, username, start_index): 
2581                 """Report attempt to download user page.""" 
2582                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % 
2583                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE)) 
2585         def _real_initialize(self): 
2586                 self._youtube_ie.initialize() 
2588         def _real_extract(self, url): 
2590                 mobj = re.match(self._VALID_URL, url) 
2592                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2595                 username = mobj.group(1) 
2597                 # Download video ids using YouTube Data API. Result size per 
2598                 # query is limited (currently to 50 videos) so we need to query 
2599                 # page by page until there are no video ids - it means we got 
2606                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1 
2607                         self.report_download_page(username, start_index) 
2609                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) 
2612                                 page = urllib2.urlopen(request).read() 
2613                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2614                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2617                         # Extract video identifiers 
2620                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2621                                 if mobj.group(1) not in ids_in_page: 
2622                                         ids_in_page.append(mobj.group(1)) 
2624                         video_ids.extend(ids_in_page) 
2626                         # A little optimization - if current page is not 
2627                         # "full
", ie. does not contain PAGE_SIZE video ids then 
2628                         # we can assume that this page is the last one - there 
2629                         # are no more ids on further pages - no need to query 
2632                         if len(ids_in_page) < self._GDATA_PAGE_SIZE: 
2637                 all_ids_count = len(video_ids) 
2638                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2639                 playlistend = self._downloader.params.get('playlistend', -1) 
2641                 if playlistend == -1: 
2642                         video_ids = video_ids[playliststart:] 
2644                         video_ids = video_ids[playliststart:playlistend] 
2646                 self._downloader.to_screen("[youtube
] user 
%s: Collected 
%d video 
ids (downloading 
%d of them
)" % 
2647                                 (username, all_ids_count, len(video_ids))) 
2649                 for video_id in video_ids: 
2650                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) 
2653 class DepositFilesIE(InfoExtractor): 
2654         """Information extractor for depositfiles.com""" 
2656         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' 
2657         IE_NAME = u'DepositFiles' 
2659         def __init__(self, downloader=None): 
2660                 InfoExtractor.__init__(self, downloader) 
2662         def report_download_webpage(self, file_id): 
2663                 """Report webpage download.""" 
2664                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) 
2666         def report_extraction(self, file_id): 
2667                 """Report information extraction.""" 
2668                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) 
2670         def _real_extract(self, url): 
2671                 # At this point we have a new file 
2672                 self._downloader.increment_downloads() 
2674                 file_id = url.split('/')[-1] 
2675                 # Rebuild url in english locale 
2676                 url = 'http://depositfiles.com/en/files/' + file_id 
2678                 # Retrieve file webpage with 'Free download' button pressed 
2679                 free_download_indication = { 'gateway_result' : '1' } 
2680                 request = urllib2.Request(url, urllib.urlencode(free_download_indication)) 
2682                         self.report_download_webpage(file_id) 
2683                         webpage = urllib2.urlopen(request).read() 
2684                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2685                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) 
2688                 # Search for the real file URL 
2689                 mobj = re.search(r'<form action="(http
://fileshare
.+?
)"', webpage) 
2690                 if (mobj is None) or (mobj.group(1) is None): 
2691                         # Try to figure out reason of the error. 
2692                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) 
2693                         if (mobj is not None) and (mobj.group(1) is not None): 
2694                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() 
2695                                 self._downloader.trouble(u'ERROR: %s' % restriction_message) 
2697                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) 
2700                 file_url = mobj.group(1) 
2701                 file_extension = os.path.splitext(file_url)[1][1:] 
2703                 # Search for file title 
2704                 mobj = re.search(r'<b title="(.*?
)">', webpage) 
2706                         self._downloader.trouble(u'ERROR: unable to extract title') 
2708                 file_title = mobj.group(1).decode('utf-8') 
2711                         # Process file information 
2712                         self._downloader.process_info({ 
2713                                 'id':           file_id.decode('utf-8'), 
2714                                 'url':          file_url.decode('utf-8'), 
2716                                 'upload_date':  u'NA', 
2717                                 'title':        file_title, 
2718                                 'stitle':       file_title, 
2719                                 'ext':          file_extension.decode('utf-8'), 
2723                 except UnavailableVideoError, err: 
2724                         self._downloader.trouble(u'ERROR: unable to download file') 
2727 class FacebookIE(InfoExtractor): 
2728         """Information Extractor for Facebook""" 
2730         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook\.com/(?:video/video|photo)\.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' 
2731         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' 
2732         _NETRC_MACHINE = 'facebook' 
2733         _available_formats = ['video', 'highqual', 'lowqual'] 
2734         _video_extensions = { 
2739         IE_NAME = u'facebook' 
2741         def __init__(self, downloader=None): 
2742                 InfoExtractor.__init__(self, downloader) 
2744         def _reporter(self, message): 
2745                 """Add header and report message.""" 
2746                 self._downloader.to_screen(u'[facebook] %s' % message) 
2748         def report_login(self): 
2749                 """Report attempt to log in.""" 
2750                 self._reporter(u'Logging in') 
2752         def report_video_webpage_download(self, video_id): 
2753                 """Report attempt to download video webpage.""" 
2754                 self._reporter(u'%s: Downloading video webpage' % video_id) 
2756         def report_information_extraction(self, video_id): 
2757                 """Report attempt to extract video information.""" 
2758                 self._reporter(u'%s: Extracting video information' % video_id) 
2760         def _parse_page(self, video_webpage): 
2761                 """Extract video information from page""" 
2763                 data = {'title': r'\("video_title
", "(.*?
)"\)', 
2764                         'description': r'<div class="datawrap
">(.*?)</div>', 
2765                         'owner': r'\("video_owner_name
", "(.*?
)"\)', 
2766                         'thumbnail':  r'\("thumb_url
", "(?P
<THUMB
>.*?
)"\)', 
2769                 for piece in data.keys(): 
2770                         mobj = re.search(data[piece], video_webpage) 
2771                         if mobj is not None: 
2772                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape
")) 
2776                 for fmt in self._available_formats: 
2777                         mobj = re.search(r'\("%s_src
\", "(.+?)"\
)' % fmt, video_webpage) 
2778                         if mobj is not None: 
2779                                 # URL is in a Javascript segment inside an escaped Unicode format within 
2780                                 # the generally utf-8 page 
2781                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) 
2782                 video_info['video_urls
'] = video_urls 
2786         def _real_initialize(self): 
2787                 if self._downloader is None: 
2792                 downloader_params = self._downloader.params 
2794                 # Attempt to use provided username and password or .netrc data 
2795                 if downloader_params.get('username
', None) is not None: 
2796                         useremail = downloader_params['username
'] 
2797                         password = downloader_params['password
'] 
2798                 elif downloader_params.get('usenetrc
', False): 
2800                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) 
2801                                 if info is not None: 
2805                                         raise netrc.NetrcParseError('No authenticators 
for %s' % self._NETRC_MACHINE) 
2806                         except (IOError, netrc.NetrcParseError), err: 
2807                                 self._downloader.to_stderr(u'WARNING
: parsing 
.netrc
: %s' % str(err)) 
2810                 if useremail is None: 
2819                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) 
2822                         login_results = urllib2.urlopen(request).read() 
2823                         if re.search(r'<form(.*)name
="login"(.*)</form
>', login_results) is not None: 
2824                                 self._downloader.to_stderr(u'WARNING
: unable to log 
in: bad username
/password
, or exceded login rate 
limit (~
3/min). Check credentials 
or wait
.') 
2826                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2827                         self._downloader.to_stderr(u'WARNING
: unable to log 
in: %s' % str(err)) 
2830         def _real_extract(self, url): 
2831                 mobj = re.match(self._VALID_URL, url) 
2833                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2835                 video_id = mobj.group('ID
') 
2838                 self.report_video_webpage_download(video_id) 
2839                 request = urllib2.Request('https
://www
.facebook
.com
/video
/video
.php?v
=%s' % video_id) 
2841                         page = urllib2.urlopen(request) 
2842                         video_webpage = page.read() 
2843                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2844                         self._downloader.trouble(u'ERROR
: unable to download video webpage
: %s' % str(err)) 
2847                 # Start extracting information 
2848                 self.report_information_extraction(video_id) 
2850                 # Extract information 
2851                 video_info = self._parse_page(video_webpage) 
2854                 if 'owner
' not in video_info: 
2855                         self._downloader.trouble(u'ERROR
: unable to extract uploader nickname
') 
2857                 video_uploader = video_info['owner
'] 
2860                 if 'title
' not in video_info: 
2861                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
2863                 video_title = video_info['title
'] 
2864                 video_title = video_title.decode('utf
-8') 
2865                 video_title = sanitize_title(video_title) 
2867                 simple_title = _simplify_title(video_title) 
2870                 if 'thumbnail
' not in video_info: 
2871                         self._downloader.trouble(u'WARNING
: unable to extract video thumbnail
') 
2872                         video_thumbnail = '' 
2874                         video_thumbnail = video_info['thumbnail
'] 
2878                 if 'upload_date
' in video_info: 
2879                         upload_time = video_info['upload_date
'] 
2880                         timetuple = email.utils.parsedate_tz(upload_time) 
2881                         if timetuple is not None: 
2883                                         upload_date = time.strftime('%Y
%m
%d', timetuple[0:9]) 
2888                 video_description = video_info.get('description
', 'No description available
.') 
2890                 url_map = video_info['video_urls
'] 
2891                 if len(url_map.keys()) > 0: 
2892                         # Decide which formats to download 
2893                         req_format = self._downloader.params.get('format
', None) 
2894                         format_limit = self._downloader.params.get('format_limit
', None) 
2896                         if format_limit is not None and format_limit in self._available_formats: 
2897                                 format_list = self._available_formats[self._available_formats.index(format_limit):] 
2899                                 format_list = self._available_formats 
2900                         existing_formats = [x for x in format_list if x in url_map] 
2901                         if len(existing_formats) == 0: 
2902                                 self._downloader.trouble(u'ERROR
: no known formats available 
for video
') 
2904                         if req_format is None: 
2905                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality 
2906                         elif req_format == 'worst
': 
2907                                 video_url_list = [(existing_formats[len(existing_formats)-1], url_map[existing_formats[len(existing_formats)-1]])] # worst quality 
2908                         elif req_format == '-1': 
2909                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats 
2912                                 if req_format not in url_map: 
2913                                         self._downloader.trouble(u'ERROR
: requested format 
not available
') 
2915                                 video_url_list = [(req_format, url_map[req_format])] # Specific format 
2917                 for format_param, video_real_url in video_url_list: 
2919                         # At this point we have a new video 
2920                         self._downloader.increment_downloads() 
2923                         video_extension = self._video_extensions.get(format_param, 'mp4
') 
2926                                 # Process video information 
2927                                 self._downloader.process_info({ 
2928                                         'id':           video_id.decode('utf
-8'), 
2929                                         'url
':          video_real_url.decode('utf
-8'), 
2930                                         'uploader
':     video_uploader.decode('utf
-8'), 
2931                                         'upload_date
':  upload_date, 
2932                                         'title
':        video_title, 
2933                                         'stitle
':       simple_title, 
2934                                         'ext
':          video_extension.decode('utf
-8'), 
2935                                         'format
':       (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
2936                                         'thumbnail
':    video_thumbnail.decode('utf
-8'), 
2937                                         'description
':  video_description.decode('utf
-8'), 
2940                         except UnavailableVideoError, err: 
2941                                 self._downloader.trouble(u'\nERROR
: unable to download video
') 
2943 class BlipTVIE(InfoExtractor): 
2944         """Information extractor for blip.tv""" 
2946         _VALID_URL = r'^
(?
:https?
://)?
(?
:\w
+\
.)?blip\
.tv(/.+)$
' 
2947         _URL_EXT = r'^
.*\
.([a
-z0
-9]+)$
' 
2948         IE_NAME = u'blip
.tv
' 
2950         def report_extraction(self, file_id): 
2951                 """Report information extraction.""" 
2952                 self._downloader.to_screen(u'[%s] %s: Extracting information
' % (self.IE_NAME, file_id)) 
2954         def report_direct_download(self, title): 
2955                 """Report information extraction.""" 
2956                 self._downloader.to_screen(u'[%s] %s: Direct download detected
' % (self.IE_NAME, title)) 
2958         def _real_extract(self, url): 
2959                 mobj = re.match(self._VALID_URL, url) 
2961                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2968                 json_url = url + cchar + 'skin
=json
&version
=2&no_wrap
=1' 
2969                 request = urllib2.Request(json_url) 
2970                 self.report_extraction(mobj.group(1)) 
2973                         urlh = urllib2.urlopen(request) 
2974                         if urlh.headers.get('Content
-Type
', '').startswith('video
/'): # Direct download 
2975                                 basename = url.split('/')[-1] 
2976                                 title,ext = os.path.splitext(basename) 
2977                                 title = title.decode('UTF
-8') 
2978                                 ext = ext.replace('.', '') 
2979                                 self.report_direct_download(title) 
2984                                         'stitle
': _simplify_title(title), 
2988                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2989                         self._downloader.trouble(u'ERROR
: unable to download video info webpage
: %s' % str(err)) 
2991                 if info is None: # Regular URL 
2993                                 json_code = urlh.read() 
2994                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2995                                 self._downloader.trouble(u'ERROR
: unable to read video info webpage
: %s' % str(err)) 
2999                                 json_data = json.loads(json_code) 
3000                                 if 'Post
' in json_data: 
3001                                         data = json_data['Post
'] 
3005                                 upload_date = datetime.datetime.strptime(data['datestamp
'], '%m
-%d-%y 
%H
:%M
%p
').strftime('%Y
%m
%d') 
3006                                 video_url = data['media
']['url
'] 
3007                                 umobj = re.match(self._URL_EXT, video_url) 
3009                                         raise ValueError('Can 
not determine filename extension
') 
3010                                 ext = umobj.group(1) 
3013                                         'id': data['item_id
'], 
3015                                         'uploader
': data['display_name
'], 
3016                                         'upload_date
': upload_date, 
3017                                         'title
': data['title
'], 
3018                                         'stitle
': _simplify_title(data['title
']), 
3020                                         'format
': data['media
']['mimeType
'], 
3021                                         'thumbnail
': data['thumbnailUrl
'], 
3022                                         'description
': data['description
'], 
3023                                         'player_url
': data['embedUrl
'] 
3025                         except (ValueError,KeyError), err: 
3026                                 self._downloader.trouble(u'ERROR
: unable to parse video information
: %s' % repr(err)) 
3029                 self._downloader.increment_downloads() 
3032                         self._downloader.process_info(info) 
3033                 except UnavailableVideoError, err: 
3034                         self._downloader.trouble(u'\nERROR
: unable to download video
') 
3037 class MyVideoIE(InfoExtractor): 
3038         """Information Extractor for myvideo.de.""" 
3040         _VALID_URL = r'(?
:http
://)?
(?
:www\
.)?myvideo\
.de
/watch
/([0-9]+)/([^?
/]+).*' 
3041         IE_NAME = u'myvideo
' 
3043         def __init__(self, downloader=None): 
3044                 InfoExtractor.__init__(self, downloader) 
3046         def report_download_webpage(self, video_id): 
3047                 """Report webpage download.""" 
3048                 self._downloader.to_screen(u'[myvideo
] %s: Downloading webpage
' % video_id) 
3050         def report_extraction(self, video_id): 
3051                 """Report information extraction.""" 
3052                 self._downloader.to_screen(u'[myvideo
] %s: Extracting information
' % video_id) 
3054         def _real_extract(self,url): 
3055                 mobj = re.match(self._VALID_URL, url) 
3057                         self._download.trouble(u'ERROR
: invalid URL
: %s' % url) 
3060                 video_id = mobj.group(1) 
3063                 request = urllib2.Request('http
://www
.myvideo
.de
/watch
/%s' % video_id) 
3065                         self.report_download_webpage(video_id) 
3066                         webpage = urllib2.urlopen(request).read() 
3067                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3068                         self._downloader.trouble(u'ERROR
: Unable to retrieve video webpage
: %s' % str(err)) 
3071                 self.report_extraction(video_id) 
3072                 mobj = re.search(r'<link rel
=\'image_src
\' href
=\'(http
://is[0-9].myvideo\
.de
/de
/movie
[0-9]+/[a
-f0
-9]+)/thumbs
/[^
.]+\
.jpg
\' />', 
3075                         self._downloader.trouble(u'ERROR
: unable to extract media URL
') 
3077                 video_url = mobj.group(1) + ('/%s.flv
' % video_id) 
3079                 mobj = re.search('<title
>([^
<]+)</title
>', webpage) 
3081                         self._downloader.trouble(u'ERROR
: unable to extract title
') 
3084                 video_title = mobj.group(1) 
3085                 video_title = sanitize_title(video_title) 
3087                 simple_title = _simplify_title(video_title) 
3090                         self._downloader.process_info({ 
3094                                 'upload_date
':  u'NA
', 
3095                                 'title
':        video_title, 
3096                                 'stitle
':       simple_title, 
3101                 except UnavailableVideoError: 
3102                         self._downloader.trouble(u'\nERROR
: Unable to download video
') 
3104 class ComedyCentralIE(InfoExtractor): 
3105         """Information extractor for The Daily Show and Colbert Report """ 
3107         _VALID_URL = r'^
(:(?P
<shortname
>tds|thedailyshow|cr|colbert|colbertnation|colbertreport
))|
(https?
://)?
(www\
.)?
(?P
<showname
>thedailyshow|colbertnation
)\
.com
/full
-episodes
/(?P
<episode
>.*)$
' 
3108         IE_NAME = u'comedycentral
' 
3110         def report_extraction(self, episode_id): 
3111                 self._downloader.to_screen(u'[comedycentral
] %s: Extracting information
' % episode_id) 
3113         def report_config_download(self, episode_id): 
3114                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading configuration
' % episode_id) 
3116         def report_index_download(self, episode_id): 
3117                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading show index
' % episode_id) 
3119         def report_player_url(self, episode_id): 
3120                 self._downloader.to_screen(u'[comedycentral
] %s: Determining player URL
' % episode_id) 
3122         def _real_extract(self, url): 
3123                 mobj = re.match(self._VALID_URL, url) 
3125                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3128                 if mobj.group('shortname
'): 
3129                         if mobj.group('shortname
') in ('tds
', 'thedailyshow
'): 
3130                                 url = u'http
://www
.thedailyshow
.com
/full
-episodes
/' 
3132                                 url = u'http
://www
.colbertnation
.com
/full
-episodes
/' 
3133                         mobj = re.match(self._VALID_URL, url) 
3134                         assert mobj is not None 
3136                 dlNewest = not mobj.group('episode
') 
3138                         epTitle = mobj.group('showname
') 
3140                         epTitle = mobj.group('episode
') 
3142                 req = urllib2.Request(url) 
3143                 self.report_extraction(epTitle) 
3145                         htmlHandle = urllib2.urlopen(req) 
3146                         html = htmlHandle.read() 
3147                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3148                         self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3151                         url = htmlHandle.geturl() 
3152                         mobj = re.match(self._VALID_URL, url) 
3154                                 self._downloader.trouble(u'ERROR
: Invalid redirected URL
: ' + url) 
3156                         if mobj.group('episode
') == '': 
3157                                 self._downloader.trouble(u'ERROR
: Redirected URL 
is still 
not specific
: ' + url) 
3159                         epTitle = mobj.group('episode
') 
3161                 mMovieParams = re.findall('(?
:<param name
="movie" value
="|var url = ")(http
://media
.mtvnservices
.com
/([^
"]*episode.*?:.*?))"', html) 
3162                 if len(mMovieParams) == 0: 
3163                         self._downloader.trouble(u'ERROR
: unable to find Flash URL 
in webpage 
' + url) 
3166                 playerUrl_raw = mMovieParams[0][0] 
3167                 self.report_player_url(epTitle) 
3169                         urlHandle = urllib2.urlopen(playerUrl_raw) 
3170                         playerUrl = urlHandle.geturl() 
3171                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3172                         self._downloader.trouble(u'ERROR
: unable to find out player URL
: ' + unicode(err)) 
3175                 uri = mMovieParams[0][1] 
3176                 indexUrl = 'http
://shadow
.comedycentral
.com
/feeds
/video_player
/mrss
/?
' + urllib.urlencode({'uri
': uri}) 
3177                 self.report_index_download(epTitle) 
3179                         indexXml = urllib2.urlopen(indexUrl).read() 
3180                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3181                         self._downloader.trouble(u'ERROR
: unable to download episode index
: ' + unicode(err)) 
3184                 idoc = xml.etree.ElementTree.fromstring(indexXml) 
3185                 itemEls = idoc.findall('.//item
') 
3186                 for itemEl in itemEls: 
3187                         mediaId = itemEl.findall('./guid
')[0].text 
3188                         shortMediaId = mediaId.split(':')[-1] 
3189                         showId = mediaId.split(':')[-2].replace('.com
', '') 
3190                         officialTitle = itemEl.findall('./title
')[0].text 
3191                         officialDate = itemEl.findall('./pubDate
')[0].text 
3193                         configUrl = ('http
://www
.comedycentral
.com
/global/feeds
/entertainment
/media
/mediaGenEntertainment
.jhtml?
' + 
3194                                                 urllib.urlencode({'uri
': mediaId})) 
3195                         configReq = urllib2.Request(configUrl) 
3196                         self.report_config_download(epTitle) 
3198                                 configXml = urllib2.urlopen(configReq).read() 
3199                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3200                                 self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3203                         cdoc = xml.etree.ElementTree.fromstring(configXml) 
3205                         for rendition in cdoc.findall('.//rendition
'): 
3206                                 finfo = (rendition.attrib['bitrate
'], rendition.findall('./src
')[0].text) 
3210                                 self._downloader.trouble(u'\nERROR
: unable to download 
' + mediaId + ': No videos found
') 
3213                         # For now, just pick the highest bitrate 
3214                         format,video_url = turls[-1] 
3216                         self._downloader.increment_downloads() 
3218                         effTitle = showId + u'-' + epTitle 
3223                                 'upload_date
': officialDate, 
3225                                 'stitle
': _simplify_title(effTitle), 
3229                                 'description
': officialTitle, 
3230                                 'player_url
': playerUrl 
3234                                 self._downloader.process_info(info) 
3235                         except UnavailableVideoError, err: 
3236                                 self._downloader.trouble(u'\nERROR
: unable to download 
' + mediaId) 
3240 class EscapistIE(InfoExtractor): 
3241         """Information extractor for The Escapist """ 
3243         _VALID_URL = r'^
(https?
://)?
(www\
.)?escapistmagazine\
.com
/videos
/view
/(?P
<showname
>[^
/]+)/(?P
<episode
>[^
/?
]+)[/?
]?
.*$
' 
3244         IE_NAME = u'escapist
' 
3246         def report_extraction(self, showName): 
3247                 self._downloader.to_screen(u'[escapist
] %s: Extracting information
' % showName) 
3249         def report_config_download(self, showName): 
3250                 self._downloader.to_screen(u'[escapist
] %s: Downloading configuration
' % showName) 
3252         def _real_extract(self, url): 
3253                 htmlParser = HTMLParser.HTMLParser() 
3255                 mobj = re.match(self._VALID_URL, url) 
3257                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3259                 showName = mobj.group('showname
') 
3260                 videoId = mobj.group('episode
') 
3262                 self.report_extraction(showName) 
3264                         webPage = urllib2.urlopen(url).read() 
3265                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3266                         self._downloader.trouble(u'ERROR
: unable to download webpage
: ' + unicode(err)) 
3269                 descMatch = re.search('<meta name
="description" content
="([^"]*)"', webPage) 
3270                 description = htmlParser.unescape(descMatch.group(1)) 
3271                 imgMatch = re.search('<meta property="og
:image
" content="([^
"]*)"', webPage) 
3272                 imgUrl = htmlParser.unescape(imgMatch.group(1)) 
3273                 playerUrlMatch = re.search('<meta 
property="og:video" content
="([^"]*)"', webPage) 
3274                 playerUrl = htmlParser.unescape(playerUrlMatch.group(1)) 
3275                 configUrlMatch = re.search('config=(.*)$', playerUrl) 
3276                 configUrl = urllib2.unquote(configUrlMatch.group(1)) 
3278                 self.report_config_download(showName) 
3280                         configJSON = urllib2.urlopen(configUrl).read() 
3281                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3282                         self._downloader.trouble(u'ERROR: unable to download configuration: ' + unicode(err)) 
3285                 # Technically, it's JavaScript, not JSON 
3286                 configJSON = configJSON.replace("'", '"') 
3289                         config = json.loads(configJSON) 
3290                 except (ValueError,), err: 
3291                         self._downloader.trouble(u'ERROR: Invalid JSON in configuration file: ' + unicode(err)) 
3294                 playlist = config['playlist'] 
3295                 videoUrl = playlist[1]['url'] 
3297                 self._downloader.increment_downloads() 
3301                         'uploader': showName, 
3302                         'upload_date': None, 
3304                         'stitle': _simplify_title(showName), 
3307                         'thumbnail': imgUrl, 
3308                         'description': description, 
3309                         'player_url': playerUrl, 
3313                         self._downloader.process_info(info) 
3314                 except UnavailableVideoError, err: 
3315                         self._downloader.trouble(u'\nERROR: unable to download ' + videoId) 
3318 class CollegeHumorIE(InfoExtractor): 
3319         """Information extractor for collegehumor.com""" 
3321         _VALID_URL = r'^(?:https?://)?(?:www\.)?collegehumor\.com/video/(?P<videoid>[0-9]+)/(?P<shorttitle>.*)$' 
3322         IE_NAME = u'collegehumor' 
3324         def report_webpage(self, video_id): 
3325                 """Report information extraction.""" 
3326                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3328         def report_extraction(self, video_id): 
3329                 """Report information extraction.""" 
3330                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3332         def _real_extract(self, url): 
3333                 htmlParser = HTMLParser.HTMLParser() 
3335                 mobj = re.match(self._VALID_URL, url) 
3337                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3339                 video_id = mobj.group('videoid') 
3341                 self.report_webpage(video_id) 
3342                 request = urllib2.Request(url) 
3344                         webpage = urllib2.urlopen(request).read() 
3345                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3346                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3349                 m = re.search(r'id="video
:(?P
<internalvideoid
>[0-9]+)"', webpage) 
3351                         self._downloader.trouble(u'ERROR: Cannot extract internal video ID') 
3353                 internal_video_id = m.group('internalvideoid') 
3357                         'internal_id': internal_video_id, 
3360                 self.report_extraction(video_id) 
3361                 xmlUrl = 'http://www.collegehumor.com/moogaloop/video:' + internal_video_id 
3363                         metaXml = urllib2.urlopen(xmlUrl).read() 
3364                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3365                         self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err)) 
3368                 mdoc = xml.etree.ElementTree.fromstring(metaXml) 
3370                         videoNode = mdoc.findall('./video')[0] 
3371                         info['description'] = videoNode.findall('./description')[0].text 
3372                         info['title'] = videoNode.findall('./caption')[0].text 
3373                         info['stitle'] = _simplify_title(info['title']) 
3374                         info['url'] = videoNode.findall('./file')[0].text 
3375                         info['thumbnail'] = videoNode.findall('./thumbnail')[0].text 
3376                         info['ext'] = info['url'].rpartition('.')[2] 
3377                         info['format'] = info['ext'] 
3379                         self._downloader.trouble(u'\nERROR: Invalid metadata XML file') 
3382                 self._downloader.increment_downloads() 
3385                         self._downloader.process_info(info) 
3386                 except UnavailableVideoError, err: 
3387                         self._downloader.trouble(u'\nERROR: unable to download video') 
3390 class XVideosIE(InfoExtractor): 
3391         """Information extractor for xvideos.com""" 
3393         _VALID_URL = r'^(?:https?://)?(?:www\.)?xvideos\.com/video([0-9]+)(?:.*)' 
3394         IE_NAME = u'xvideos' 
3396         def report_webpage(self, video_id): 
3397                 """Report information extraction.""" 
3398                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3400         def report_extraction(self, video_id): 
3401                 """Report information extraction.""" 
3402                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3404         def _real_extract(self, url): 
3405                 htmlParser = HTMLParser.HTMLParser() 
3407                 mobj = re.match(self._VALID_URL, url) 
3409                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3411                 video_id = mobj.group(1).decode('utf-8') 
3413                 self.report_webpage(video_id) 
3415                 request = urllib2.Request(r'http://www.xvideos.com/video' + video_id) 
3417                         webpage = urllib2.urlopen(request).read() 
3418                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3419                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3422                 self.report_extraction(video_id) 
3426                 mobj = re.search(r'flv_url=(.+?)&', webpage) 
3428                         self._downloader.trouble(u'ERROR: unable to extract video url') 
3430                 video_url = urllib2.unquote(mobj.group(1).decode('utf-8')) 
3434                 mobj = re.search(r'<title>(.*?)\s+-\s+XVID', webpage) 
3436                         self._downloader.trouble(u'ERROR: unable to extract video title') 
3438                 video_title = mobj.group(1).decode('utf-8') 
3441                 # Extract video thumbnail 
3442                 mobj = re.search(r'http://(?:img.*?\.)xvideos.com/videos/thumbs/[a-fA-F0-9]/[a-fA-F0-9]/[a-fA-F0-9]/([a-fA-F0-9.]+jpg)', webpage) 
3444                         self._downloader.trouble(u'ERROR: unable to extract video thumbnail') 
3446                 video_thumbnail = mobj.group(1).decode('utf-8') 
3450                 self._downloader.increment_downloads() 
3455                         'upload_date': None, 
3456                         'title': video_title, 
3457                         'stitle': _simplify_title(video_title), 
3460                         'thumbnail': video_thumbnail, 
3461                         'description': None, 
3466                         self._downloader.process_info(info) 
3467                 except UnavailableVideoError, err: 
3468                         self._downloader.trouble(u'\nERROR: unable to download ' + video_id) 
3471 class SoundcloudIE(InfoExtractor): 
3472         """Information extractor for soundcloud.com 
3473            To access the media, the uid of the song and a stream token 
3474            must be extracted from the page source and the script must make 
3475            a request to media.soundcloud.com/crossdomain.xml. Then 
3476            the media can be grabbed by requesting from an url composed 
3477            of the stream token and uid 
3480         _VALID_URL = r'^(?:https?://)?(?:www\.)?soundcloud\.com/([\w\d-]+)/([\w\d-]+)' 
3481         IE_NAME = u'soundcloud' 
3483         def __init__(self, downloader=None): 
3484                 InfoExtractor.__init__(self, downloader) 
3486         def report_webpage(self, video_id): 
3487                 """Report information extraction.""" 
3488                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3490         def report_extraction(self, video_id): 
3491                 """Report information extraction.""" 
3492                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3494         def _real_extract(self, url): 
3495                 htmlParser = HTMLParser.HTMLParser() 
3497                 mobj = re.match(self._VALID_URL, url) 
3499                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3502                 # extract uploader (which is in the url) 
3503                 uploader = mobj.group(1).decode('utf-8') 
3504                 # extract simple title (uploader + slug of song title) 
3505                 slug_title =  mobj.group(2).decode('utf-8') 
3506                 simple_title = uploader + '-' + slug_title 
3508                 self.report_webpage('%s/%s' % (uploader, slug_title)) 
3510                 request = urllib2.Request('http://soundcloud.com/%s/%s' % (uploader, slug_title)) 
3512                         webpage = urllib2.urlopen(request).read() 
3513                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3514                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3517                 self.report_extraction('%s/%s' % (uploader, slug_title)) 
3519                 # extract uid and stream token that soundcloud hands out for access 
3520                 mobj = re.search('"uid
":"([\w\d
]+?
)".*?stream_token=([\w\d]+)', webpage) 
3522                         video_id = mobj.group(1) 
3523                         stream_token = mobj.group(2) 
3525                 # extract unsimplified title 
3526                 mobj = re.search('"title
":"(.*?
)",', webpage) 
3528                         title = mobj.group(1) 
3530                 # construct media url (with uid/token) 
3531                 mediaURL = "http
://media
.soundcloud
.com
/stream
/%s?stream_token
=%s" 
3532                 mediaURL = mediaURL % (video_id, stream_token) 
3535                 description = u'No description available' 
3536                 mobj = re.search('track-description-value"><p
>(.*?
)</p
>', webpage) 
3538                         description = mobj.group(1) 
3542                 mobj = re.search("pretty-date'>on ([\w
]+ [\d
]+, [\d
]+ \d
+:\d
+)</abbr
></h2
>", webpage) 
3545                                 upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') 
3546                         except Exception, e: 
3549                 # for soundcloud, a request to a cross domain is required for cookies 
3550                 request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) 
3553                         self._downloader.process_info({ 
3554                                 'id':           video_id.decode('utf-8'), 
3556                                 'uploader':     uploader.decode('utf-8'), 
3557                                 'upload_date':  upload_date, 
3558                                 'title':        simple_title.decode('utf-8'), 
3559                                 'stitle':       simple_title.decode('utf-8'), 
3563                                 'description': description.decode('utf-8') 
3565                 except UnavailableVideoError: 
3566                         self._downloader.trouble(u'\nERROR: unable to download video') 
3569 class InfoQIE(InfoExtractor): 
3570         """Information extractor for infoq.com""" 
3572         _VALID_URL = r'^(?:https?://)?(?:www\.)?infoq\.com/[^/]+/[^/]+$' 
3575         def report_webpage(self, video_id): 
3576                 """Report information extraction.""" 
3577                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, video_id)) 
3579         def report_extraction(self, video_id): 
3580                 """Report information extraction.""" 
3581                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3583         def _real_extract(self, url): 
3584                 htmlParser = HTMLParser.HTMLParser() 
3586                 mobj = re.match(self._VALID_URL, url) 
3588                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3591                 self.report_webpage(url) 
3593                 request = urllib2.Request(url) 
3595                         webpage = urllib2.urlopen(request).read() 
3596                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3597                         self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) 
3600                 self.report_extraction(url) 
3604                 mobj = re.search(r"jsclassref
='([^']*)'", webpage) 
3606                         self._downloader.trouble(u'ERROR
: unable to extract video url
') 
3608                 video_url = 'rtmpe
://video
.infoq
.com
/cfx
/st
/' + urllib2.unquote(mobj.group(1).decode('base64
')) 
3612                 mobj = re.search(r'contentTitle 
= "(.*?)";', webpage) 
3614                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
3616                 video_title = mobj.group(1).decode('utf
-8') 
3618                 # Extract description 
3619                 video_description = u'No description available
.' 
3620                 mobj = re.search(r'<meta name
="description" content
="(.*)"(?
:\s
*/)?
>', webpage) 
3621                 if mobj is not None: 
3622                         video_description = mobj.group(1).decode('utf
-8') 
3624                 video_filename = video_url.split('/')[-1] 
3625                 video_id, extension = video_filename.split('.') 
3627                 self._downloader.increment_downloads() 
3632                         'upload_date
': None, 
3633                         'title
': video_title, 
3634                         'stitle
': _simplify_title(video_title), 
3636                         'format
': extension, # Extension is always(?) mp4, but seems to be flv 
3638                         'description
': video_description, 
3643                         self._downloader.process_info(info) 
3644                 except UnavailableVideoError, err: 
3645                         self._downloader.trouble(u'\nERROR
: unable to download 
' + video_url) 
3647 class MixcloudIE(InfoExtractor): 
3648         """Information extractor for www.mixcloud.com""" 
3649         _VALID_URL = r'^
(?
:https?
://)?
(?
:www\
.)?mixcloud\
.com
/([\w\d
-]+)/([\w\d
-]+)' 
3650         IE_NAME = u'mixcloud
' 
3652         def __init__(self, downloader=None): 
3653                 InfoExtractor.__init__(self, downloader) 
3655         def report_download_json(self, file_id): 
3656                 """Report JSON download.""" 
3657                 self._downloader.to_screen(u'[%s] Downloading json
' % self.IE_NAME) 
3659         def report_extraction(self, file_id): 
3660                 """Report information extraction.""" 
3661                 self._downloader.to_screen(u'[%s] %s: Extracting information
' % (self.IE_NAME, file_id)) 
3663         def get_urls(self, jsonData, fmt, bitrate='best
'): 
3664                 """Get urls from 'audio_formats
' section in json""" 
3667                         bitrate_list = jsonData[fmt] 
3668                         if bitrate is None or bitrate == 'best
' or bitrate not in bitrate_list: 
3669                                 bitrate = max(bitrate_list) # select highest 
3671                         url_list = jsonData[fmt][bitrate] 
3672                 except TypeError: # we have no bitrate info. 
3673                         url_list = jsonData[fmt] 
3677         def check_urls(self, url_list): 
3678                 """Returns 1st active url from list""" 
3679                 for url in url_list: 
3681                                 urllib2.urlopen(url) 
3683                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3688         def _print_formats(self, formats): 
3689                 print 'Available formats
:' 
3690                 for fmt in formats.keys(): 
3691                         for b in formats[fmt]: 
3693                                         ext = formats[fmt][b][0] 
3694                                         print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]) 
3695                                 except TypeError: # we have no bitrate info 
3696                                         ext = formats[fmt][0] 
3697                                         print '%s\t%s\t[%s]' % (fmt, '??
', ext.split('.')[-1]) 
3700         def _real_extract(self, url): 
3701                 mobj = re.match(self._VALID_URL, url) 
3703                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3705                 # extract uploader & filename from url 
3706                 uploader = mobj.group(1).decode('utf
-8') 
3707                 file_id = uploader + "-" + mobj.group(2).decode('utf
-8') 
3709                 # construct API request 
3710                 file_url = 'http
://www
.mixcloud
.com
/api
/1/cloudcast
/' + '/'.join(url.split('/')[-3:-1]) + '.json
' 
3711                 # retrieve .json file with links to files 
3712                 request = urllib2.Request(file_url) 
3714                         self.report_download_json(file_url) 
3715                         jsonData = urllib2.urlopen(request).read() 
3716                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3717                         self._downloader.trouble(u'ERROR
: Unable to retrieve 
file: %s' % str(err)) 
3721                 json_data = json.loads(jsonData) 
3722                 player_url = json_data['player_swf_url
'] 
3723                 formats = dict(json_data['audio_formats
']) 
3725                 req_format = self._downloader.params.get('format
', None) 
3728                 if self._downloader.params.get('listformats
', None): 
3729                         self._print_formats(formats) 
3732                 if req_format is None or req_format == 'best
': 
3733                         for format_param in formats.keys(): 
3734                                 url_list = self.get_urls(formats, format_param) 
3736                                 file_url = self.check_urls(url_list) 
3737                                 if file_url is not None: 
3740                         if req_format not in formats.keys(): 
3741                                 self._downloader.trouble(u'ERROR
: format 
is not available
') 
3744                         url_list = self.get_urls(formats, req_format) 
3745                         file_url = self.check_urls(url_list) 
3746                         format_param = req_format 
3749                 self._downloader.increment_downloads() 
3751                         # Process file information 
3752                         self._downloader.process_info({ 
3753                                 'id':           file_id.decode('utf
-8'), 
3754                                 'url
':          file_url.decode('utf
-8'), 
3755                                 'uploader
':     uploader.decode('utf
-8'), 
3756                                 'upload_date
':  u'NA
', 
3757                                 'title
':        json_data['name
'], 
3758                                 'stitle
':       _simplify_title(json_data['name
']), 
3759                                 'ext
':          file_url.split('.')[-1].decode('utf
-8'), 
3760                                 'format
':       (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
3761                                 'thumbnail
':    json_data['thumbnail_url
'], 
3762                                 'description
':  json_data['description
'], 
3763                                 'player_url
':   player_url.decode('utf
-8'), 
3765                 except UnavailableVideoError, err: 
3766                         self._downloader.trouble(u'ERROR
: unable to download 
file') 
3768 class StanfordOpenClassroomIE(InfoExtractor): 
3769         """Information extractor for Stanford's Open ClassRoom
""" 
3771         _VALID_URL = r'^(?:https?://)?openclassroom.stanford.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$' 
3772         IE_NAME = u'stanfordoc' 
3774         def report_download_webpage(self, objid): 
3775                 """Report information extraction
.""" 
3776                 self._downloader.to_screen(u'[%s] %s: Downloading webpage' % (self.IE_NAME, objid)) 
3778         def report_extraction(self, video_id): 
3779                 """Report information extraction
.""" 
3780                 self._downloader.to_screen(u'[%s] %s: Extracting information' % (self.IE_NAME, video_id)) 
3782         def _real_extract(self, url): 
3783                 mobj = re.match(self._VALID_URL, url) 
3785                         self._downloader.trouble(u'ERROR: invalid URL: %s' % url) 
3788                 if mobj.group('course') and mobj.group('video'): # A specific video 
3789                         course = mobj.group('course') 
3790                         video = mobj.group('video') 
3792                                 'id': _simplify_title(course + '_' + video), 
3795                         self.report_extraction(info['id']) 
3796                         baseUrl = 'http://openclassroom.stanford.edu/MainFolder/courses/' + course + '/videos/' 
3797                         xmlUrl = baseUrl + video + '.xml' 
3799                                 metaXml = urllib2.urlopen(xmlUrl).read() 
3800                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3801                                 self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % unicode(err)) 
3803                         mdoc = xml.etree.ElementTree.fromstring(metaXml) 
3805                                 info['title'] = mdoc.findall('./title')[0].text 
3806                                 info['url'] = baseUrl + mdoc.findall('./videoFile')[0].text 
3808                                 self._downloader.trouble(u'\nERROR: Invalid metadata XML file') 
3810                         info['stitle'] = _simplify_title(info['title']) 
3811                         info['ext'] = info['url'].rpartition('.')[2] 
3812                         info['format'] = info['ext'] 
3813                         self._downloader.increment_downloads() 
3815                                 self._downloader.process_info(info) 
3816                         except UnavailableVideoError, err: 
3817                                 self._downloader.trouble(u'\nERROR: unable to download video') 
3818                 elif mobj.group('course'): # A course page 
3819                         unescapeHTML = HTMLParser.HTMLParser().unescape 
3821                         course = mobj.group('course') 
3823                                 'id': _simplify_title(course), 
3827                         self.report_download_webpage(info['id']) 
3829                                 coursepage = urllib2.urlopen(url).read() 
3830                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3831                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) 
3834                         m = re.search('<h1>([^<]+)</h1>', coursepage) 
3836                                 info['title'] = unescapeHTML(m.group(1)) 
3838                                 info['title'] = info['id'] 
3839                         info['stitle'] = _simplify_title(info['title']) 
3841                         m = re.search('<description>([^<]+)</description>', coursepage) 
3843                                 info['description'] = unescapeHTML(m.group(1)) 
3845                         links = _orderedSet(re.findall('<a href="(VideoPage.php\?[^"]+)">', coursepage)) 
3848                                         'type': 'reference', 
3849                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(vpage), 
3853                         for entry in info['list']: 
3854                                 assert entry['type'] == 'reference' 
3855                                 self.extract(entry['url']) 
3857                         unescapeHTML = HTMLParser.HTMLParser().unescape 
3860                                 'id': 'Stanford OpenClassroom', 
3864                         self.report_download_webpage(info['id']) 
3865                         rootURL = 'http://openclassroom.stanford.edu/MainFolder/HomePage.php' 
3867                                 rootpage = urllib2.urlopen(rootURL).read() 
3868                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3869                                 self._downloader.trouble(u'ERROR: unable to download course info page: ' + unicode(err)) 
3872                         info['title'] = info['id'] 
3873                         info['stitle'] = _simplify_title(info['title']) 
3875                         links = _orderedSet(re.findall('<a href="(CoursePage.php\?[^"]+)">', rootpage)) 
3878                                         'type': 'reference', 
3879                                         'url': 'http://openclassroom.stanford.edu/MainFolder/' + unescapeHTML(cpage), 
3883                         for entry in info['list']: 
3884                                 assert entry['type'] == 'reference' 
3885                                 self.extract(entry['url']) 
3888 class PostProcessor(object): 
3889         """Post Processor 
class. 
3891         PostProcessor objects can be added to downloaders 
with their
 
3892         add_post_processor() method
. When the downloader has finished a
 
3893         successful download
, it will take its internal chain of PostProcessors
 
3894         and start calling the 
run() method on each one of them
, first 
with 
3895         an initial argument 
and then 
with the returned value of the previous
 
3898         The chain will be stopped 
if one of them ever returns 
None or the end
 
3899         of the chain 
is reached
. 
3901         PostProcessor objects follow a 
"mutual registration" process similar
 
3902         to InfoExtractor objects
. 
3907         def __init__(self, downloader=None): 
3908                 self._downloader = downloader 
3910         def set_downloader(self, downloader): 
3911                 """Sets the downloader 
for this PP
.""" 
3912                 self._downloader = downloader 
3914         def run(self, information): 
3915                 """Run the PostProcessor
. 
3917                 The 
"information" argument 
is a dictionary like the ones
 
3918                 composed by InfoExtractors
. The only difference 
is that this
 
3919                 one has an extra field called 
"filepath" that points to the
 
3922                 When this method returns 
None, the postprocessing chain 
is 
3923                 stopped
. However
, this method may 
return an information
 
3924                 dictionary that will be passed to the next postprocessing
 
3925                 object in the chain
. It can be the one it received after
 
3926                 changing some fields
. 
3928                 In addition
, this method may 
raise a PostProcessingError
 
3929                 exception that will be taken into account by the downloader
 
3932                 return information # by default, do nothing 
3934 class AudioConversionError(BaseException): 
3935         def __init__(self, message): 
3936                 self.message = message 
3938 class FFmpegExtractAudioPP(PostProcessor): 
3940         def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False): 
3941                 PostProcessor.__init__(self, downloader) 
3942                 if preferredcodec is None: 
3943                         preferredcodec = 'best' 
3944                 self._preferredcodec = preferredcodec 
3945                 self._preferredquality = preferredquality 
3946                 self._keepvideo = keepvideo 
3949         def get_audio_codec(path): 
3951                         cmd = ['ffprobe', '-show_streams', '--', path] 
3952                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE) 
3953                         output = handle.communicate()[0] 
3954                         if handle.wait() != 0: 
3956                 except (IOError, OSError): 
3959                 for line in output.split('\n'): 
3960                         if line.startswith('codec_name='): 
3961                                 audio_codec = line.split('=')[1].strip() 
3962                         elif line.strip() == 'codec_type=audio' and audio_codec is not None: 
3967         def run_ffmpeg(path, out_path, codec, more_opts): 
3971                         acodec_opts = ['-acodec', codec] 
3972                 cmd = ['ffmpeg', '-y', '-i', path, '-vn'] + acodec_opts + more_opts + ['--', out_path] 
3974                         p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
3975                         stdout,stderr = p.communicate() 
3976                 except (IOError, OSError): 
3977                         e = sys.exc_info()[1] 
3978                         if isinstance(e, OSError) and e.errno == 2: 
3979                                 raise AudioConversionError('ffmpeg not found. Please install ffmpeg.') 
3982                 if p.returncode != 0: 
3983                         msg = stderr.strip().split('\n')[-1] 
3984                         raise AudioConversionError(msg) 
3986         def run(self, information): 
3987                 path = information['filepath'] 
3989                 filecodec = self.get_audio_codec(path) 
3990                 if filecodec is None: 
3991                         self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe') 
3995                 if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): 
3996                         if self._preferredcodec == 'm4a' and filecodec == 'aac': 
3997                                 # Lossless, but in another container 
3999                                 extension = self._preferredcodec 
4000                                 more_opts = ['-absf', 'aac_adtstoasc'] 
4001                         elif filecodec in ['aac', 'mp3', 'vorbis']: 
4002                                 # Lossless if possible 
4004                                 extension = filecodec 
4005                                 if filecodec == 'aac': 
4006                                         more_opts = ['-f', 'adts'] 
4007                                 if filecodec == 'vorbis': 
4011                                 acodec = 'libmp3lame' 
4014                                 if self._preferredquality is not None: 
4015                                         more_opts += ['-ab', self._preferredquality] 
4017                         # We convert the audio (lossy) 
4018                         acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] 
4019                         extension = self._preferredcodec 
4021                         if self._preferredquality is not None: 
4022                                 more_opts += ['-ab', self._preferredquality] 
4023                         if self._preferredcodec == 'aac': 
4024                                 more_opts += ['-f', 'adts'] 
4025                         if self._preferredcodec == 'm4a': 
4026                                 more_opts += ['-absf', 'aac_adtstoasc'] 
4027                         if self._preferredcodec == 'vorbis': 
4029                         if self._preferredcodec == 'wav': 
4031                                 more_opts += ['-f', 'wav'] 
4033                 (prefix, ext) = os.path.splitext(path) 
4034                 new_path = prefix + '.' + extension 
4035                 self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path) 
4037                         self.run_ffmpeg(path, new_path, acodec, more_opts) 
4039                         etype,e,tb = sys.exc_info() 
4040                         if isinstance(e, AudioConversionError): 
4041                                 self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message) 
4043                                 self._downloader.to_stderr(u'ERROR: error running ffmpeg') 
4046                 # Try to update the date time for extracted audio file. 
4047                 if information.get('filetime') is not None: 
4049                                 os.utime(new_path, (time.time(), information['filetime'])) 
4051                                 self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') 
4053                 if not self._keepvideo: 
4056                         except (IOError, OSError): 
4057                                 self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') 
4060                 information['filepath'] = new_path 
4064 def updateSelf(downloader, filename): 
4065         ''' Update the program file with the latest version from the repository ''' 
4066         # Note: downloader only used for options 
4067         if not os.access(filename, os.W_OK): 
4068                 sys.exit('ERROR: no write permissions on %s' % filename) 
4070         downloader.to_screen('Updating to latest version...') 
4074                         urlh = urllib.urlopen(UPDATE_URL) 
4075                         newcontent = urlh.read() 
4077                         vmatch = re.search("__version__ = '([^']+)'", newcontent) 
4078                         if vmatch is not None and vmatch.group(1) == __version__: 
4079                                 downloader.to_screen('youtube-dl is up-to-date (' + __version__ + ')') 
4083         except (IOError, OSError), err: 
4084                 sys.exit('ERROR: unable to download latest version') 
4087                 outf = open(filename, 'wb') 
4089                         outf.write(newcontent) 
4092         except (IOError, OSError), err: 
4093                 sys.exit('ERROR: unable to overwrite current version') 
4095         downloader.to_screen('Updated youtube-dl. Restart youtube-dl to use the new version.') 
4103         def _readOptions(filename): 
4105                         optionf = open(filename) 
4107                         return [] # silently skip if file is not present 
4111                                 res += shlex.split(l, comments=True) 
4116         def _format_option_string(option): 
4117                 ''' ('-o', '--option') -> -o, --format METAVAR''' 
4121                 if option._short_opts: opts.append(option._short_opts[0]) 
4122                 if option._long_opts: opts.append(option._long_opts[0]) 
4123                 if len(opts) > 1: opts.insert(1, ', ') 
4125                 if option.takes_value(): opts.append(' %s' % option.metavar) 
4127                 return "".join(opts) 
4129         def _find_term_columns(): 
4130                 columns = os.environ.get('COLUMNS', None) 
4135                         sp = subprocess.Popen(['stty', 'size'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
4136                         out,err = sp.communicate() 
4137                         return int(out.split()[1]) 
4143         max_help_position = 80 
4145         # No need to wrap help messages if we're on a wide console 
4146         columns = _find_term_columns() 
4147         if columns: max_width = columns 
4149         fmt = optparse.IndentedHelpFormatter(width=max_width, max_help_position=max_help_position) 
4150         fmt.format_option_strings = _format_option_string 
4153                 'version'   : __version__, 
4155                 'usage' : '%prog [options] url [url...]', 
4156                 'conflict_handler' : 'resolve', 
4159         parser = optparse.OptionParser(**kw) 
4162         general        = optparse.OptionGroup(parser, 'General Options') 
4163         selection      = optparse.OptionGroup(parser, 'Video Selection') 
4164         authentication = optparse.OptionGroup(parser, 'Authentication Options') 
4165         video_format   = optparse.OptionGroup(parser, 'Video Format Options') 
4166         postproc       = optparse.OptionGroup(parser, 'Post-processing Options') 
4167         filesystem     = optparse.OptionGroup(parser, 'Filesystem Options') 
4168         verbosity      = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') 
4170         general.add_option('-h', '--help', 
4171                         action='help', help='print this help text and exit') 
4172         general.add_option('-v', '--version', 
4173                         action='version', help='print program version and exit') 
4174         general.add_option('-U', '--update', 
4175                         action='store_true', dest='update_self', help='update this program to latest version') 
4176         general.add_option('-i', '--ignore-errors', 
4177                         action='store_true', dest='ignoreerrors', help='continue on download errors', default=False) 
4178         general.add_option('-r', '--rate-limit', 
4179                         dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') 
4180         general.add_option('-R', '--retries', 
4181                         dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) 
4182         general.add_option('--dump-user-agent', 
4183                         action='store_true', dest='dump_user_agent', 
4184                         help='display the current browser identification', default=False) 
4185         general.add_option('--list-extractors', 
4186                         action='store_true', dest='list_extractors', 
4187                         help='List all supported extractors and the URLs they would handle', default=False) 
4189         selection.add_option('--playlist-start', 
4190                         dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) 
4191         selection.add_option('--playlist-end', 
4192                         dest='playlistend', metavar='NUMBER', help='playlist video to end at (default is last)', default=-1) 
4193         selection.add_option('--match-title', dest='matchtitle', metavar='REGEX',help='download only matching titles (regex or caseless sub-string)') 
4194         selection.add_option('--reject-title', dest='rejecttitle', metavar='REGEX',help='skip download for matching titles (regex or caseless sub-string)') 
4195         selection.add_option('--max-downloads', metavar='NUMBER', dest='max_downloads', help='Abort after downloading NUMBER files', default=None) 
4197         authentication.add_option('-u', '--username', 
4198                         dest='username', metavar='USERNAME', help='account username') 
4199         authentication.add_option('-p', '--password', 
4200                         dest='password', metavar='PASSWORD', help='account password') 
4201         authentication.add_option('-n', '--netrc', 
4202                         action='store_true', dest='usenetrc', help='use .netrc authentication data', default=False) 
4205         video_format.add_option('-f', '--format', 
4206                         action='store', dest='format', metavar='FORMAT', help='video format code') 
4207         video_format.add_option('--all-formats', 
4208                         action='store_const', dest='format', help='download all available video formats', const='all') 
4209         video_format.add_option('--prefer-free-formats', 
4210                         action='store_true', dest='prefer_free_formats', default=False, help='prefer free video formats unless a specific one is requested') 
4211         video_format.add_option('--max-quality', 
4212                         action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') 
4213         video_format.add_option('-F', '--list-formats', 
4214                         action='store_true', dest='listformats', help='list all available formats (currently youtube only)') 
4217         verbosity.add_option('-q', '--quiet', 
4218                         action='store_true', dest='quiet', help='activates quiet mode', default=False) 
4219         verbosity.add_option('-s', '--simulate', 
4220                         action='store_true', dest='simulate', help='do not download the video and do not write anything to disk', default=False) 
4221         verbosity.add_option('--skip-download', 
4222                         action='store_true', dest='skip_download', help='do not download the video', default=False) 
4223         verbosity.add_option('-g', '--get-url', 
4224                         action='store_true', dest='geturl', help='simulate, quiet but print URL', default=False) 
4225         verbosity.add_option('-e', '--get-title', 
4226                         action='store_true', dest='gettitle', help='simulate, quiet but print title', default=False) 
4227         verbosity.add_option('--get-thumbnail', 
4228                         action='store_true', dest='getthumbnail', 
4229                         help='simulate, quiet but print thumbnail URL', default=False) 
4230         verbosity.add_option('--get-description', 
4231                         action='store_true', dest='getdescription', 
4232                         help='simulate, quiet but print video description', default=False) 
4233         verbosity.add_option('--get-filename', 
4234                         action='store_true', dest='getfilename', 
4235                         help='simulate, quiet but print output filename', default=False) 
4236         verbosity.add_option('--get-format', 
4237                         action='store_true', dest='getformat', 
4238                         help='simulate, quiet but print output format', default=False) 
4239         verbosity.add_option('--no-progress', 
4240                         action='store_true', dest='noprogress', help='do not print progress bar', default=False) 
4241         verbosity.add_option('--console-title', 
4242                         action='store_true', dest='consoletitle', 
4243                         help='display progress in console titlebar', default=False) 
4246         filesystem.add_option('-t', '--title', 
4247                         action='store_true', dest='usetitle', help='use title in file name', default=False) 
4248         filesystem.add_option('-l', '--literal', 
4249                         action='store_true', dest='useliteral', help='use literal title in file name', default=False) 
4250         filesystem.add_option('-A', '--auto-number', 
4251                         action='store_true', dest='autonumber', 
4252                         help='number downloaded files starting from 00000', default=False) 
4253         filesystem.add_option('-o', '--output', 
4254                         dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), and %% for a literal percent. Use - to output to stdout.') 
4255         filesystem.add_option('-a', '--batch-file', 
4256                         dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') 
4257         filesystem.add_option('-w', '--no-overwrites', 
4258                         action='store_true', dest='nooverwrites', help='do not overwrite files', default=False) 
4259         filesystem.add_option('-c', '--continue', 
4260                         action='store_true', dest='continue_dl', help='resume partially downloaded files', default=False) 
4261         filesystem.add_option('--no-continue', 
4262                         action='store_false', dest='continue_dl', 
4263                         help='do not resume partially downloaded files (restart from beginning)') 
4264         filesystem.add_option('--cookies', 
4265                         dest='cookiefile', metavar='FILE', help='file to read cookies from and dump cookie jar in') 
4266         filesystem.add_option('--no-part', 
4267                         action='store_true', dest='nopart', help='do not use .part files', default=False) 
4268         filesystem.add_option('--no-mtime', 
4269                         action='store_false', dest='updatetime', 
4270                         help='do not use the Last-modified header to set the file modification time', default=True) 
4271         filesystem.add_option('--write-description', 
4272                         action='store_true', dest='writedescription', 
4273                         help='write video description to a .description file', default=False) 
4274         filesystem.add_option('--write-info-json', 
4275                         action='store_true', dest='writeinfojson', 
4276                         help='write video metadata to a .info.json file', default=False) 
4279         postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, 
4280                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)') 
4281         postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', 
4282                         help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') 
4283         postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='128K', 
4284                         help='ffmpeg audio bitrate specification, 128k by default') 
4285         postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, 
4286                         help='keeps the video file on disk after the post-processing; the video is erased by default') 
4289         parser.add_option_group(general) 
4290         parser.add_option_group(selection) 
4291         parser.add_option_group(filesystem) 
4292         parser.add_option_group(verbosity) 
4293         parser.add_option_group(video_format) 
4294         parser.add_option_group(authentication) 
4295         parser.add_option_group(postproc) 
4297         xdg_config_home = os.environ.get('XDG_CONFIG_HOME') 
4299                 userConf = os.path.join(xdg_config_home, 'youtube-dl.conf') 
4301                 userConf = os.path.join(os.path.expanduser('~'), '.config', 'youtube-dl.conf') 
4302         argv = _readOptions('/etc/youtube-dl.conf') + _readOptions(userConf) + sys.argv[1:] 
4303         opts, args = parser.parse_args(argv) 
4305         return parser, opts, args 
4307 def gen_extractors(): 
4308         """ Return a 
list of an instance of every supported extractor
. 
4309         The order does matter
; the first extractor matched 
is the one handling the URL
. 
4311         youtube_ie = YoutubeIE() 
4312         google_ie = GoogleIE() 
4313         yahoo_ie = YahooIE() 
4315                 YoutubePlaylistIE(youtube_ie), 
4316                 YoutubeUserIE(youtube_ie), 
4317                 YoutubeSearchIE(youtube_ie), 
4319                 MetacafeIE(youtube_ie), 
4322                 GoogleSearchIE(google_ie), 
4325                 YahooSearchIE(yahoo_ie), 
4338                 StanfordOpenClassroomIE(), 
4344         parser, opts, args = parseOpts() 
4346         # Open appropriate CookieJar 
4347         if opts.cookiefile is None: 
4348                 jar = cookielib.CookieJar() 
4351                         jar = cookielib.MozillaCookieJar(opts.cookiefile) 
4352                         if os.path.isfile(opts.cookiefile) and os.access(opts.cookiefile, os.R_OK): 
4354                 except (IOError, OSError), err: 
4355                         sys.exit(u'ERROR: unable to open cookie file') 
4358         if opts.dump_user_agent: 
4359                 print std_headers['User-Agent'] 
4362         # Batch file verification 
4364         if opts.batchfile is not None: 
4366                         if opts.batchfile == '-': 
4369                                 batchfd = open(opts.batchfile, 'r') 
4370                         batchurls = batchfd.readlines() 
4371                         batchurls = [x.strip() for x in batchurls] 
4372                         batchurls = [x for x in batchurls if len(x) > 0 and not re.search(r'^[#/;]', x)] 
4374                         sys.exit(u'ERROR: batch file could not be read') 
4375         all_urls = batchurls + args 
4377         # General configuration 
4378         cookie_processor = urllib2.HTTPCookieProcessor(jar) 
4379         opener = urllib2.build_opener(urllib2.ProxyHandler(), cookie_processor, YoutubeDLHandler()) 
4380         urllib2.install_opener(opener) 
4381         socket.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
4383         extractors = gen_extractors() 
4385         if opts.list_extractors: 
4386                 for ie in extractors: 
4388                         matchedUrls = filter(lambda url: ie.suitable(url), all_urls) 
4389                         all_urls = filter(lambda url: url not in matchedUrls, all_urls) 
4390                         for mu in matchedUrls: 
4394         # Conflicting, missing and erroneous options 
4395         if opts.usenetrc and (opts.username is not None or opts.password is not None): 
4396                 parser.error(u'using .netrc conflicts with giving username/password') 
4397         if opts.password is not None and opts.username is None: 
4398                 parser.error(u'account username missing') 
4399         if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): 
4400                 parser.error(u'using output template conflicts with using title, literal title or auto number') 
4401         if opts.usetitle and opts.useliteral: 
4402                 parser.error(u'using title conflicts with using literal title') 
4403         if opts.username is not None and opts.password is None: 
4404                 opts.password = getpass.getpass(u'Type account password and press return:') 
4405         if opts.ratelimit is not None: 
4406                 numeric_limit = FileDownloader.parse_bytes(opts.ratelimit) 
4407                 if numeric_limit is None: 
4408                         parser.error(u'invalid rate limit specified') 
4409                 opts.ratelimit = numeric_limit 
4410         if opts.retries is not None: 
4412                         opts.retries = long(opts.retries) 
4413                 except (TypeError, ValueError), err: 
4414                         parser.error(u'invalid retry count specified') 
4416                 opts.playliststart = int(opts.playliststart) 
4417                 if opts.playliststart <= 0: 
4418                         raise ValueError(u'Playlist start must be positive') 
4419         except (TypeError, ValueError), err: 
4420                 parser.error(u'invalid playlist start number specified') 
4422                 opts.playlistend = int(opts.playlistend) 
4423                 if opts.playlistend != -1 and (opts.playlistend <= 0 or opts.playlistend < opts.playliststart): 
4424                         raise ValueError(u'Playlist end must be greater than playlist start') 
4425         except (TypeError, ValueError), err: 
4426                 parser.error(u'invalid playlist end number specified') 
4427         if opts.extractaudio: 
4428                 if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']: 
4429                         parser.error(u'invalid audio format specified') 
4432         fd = FileDownloader({ 
4433                 'usenetrc': opts.usenetrc, 
4434                 'username': opts.username, 
4435                 'password': opts.password, 
4436                 'quiet': (opts.quiet or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 
4437                 'forceurl': opts.geturl, 
4438                 'forcetitle': opts.gettitle, 
4439                 'forcethumbnail': opts.getthumbnail, 
4440                 'forcedescription': opts.getdescription, 
4441                 'forcefilename': opts.getfilename, 
4442                 'forceformat': opts.getformat, 
4443                 'simulate': opts.simulate, 
4444                 'skip_download': (opts.skip_download or opts.simulate or opts.geturl or opts.gettitle or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat), 
4445                 'format': opts.format, 
4446                 'format_limit': opts.format_limit, 
4447                 'listformats': opts.listformats, 
4448                 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) 
4449                         or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
4450                         or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') 
4451                         or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') 
4452                         or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') 
4453                         or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') 
4454                         or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') 
4455                         or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') 
4456                         or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') 
4457                         or u'%(id)s.%(ext)s'), 
4458                 'ignoreerrors': opts.ignoreerrors, 
4459                 'ratelimit': opts.ratelimit, 
4460                 'nooverwrites': opts.nooverwrites, 
4461                 'retries': opts.retries, 
4462                 'continuedl': opts.continue_dl, 
4463                 'noprogress': opts.noprogress, 
4464                 'playliststart': opts.playliststart, 
4465                 'playlistend': opts.playlistend, 
4466                 'logtostderr': opts.outtmpl == '-', 
4467                 'consoletitle': opts.consoletitle, 
4468                 'nopart': opts.nopart, 
4469                 'updatetime': opts.updatetime, 
4470                 'writedescription': opts.writedescription, 
4471                 'writeinfojson': opts.writeinfojson, 
4472                 'matchtitle': opts.matchtitle, 
4473                 'rejecttitle': opts.rejecttitle, 
4474                 'max_downloads': opts.max_downloads, 
4475                 'prefer_free_formats': opts.prefer_free_formats, 
4477         for extractor in extractors: 
4478                 fd.add_info_extractor(extractor) 
4481         if opts.extractaudio: 
4482                 fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo)) 
4485         if opts.update_self: 
4486                 updateSelf(fd, sys.argv[0]) 
4489         if len(all_urls) < 1: 
4490                 if not opts.update_self: 
4491                         parser.error(u'you must provide at least one URL') 
4496                 retcode = fd.download(all_urls) 
4497         except MaxDownloadsReached: 
4498                 fd.to_screen(u'--max-download limit reached, aborting.') 
4501         # Dump cookie jar if requested 
4502         if opts.cookiefile is not None: 
4505                 except (IOError, OSError), err: 
4506                         sys.exit(u'ERROR: unable to save cookie jar') 
4513         except DownloadError: 
4515         except SameFileError: 
4516                 sys.exit(u'ERROR: fixed output name but more than one file to download') 
4517         except KeyboardInterrupt: 
4518                 sys.exit(u'\nERROR: Interrupted by user') 
4520 if __name__ == '__main__': 
4523 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: