2 # -*- coding: utf-8 -*- 
   5         'Ricardo Garcia Gonzalez', 
  13         'Philipp Hagemeister', 
  17 __license__ 
= 'Public Domain' 
  18 __version__ 
= '2011.09.14' 
  20 UPDATE_URL 
= 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' 
  48 except ImportError: # Python 2.4 
  51         import cStringIO 
as StringIO
 
  55 # parse_qs was moved from the cgi module to the urlparse module recently. 
  57         from urlparse 
import parse_qs
 
  59         from cgi 
import parse_qs
 
  67         import xml
.etree
.ElementTree
 
  68 except ImportError: # Python<2.5 
  69         pass # Not officially supported, but let it slip 
  72         'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:5.0.1) Gecko/20100101 Firefox/5.0.1', 
  73         'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 
  74         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 
  75         'Accept-Encoding': 'gzip, deflate', 
  76         'Accept-Language': 'en-us,en;q=0.5', 
  79 simple_title_chars 
= string
.ascii_letters
.decode('ascii') + string
.digits
.decode('ascii') 
  83 except ImportError: # Python <2.6, use trivialjson (https://github.com/phihag/trivialjson): 
  89                         def raiseError(msg
, i
): 
  90                                 raise ValueError(msg 
+ ' at position ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:])) 
  91                         def skipSpace(i
, expectMore
=True): 
  92                                 while i 
< len(s
) and s
[i
] in ' \t\r\n': 
  96                                                 raiseError('Premature end', i
) 
  98                         def decodeEscape(match
): 
 114                                                 return unichr(int(esc
[1:5], 16)) 
 115                                         if len(esc
) == 5+6 and esc
[5:7] == '\\u': 
 116                                                 hi 
= int(esc
[1:5], 16) 
 117                                                 low 
= int(esc
[7:11], 16) 
 118                                                 return unichr((hi 
- 0xd800) * 0x400 + low 
- 0xdc00 + 0x10000) 
 119                                 raise ValueError('Unknown escape ' + str(esc
)) 
 126                                         while s
[e
-bslashes
-1] == '\\': 
 128                                         if bslashes 
% 2 == 1: 
 132                                 rexp 
= re
.compile(r
'\\(u[dD][89aAbB][0-9a-fA-F]{2}\\u[0-9a-fA-F]{4}|u[0-9a-fA-F]{4}|.|$)') 
 133                                 stri 
= rexp
.sub(decodeEscape
, s
[i
:e
]) 
 139                                 if s
[i
] == '}': # Empty dictionary 
 143                                                 raiseError('Expected a string object key', i
) 
 144                                         i
,key 
= parseString(i
) 
 146                                         if i 
>= len(s
) or s
[i
] != ':': 
 147                                                 raiseError('Expected a colon', i
) 
 154                                                 raiseError('Expected comma or closing curly brace', i
) 
 159                                 if s
[i
] == ']': # Empty array 
 164                                         i 
= skipSpace(i
) # Raise exception if premature end 
 168                                                 raiseError('Expected a comma or closing bracket', i
) 
 170                         def parseDiscrete(i
): 
 171                                 for k
,v 
in {'true': True, 'false': False, 'null': None}.items(): 
 172                                         if s
.startswith(k
, i
): 
 174                                 raiseError('Not a boolean (or null)', i
) 
 176                                 mobj 
= re
.match('^(-?(0|[1-9][0-9]*)(\.[0-9]*)?([eE][+-]?[0-9]+)?)', s
[i
:]) 
 178                                         raiseError('Not a number', i
) 
 180                                 if '.' in nums 
or 'e' in nums 
or 'E' in nums
: 
 181                                         return (i
+len(nums
), float(nums
)) 
 182                                 return (i
+len(nums
), int(nums
)) 
 183                         CHARMAP 
= {'{': parseObj
, '[': parseArray
, '"': parseString
, 't': parseDiscrete
, 'f': parseDiscrete
, 'n': parseDiscrete
} 
 186                                 i
,res 
= CHARMAP
.get(s
[i
], parseNumber
)(i
) 
 187                                 i 
= skipSpace(i
, False) 
 191                                 raise ValueError('Extra data at end of input (index ' + str(i
) + ' of ' + repr(s
) + ': ' + repr(s
[i
:]) + ')') 
 194 def preferredencoding(): 
 195         """Get preferred encoding. 
 197         Returns the best encoding scheme for the system, based on 
 198         locale.getpreferredencoding() and some further tweaks. 
 200         def yield_preferredencoding(): 
 202                         pref 
= locale
.getpreferredencoding() 
 208         return yield_preferredencoding().next() 
 211 def htmlentity_transform(matchobj
): 
 212         """Transforms an HTML entity to a Unicode character. 
 214         This function receives a match object and is intended to be used with 
 215         the re.sub() function. 
 217         entity 
= matchobj
.group(1) 
 219         # Known non-numeric HTML entity 
 220         if entity 
in htmlentitydefs
.name2codepoint
: 
 221                 return unichr(htmlentitydefs
.name2codepoint
[entity
]) 
 224         mobj 
= re
.match(ur
'(?u)#(x?\d+)', entity
) 
 226                 numstr 
= mobj
.group(1) 
 227                 if numstr
.startswith(u
'x'): 
 229                         numstr 
= u
'0%s' % numstr
 
 232                 return unichr(long(numstr
, base
)) 
 234         # Unknown entity in name, return its literal representation 
 235         return (u
'&%s;' % entity
) 
 238 def sanitize_title(utitle
): 
 239         """Sanitizes a video title so it could be used as part of a filename.""" 
 240         utitle 
= re
.sub(ur
'(?u)&(.+?);', htmlentity_transform
, utitle
) 
 241         return utitle
.replace(unicode(os
.sep
), u
'%') 
 244 def sanitize_open(filename
, open_mode
): 
 245         """Try to open the given filename, and slightly tweak it if this fails. 
 247         Attempts to open the given filename. If this fails, it tries to change 
 248         the filename slightly, step by step, until it's either able to open it 
 249         or it fails and raises a final exception, like the standard open() 
 252         It returns the tuple (stream, definitive_file_name). 
 256                         if sys
.platform 
== 'win32': 
 258                                 msvcrt
.setmode(sys
.stdout
.fileno(), os
.O_BINARY
) 
 259                         return (sys
.stdout
, filename
) 
 260                 stream 
= open(filename
, open_mode
) 
 261                 return (stream
, filename
) 
 262         except (IOError, OSError), err
: 
 263                 # In case of error, try to remove win32 forbidden chars 
 264                 filename 
= re
.sub(ur
'[/<>:"\|\?\*]', u
'#', filename
) 
 266                 # An exception here should be caught in the caller 
 267                 stream 
= open(filename
, open_mode
) 
 268                 return (stream
, filename
) 
 271 def timeconvert(timestr
): 
 272         """Convert RFC 2822 defined time string into system timestamp""" 
 274         timetuple 
= email
.utils
.parsedate_tz(timestr
) 
 275         if timetuple 
is not None: 
 276                 timestamp 
= email
.utils
.mktime_tz(timetuple
) 
 280 class DownloadError(Exception): 
 281         """Download Error exception. 
 283         This exception may be thrown by FileDownloader objects if they are not 
 284         configured to continue on errors. They will contain the appropriate 
 290 class SameFileError(Exception): 
 291         """Same File exception. 
 293         This exception will be thrown by FileDownloader objects if they detect 
 294         multiple files would have to be downloaded to the same file on disk. 
 299 class PostProcessingError(Exception): 
 300         """Post Processing exception. 
 302         This exception may be raised by PostProcessor's .run() method to 
 303         indicate an error in the postprocessing task. 
 308 class UnavailableVideoError(Exception): 
 309         """Unavailable Format exception. 
 311         This exception will be thrown when a video is requested 
 312         in a format that is not available for that video. 
 317 class ContentTooShortError(Exception): 
 318         """Content Too Short exception. 
 320         This exception may be raised by FileDownloader objects when a file they 
 321         download is too small for what the server announced first, indicating 
 322         the connection was probably interrupted. 
 328         def __init__(self
, downloaded
, expected
): 
 329                 self
.downloaded 
= downloaded
 
 330                 self
.expected 
= expected
 
 333 class YoutubeDLHandler(urllib2
.HTTPHandler
): 
 334         """Handler for HTTP requests and responses. 
 336         This class, when installed with an OpenerDirector, automatically adds 
 337         the standard headers to every HTTP request and handles gzipped and 
 338         deflated responses from web servers. If compression is to be avoided in 
 339         a particular request, the original request in the program code only has 
 340         to include the HTTP header "Youtubedl-No-Compression", which will be 
 341         removed before making the real request. 
 343         Part of this code was copied from: 
 345         http://techknack.net/python-urllib2-handlers/ 
 347         Andrew Rowls, the author of that code, agreed to release it to the 
 354                         return zlib
.decompress(data
, -zlib
.MAX_WBITS
) 
 356                         return zlib
.decompress(data
) 
 359         def addinfourl_wrapper(stream
, headers
, url
, code
): 
 360                 if hasattr(urllib2
.addinfourl
, 'getcode'): 
 361                         return urllib2
.addinfourl(stream
, headers
, url
, code
) 
 362                 ret 
= urllib2
.addinfourl(stream
, headers
, url
) 
 366         def http_request(self
, req
): 
 367                 for h 
in std_headers
: 
 370                         req
.add_header(h
, std_headers
[h
]) 
 371                 if 'Youtubedl-no-compression' in req
.headers
: 
 372                         if 'Accept-encoding' in req
.headers
: 
 373                                 del req
.headers
['Accept-encoding'] 
 374                         del req
.headers
['Youtubedl-no-compression'] 
 377         def http_response(self
, req
, resp
): 
 380                 if resp
.headers
.get('Content-encoding', '') == 'gzip': 
 381                         gz 
= gzip
.GzipFile(fileobj
=StringIO
.StringIO(resp
.read()), mode
='r') 
 382                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 383                         resp
.msg 
= old_resp
.msg
 
 385                 if resp
.headers
.get('Content-encoding', '') == 'deflate': 
 386                         gz 
= StringIO
.StringIO(self
.deflate(resp
.read())) 
 387                         resp 
= self
.addinfourl_wrapper(gz
, old_resp
.headers
, old_resp
.url
, old_resp
.code
) 
 388                         resp
.msg 
= old_resp
.msg
 
 392 class FileDownloader(object): 
 393         """File Downloader class. 
 395         File downloader objects are the ones responsible of downloading the 
 396         actual video file and writing it to disk if the user has requested 
 397         it, among some other tasks. In most cases there should be one per 
 398         program. As, given a video URL, the downloader doesn't know how to 
 399         extract all the needed information, task that InfoExtractors do, it 
 400         has to pass the URL to one of them. 
 402         For this, file downloader objects have a method that allows 
 403         InfoExtractors to be registered in a given order. When it is passed 
 404         a URL, the file downloader handles it to the first InfoExtractor it 
 405         finds that reports being able to handle it. The InfoExtractor extracts 
 406         all the information about the video or videos the URL refers to, and 
 407         asks the FileDownloader to process the video information, possibly 
 408         downloading the video. 
 410         File downloaders accept a lot of parameters. In order not to saturate 
 411         the object constructor with arguments, it receives a dictionary of 
 412         options instead. These options are available through the params 
 413         attribute for the InfoExtractors to use. The FileDownloader also 
 414         registers itself as the downloader in charge for the InfoExtractors 
 415         that are added to it, so this is a "mutual registration". 
 419         username:         Username for authentication purposes. 
 420         password:         Password for authentication purposes. 
 421         usenetrc:         Use netrc for authentication instead. 
 422         quiet:            Do not print messages to stdout. 
 423         forceurl:         Force printing final URL. 
 424         forcetitle:       Force printing title. 
 425         forcethumbnail:   Force printing thumbnail URL. 
 426         forcedescription: Force printing description. 
 427         forcefilename:    Force printing final filename. 
 428         simulate:         Do not download the video files. 
 429         format:           Video format code. 
 430         format_limit:     Highest quality format to try. 
 431         outtmpl:          Template for output names. 
 432         ignoreerrors:     Do not stop on download errors. 
 433         ratelimit:        Download speed limit, in bytes/sec. 
 434         nooverwrites:     Prevent overwriting files. 
 435         retries:          Number of times to retry for HTTP error 5xx 
 436         continuedl:       Try to continue downloads if possible. 
 437         noprogress:       Do not print the progress bar. 
 438         playliststart:    Playlist item to start at. 
 439         playlistend:      Playlist item to end at. 
 440         logtostderr:      Log messages to stderr instead of stdout. 
 441         consoletitle:     Display progress in console window's titlebar. 
 442         nopart:           Do not use temporary .part files. 
 443         updatetime:       Use the Last-modified header to set output file timestamps. 
 444         writedescription: Write the video description to a .description file 
 445         writeinfojson:    Write the video description to a .info.json file 
 451         _download_retcode 
= None 
 452         _num_downloads 
= None 
 455         def __init__(self
, params
): 
 456                 """Create a FileDownloader object with the given options.""" 
 459                 self
._download
_retcode 
= 0 
 460                 self
._num
_downloads 
= 0 
 461                 self
._screen
_file 
= [sys
.stdout
, sys
.stderr
][params
.get('logtostderr', False)] 
 465         def format_bytes(bytes): 
 468                 if type(bytes) is str: 
 473                         exponent 
= long(math
.log(bytes, 1024.0)) 
 474                 suffix 
= 'bkMGTPEZY'[exponent
] 
 475                 converted 
= float(bytes) / float(1024 ** exponent
) 
 476                 return '%.2f%s' % (converted
, suffix
) 
 479         def calc_percent(byte_counter
, data_len
): 
 482                 return '%6s' % ('%3.1f%%' % (float(byte_counter
) / float(data_len
) * 100.0)) 
 485         def calc_eta(start
, now
, total
, current
): 
 489                 if current 
== 0 or dif 
< 0.001: # One millisecond 
 491                 rate 
= float(current
) / dif
 
 492                 eta 
= long((float(total
) - float(current
)) / rate
) 
 493                 (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 496                 return '%02d:%02d' % (eta_mins
, eta_secs
) 
 499         def calc_speed(start
, now
, bytes): 
 501                 if bytes == 0 or dif 
< 0.001: # One millisecond 
 502                         return '%10s' % '---b/s' 
 503                 return '%10s' % ('%s/s' % FileDownloader
.format_bytes(float(bytes) / dif
)) 
 506         def best_block_size(elapsed_time
, bytes): 
 507                 new_min 
= max(bytes / 2.0, 1.0) 
 508                 new_max 
= min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB 
 509                 if elapsed_time 
< 0.001: 
 511                 rate 
= bytes / elapsed_time
 
 519         def parse_bytes(bytestr
): 
 520                 """Parse a string indicating a byte quantity into a long integer.""" 
 521                 matchobj 
= re
.match(r
'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr
) 
 524                 number 
= float(matchobj
.group(1)) 
 525                 multiplier 
= 1024.0 ** 'bkmgtpezy'.index(matchobj
.group(2).lower()) 
 526                 return long(round(number 
* multiplier
)) 
 528         def add_info_extractor(self
, ie
): 
 529                 """Add an InfoExtractor object to the end of the list.""" 
 531                 ie
.set_downloader(self
) 
 533         def add_post_processor(self
, pp
): 
 534                 """Add a PostProcessor object to the end of the chain.""" 
 536                 pp
.set_downloader(self
) 
 538         def to_screen(self
, message
, skip_eol
=False, ignore_encoding_errors
=False): 
 539                 """Print message to stdout if not in quiet mode.""" 
 541                         if not self
.params
.get('quiet', False): 
 542                                 terminator 
= [u
'\n', u
''][skip_eol
] 
 543                                 print >>self
._screen
_file
, (u
'%s%s' % (message
, terminator
)).encode(preferredencoding()), 
 544                         self
._screen
_file
.flush() 
 545                 except (UnicodeEncodeError), err
: 
 546                         if not ignore_encoding_errors
: 
 549         def to_stderr(self
, message
): 
 550                 """Print message to stderr.""" 
 551                 print >>sys
.stderr
, message
.encode(preferredencoding()) 
 553         def to_cons_title(self
, message
): 
 554                 """Set console/terminal window title to message.""" 
 555                 if not self
.params
.get('consoletitle', False): 
 557                 if os
.name 
== 'nt' and ctypes
.windll
.kernel32
.GetConsoleWindow(): 
 558                         # c_wchar_p() might not be necessary if `message` is 
 559                         # already of type unicode() 
 560                         ctypes
.windll
.kernel32
.SetConsoleTitleW(ctypes
.c_wchar_p(message
)) 
 561                 elif 'TERM' in os
.environ
: 
 562                         sys
.stderr
.write('\033]0;%s\007' % message
.encode(preferredencoding())) 
 564         def fixed_template(self
): 
 565                 """Checks if the output template is fixed.""" 
 566                 return (re
.search(ur
'(?u)%\(.+?\)s', self
.params
['outtmpl']) is None) 
 568         def trouble(self
, message
=None): 
 569                 """Determine action to take when a download problem appears. 
 571                 Depending on if the downloader has been configured to ignore 
 572                 download errors or not, this method may throw an exception or 
 573                 not when errors are found, after printing the message. 
 575                 if message 
is not None: 
 576                         self
.to_stderr(message
) 
 577                 if not self
.params
.get('ignoreerrors', False): 
 578                         raise DownloadError(message
) 
 579                 self
._download
_retcode 
= 1 
 581         def slow_down(self
, start_time
, byte_counter
): 
 582                 """Sleep if the download speed is over the rate limit.""" 
 583                 rate_limit 
= self
.params
.get('ratelimit', None) 
 584                 if rate_limit 
is None or byte_counter 
== 0: 
 587                 elapsed 
= now 
- start_time
 
 590                 speed 
= float(byte_counter
) / elapsed
 
 591                 if speed 
> rate_limit
: 
 592                         time
.sleep((byte_counter 
- rate_limit 
* (now 
- start_time
)) / rate_limit
) 
 594         def temp_name(self
, filename
): 
 595                 """Returns a temporary filename for the given filename.""" 
 596                 if self
.params
.get('nopart', False) or filename 
== u
'-' or \
 
 597                                 (os
.path
.exists(filename
) and not os
.path
.isfile(filename
)): 
 599                 return filename 
+ u
'.part' 
 601         def undo_temp_name(self
, filename
): 
 602                 if filename
.endswith(u
'.part'): 
 603                         return filename
[:-len(u
'.part')] 
 606         def try_rename(self
, old_filename
, new_filename
): 
 608                         if old_filename 
== new_filename
: 
 610                         os
.rename(old_filename
, new_filename
) 
 611                 except (IOError, OSError), err
: 
 612                         self
.trouble(u
'ERROR: unable to rename file') 
 614         def try_utime(self
, filename
, last_modified_hdr
): 
 615                 """Try to set the last-modified time of the given file.""" 
 616                 if last_modified_hdr 
is None: 
 618                 if not os
.path
.isfile(filename
): 
 620                 timestr 
= last_modified_hdr
 
 623                 filetime 
= timeconvert(timestr
) 
 627                         os
.utime(filename
, (time
.time(), filetime
)) 
 631         def report_writedescription(self
, descfn
): 
 632                 """ Report that the description file is being written """ 
 633                 self
.to_screen(u
'[info] Writing video description to: %s' % descfn
, ignore_encoding_errors
=True) 
 635         def report_writeinfojson(self
, infofn
): 
 636                 """ Report that the metadata file has been written """ 
 637                 self
.to_screen(u
'[info] Video description metadata as JSON to: %s' % infofn
, ignore_encoding_errors
=True) 
 639         def report_destination(self
, filename
): 
 640                 """Report destination filename.""" 
 641                 self
.to_screen(u
'[download] Destination: %s' % filename
, ignore_encoding_errors
=True) 
 643         def report_progress(self
, percent_str
, data_len_str
, speed_str
, eta_str
): 
 644                 """Report download progress.""" 
 645                 if self
.params
.get('noprogress', False): 
 647                 self
.to_screen(u
'\r[download] %s of %s at %s ETA %s' % 
 648                                 (percent_str
, data_len_str
, speed_str
, eta_str
), skip_eol
=True) 
 649                 self
.to_cons_title(u
'youtube-dl - %s of %s at %s ETA %s' % 
 650                                 (percent_str
.strip(), data_len_str
.strip(), speed_str
.strip(), eta_str
.strip())) 
 652         def report_resuming_byte(self
, resume_len
): 
 653                 """Report attempt to resume at given byte.""" 
 654                 self
.to_screen(u
'[download] Resuming download at byte %s' % resume_len
) 
 656         def report_retry(self
, count
, retries
): 
 657                 """Report retry in case of HTTP error 5xx""" 
 658                 self
.to_screen(u
'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count
, retries
)) 
 660         def report_file_already_downloaded(self
, file_name
): 
 661                 """Report file has already been fully downloaded.""" 
 663                         self
.to_screen(u
'[download] %s has already been downloaded' % file_name
) 
 664                 except (UnicodeEncodeError), err
: 
 665                         self
.to_screen(u
'[download] The file has already been downloaded') 
 667         def report_unable_to_resume(self
): 
 668                 """Report it was impossible to resume download.""" 
 669                 self
.to_screen(u
'[download] Unable to resume') 
 671         def report_finish(self
): 
 672                 """Report download finished.""" 
 673                 if self
.params
.get('noprogress', False): 
 674                         self
.to_screen(u
'[download] Download completed') 
 678         def increment_downloads(self
): 
 679                 """Increment the ordinal that assigns a number to each file.""" 
 680                 self
._num
_downloads 
+= 1 
 682         def prepare_filename(self
, info_dict
): 
 683                 """Generate the output filename.""" 
 685                         template_dict 
= dict(info_dict
) 
 686                         template_dict
['epoch'] = unicode(long(time
.time())) 
 687                         template_dict
['autonumber'] = unicode('%05d' % self
._num
_downloads
) 
 688                         filename 
= self
.params
['outtmpl'] % template_dict
 
 690                 except (ValueError, KeyError), err
: 
 691                         self
.trouble(u
'ERROR: invalid system charset or erroneous output template') 
 694         def process_info(self
, info_dict
): 
 695                 """Process a single dictionary returned by an InfoExtractor.""" 
 696                 filename 
= self
.prepare_filename(info_dict
) 
 697                 # Do nothing else if in simulate mode 
 698                 if self
.params
.get('simulate', False): 
 700                         if self
.params
.get('forcetitle', False): 
 701                                 print info_dict
['title'].encode(preferredencoding(), 'xmlcharrefreplace') 
 702                         if self
.params
.get('forceurl', False): 
 703                                 print info_dict
['url'].encode(preferredencoding(), 'xmlcharrefreplace') 
 704                         if self
.params
.get('forcethumbnail', False) and 'thumbnail' in info_dict
: 
 705                                 print info_dict
['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') 
 706                         if self
.params
.get('forcedescription', False) and 'description' in info_dict
: 
 707                                 print info_dict
['description'].encode(preferredencoding(), 'xmlcharrefreplace') 
 708                         if self
.params
.get('forcefilename', False) and filename 
is not None: 
 709                                 print filename
.encode(preferredencoding(), 'xmlcharrefreplace') 
 715                 if self
.params
.get('nooverwrites', False) and os
.path
.exists(filename
): 
 716                         self
.to_stderr(u
'WARNING: file exists and will be skipped') 
 720                         dn 
= os
.path
.dirname(filename
) 
 721                         if dn 
!= '' and not os
.path
.exists(dn
): 
 723                 except (OSError, IOError), err
: 
 724                         self
.trouble(u
'ERROR: unable to create directory ' + unicode(err
)) 
 727                 if self
.params
.get('writedescription', False): 
 729                                 descfn 
= filename 
+ '.description' 
 730                                 self
.report_writedescription(descfn
) 
 731                                 descfile 
= open(descfn
, 'wb') 
 733                                         descfile
.write(info_dict
['description'].encode('utf-8')) 
 736                         except (OSError, IOError): 
 737                                 self
.trouble(u
'ERROR: Cannot write description file ' + descfn
) 
 740                 if self
.params
.get('writeinfojson', False): 
 741                         infofn 
= filename 
+ '.info.json' 
 742                         self
.report_writeinfojson(infofn
) 
 745                         except (NameError,AttributeError): 
 746                                 self
.trouble(u
'ERROR: No JSON encoder found. Update to Python 2.6+, setup a json module, or leave out --write-info-json.') 
 749                                 infof 
= open(infofn
, 'wb') 
 751                                         json
.dump(info_dict
, infof
) 
 754                         except (OSError, IOError): 
 755                                 self
.trouble(u
'ERROR: Cannot write metadata to JSON file ' + infofn
) 
 759                         success 
= self
._do
_download
(filename
, info_dict
['url'].encode('utf-8'), info_dict
.get('player_url', None)) 
 760                 except (OSError, IOError), err
: 
 761                         raise UnavailableVideoError
 
 762                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
 763                         self
.trouble(u
'ERROR: unable to download video data: %s' % str(err
)) 
 765                 except (ContentTooShortError
, ), err
: 
 766                         self
.trouble(u
'ERROR: content too short (expected %s bytes and served %s)' % (err
.expected
, err
.downloaded
)) 
 771                                 self
.post_process(filename
, info_dict
) 
 772                         except (PostProcessingError
), err
: 
 773                                 self
.trouble(u
'ERROR: postprocessing: %s' % str(err
)) 
 776         def download(self
, url_list
): 
 777                 """Download a given list of URLs.""" 
 778                 if len(url_list
) > 1 and self
.fixed_template(): 
 779                         raise SameFileError(self
.params
['outtmpl']) 
 782                         suitable_found 
= False 
 784                                 # Go to next InfoExtractor if not suitable 
 785                                 if not ie
.suitable(url
): 
 788                                 # Suitable InfoExtractor found 
 789                                 suitable_found 
= True 
 791                                 # Extract information from URL and process it 
 794                                 # Suitable InfoExtractor had been found; go to next URL 
 797                         if not suitable_found
: 
 798                                 self
.trouble(u
'ERROR: no suitable InfoExtractor: %s' % url
) 
 800                 return self
._download
_retcode
 
 802         def post_process(self
, filename
, ie_info
): 
 803                 """Run the postprocessing chain on the given file.""" 
 805                 info
['filepath'] = filename
 
 811         def _download_with_rtmpdump(self
, filename
, url
, player_url
): 
 812                 self
.report_destination(filename
) 
 813                 tmpfilename 
= self
.temp_name(filename
) 
 815                 # Check for rtmpdump first 
 817                         subprocess
.call(['rtmpdump', '-h'], stdout
=(file(os
.path
.devnull
, 'w')), stderr
=subprocess
.STDOUT
) 
 818                 except (OSError, IOError): 
 819                         self
.trouble(u
'ERROR: RTMP download detected but "rtmpdump" could not be run') 
 822                 # Download using rtmpdump. rtmpdump returns exit code 2 when 
 823                 # the connection was interrumpted and resuming appears to be 
 824                 # possible. This is part of rtmpdump's normal usage, AFAIK. 
 825                 basic_args 
= ['rtmpdump'] + [[], ['-W', player_url
]][player_url 
is not None] + ['-r', url
, '-o', tmpfilename
] 
 826                 retval 
= subprocess
.call(basic_args 
+ [[], ['-e', '-k', '1']][self
.params
.get('continuedl', False)]) 
 827                 while retval 
== 2 or retval 
== 1: 
 828                         prevsize 
= os
.path
.getsize(tmpfilename
) 
 829                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % prevsize
, skip_eol
=True) 
 830                         time
.sleep(5.0) # This seems to be needed 
 831                         retval 
= subprocess
.call(basic_args 
+ ['-e'] + [[], ['-k', '1']][retval 
== 1]) 
 832                         cursize 
= os
.path
.getsize(tmpfilename
) 
 833                         if prevsize 
== cursize 
and retval 
== 1: 
 836                         self
.to_screen(u
'\r[rtmpdump] %s bytes' % os
.path
.getsize(tmpfilename
)) 
 837                         self
.try_rename(tmpfilename
, filename
) 
 840                         self
.trouble(u
'\nERROR: rtmpdump exited with code %d' % retval
) 
 843         def _do_download(self
, filename
, url
, player_url
): 
 844                 # Check file already present 
 845                 if self
.params
.get('continuedl', False) and os
.path
.isfile(filename
) and not self
.params
.get('nopart', False): 
 846                         self
.report_file_already_downloaded(filename
) 
 849                 # Attempt to download using rtmpdump 
 850                 if url
.startswith('rtmp'): 
 851                         return self
._download
_with
_rtmpdump
(filename
, url
, player_url
) 
 853                 tmpfilename 
= self
.temp_name(filename
) 
 857                 # Do not include the Accept-Encoding header 
 858                 headers 
= {'Youtubedl-no-compression': 'True'} 
 859                 basic_request 
= urllib2
.Request(url
, None, headers
) 
 860                 request 
= urllib2
.Request(url
, None, headers
) 
 862                 # Establish possible resume length 
 863                 if os
.path
.isfile(tmpfilename
): 
 864                         resume_len 
= os
.path
.getsize(tmpfilename
) 
 868                 # Request parameters in case of being able to resume 
 869                 if self
.params
.get('continuedl', False) and resume_len 
!= 0: 
 870                         self
.report_resuming_byte(resume_len
) 
 871                         request
.add_header('Range', 'bytes=%d-' % resume_len
) 
 875                 retries 
= self
.params
.get('retries', 0) 
 876                 while count 
<= retries
: 
 877                         # Establish connection 
 879                                 data 
= urllib2
.urlopen(request
) 
 881                         except (urllib2
.HTTPError
, ), err
: 
 882                                 if (err
.code 
< 500 or err
.code 
>= 600) and err
.code 
!= 416: 
 883                                         # Unexpected HTTP error 
 885                                 elif err
.code 
== 416: 
 886                                         # Unable to resume (requested range not satisfiable) 
 888                                                 # Open the connection again without the range header 
 889                                                 data 
= urllib2
.urlopen(basic_request
) 
 890                                                 content_length 
= data
.info()['Content-Length'] 
 891                                         except (urllib2
.HTTPError
, ), err
: 
 892                                                 if err
.code 
< 500 or err
.code 
>= 600: 
 895                                                 # Examine the reported length 
 896                                                 if (content_length 
is not None and 
 897                                                                 (resume_len 
- 100 < long(content_length
) < resume_len 
+ 100)): 
 898                                                         # The file had already been fully downloaded. 
 899                                                         # Explanation to the above condition: in issue #175 it was revealed that 
 900                                                         # YouTube sometimes adds or removes a few bytes from the end of the file, 
 901                                                         # changing the file size slightly and causing problems for some users. So 
 902                                                         # I decided to implement a suggested change and consider the file 
 903                                                         # completely downloaded if the file size differs less than 100 bytes from 
 904                                                         # the one in the hard drive. 
 905                                                         self
.report_file_already_downloaded(filename
) 
 906                                                         self
.try_rename(tmpfilename
, filename
) 
 909                                                         # The length does not match, we start the download over 
 910                                                         self
.report_unable_to_resume() 
 916                                 self
.report_retry(count
, retries
) 
 919                         self
.trouble(u
'ERROR: giving up after %s retries' % retries
) 
 922                 data_len 
= data
.info().get('Content-length', None) 
 923                 if data_len 
is not None: 
 924                         data_len 
= long(data_len
) + resume_len
 
 925                 data_len_str 
= self
.format_bytes(data_len
) 
 926                 byte_counter 
= 0 + resume_len
 
 932                         data_block 
= data
.read(block_size
) 
 934                         if len(data_block
) == 0: 
 936                         byte_counter 
+= len(data_block
) 
 938                         # Open file just in time 
 941                                         (stream
, tmpfilename
) = sanitize_open(tmpfilename
, open_mode
) 
 942                                         assert stream 
is not None 
 943                                         filename 
= self
.undo_temp_name(tmpfilename
) 
 944                                         self
.report_destination(filename
) 
 945                                 except (OSError, IOError), err
: 
 946                                         self
.trouble(u
'ERROR: unable to open for writing: %s' % str(err
)) 
 949                                 stream
.write(data_block
) 
 950                         except (IOError, OSError), err
: 
 951                                 self
.trouble(u
'\nERROR: unable to write data: %s' % str(err
)) 
 953                         block_size 
= self
.best_block_size(after 
- before
, len(data_block
)) 
 956                         percent_str 
= self
.calc_percent(byte_counter
, data_len
) 
 957                         eta_str 
= self
.calc_eta(start
, time
.time(), data_len 
- resume_len
, byte_counter 
- resume_len
) 
 958                         speed_str 
= self
.calc_speed(start
, time
.time(), byte_counter 
- resume_len
) 
 959                         self
.report_progress(percent_str
, data_len_str
, speed_str
, eta_str
) 
 962                         self
.slow_down(start
, byte_counter 
- resume_len
) 
 965                         self
.trouble(u
'\nERROR: Did not get any data blocks') 
 969                 if data_len 
is not None and byte_counter 
!= data_len
: 
 970                         raise ContentTooShortError(byte_counter
, long(data_len
)) 
 971                 self
.try_rename(tmpfilename
, filename
) 
 973                 # Update file modification time 
 974                 if self
.params
.get('updatetime', True): 
 975                         self
.try_utime(filename
, data
.info().get('last-modified', None)) 
 980 class InfoExtractor(object): 
 981         """Information Extractor class. 
 983         Information extractors are the classes that, given a URL, extract 
 984         information from the video (or videos) the URL refers to. This 
 985         information includes the real video URL, the video title and simplified 
 986         title, author and others. The information is stored in a dictionary 
 987         which is then passed to the FileDownloader. The FileDownloader 
 988         processes this information possibly downloading the video to the file 
 989         system, among other possible outcomes. The dictionaries must include 
 990         the following fields: 
 992         id:             Video identifier. 
 993         url:            Final video URL. 
 994         uploader:       Nickname of the video uploader. 
 995         title:          Literal title. 
 996         stitle:         Simplified title. 
 997         ext:            Video filename extension. 
 998         format:         Video format. 
 999         player_url:     SWF Player URL (may be None). 
1001         The following fields are optional. Their primary purpose is to allow 
1002         youtube-dl to serve as the backend for a video search function, such 
1003         as the one in youtube2mp3.  They are only used when their respective 
1004         forced printing functions are called: 
1006         thumbnail:      Full URL to a video thumbnail image. 
1007         description:    One-line video description. 
1009         Subclasses of this one should re-define the _real_initialize() and 
1010         _real_extract() methods, as well as the suitable() static method. 
1011         Probably, they should also be instantiated and added to the main 
1018         def __init__(self
, downloader
=None): 
1019                 """Constructor. Receives an optional downloader.""" 
1021                 self
.set_downloader(downloader
) 
1025                 """Receives a URL and returns True if suitable for this IE.""" 
1028         def initialize(self
): 
1029                 """Initializes an instance (authentication, etc).""" 
1031                         self
._real
_initialize
() 
1034         def extract(self
, url
): 
1035                 """Extracts URL information and returns it in list of dicts.""" 
1037                 return self
._real
_extract
(url
) 
1039         def set_downloader(self
, downloader
): 
1040                 """Sets the downloader for this IE.""" 
1041                 self
._downloader 
= downloader
 
1043         def _real_initialize(self
): 
1044                 """Real initialization process. Redefine in subclasses.""" 
1047         def _real_extract(self
, url
): 
1048                 """Real extraction process. Redefine in subclasses.""" 
1052 class YoutubeIE(InfoExtractor
): 
1053         """Information extractor for youtube.com.""" 
1055         _VALID_URL 
= r
'^((?:https?://)?(?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/)(?:(?:(?:v|embed|e)/)|(?:(?:watch(?:_popup)?(?:\.php)?)?(?:\?|#!?)(?:.+&)?v=))?)?([0-9A-Za-z_-]+)(?(1).+)?$' 
1056         _LANG_URL 
= r
'http://www.youtube.com/?hl=en&persist_hl=1&gl=US&persist_gl=1&opt_out_ackd=1' 
1057         _LOGIN_URL 
= 'https://www.youtube.com/signup?next=/&gl=US&hl=en' 
1058         _AGE_URL 
= 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en' 
1059         _NETRC_MACHINE 
= 'youtube' 
1060         # Listed in order of quality 
1061         _available_formats 
= ['38', '37', '45', '22', '43', '35', '34', '18', '6', '5', '17', '13'] 
1062         _video_extensions 
= { 
1068                 '38': 'video', # You actually don't know if this will be MOV, AVI or whatever 
1075                 return (re
.match(YoutubeIE
._VALID
_URL
, url
) is not None) 
1077         def report_lang(self
): 
1078                 """Report attempt to set language.""" 
1079                 self
._downloader
.to_screen(u
'[youtube] Setting language') 
1081         def report_login(self
): 
1082                 """Report attempt to log in.""" 
1083                 self
._downloader
.to_screen(u
'[youtube] Logging in') 
1085         def report_age_confirmation(self
): 
1086                 """Report attempt to confirm age.""" 
1087                 self
._downloader
.to_screen(u
'[youtube] Confirming age') 
1089         def report_video_webpage_download(self
, video_id
): 
1090                 """Report attempt to download video webpage.""" 
1091                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video webpage' % video_id
) 
1093         def report_video_info_webpage_download(self
, video_id
): 
1094                 """Report attempt to download video info webpage.""" 
1095                 self
._downloader
.to_screen(u
'[youtube] %s: Downloading video info webpage' % video_id
) 
1097         def report_information_extraction(self
, video_id
): 
1098                 """Report attempt to extract video information.""" 
1099                 self
._downloader
.to_screen(u
'[youtube] %s: Extracting video information' % video_id
) 
1101         def report_unavailable_format(self
, video_id
, format
): 
1102                 """Report extracted video URL.""" 
1103                 self
._downloader
.to_screen(u
'[youtube] %s: Format %s not available' % (video_id
, format
)) 
1105         def report_rtmp_download(self
): 
1106                 """Indicate the download will use the RTMP protocol.""" 
1107                 self
._downloader
.to_screen(u
'[youtube] RTMP download detected') 
1109         def _real_initialize(self
): 
1110                 if self
._downloader 
is None: 
1115                 downloader_params 
= self
._downloader
.params
 
1117                 # Attempt to use provided username and password or .netrc data 
1118                 if downloader_params
.get('username', None) is not None: 
1119                         username 
= downloader_params
['username'] 
1120                         password 
= downloader_params
['password'] 
1121                 elif downloader_params
.get('usenetrc', False): 
1123                                 info 
= netrc
.netrc().authenticators(self
._NETRC
_MACHINE
) 
1124                                 if info 
is not None: 
1128                                         raise netrc
.NetrcParseError('No authenticators for %s' % self
._NETRC
_MACHINE
) 
1129                         except (IOError, netrc
.NetrcParseError
), err
: 
1130                                 self
._downloader
.to_stderr(u
'WARNING: parsing .netrc: %s' % str(err
)) 
1134                 request 
= urllib2
.Request(self
._LANG
_URL
) 
1137                         urllib2
.urlopen(request
).read() 
1138                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1139                         self
._downloader
.to_stderr(u
'WARNING: unable to set language: %s' % str(err
)) 
1142                 # No authentication to be performed 
1143                 if username 
is None: 
1148                                 'current_form': 'loginForm', 
1150                                 'action_login': 'Log In', 
1151                                 'username':     username
, 
1152                                 'password':     password
, 
1154                 request 
= urllib2
.Request(self
._LOGIN
_URL
, urllib
.urlencode(login_form
)) 
1157                         login_results 
= urllib2
.urlopen(request
).read() 
1158                         if re
.search(r
'(?i)<form[^>]* name="loginForm"', login_results
) is not None: 
1159                                 self
._downloader
.to_stderr(u
'WARNING: unable to log in: bad username or password') 
1161                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1162                         self
._downloader
.to_stderr(u
'WARNING: unable to log in: %s' % str(err
)) 
1168                                 'action_confirm':       'Confirm', 
1170                 request 
= urllib2
.Request(self
._AGE
_URL
, urllib
.urlencode(age_form
)) 
1172                         self
.report_age_confirmation() 
1173                         age_results 
= urllib2
.urlopen(request
).read() 
1174                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1175                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1178         def _real_extract(self
, url
): 
1179                 # Extract video id from URL 
1180                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1182                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1184                 video_id 
= mobj
.group(2) 
1187                 self
.report_video_webpage_download(video_id
) 
1188                 request 
= urllib2
.Request('http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id
) 
1190                         video_webpage 
= urllib2
.urlopen(request
).read() 
1191                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1192                         self
._downloader
.trouble(u
'ERROR: unable to download video webpage: %s' % str(err
)) 
1195                 # Attempt to extract SWF player URL 
1196                 mobj 
= re
.search(r
'swfConfig.*?"(http:\\/\\/.*?watch.*?-.*?\.swf)"', video_webpage
) 
1197                 if mobj 
is not None: 
1198                         player_url 
= re
.sub(r
'\\(.)', r
'\1', mobj
.group(1)) 
1203                 self
.report_video_info_webpage_download(video_id
) 
1204                 for el_type 
in ['&el=embedded', '&el=detailpage', '&el=vevo', '']: 
1205                         video_info_url 
= ('http://www.youtube.com/get_video_info?&video_id=%s%s&ps=default&eurl=&gl=US&hl=en' 
1206                                         % (video_id
, el_type
)) 
1207                         request 
= urllib2
.Request(video_info_url
) 
1209                                 video_info_webpage 
= urllib2
.urlopen(request
).read() 
1210                                 video_info 
= parse_qs(video_info_webpage
) 
1211                                 if 'token' in video_info
: 
1213                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1214                                 self
._downloader
.trouble(u
'ERROR: unable to download video info webpage: %s' % str(err
)) 
1216                 if 'token' not in video_info
: 
1217                         if 'reason' in video_info
: 
1218                                 self
._downloader
.trouble(u
'ERROR: YouTube said: %s' % video_info
['reason'][0].decode('utf-8')) 
1220                                 self
._downloader
.trouble(u
'ERROR: "token" parameter not in video info for unknown reason') 
1223                 # Start extracting information 
1224                 self
.report_information_extraction(video_id
) 
1227                 if 'author' not in video_info
: 
1228                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1230                 video_uploader 
= urllib
.unquote_plus(video_info
['author'][0]) 
1233                 if 'title' not in video_info
: 
1234                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1236                 video_title 
= urllib
.unquote_plus(video_info
['title'][0]) 
1237                 video_title 
= video_title
.decode('utf-8') 
1238                 video_title 
= sanitize_title(video_title
) 
1241                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1242                 simple_title 
= simple_title
.strip(ur
'_') 
1245                 if 'thumbnail_url' not in video_info
: 
1246                         self
._downloader
.trouble(u
'WARNING: unable to extract video thumbnail') 
1247                         video_thumbnail 
= '' 
1248                 else:   # don't panic if we can't find it 
1249                         video_thumbnail 
= urllib
.unquote_plus(video_info
['thumbnail_url'][0]) 
1253                 mobj 
= re
.search(r
'id="eow-date.*?>(.*?)</span>', video_webpage
, re
.DOTALL
) 
1254                 if mobj 
is not None: 
1255                         upload_date 
= ' '.join(re
.sub(r
'[/,-]', r
' ', mobj
.group(1)).split()) 
1256                         format_expressions 
= ['%d %B %Y', '%B %d %Y', '%b %d %Y'] 
1257                         for expression 
in format_expressions
: 
1259                                         upload_date 
= datetime
.datetime
.strptime(upload_date
, expression
).strftime('%Y%m%d') 
1267                         video_description 
= u
'No description available.' 
1268                         if self
._downloader
.params
.get('forcedescription', False) or self
._downloader
.params
.get('writedescription', False): 
1269                                 mobj 
= re
.search(r
'<meta name="description" content="(.*)"(?:\s*/)?>', video_webpage
) 
1270                                 if mobj 
is not None: 
1271                                         video_description 
= mobj
.group(1).decode('utf-8') 
1273                         html_parser 
= lxml
.etree
.HTMLParser(encoding
='utf-8') 
1274                         vwebpage_doc 
= lxml
.etree
.parse(StringIO
.StringIO(video_webpage
), html_parser
) 
1275                         video_description 
= u
''.join(vwebpage_doc
.xpath('id("eow-description")//text()')) 
1276                         # TODO use another parser 
1279                 video_token 
= urllib
.unquote_plus(video_info
['token'][0]) 
1281                 # Decide which formats to download 
1282                 req_format 
= self
._downloader
.params
.get('format', None) 
1284                 if 'conn' in video_info 
and video_info
['conn'][0].startswith('rtmp'): 
1285                         self
.report_rtmp_download() 
1286                         video_url_list 
= [(None, video_info
['conn'][0])] 
1287                 elif 'url_encoded_fmt_stream_map' in video_info 
and len(video_info
['url_encoded_fmt_stream_map']) >= 1: 
1288                         url_data_strs 
= video_info
['url_encoded_fmt_stream_map'][0].split(',') 
1289                         url_data 
= [parse_qs(uds
) for uds 
in url_data_strs
] 
1290                         url_data 
= filter(lambda ud
: 'itag' in ud 
and 'url' in ud
, url_data
) 
1291                         url_map 
= dict((ud
['itag'][0], ud
['url'][0]) for ud 
in url_data
) 
1293                         format_limit 
= self
._downloader
.params
.get('format_limit', None) 
1294                         if format_limit 
is not None and format_limit 
in self
._available
_formats
: 
1295                                 format_list 
= self
._available
_formats
[self
._available
_formats
.index(format_limit
):] 
1297                                 format_list 
= self
._available
_formats
 
1298                         existing_formats 
= [x 
for x 
in format_list 
if x 
in url_map
] 
1299                         if len(existing_formats
) == 0: 
1300                                 self
._downloader
.trouble(u
'ERROR: no known formats available for video') 
1302                         if req_format 
is None: 
1303                                 video_url_list 
= [(existing_formats
[0], url_map
[existing_formats
[0]])] # Best quality 
1304                         elif req_format 
== '-1': 
1305                                 video_url_list 
= [(f
, url_map
[f
]) for f 
in existing_formats
] # All formats 
1308                                 if req_format 
not in url_map
: 
1309                                         self
._downloader
.trouble(u
'ERROR: requested format not available') 
1311                                 video_url_list 
= [(req_format
, url_map
[req_format
])] # Specific format 
1313                         self
._downloader
.trouble(u
'ERROR: no conn or url_encoded_fmt_stream_map information found in video info') 
1316                 for format_param
, video_real_url 
in video_url_list
: 
1317                         # At this point we have a new video 
1318                         self
._downloader
.increment_downloads() 
1321                         video_extension 
= self
._video
_extensions
.get(format_param
, 'flv') 
1324                                 # Process video information 
1325                                 self
._downloader
.process_info({ 
1326                                         'id':           video_id
.decode('utf-8'), 
1327                                         'url':          video_real_url
.decode('utf-8'), 
1328                                         'uploader':     video_uploader
.decode('utf-8'), 
1329                                         'upload_date':  upload_date
, 
1330                                         'title':        video_title
, 
1331                                         'stitle':       simple_title
, 
1332                                         'ext':          video_extension
.decode('utf-8'), 
1333                                         'format':       (format_param 
is None and u
'NA' or format_param
.decode('utf-8')), 
1334                                         'thumbnail':    video_thumbnail
.decode('utf-8'), 
1335                                         'description':  video_description
, 
1336                                         'player_url':   player_url
, 
1338                         except UnavailableVideoError
, err
: 
1339                                 self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1342 class MetacafeIE(InfoExtractor
): 
1343         """Information Extractor for metacafe.com.""" 
1345         _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
1346         _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
1347         _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
1350         def __init__(self
, youtube_ie
, downloader
=None): 
1351                 InfoExtractor
.__init
__(self
, downloader
) 
1352                 self
._youtube
_ie 
= youtube_ie
 
1356                 return (re
.match(MetacafeIE
._VALID
_URL
, url
) is not None) 
1358         def report_disclaimer(self
): 
1359                 """Report disclaimer retrieval.""" 
1360                 self
._downloader
.to_screen(u
'[metacafe] Retrieving disclaimer') 
1362         def report_age_confirmation(self
): 
1363                 """Report attempt to confirm age.""" 
1364                 self
._downloader
.to_screen(u
'[metacafe] Confirming age') 
1366         def report_download_webpage(self
, video_id
): 
1367                 """Report webpage download.""" 
1368                 self
._downloader
.to_screen(u
'[metacafe] %s: Downloading webpage' % video_id
) 
1370         def report_extraction(self
, video_id
): 
1371                 """Report information extraction.""" 
1372                 self
._downloader
.to_screen(u
'[metacafe] %s: Extracting information' % video_id
) 
1374         def _real_initialize(self
): 
1375                 # Retrieve disclaimer 
1376                 request 
= urllib2
.Request(self
._DISCLAIMER
) 
1378                         self
.report_disclaimer() 
1379                         disclaimer 
= urllib2
.urlopen(request
).read() 
1380                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1381                         self
._downloader
.trouble(u
'ERROR: unable to retrieve disclaimer: %s' % str(err
)) 
1387                         'submit': "Continue - I'm over 18", 
1389                 request 
= urllib2
.Request(self
._FILTER
_POST
, urllib
.urlencode(disclaimer_form
)) 
1391                         self
.report_age_confirmation() 
1392                         disclaimer 
= urllib2
.urlopen(request
).read() 
1393                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1394                         self
._downloader
.trouble(u
'ERROR: unable to confirm age: %s' % str(err
)) 
1397         def _real_extract(self
, url
): 
1398                 # Extract id and simplified title from URL 
1399                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1401                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1404                 video_id 
= mobj
.group(1) 
1406                 # Check if video comes from YouTube 
1407                 mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
1408                 if mobj2 
is not None: 
1409                         self
._youtube
_ie
.extract('http://www.youtube.com/watch?v=%s' % mobj2
.group(1)) 
1412                 # At this point we have a new video 
1413                 self
._downloader
.increment_downloads() 
1415                 simple_title 
= mobj
.group(2).decode('utf-8') 
1417                 # Retrieve video webpage to extract further information 
1418                 request 
= urllib2
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
1420                         self
.report_download_webpage(video_id
) 
1421                         webpage 
= urllib2
.urlopen(request
).read() 
1422                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1423                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1426                 # Extract URL, uploader and title from webpage 
1427                 self
.report_extraction(video_id
) 
1428                 mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
1429                 if mobj 
is not None: 
1430                         mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1431                         video_extension 
= mediaURL
[-3:] 
1433                         # Extract gdaKey if available 
1434                         mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
1436                                 video_url 
= mediaURL
 
1438                                 gdaKey 
= mobj
.group(1) 
1439                                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
1441                         mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
1443                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1445                         vardict 
= parse_qs(mobj
.group(1)) 
1446                         if 'mediaData' not in vardict
: 
1447                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1449                         mobj 
= re
.search(r
'"mediaURL":"(http.*?)","key":"(.*?)"', vardict
['mediaData'][0]) 
1451                                 self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1453                         mediaURL 
= mobj
.group(1).replace('\\/', '/') 
1454                         video_extension 
= mediaURL
[-3:] 
1455                         video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group(2)) 
1457                 mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
1459                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1461                 video_title 
= mobj
.group(1).decode('utf-8') 
1462                 video_title 
= sanitize_title(video_title
) 
1464                 mobj 
= re
.search(r
'(?ms)By:\s*<a .*?>(.+?)<', webpage
) 
1466                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1468                 video_uploader 
= mobj
.group(1) 
1471                         # Process video information 
1472                         self
._downloader
.process_info({ 
1473                                 'id':           video_id
.decode('utf-8'), 
1474                                 'url':          video_url
.decode('utf-8'), 
1475                                 'uploader':     video_uploader
.decode('utf-8'), 
1476                                 'upload_date':  u
'NA', 
1477                                 'title':        video_title
, 
1478                                 'stitle':       simple_title
, 
1479                                 'ext':          video_extension
.decode('utf-8'), 
1483                 except UnavailableVideoError
: 
1484                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1487 class DailymotionIE(InfoExtractor
): 
1488         """Information Extractor for Dailymotion""" 
1490         _VALID_URL 
= r
'(?i)(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/video/([^_/]+)_([^/]+)' 
1492         def __init__(self
, downloader
=None): 
1493                 InfoExtractor
.__init
__(self
, downloader
) 
1497                 return (re
.match(DailymotionIE
._VALID
_URL
, url
) is not None) 
1499         def report_download_webpage(self
, video_id
): 
1500                 """Report webpage download.""" 
1501                 self
._downloader
.to_screen(u
'[dailymotion] %s: Downloading webpage' % video_id
) 
1503         def report_extraction(self
, video_id
): 
1504                 """Report information extraction.""" 
1505                 self
._downloader
.to_screen(u
'[dailymotion] %s: Extracting information' % video_id
) 
1507         def _real_initialize(self
): 
1510         def _real_extract(self
, url
): 
1511                 # Extract id and simplified title from URL 
1512                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1514                         self
._downloader
.trouble(u
'ERROR: invalid URL: %s' % url
) 
1517                 # At this point we have a new video 
1518                 self
._downloader
.increment_downloads() 
1519                 video_id 
= mobj
.group(1) 
1521                 simple_title 
= mobj
.group(2).decode('utf-8') 
1522                 video_extension 
= 'flv' 
1524                 # Retrieve video webpage to extract further information 
1525                 request 
= urllib2
.Request(url
) 
1526                 request
.add_header('Cookie', 'family_filter=off') 
1528                         self
.report_download_webpage(video_id
) 
1529                         webpage 
= urllib2
.urlopen(request
).read() 
1530                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1531                         self
._downloader
.trouble(u
'ERROR: unable retrieve video webpage: %s' % str(err
)) 
1534                 # Extract URL, uploader and title from webpage 
1535                 self
.report_extraction(video_id
) 
1536                 mobj 
= re
.search(r
'(?i)addVariable\(\"sequence\"\s*,\s*\"([^\"]+?)\"\)', webpage
) 
1538                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1540                 sequence 
= urllib
.unquote(mobj
.group(1)) 
1541                 mobj 
= re
.search(r
',\"sdURL\"\:\"([^\"]+?)\",', sequence
) 
1543                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1545                 mediaURL 
= urllib
.unquote(mobj
.group(1)).replace('\\', '') 
1547                 # if needed add http://www.dailymotion.com/ if relative URL 
1549                 video_url 
= mediaURL
 
1551                 mobj 
= re
.search(r
'(?im)<title>Dailymotion\s*-\s*(.+)\s*-\s*[^<]+?</title>', webpage
) 
1553                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1555                 video_title 
= mobj
.group(1).decode('utf-8') 
1556                 video_title 
= sanitize_title(video_title
) 
1558                 mobj 
= re
.search(r
'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage
) 
1560                         self
._downloader
.trouble(u
'ERROR: unable to extract uploader nickname') 
1562                 video_uploader 
= mobj
.group(1) 
1565                         # Process video information 
1566                         self
._downloader
.process_info({ 
1567                                 'id':           video_id
.decode('utf-8'), 
1568                                 'url':          video_url
.decode('utf-8'), 
1569                                 'uploader':     video_uploader
.decode('utf-8'), 
1570                                 'upload_date':  u
'NA', 
1571                                 'title':        video_title
, 
1572                                 'stitle':       simple_title
, 
1573                                 'ext':          video_extension
.decode('utf-8'), 
1577                 except UnavailableVideoError
: 
1578                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1581 class GoogleIE(InfoExtractor
): 
1582         """Information extractor for video.google.com.""" 
1584         _VALID_URL 
= r
'(?:http://)?video\.google\.(?:com(?:\.au)?|co\.(?:uk|jp|kr|cr)|ca|de|es|fr|it|nl|pl)/videoplay\?docid=([^\&]+).*' 
1586         def __init__(self
, downloader
=None): 
1587                 InfoExtractor
.__init
__(self
, downloader
) 
1591                 return (re
.match(GoogleIE
._VALID
_URL
, url
) is not None) 
1593         def report_download_webpage(self
, video_id
): 
1594                 """Report webpage download.""" 
1595                 self
._downloader
.to_screen(u
'[video.google] %s: Downloading webpage' % video_id
) 
1597         def report_extraction(self
, video_id
): 
1598                 """Report information extraction.""" 
1599                 self
._downloader
.to_screen(u
'[video.google] %s: Extracting information' % video_id
) 
1601         def _real_initialize(self
): 
1604         def _real_extract(self
, url
): 
1605                 # Extract id from URL 
1606                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1608                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1611                 # At this point we have a new video 
1612                 self
._downloader
.increment_downloads() 
1613                 video_id 
= mobj
.group(1) 
1615                 video_extension 
= 'mp4' 
1617                 # Retrieve video webpage to extract further information 
1618                 request 
= urllib2
.Request('http://video.google.com/videoplay?docid=%s&hl=en&oe=utf-8' % video_id
) 
1620                         self
.report_download_webpage(video_id
) 
1621                         webpage 
= urllib2
.urlopen(request
).read() 
1622                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1623                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1626                 # Extract URL, uploader, and title from webpage 
1627                 self
.report_extraction(video_id
) 
1628                 mobj 
= re
.search(r
"download_url:'([^']+)'", webpage
) 
1630                         video_extension 
= 'flv' 
1631                         mobj 
= re
.search(r
"(?i)videoUrl\\x3d(.+?)\\x26", webpage
) 
1633                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1635                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1636                 mediaURL 
= mediaURL
.replace('\\x3d', '\x3d') 
1637                 mediaURL 
= mediaURL
.replace('\\x26', '\x26') 
1639                 video_url 
= mediaURL
 
1641                 mobj 
= re
.search(r
'<title>(.*)</title>', webpage
) 
1643                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1645                 video_title 
= mobj
.group(1).decode('utf-8') 
1646                 video_title 
= sanitize_title(video_title
) 
1647                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1649                 # Extract video description 
1650                 mobj 
= re
.search(r
'<span id=short-desc-content>([^<]*)</span>', webpage
) 
1652                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1654                 video_description 
= mobj
.group(1).decode('utf-8') 
1655                 if not video_description
: 
1656                         video_description 
= 'No description available.' 
1658                 # Extract video thumbnail 
1659                 if self
._downloader
.params
.get('forcethumbnail', False): 
1660                         request 
= urllib2
.Request('http://video.google.com/videosearch?q=%s+site:video.google.com&hl=en' % abs(int(video_id
))) 
1662                                 webpage 
= urllib2
.urlopen(request
).read() 
1663                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1664                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1666                         mobj 
= re
.search(r
'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage
) 
1668                                 self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1670                         video_thumbnail 
= mobj
.group(1) 
1671                 else:   # we need something to pass to process_info 
1672                         video_thumbnail 
= '' 
1675                         # Process video information 
1676                         self
._downloader
.process_info({ 
1677                                 'id':           video_id
.decode('utf-8'), 
1678                                 'url':          video_url
.decode('utf-8'), 
1680                                 'upload_date':  u
'NA', 
1681                                 'title':        video_title
, 
1682                                 'stitle':       simple_title
, 
1683                                 'ext':          video_extension
.decode('utf-8'), 
1687                 except UnavailableVideoError
: 
1688                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1691 class PhotobucketIE(InfoExtractor
): 
1692         """Information extractor for photobucket.com.""" 
1694         _VALID_URL 
= r
'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' 
1696         def __init__(self
, downloader
=None): 
1697                 InfoExtractor
.__init
__(self
, downloader
) 
1701                 return (re
.match(PhotobucketIE
._VALID
_URL
, url
) is not None) 
1703         def report_download_webpage(self
, video_id
): 
1704                 """Report webpage download.""" 
1705                 self
._downloader
.to_screen(u
'[photobucket] %s: Downloading webpage' % video_id
) 
1707         def report_extraction(self
, video_id
): 
1708                 """Report information extraction.""" 
1709                 self
._downloader
.to_screen(u
'[photobucket] %s: Extracting information' % video_id
) 
1711         def _real_initialize(self
): 
1714         def _real_extract(self
, url
): 
1715                 # Extract id from URL 
1716                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1718                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1721                 # At this point we have a new video 
1722                 self
._downloader
.increment_downloads() 
1723                 video_id 
= mobj
.group(1) 
1725                 video_extension 
= 'flv' 
1727                 # Retrieve video webpage to extract further information 
1728                 request 
= urllib2
.Request(url
) 
1730                         self
.report_download_webpage(video_id
) 
1731                         webpage 
= urllib2
.urlopen(request
).read() 
1732                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1733                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1736                 # Extract URL, uploader, and title from webpage 
1737                 self
.report_extraction(video_id
) 
1738                 mobj 
= re
.search(r
'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage
) 
1740                         self
._downloader
.trouble(u
'ERROR: unable to extract media URL') 
1742                 mediaURL 
= urllib
.unquote(mobj
.group(1)) 
1744                 video_url 
= mediaURL
 
1746                 mobj 
= re
.search(r
'<title>(.*) video by (.*) - Photobucket</title>', webpage
) 
1748                         self
._downloader
.trouble(u
'ERROR: unable to extract title') 
1750                 video_title 
= mobj
.group(1).decode('utf-8') 
1751                 video_title 
= sanitize_title(video_title
) 
1752                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1754                 video_uploader 
= mobj
.group(2).decode('utf-8') 
1757                         # Process video information 
1758                         self
._downloader
.process_info({ 
1759                                 'id':           video_id
.decode('utf-8'), 
1760                                 'url':          video_url
.decode('utf-8'), 
1761                                 'uploader':     video_uploader
, 
1762                                 'upload_date':  u
'NA', 
1763                                 'title':        video_title
, 
1764                                 'stitle':       simple_title
, 
1765                                 'ext':          video_extension
.decode('utf-8'), 
1769                 except UnavailableVideoError
: 
1770                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1773 class YahooIE(InfoExtractor
): 
1774         """Information extractor for video.yahoo.com.""" 
1776         # _VALID_URL matches all Yahoo! Video URLs 
1777         # _VPAGE_URL matches only the extractable '/watch/' URLs 
1778         _VALID_URL 
= r
'(?:http://)?(?:[a-z]+\.)?video\.yahoo\.com/(?:watch|network)/([0-9]+)(?:/|\?v=)([0-9]+)(?:[#\?].*)?' 
1779         _VPAGE_URL 
= r
'(?:http://)?video\.yahoo\.com/watch/([0-9]+)/([0-9]+)(?:[#\?].*)?' 
1781         def __init__(self
, downloader
=None): 
1782                 InfoExtractor
.__init
__(self
, downloader
) 
1786                 return (re
.match(YahooIE
._VALID
_URL
, url
) is not None) 
1788         def report_download_webpage(self
, video_id
): 
1789                 """Report webpage download.""" 
1790                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Downloading webpage' % video_id
) 
1792         def report_extraction(self
, video_id
): 
1793                 """Report information extraction.""" 
1794                 self
._downloader
.to_screen(u
'[video.yahoo] %s: Extracting information' % video_id
) 
1796         def _real_initialize(self
): 
1799         def _real_extract(self
, url
, new_video
=True): 
1800                 # Extract ID from URL 
1801                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1803                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1806                 # At this point we have a new video 
1807                 self
._downloader
.increment_downloads() 
1808                 video_id 
= mobj
.group(2) 
1809                 video_extension 
= 'flv' 
1811                 # Rewrite valid but non-extractable URLs as 
1812                 # extractable English language /watch/ URLs 
1813                 if re
.match(self
._VPAGE
_URL
, url
) is None: 
1814                         request 
= urllib2
.Request(url
) 
1816                                 webpage 
= urllib2
.urlopen(request
).read() 
1817                         except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1818                                 self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1821                         mobj 
= re
.search(r
'\("id", "([0-9]+)"\);', webpage
) 
1823                                 self
._downloader
.trouble(u
'ERROR: Unable to extract id field') 
1825                         yahoo_id 
= mobj
.group(1) 
1827                         mobj 
= re
.search(r
'\("vid", "([0-9]+)"\);', webpage
) 
1829                                 self
._downloader
.trouble(u
'ERROR: Unable to extract vid field') 
1831                         yahoo_vid 
= mobj
.group(1) 
1833                         url 
= 'http://video.yahoo.com/watch/%s/%s' % (yahoo_vid
, yahoo_id
) 
1834                         return self
._real
_extract
(url
, new_video
=False) 
1836                 # Retrieve video webpage to extract further information 
1837                 request 
= urllib2
.Request(url
) 
1839                         self
.report_download_webpage(video_id
) 
1840                         webpage 
= urllib2
.urlopen(request
).read() 
1841                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1842                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1845                 # Extract uploader and title from webpage 
1846                 self
.report_extraction(video_id
) 
1847                 mobj 
= re
.search(r
'<meta name="title" content="(.*)" />', webpage
) 
1849                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1851                 video_title 
= mobj
.group(1).decode('utf-8') 
1852                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1854                 mobj 
= re
.search(r
'<h2 class="ti-5"><a href="http://video\.yahoo\.com/(people|profile)/[0-9]+" beacon=".*">(.*)</a></h2>', webpage
) 
1856                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1858                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1860                 # Extract video thumbnail 
1861                 mobj 
= re
.search(r
'<link rel="image_src" href="(.*)" />', webpage
) 
1863                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
1865                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
1867                 # Extract video description 
1868                 mobj 
= re
.search(r
'<meta name="description" content="(.*)" />', webpage
) 
1870                         self
._downloader
.trouble(u
'ERROR: unable to extract video description') 
1872                 video_description 
= mobj
.group(1).decode('utf-8') 
1873                 if not video_description
: 
1874                         video_description 
= 'No description available.' 
1876                 # Extract video height and width 
1877                 mobj 
= re
.search(r
'<meta name="video_height" content="([0-9]+)" />', webpage
) 
1879                         self
._downloader
.trouble(u
'ERROR: unable to extract video height') 
1881                 yv_video_height 
= mobj
.group(1) 
1883                 mobj 
= re
.search(r
'<meta name="video_width" content="([0-9]+)" />', webpage
) 
1885                         self
._downloader
.trouble(u
'ERROR: unable to extract video width') 
1887                 yv_video_width 
= mobj
.group(1) 
1889                 # Retrieve video playlist to extract media URL 
1890                 # I'm not completely sure what all these options are, but we 
1891                 # seem to need most of them, otherwise the server sends a 401. 
1892                 yv_lg 
= 'R0xx6idZnW2zlrKP8xxAIR'  # not sure what this represents 
1893                 yv_bitrate 
= '700'  # according to Wikipedia this is hard-coded 
1894                 request 
= urllib2
.Request('http://cosmos.bcst.yahoo.com/up/yep/process/getPlaylistFOP.php?node_id=' + video_id 
+ 
1895                                 '&tech=flash&mode=playlist&lg=' + yv_lg 
+ '&bitrate=' + yv_bitrate 
+ '&vidH=' + yv_video_height 
+ 
1896                                 '&vidW=' + yv_video_width 
+ '&swf=as3&rd=video.yahoo.com&tk=null&adsupported=v1,v2,&eventid=1301797') 
1898                         self
.report_download_webpage(video_id
) 
1899                         webpage 
= urllib2
.urlopen(request
).read() 
1900                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1901                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1904                 # Extract media URL from playlist XML 
1905                 mobj 
= re
.search(r
'<STREAM APP="(http://.*)" FULLPATH="/?(/.*\.flv\?[^"]*)"', webpage
) 
1907                         self
._downloader
.trouble(u
'ERROR: Unable to extract media URL') 
1909                 video_url 
= urllib
.unquote(mobj
.group(1) + mobj
.group(2)).decode('utf-8') 
1910                 video_url 
= re
.sub(r
'(?u)&(.+?);', htmlentity_transform
, video_url
) 
1913                         # Process video information 
1914                         self
._downloader
.process_info({ 
1915                                 'id':           video_id
.decode('utf-8'), 
1917                                 'uploader':     video_uploader
, 
1918                                 'upload_date':  u
'NA', 
1919                                 'title':        video_title
, 
1920                                 'stitle':       simple_title
, 
1921                                 'ext':          video_extension
.decode('utf-8'), 
1922                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
1923                                 'description':  video_description
, 
1924                                 'thumbnail':    video_thumbnail
, 
1927                 except UnavailableVideoError
: 
1928                         self
._downloader
.trouble(u
'\nERROR: unable to download video') 
1931 class VimeoIE(InfoExtractor
): 
1932         """Information extractor for vimeo.com.""" 
1934         # _VALID_URL matches Vimeo URLs 
1935         _VALID_URL 
= r
'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' 
1937         def __init__(self
, downloader
=None): 
1938                 InfoExtractor
.__init
__(self
, downloader
) 
1942                 return (re
.match(VimeoIE
._VALID
_URL
, url
) is not None) 
1944         def report_download_webpage(self
, video_id
): 
1945                 """Report webpage download.""" 
1946                 self
._downloader
.to_screen(u
'[vimeo] %s: Downloading webpage' % video_id
) 
1948         def report_extraction(self
, video_id
): 
1949                 """Report information extraction.""" 
1950                 self
._downloader
.to_screen(u
'[vimeo] %s: Extracting information' % video_id
) 
1952         def _real_initialize(self
): 
1955         def _real_extract(self
, url
, new_video
=True): 
1956                 # Extract ID from URL 
1957                 mobj 
= re
.match(self
._VALID
_URL
, url
) 
1959                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
1962                 # At this point we have a new video 
1963                 self
._downloader
.increment_downloads() 
1964                 video_id 
= mobj
.group(1) 
1966                 # Retrieve video webpage to extract further information 
1967                 request 
= urllib2
.Request("http://vimeo.com/moogaloop/load/clip:%s" % video_id
, None, std_headers
) 
1969                         self
.report_download_webpage(video_id
) 
1970                         webpage 
= urllib2
.urlopen(request
).read() 
1971                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
1972                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
1975                 # Now we begin extracting as much information as we can from what we 
1976                 # retrieved. First we extract the information common to all extractors, 
1977                 # and latter we extract those that are Vimeo specific. 
1978                 self
.report_extraction(video_id
) 
1981                 mobj 
= re
.search(r
'<caption>(.*?)</caption>', webpage
) 
1983                         self
._downloader
.trouble(u
'ERROR: unable to extract video title') 
1985                 video_title 
= mobj
.group(1).decode('utf-8') 
1986                 simple_title 
= re
.sub(ur
'(?u)([^%s]+)' % simple_title_chars
, ur
'_', video_title
) 
1989                 mobj 
= re
.search(r
'<uploader_url>http://vimeo.com/(.*?)</uploader_url>', webpage
) 
1991                         self
._downloader
.trouble(u
'ERROR: unable to extract video uploader') 
1993                 video_uploader 
= mobj
.group(1).decode('utf-8') 
1995                 # Extract video thumbnail 
1996                 mobj 
= re
.search(r
'<thumbnail>(.*?)</thumbnail>', webpage
) 
1998                         self
._downloader
.trouble(u
'ERROR: unable to extract video thumbnail') 
2000                 video_thumbnail 
= mobj
.group(1).decode('utf-8') 
2002                 # # Extract video description 
2003                 # mobj = re.search(r'<meta property="og:description" content="(.*)" />', webpage) 
2005                 #       self._downloader.trouble(u'ERROR: unable to extract video description') 
2007                 # video_description = mobj.group(1).decode('utf-8') 
2008                 # if not video_description: video_description = 'No description available.' 
2009                 video_description 
= 'Foo.' 
2011                 # Vimeo specific: extract request signature 
2012                 mobj 
= re
.search(r
'<request_signature>(.*?)</request_signature>', webpage
) 
2014                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature') 
2016                 sig 
= mobj
.group(1).decode('utf-8') 
2018                 # Vimeo specific: Extract request signature expiration 
2019                 mobj 
= re
.search(r
'<request_signature_expires>(.*?)</request_signature_expires>', webpage
) 
2021                         self
._downloader
.trouble(u
'ERROR: unable to extract request signature expiration') 
2023                 sig_exp 
= mobj
.group(1).decode('utf-8') 
2025                 video_url 
= "http://vimeo.com/moogaloop/play/clip:%s/%s/%s" % (video_id
, sig
, sig_exp
) 
2028                         # Process video information 
2029                         self
._downloader
.process_info({ 
2030                                 'id':           video_id
.decode('utf-8'), 
2032                                 'uploader':     video_uploader
, 
2033                                 'upload_date':  u
'NA', 
2034                                 'title':        video_title
, 
2035                                 'stitle':       simple_title
, 
2037                                 'thumbnail':    video_thumbnail
.decode('utf-8'), 
2038                                 'description':  video_description
, 
2039                                 'thumbnail':    video_thumbnail
, 
2040                                 'description':  video_description
, 
2043                 except UnavailableVideoError
: 
2044                         self
._downloader
.trouble(u
'ERROR: unable to download video') 
2047 class GenericIE(InfoExtractor
): 
2048         """Generic last-resort information extractor.""" 
2050         def __init__(self
, downloader
=None): 
2051                 InfoExtractor
.__init
__(self
, downloader
) 
2057         def report_download_webpage(self
, video_id
): 
2058                 """Report webpage download.""" 
2059                 self
._downloader
.to_screen(u
'WARNING: Falling back on generic information extractor.') 
2060                 self
._downloader
.to_screen(u
'[generic] %s: Downloading webpage' % video_id
) 
2062         def report_extraction(self
, video_id
): 
2063                 """Report information extraction.""" 
2064                 self
._downloader
.to_screen(u
'[generic] %s: Extracting information' % video_id
) 
2066         def _real_initialize(self
): 
2069         def _real_extract(self
, url
): 
2070                 # At this point we have a new video 
2071                 self
._downloader
.increment_downloads() 
2073                 video_id 
= url
.split('/')[-1] 
2074                 request 
= urllib2
.Request(url
) 
2076                         self
.report_download_webpage(video_id
) 
2077                         webpage 
= urllib2
.urlopen(request
).read() 
2078                 except (urllib2
.URLError
, httplib
.HTTPException
, socket
.error
), err
: 
2079                         self
._downloader
.trouble(u
'ERROR: Unable to retrieve video webpage: %s' % str(err
)) 
2081                 except ValueError, err
: 
2082                         # since this is the last-resort InfoExtractor, if 
2083                         # this error is thrown, it'll be thrown here 
2084                         self
._downloader
.trouble(u
'ERROR: Invalid URL: %s' % url
) 
2087                 self
.report_extraction(video_id
) 
2088                 # Start with something easy: JW Player in SWFObject 
2089                 mobj 
= re
.search(r
'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) 
2091                         # Broaden the search a little bit 
2092                         mobj = re.search(r'[^A
-Za
-z0
-9]?
(?
:file|source
)=(http
[^
\'"&]*)', webpage) 
2094                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2097                 # It's possible that one of the regexes 
2098                 # matched, but returned an empty group: 
2099                 if mobj.group(1) is None: 
2100                         self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) 
2103                 video_url = urllib.unquote(mobj.group(1)) 
2104                 video_id = os.path.basename(video_url) 
2106                 # here's a fun little line of code for you: 
2107                 video_extension = os.path.splitext(video_id)[1][1:] 
2108                 video_id = os.path.splitext(video_id)[0] 
2110                 # it's tempting to parse this further, but you would 
2111                 # have to take into account all the variations like 
2112                 #   Video Title - Site Name 
2113                 #   Site Name | Video Title 
2114                 #   Video Title - Tagline | Site Name 
2115                 # and so on and so forth; it's just not practical 
2116                 mobj = re.search(r'<title>(.*)</title>', webpage) 
2118                         self._downloader.trouble(u'ERROR: unable to extract title') 
2120                 video_title = mobj.group(1).decode('utf-8') 
2121                 video_title = sanitize_title(video_title) 
2122                 simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title) 
2124                 # video uploader is domain name 
2125                 mobj = re.match(r'(?:https?://)?([^/]*)/.*', url) 
2127                         self._downloader.trouble(u'ERROR: unable to extract title') 
2129                 video_uploader = mobj.group(1).decode('utf-8') 
2132                         # Process video information 
2133                         self._downloader.process_info({ 
2134                                 'id':           video_id.decode('utf-8'), 
2135                                 'url':          video_url.decode('utf-8'), 
2136                                 'uploader':     video_uploader, 
2137                                 'upload_date':  u'NA', 
2138                                 'title':        video_title, 
2139                                 'stitle':       simple_title, 
2140                                 'ext':          video_extension.decode('utf-8'), 
2144                 except UnavailableVideoError, err: 
2145                         self._downloader.trouble(u'\nERROR: unable to download video') 
2148 class YoutubeSearchIE(InfoExtractor): 
2149         """Information Extractor for YouTube search queries.""" 
2150         _VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' 
2151         _TEMPLATE_URL = 'http://www.youtube.com/results?search_query=%s&page=%s&gl=US&hl=en' 
2152         _VIDEO_INDICATOR = r'href="/watch
\?v
=.+?
"' 
2153         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2155         _max_youtube_results = 1000 
2157         def __init__(self, youtube_ie, downloader=None): 
2158                 InfoExtractor.__init__(self, downloader) 
2159                 self._youtube_ie = youtube_ie 
2163                 return (re.match(YoutubeSearchIE._VALID_QUERY, url) is not None) 
2165         def report_download_page(self, query, pagenum): 
2166                 """Report attempt to download playlist page with given number.""" 
2167                 query = query.decode(preferredencoding()) 
2168                 self._downloader.to_screen(u'[youtube] query "%s": Downloading page %s' % (query, pagenum)) 
2170         def _real_initialize(self): 
2171                 self._youtube_ie.initialize() 
2173         def _real_extract(self, query): 
2174                 mobj = re.match(self._VALID_QUERY, query) 
2176                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2179                 prefix, query = query.split(':') 
2181                 query = query.encode('utf-8') 
2183                         self._download_n_results(query, 1) 
2185                 elif prefix == 'all': 
2186                         self._download_n_results(query, self._max_youtube_results) 
2192                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2194                                 elif n > self._max_youtube_results: 
2195                                         self._downloader.to_stderr(u'WARNING: ytsearch returns max %i results (you requested %i)' % (self._max_youtube_results, n)) 
2196                                         n = self._max_youtube_results 
2197                                 self._download_n_results(query, n) 
2199                         except ValueError: # parsing prefix as integer fails 
2200                                 self._download_n_results(query, 1) 
2203         def _download_n_results(self, query, n): 
2204                 """Downloads a specified number of results for a query""" 
2207                 already_seen = set() 
2211                         self.report_download_page(query, pagenum) 
2212                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2213                         request = urllib2.Request(result_url) 
2215                                 page = urllib2.urlopen(request).read() 
2216                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2217                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2220                         # Extract video identifiers 
2221                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2222                                 video_id = page[mobj.span()[0]:mobj.span()[1]].split('=')[2][:-1] 
2223                                 if video_id not in already_seen: 
2224                                         video_ids.append(video_id) 
2225                                         already_seen.add(video_id) 
2226                                         if len(video_ids) == n: 
2227                                                 # Specified n videos reached 
2228                                                 for id in video_ids: 
2229                                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2232                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2233                                 for id in video_ids: 
2234                                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2237                         pagenum = pagenum + 1 
2240 class GoogleSearchIE(InfoExtractor): 
2241         """Information Extractor for Google Video search queries.""" 
2242         _VALID_QUERY = r'gvsearch(\d+|all)?:[\s\S]+' 
2243         _TEMPLATE_URL = 'http://video.google.com/videosearch?q=%s+site:video.google.com&start=%s&hl=en' 
2244         _VIDEO_INDICATOR = r'videoplay\?docid=([^\&>]+)\&' 
2245         _MORE_PAGES_INDICATOR = r'<span>Next</span>' 
2247         _max_google_results = 1000 
2249         def __init__(self, google_ie, downloader=None): 
2250                 InfoExtractor.__init__(self, downloader) 
2251                 self._google_ie = google_ie 
2255                 return (re.match(GoogleSearchIE._VALID_QUERY, url) is not None) 
2257         def report_download_page(self, query, pagenum): 
2258                 """Report attempt to download playlist page with given number.""" 
2259                 query = query.decode(preferredencoding()) 
2260                 self._downloader.to_screen(u'[video.google] query "%s": Downloading page %s' % (query, pagenum)) 
2262         def _real_initialize(self): 
2263                 self._google_ie.initialize() 
2265         def _real_extract(self, query): 
2266                 mobj = re.match(self._VALID_QUERY, query) 
2268                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2271                 prefix, query = query.split(':') 
2273                 query = query.encode('utf-8') 
2275                         self._download_n_results(query, 1) 
2277                 elif prefix == 'all': 
2278                         self._download_n_results(query, self._max_google_results) 
2284                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2286                                 elif n > self._max_google_results: 
2287                                         self._downloader.to_stderr(u'WARNING: gvsearch returns max %i results (you requested %i)' % (self._max_google_results, n)) 
2288                                         n = self._max_google_results 
2289                                 self._download_n_results(query, n) 
2291                         except ValueError: # parsing prefix as integer fails 
2292                                 self._download_n_results(query, 1) 
2295         def _download_n_results(self, query, n): 
2296                 """Downloads a specified number of results for a query""" 
2299                 already_seen = set() 
2303                         self.report_download_page(query, pagenum) 
2304                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2305                         request = urllib2.Request(result_url) 
2307                                 page = urllib2.urlopen(request).read() 
2308                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2309                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2312                         # Extract video identifiers 
2313                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2314                                 video_id = mobj.group(1) 
2315                                 if video_id not in already_seen: 
2316                                         video_ids.append(video_id) 
2317                                         already_seen.add(video_id) 
2318                                         if len(video_ids) == n: 
2319                                                 # Specified n videos reached 
2320                                                 for id in video_ids: 
2321                                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2324                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2325                                 for id in video_ids: 
2326                                         self._google_ie.extract('http://video.google.com/videoplay?docid=%s' % id) 
2329                         pagenum = pagenum + 1 
2332 class YahooSearchIE(InfoExtractor): 
2333         """Information Extractor for Yahoo! Video search queries.""" 
2334         _VALID_QUERY = r'yvsearch(\d+|all)?:[\s\S]+' 
2335         _TEMPLATE_URL = 'http://video.yahoo.com/search/?p=%s&o=%s' 
2336         _VIDEO_INDICATOR = r'href="http
://video\
.yahoo\
.com
/watch
/([0-9]+/[0-9]+)"' 
2337         _MORE_PAGES_INDICATOR = r'\s*Next' 
2339         _max_yahoo_results = 1000 
2341         def __init__(self, yahoo_ie, downloader=None): 
2342                 InfoExtractor.__init__(self, downloader) 
2343                 self._yahoo_ie = yahoo_ie 
2347                 return (re.match(YahooSearchIE._VALID_QUERY, url) is not None) 
2349         def report_download_page(self, query, pagenum): 
2350                 """Report attempt to download playlist page with given number.""" 
2351                 query = query.decode(preferredencoding()) 
2352                 self._downloader.to_screen(u'[video.yahoo] query "%s": Downloading page %s' % (query, pagenum)) 
2354         def _real_initialize(self): 
2355                 self._yahoo_ie.initialize() 
2357         def _real_extract(self, query): 
2358                 mobj = re.match(self._VALID_QUERY, query) 
2360                         self._downloader.trouble(u'ERROR: invalid search query "%s"' % query) 
2363                 prefix, query = query.split(':') 
2365                 query = query.encode('utf-8') 
2367                         self._download_n_results(query, 1) 
2369                 elif prefix == 'all': 
2370                         self._download_n_results(query, self._max_yahoo_results) 
2376                                         self._downloader.trouble(u'ERROR: invalid download number %s for query "%s"' % (n, query)) 
2378                                 elif n > self._max_yahoo_results: 
2379                                         self._downloader.to_stderr(u'WARNING: yvsearch returns max %i results (you requested %i)' % (self._max_yahoo_results, n)) 
2380                                         n = self._max_yahoo_results 
2381                                 self._download_n_results(query, n) 
2383                         except ValueError: # parsing prefix as integer fails 
2384                                 self._download_n_results(query, 1) 
2387         def _download_n_results(self, query, n): 
2388                 """Downloads a specified number of results for a query""" 
2391                 already_seen = set() 
2395                         self.report_download_page(query, pagenum) 
2396                         result_url = self._TEMPLATE_URL % (urllib.quote_plus(query), pagenum) 
2397                         request = urllib2.Request(result_url) 
2399                                 page = urllib2.urlopen(request).read() 
2400                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2401                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2404                         # Extract video identifiers 
2405                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2406                                 video_id = mobj.group(1) 
2407                                 if video_id not in already_seen: 
2408                                         video_ids.append(video_id) 
2409                                         already_seen.add(video_id) 
2410                                         if len(video_ids) == n: 
2411                                                 # Specified n videos reached 
2412                                                 for id in video_ids: 
2413                                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2416                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2417                                 for id in video_ids: 
2418                                         self._yahoo_ie.extract('http://video.yahoo.com/watch/%s' % id) 
2421                         pagenum = pagenum + 1 
2424 class YoutubePlaylistIE(InfoExtractor): 
2425         """Information Extractor for YouTube playlists.""" 
2427         _VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' 
2428         _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' 
2429         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2430         _MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>' 
2433         def __init__(self, youtube_ie, downloader=None): 
2434                 InfoExtractor.__init__(self, downloader) 
2435                 self._youtube_ie = youtube_ie 
2439                 return (re.match(YoutubePlaylistIE._VALID_URL, url) is not None) 
2441         def report_download_page(self, playlist_id, pagenum): 
2442                 """Report attempt to download playlist page with given number.""" 
2443                 self._downloader.to_screen(u'[youtube] PL %s: Downloading page #%s' % (playlist_id, pagenum)) 
2445         def _real_initialize(self): 
2446                 self._youtube_ie.initialize() 
2448         def _real_extract(self, url): 
2449                 # Extract playlist id 
2450                 mobj = re.match(self._VALID_URL, url) 
2452                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2456                 if mobj.group(3) is not None: 
2457                         self._youtube_ie.extract(mobj.group(3)) 
2460                 # Download playlist pages 
2461                 # prefix is 'p' as default for playlists but there are other types that need extra care 
2462                 playlist_prefix = mobj.group(1) 
2463                 if playlist_prefix == 'a': 
2464                         playlist_access = 'artist' 
2466                         playlist_prefix = 'p' 
2467                         playlist_access = 'view_play_list' 
2468                 playlist_id = mobj.group(2) 
2473                         self.report_download_page(playlist_id, pagenum) 
2474                         request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum)) 
2476                                 page = urllib2.urlopen(request).read() 
2477                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2478                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2481                         # Extract video identifiers 
2483                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2484                                 if mobj.group(1) not in ids_in_page: 
2485                                         ids_in_page.append(mobj.group(1)) 
2486                         video_ids.extend(ids_in_page) 
2488                         if re.search(self._MORE_PAGES_INDICATOR, page) is None: 
2490                         pagenum = pagenum + 1 
2492                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2493                 playlistend = self._downloader.params.get('playlistend', -1) 
2494                 video_ids = video_ids[playliststart:playlistend] 
2496                 for id in video_ids: 
2497                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) 
2501 class YoutubeUserIE(InfoExtractor): 
2502         """Information Extractor for YouTube users.""" 
2504         _VALID_URL = r'(?:(?:(?:http://)?(?:\w+\.)?youtube.com/user/)|ytuser:)([A-Za-z0-9_-]+)' 
2505         _TEMPLATE_URL = 'http://gdata.youtube.com/feeds/api/users/%s' 
2506         _GDATA_PAGE_SIZE = 50 
2507         _GDATA_URL = 'http://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d' 
2508         _VIDEO_INDICATOR = r'/watch\?v=(.+?)&' 
2511         def __init__(self, youtube_ie, downloader=None): 
2512                 InfoExtractor.__init__(self, downloader) 
2513                 self._youtube_ie = youtube_ie 
2517                 return (re.match(YoutubeUserIE._VALID_URL, url) is not None) 
2519         def report_download_page(self, username, start_index): 
2520                 """Report attempt to download user page.""" 
2521                 self._downloader.to_screen(u'[youtube] user %s: Downloading video ids from %d to %d' % 
2522                                 (username, start_index, start_index + self._GDATA_PAGE_SIZE)) 
2524         def _real_initialize(self): 
2525                 self._youtube_ie.initialize() 
2527         def _real_extract(self, url): 
2529                 mobj = re.match(self._VALID_URL, url) 
2531                         self._downloader.trouble(u'ERROR: invalid url: %s' % url) 
2534                 username = mobj.group(1) 
2536                 # Download video ids using YouTube Data API. Result size per 
2537                 # query is limited (currently to 50 videos) so we need to query 
2538                 # page by page until there are no video ids - it means we got 
2545                         start_index = pagenum * self._GDATA_PAGE_SIZE + 1 
2546                         self.report_download_page(username, start_index) 
2548                         request = urllib2.Request(self._GDATA_URL % (username, self._GDATA_PAGE_SIZE, start_index)) 
2551                                 page = urllib2.urlopen(request).read() 
2552                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2553                                 self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) 
2556                         # Extract video identifiers 
2559                         for mobj in re.finditer(self._VIDEO_INDICATOR, page): 
2560                                 if mobj.group(1) not in ids_in_page: 
2561                                         ids_in_page.append(mobj.group(1)) 
2563                         video_ids.extend(ids_in_page) 
2565                         # A little optimization - if current page is not 
2566                         # "full
", ie. does not contain PAGE_SIZE video ids then 
2567                         # we can assume that this page is the last one - there 
2568                         # are no more ids on further pages - no need to query 
2571                         if len(ids_in_page) < self._GDATA_PAGE_SIZE: 
2576                 all_ids_count = len(video_ids) 
2577                 playliststart = self._downloader.params.get('playliststart', 1) - 1 
2578                 playlistend = self._downloader.params.get('playlistend', -1) 
2580                 if playlistend == -1: 
2581                         video_ids = video_ids[playliststart:] 
2583                         video_ids = video_ids[playliststart:playlistend] 
2585                 self._downloader.to_screen("[youtube
] user 
%s: Collected 
%d video 
ids (downloading 
%d of them
)" % 
2586                                 (username, all_ids_count, len(video_ids))) 
2588                 for video_id in video_ids: 
2589                         self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % video_id) 
2592 class DepositFilesIE(InfoExtractor): 
2593         """Information extractor for depositfiles.com""" 
2595         _VALID_URL = r'(?:http://)?(?:\w+\.)?depositfiles.com/(?:../(?#locale))?files/(.+)' 
2597         def __init__(self, downloader=None): 
2598                 InfoExtractor.__init__(self, downloader) 
2602                 return (re.match(DepositFilesIE._VALID_URL, url) is not None) 
2604         def report_download_webpage(self, file_id): 
2605                 """Report webpage download.""" 
2606                 self._downloader.to_screen(u'[DepositFiles] %s: Downloading webpage' % file_id) 
2608         def report_extraction(self, file_id): 
2609                 """Report information extraction.""" 
2610                 self._downloader.to_screen(u'[DepositFiles] %s: Extracting information' % file_id) 
2612         def _real_initialize(self): 
2615         def _real_extract(self, url): 
2616                 # At this point we have a new file 
2617                 self._downloader.increment_downloads() 
2619                 file_id = url.split('/')[-1] 
2620                 # Rebuild url in english locale 
2621                 url = 'http://depositfiles.com/en/files/' + file_id 
2623                 # Retrieve file webpage with 'Free download' button pressed 
2624                 free_download_indication = { 'gateway_result' : '1' } 
2625                 request = urllib2.Request(url, urllib.urlencode(free_download_indication)) 
2627                         self.report_download_webpage(file_id) 
2628                         webpage = urllib2.urlopen(request).read() 
2629                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2630                         self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) 
2633                 # Search for the real file URL 
2634                 mobj = re.search(r'<form action="(http
://fileshare
.+?
)"', webpage) 
2635                 if (mobj is None) or (mobj.group(1) is None): 
2636                         # Try to figure out reason of the error. 
2637                         mobj = re.search(r'<strong>(Attention.*?)</strong>', webpage, re.DOTALL) 
2638                         if (mobj is not None) and (mobj.group(1) is not None): 
2639                                 restriction_message = re.sub('\s+', ' ', mobj.group(1)).strip() 
2640                                 self._downloader.trouble(u'ERROR: %s' % restriction_message) 
2642                                 self._downloader.trouble(u'ERROR: unable to extract download URL from: %s' % url) 
2645                 file_url = mobj.group(1) 
2646                 file_extension = os.path.splitext(file_url)[1][1:] 
2648                 # Search for file title 
2649                 mobj = re.search(r'<b title="(.*?
)">', webpage) 
2651                         self._downloader.trouble(u'ERROR: unable to extract title') 
2653                 file_title = mobj.group(1).decode('utf-8') 
2656                         # Process file information 
2657                         self._downloader.process_info({ 
2658                                 'id':           file_id.decode('utf-8'), 
2659                                 'url':          file_url.decode('utf-8'), 
2661                                 'upload_date':  u'NA', 
2662                                 'title':        file_title, 
2663                                 'stitle':       file_title, 
2664                                 'ext':          file_extension.decode('utf-8'), 
2668                 except UnavailableVideoError, err: 
2669                         self._downloader.trouble(u'ERROR: unable to download file') 
2672 class FacebookIE(InfoExtractor): 
2673         """Information Extractor for Facebook""" 
2675         _VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)' 
2676         _LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&' 
2677         _NETRC_MACHINE = 'facebook' 
2678         _available_formats = ['highqual', 'lowqual'] 
2679         _video_extensions = { 
2684         def __init__(self, downloader=None): 
2685                 InfoExtractor.__init__(self, downloader) 
2689                 return (re.match(FacebookIE._VALID_URL, url) is not None) 
2691         def _reporter(self, message): 
2692                 """Add header and report message.""" 
2693                 self._downloader.to_screen(u'[facebook] %s' % message) 
2695         def report_login(self): 
2696                 """Report attempt to log in.""" 
2697                 self._reporter(u'Logging in') 
2699         def report_video_webpage_download(self, video_id): 
2700                 """Report attempt to download video webpage.""" 
2701                 self._reporter(u'%s: Downloading video webpage' % video_id) 
2703         def report_information_extraction(self, video_id): 
2704                 """Report attempt to extract video information.""" 
2705                 self._reporter(u'%s: Extracting video information' % video_id) 
2707         def _parse_page(self, video_webpage): 
2708                 """Extract video information from page""" 
2710                 data = {'title': r'class="video_title datawrap
">(.*?)</', 
2711                         'description': r'<div class="datawrap
">(.*?)</div>', 
2712                         'owner': r'\("video_owner_name
", "(.*?
)"\)', 
2713                         'upload_date': r'data-date="(.*?
)"', 
2714                         'thumbnail':  r'\("thumb_url
", "(?P
<THUMB
>.*?
)"\)', 
2717                 for piece in data.keys(): 
2718                         mobj = re.search(data[piece], video_webpage) 
2719                         if mobj is not None: 
2720                                 video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape
")) 
2724                 for fmt in self._available_formats: 
2725                         mobj = re.search(r'\("%s_src
\", "(.+?)"\
)' % fmt, video_webpage) 
2726                         if mobj is not None: 
2727                                 # URL is in a Javascript segment inside an escaped Unicode format within 
2728                                 # the generally utf-8 page 
2729                                 video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape")) 
2730                 video_info['video_urls
'] = video_urls 
2734         def _real_initialize(self): 
2735                 if self._downloader is None: 
2740                 downloader_params = self._downloader.params 
2742                 # Attempt to use provided username and password or .netrc data 
2743                 if downloader_params.get('username
', None) is not None: 
2744                         useremail = downloader_params['username
'] 
2745                         password = downloader_params['password
'] 
2746                 elif downloader_params.get('usenetrc
', False): 
2748                                 info = netrc.netrc().authenticators(self._NETRC_MACHINE) 
2749                                 if info is not None: 
2753                                         raise netrc.NetrcParseError('No authenticators 
for %s' % self._NETRC_MACHINE) 
2754                         except (IOError, netrc.NetrcParseError), err: 
2755                                 self._downloader.to_stderr(u'WARNING
: parsing 
.netrc
: %s' % str(err)) 
2758                 if useremail is None: 
2767                 request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form)) 
2770                         login_results = urllib2.urlopen(request).read() 
2771                         if re.search(r'<form(.*)name
="login"(.*)</form
>', login_results) is not None: 
2772                                 self._downloader.to_stderr(u'WARNING
: unable to log 
in: bad username
/password
, or exceded login rate 
limit (~
3/min). Check credentials 
or wait
.') 
2774                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2775                         self._downloader.to_stderr(u'WARNING
: unable to log 
in: %s' % str(err)) 
2778         def _real_extract(self, url): 
2779                 mobj = re.match(self._VALID_URL, url) 
2781                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2783                 video_id = mobj.group('ID
') 
2786                 self.report_video_webpage_download(video_id) 
2787                 request = urllib2.Request('https
://www
.facebook
.com
/video
/video
.php?v
=%s' % video_id) 
2789                         page = urllib2.urlopen(request) 
2790                         video_webpage = page.read() 
2791                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2792                         self._downloader.trouble(u'ERROR
: unable to download video webpage
: %s' % str(err)) 
2795                 # Start extracting information 
2796                 self.report_information_extraction(video_id) 
2798                 # Extract information 
2799                 video_info = self._parse_page(video_webpage) 
2802                 if 'owner
' not in video_info: 
2803                         self._downloader.trouble(u'ERROR
: unable to extract uploader nickname
') 
2805                 video_uploader = video_info['owner
'] 
2808                 if 'title
' not in video_info: 
2809                         self._downloader.trouble(u'ERROR
: unable to extract video title
') 
2811                 video_title = video_info['title
'] 
2812                 video_title = video_title.decode('utf
-8') 
2813                 video_title = sanitize_title(video_title) 
2816                 simple_title = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', video_title) 
2817                 simple_title = simple_title.strip(ur'_
') 
2820                 if 'thumbnail
' not in video_info: 
2821                         self._downloader.trouble(u'WARNING
: unable to extract video thumbnail
') 
2822                         video_thumbnail = '' 
2824                         video_thumbnail = video_info['thumbnail
'] 
2828                 if 'upload_date
' in video_info: 
2829                         upload_time = video_info['upload_date
'] 
2830                         timetuple = email.utils.parsedate_tz(upload_time) 
2831                         if timetuple is not None: 
2833                                         upload_date = time.strftime('%Y
%m
%d', timetuple[0:9]) 
2838                 video_description = video_info.get('description
', 'No description available
.') 
2840                 url_map = video_info['video_urls
'] 
2841                 if len(url_map.keys()) > 0: 
2842                         # Decide which formats to download 
2843                         req_format = self._downloader.params.get('format
', None) 
2844                         format_limit = self._downloader.params.get('format_limit
', None) 
2846                         if format_limit is not None and format_limit in self._available_formats: 
2847                                 format_list = self._available_formats[self._available_formats.index(format_limit):] 
2849                                 format_list = self._available_formats 
2850                         existing_formats = [x for x in format_list if x in url_map] 
2851                         if len(existing_formats) == 0: 
2852                                 self._downloader.trouble(u'ERROR
: no known formats available 
for video
') 
2854                         if req_format is None: 
2855                                 video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality 
2856                         elif req_format == '-1': 
2857                                 video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats 
2860                                 if req_format not in url_map: 
2861                                         self._downloader.trouble(u'ERROR
: requested format 
not available
') 
2863                                 video_url_list = [(req_format, url_map[req_format])] # Specific format 
2865                 for format_param, video_real_url in video_url_list: 
2867                         # At this point we have a new video 
2868                         self._downloader.increment_downloads() 
2871                         video_extension = self._video_extensions.get(format_param, 'mp4
') 
2874                                 # Process video information 
2875                                 self._downloader.process_info({ 
2876                                         'id':           video_id.decode('utf
-8'), 
2877                                         'url
':          video_real_url.decode('utf
-8'), 
2878                                         'uploader
':     video_uploader.decode('utf
-8'), 
2879                                         'upload_date
':  upload_date, 
2880                                         'title
':        video_title, 
2881                                         'stitle
':       simple_title, 
2882                                         'ext
':          video_extension.decode('utf
-8'), 
2883                                         'format
':       (format_param is None and u'NA
' or format_param.decode('utf
-8')), 
2884                                         'thumbnail
':    video_thumbnail.decode('utf
-8'), 
2885                                         'description
':  video_description.decode('utf
-8'), 
2888                         except UnavailableVideoError, err: 
2889                                 self._downloader.trouble(u'\nERROR
: unable to download video
') 
2891 class BlipTVIE(InfoExtractor): 
2892         """Information extractor for blip.tv""" 
2894         _VALID_URL = r'^
(?
:https?
://)?
(?
:\w
+\
.)?blip\
.tv(/.+)$
' 
2895         _URL_EXT = r'^
.*\
.([a
-z0
-9]+)$
' 
2899                 return (re.match(BlipTVIE._VALID_URL, url) is not None) 
2901         def report_extraction(self, file_id): 
2902                 """Report information extraction.""" 
2903                 self._downloader.to_screen(u'[blip
.tv
] %s: Extracting information
' % file_id) 
2905         def _simplify_title(self, title): 
2906                 res = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', title) 
2907                 res = res.strip(ur'_
') 
2910         def _real_extract(self, url): 
2911                 mobj = re.match(self._VALID_URL, url) 
2913                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
2920                 json_url = url + cchar + 'skin
=json
&version
=2&no_wrap
=1' 
2921                 request = urllib2.Request(json_url) 
2922                 self.report_extraction(mobj.group(1)) 
2924                         json_code = urllib2.urlopen(request).read() 
2925                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
2926                         self._downloader.trouble(u'ERROR
: unable to download video info webpage
: %s' % str(err)) 
2929                         json_data = json.loads(json_code) 
2930                         if 'Post
' in json_data: 
2931                                 data = json_data['Post
'] 
2935                         upload_date = datetime.datetime.strptime(data['datestamp
'], '%m
-%d-%y 
%H
:%M
%p
').strftime('%Y
%m
%d') 
2936                         video_url = data['media
']['url
'] 
2937                         umobj = re.match(self._URL_EXT, video_url) 
2939                                 raise ValueError('Can 
not determine filename extension
') 
2940                         ext = umobj.group(1) 
2942                         self._downloader.increment_downloads() 
2945                                 'id': data['item_id
'], 
2947                                 'uploader
': data['display_name
'], 
2948                                 'upload_date
': upload_date, 
2949                                 'title
': data['title
'], 
2950                                 'stitle
': self._simplify_title(data['title
']), 
2952                                 'format
': data['media
']['mimeType
'], 
2953                                 'thumbnail
': data['thumbnailUrl
'], 
2954                                 'description
': data['description
'], 
2955                                 'player_url
': data['embedUrl
'] 
2957                 except (ValueError,KeyError), err: 
2958                         self._downloader.trouble(u'ERROR
: unable to parse video information
: %s' % repr(err)) 
2962                         self._downloader.process_info(info) 
2963                 except UnavailableVideoError, err: 
2964                         self._downloader.trouble(u'\nERROR
: unable to download video
') 
2967 class MyVideoIE(InfoExtractor): 
2968         """Information Extractor for myvideo.de.""" 
2970         _VALID_URL = r'(?
:http
://)?
(?
:www\
.)?myvideo\
.de
/watch
/([0-9]+)/([^?
/]+).*' 
2972         def __init__(self, downloader=None): 
2973                 InfoExtractor.__init__(self, downloader) 
2977                 return (re.match(MyVideoIE._VALID_URL, url) is not None) 
2979         def report_download_webpage(self, video_id): 
2980                 """Report webpage download.""" 
2981                 self._downloader.to_screen(u'[myvideo
] %s: Downloading webpage
' % video_id) 
2983         def report_extraction(self, video_id): 
2984                 """Report information extraction.""" 
2985                 self._downloader.to_screen(u'[myvideo
] %s: Extracting information
' % video_id) 
2987         def _real_initialize(self): 
2990         def _real_extract(self,url): 
2991                 mobj = re.match(self._VALID_URL, url) 
2993                         self._download.trouble(u'ERROR
: invalid URL
: %s' % url) 
2996                 video_id = mobj.group(1) 
2997                 simple_title = mobj.group(2).decode('utf
-8') 
2998                 # should actually not be necessary 
2999                 simple_title = sanitize_title(simple_title) 
3000                 simple_title = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', simple_title) 
3003                 request = urllib2.Request('http
://www
.myvideo
.de
/watch
/%s' % video_id) 
3005                         self.report_download_webpage(video_id) 
3006                         webpage = urllib2.urlopen(request).read() 
3007                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3008                         self._downloader.trouble(u'ERROR
: Unable to retrieve video webpage
: %s' % str(err)) 
3011                 self.report_extraction(video_id) 
3012                 mobj = re.search(r'<link rel
=\'image_src
\' href
=\'(http
://is[0-9].myvideo\
.de
/de
/movie
[0-9]+/[a
-f0
-9]+)/thumbs
/[^
.]+\
.jpg
\' />', 
3015                         self._downloader.trouble(u'ERROR
: unable to extract media URL
') 
3017                 video_url = mobj.group(1) + ('/%s.flv
' % video_id) 
3019                 mobj = re.search('<title
>([^
<]+)</title
>', webpage) 
3021                         self._downloader.trouble(u'ERROR
: unable to extract title
') 
3024                 video_title = mobj.group(1) 
3025                 video_title = sanitize_title(video_title) 
3029                         self._downloader.process_info({ 
3033                                 'upload_date
':  u'NA
', 
3034                                 'title
':        video_title, 
3035                                 'stitle
':       simple_title, 
3040                 except UnavailableVideoError: 
3041                         self._downloader.trouble(u'\nERROR
: Unable to download video
') 
3043 class ComedyCentralIE(InfoExtractor): 
3044         """Information extractor for The Daily Show and Colbert Report """ 
3046         _VALID_URL = r'^
(:(?P
<shortname
>tds|thedailyshow|cr|colbert|colbertnation|colbertreport
))|
(https?
://)?
(www\
.)(?P
<showname
>thedailyshow|colbertnation
)\
.com
/full
-episodes
/(?P
<episode
>.*)$
' 
3050                 return (re.match(ComedyCentralIE._VALID_URL, url) is not None) 
3052         def report_extraction(self, episode_id): 
3053                 self._downloader.to_screen(u'[comedycentral
] %s: Extracting information
' % episode_id) 
3055         def report_config_download(self, episode_id): 
3056                 self._downloader.to_screen(u'[comedycentral
] %s: Downloading configuration
' % episode_id) 
3058         def report_player_url(self, episode_id): 
3059                 self._downloader.to_screen(u'[comedycentral
] %s: Determining player URL
' % episode_id) 
3061         def _simplify_title(self, title): 
3062                 res = re.sub(ur'(?u
)([^
%s]+)' % simple_title_chars, ur'_
', title) 
3063                 res = res.strip(ur'_
') 
3066         def _real_extract(self, url): 
3067                 mobj = re.match(self._VALID_URL, url) 
3069                         self._downloader.trouble(u'ERROR
: invalid URL
: %s' % url) 
3072                 if mobj.group('shortname
'): 
3073                         if mobj.group('shortname
') in ('tds
', 'thedailyshow
'): 
3074                                 url = 'http
://www
.thedailyshow
.com
/full
-episodes
/' 
3076                                 url = 'http
://www
.colbertnation
.com
/full
-episodes
/' 
3077                         mobj = re.match(self._VALID_URL, url) 
3078                         assert mobj is not None 
3080                 dlNewest = not mobj.group('episode
') 
3082                         epTitle = mobj.group('showname
') 
3084                         epTitle = mobj.group('episode
') 
3086                 req = urllib2.Request(url) 
3087                 self.report_extraction(epTitle) 
3089                         htmlHandle = urllib2.urlopen(req) 
3090                         html = htmlHandle.read() 
3091                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3092                         self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3095                         url = htmlHandle.geturl() 
3096                         mobj = re.match(self._VALID_URL, url) 
3098                                 self._downloader.trouble(u'ERROR
: Invalid redirected URL
: ' + url) 
3100                         if mobj.group('episode
') == '': 
3101                                 self._downloader.trouble(u'ERROR
: Redirected URL 
is still 
not specific
: ' + url) 
3103                         epTitle = mobj.group('episode
') 
3105                 mMovieParams = re.findall('<param name
="movie" value
="(http://media.mtvnservices.com/(.*?:episode:([^:]*):)(.*?))"/>', html) 
3106                 if len(mMovieParams) == 0: 
3107                         self._downloader.trouble(u'ERROR
: unable to find Flash URL 
in webpage 
' + url) 
3109                 show_id = mMovieParams[0][2] 
3110                 ACT_COUNT = { # TODO: Detect this dynamically 
3111                         'thedailyshow
.com
': 4, 
3112                         'colbertnation
.com
': 3, 
3115                         'thedailyshow
.com
': 1, 
3116                         'colbertnation
.com
': 1, 
3119                 first_player_url = mMovieParams[0][0] 
3120                 startMediaNum = int(mMovieParams[0][3]) + OFFSET 
3121                 movieId = mMovieParams[0][1] 
3123                 playerReq = urllib2.Request(first_player_url) 
3124                 self.report_player_url(epTitle) 
3126                         playerResponse = urllib2.urlopen(playerReq) 
3127                 except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3128                         self._downloader.trouble(u'ERROR
: unable to download player
: %s' % unicode(err)) 
3130                 player_url = playerResponse.geturl() 
3132                 for actNum in range(ACT_COUNT): 
3133                         mediaNum = startMediaNum + actNum 
3134                         mediaId = movieId + str(mediaNum) 
3135                         configUrl = ('http
://www
.comedycentral
.com
/global/feeds
/entertainment
/media
/mediaGenEntertainment
.jhtml?
' + 
3136                                                 urllib.urlencode({'uri
': mediaId})) 
3137                         configReq = urllib2.Request(configUrl) 
3138                         self.report_config_download(epTitle) 
3140                                 configXml = urllib2.urlopen(configReq).read() 
3141                         except (urllib2.URLError, httplib.HTTPException, socket.error), err: 
3142                                 self._downloader.trouble(u'ERROR
: unable to download webpage
: %s' % unicode(err)) 
3145                         cdoc = xml.etree.ElementTree.fromstring(configXml) 
3147                         for rendition in cdoc.findall('.//rendition
'): 
3148                                 finfo = (rendition.attrib['bitrate
'], rendition.findall('./src
')[0].text) 
3152                                 self._downloader.trouble(u'\nERROR
: unable to download 
' + str(mediaNum) + ': No videos found
') 
3155                         # For now, just pick the highest bitrate 
3156                         format,video_url = turls[-1] 
3158                         self._downloader.increment_downloads() 
3160                         effTitle = show_id.replace('.com
', '') + '-' + epTitle 
3162                                 'id': str(mediaNum), 
3164                                 'uploader
': show_id, 
3165                                 'upload_date
': 'NA
', 
3167                                 'stitle
': self._simplify_title(effTitle), 
3171                                 'description
': 'TODO
: Not yet supported
', 
3172                                 'player_url
': player_url 
3176                                 self._downloader.process_info(info) 
3177                         except UnavailableVideoError, err: 
3178                                 self._downloader.trouble(u'\nERROR
: unable to download 
' + str(mediaNum)) 
3182 class PostProcessor(object): 
3183         """Post Processor class. 
3185         PostProcessor objects can be added to downloaders with their 
3186         add_post_processor() method. When the downloader has finished a 
3187         successful download, it will take its internal chain of PostProcessors 
3188         and start calling the run() method on each one of them, first with 
3189         an initial argument and then with the returned value of the previous 
3192         The chain will be stopped if one of them ever returns None or the end 
3193         of the chain is reached. 
3195         PostProcessor objects follow a "mutual registration" process similar 
3196         to InfoExtractor objects. 
3201         def __init__(self, downloader=None): 
3202                 self._downloader = downloader 
3204         def set_downloader(self, downloader): 
3205                 """Sets the downloader for this PP.""" 
3206                 self._downloader = downloader 
3208         def run(self, information): 
3209                 """Run the PostProcessor. 
3211                 The "information" argument is a dictionary like the ones 
3212                 composed by InfoExtractors. The only difference is that this 
3213                 one has an extra field called "filepath" that points to the 
3216                 When this method returns None, the postprocessing chain is 
3217                 stopped. However, this method may return an information 
3218                 dictionary that will be passed to the next postprocessing 
3219                 object in the chain. It can be the one it received after 
3220                 changing some fields. 
3222                 In addition, this method may raise a PostProcessingError 
3223                 exception that will be taken into account by the downloader 
3226                 return information # by default, do nothing 
3229 class FFmpegExtractAudioPP(PostProcessor): 
3231         def __init__(self, downloader=None, preferredcodec=None): 
3232                 PostProcessor.__init__(self, downloader) 
3233                 if preferredcodec is None: 
3234                         preferredcodec = 'best
' 
3235                 self._preferredcodec = preferredcodec 
3238         def get_audio_codec(path): 
3240                         cmd = ['ffprobe
', '-show_streams
', '--', path] 
3241                         handle = subprocess.Popen(cmd, stderr=file(os.path.devnull, 'w
'), stdout=subprocess.PIPE) 
3242                         output = handle.communicate()[0] 
3243                         if handle.wait() != 0: 
3245                 except (IOError, OSError): 
3248                 for line in output.split('\n'): 
3249                         if line.startswith('codec_name
='): 
3250                                 audio_codec = line.split('=')[1].strip() 
3251                         elif line.strip() == 'codec_type
=audio
' and audio_codec is not None: 
3256         def run_ffmpeg(path, out_path, codec, more_opts): 
3258                         cmd = ['ffmpeg
', '-y
', '-i
', path, '-vn
', '-acodec
', codec] + more_opts + ['--', out_path] 
3259                         ret = subprocess.call(cmd, stdout=file(os.path.devnull, 'w
'), stderr=subprocess.STDOUT) 
3261                 except (IOError, OSError): 
3264         def run(self, information): 
3265                 path = information['filepath
'] 
3267                 filecodec = self.get_audio_codec(path) 
3268                 if filecodec is None: 
3269                         self._downloader.to_stderr(u'WARNING
: unable to obtain 
file audio codec 
with ffprobe
') 
3273                 if self._preferredcodec == 'best
' or self._preferredcodec == filecodec: 
3274                         if filecodec == 'aac
' or filecodec == 'mp3
': 
3275                                 # Lossless if possible 
3277                                 extension = filecodec 
3278                                 if filecodec == 'aac
': 
3279                                         more_opts = ['-f
', 'adts
'] 
3282                                 acodec = 'libmp3lame
' 
3284                                 more_opts = ['-ab
', '128k
'] 
3286                         # We convert the audio (lossy) 
3287                         acodec = {'mp3
': 'libmp3lame
', 'aac
': 'aac
'}[self._preferredcodec] 
3288                         extension = self._preferredcodec 
3289                         more_opts = ['-ab
', '128k
'] 
3290                         if self._preferredcodec == 'aac
': 
3291                                 more_opts += ['-f
', 'adts
'] 
3293                 (prefix, ext) = os.path.splitext(path) 
3294                 new_path = prefix + '.' + extension 
3295                 self._downloader.to_screen(u'[ffmpeg
] Destination
: %s' % new_path) 
3296                 status = self.run_ffmpeg(path, new_path, acodec, more_opts) 
3299                         self._downloader.to_stderr(u'WARNING
: error running ffmpeg
') 
3304                 except (IOError, OSError): 
3305                         self._downloader.to_stderr(u'WARNING
: Unable to remove downloaded video 
file') 
3308                 information['filepath
'] = new_path 
3312 def updateSelf(downloader, filename): 
3313         ''' Update the program file with the latest version from the repository ''' 
3314         # Note: downloader only used for options 
3315         if not os.access(filename, os.W_OK): 
3316                 sys.exit('ERROR
: no write permissions on 
%s' % filename) 
3318         downloader.to_screen('Updating to latest version
...') 
3322                         urlh = urllib.urlopen(UPDATE_URL) 
3323                         newcontent = urlh.read() 
3326         except (IOError, OSError), err: 
3327                 sys.exit('ERROR
: unable to download latest version
') 
3330                 outf = open(filename, 'wb
') 
3332                         outf.write(newcontent) 
3335         except (IOError, OSError), err: 
3336                 sys.exit('ERROR
: unable to overwrite current version
') 
3338         downloader.to_screen('Updated youtube
-dl
. Restart to use the new version
.') 
3345         def _format_option_string(option): 
3346                 ''' ('-o
', '--option
') -> -o, --format METAVAR''' 
3350                 if option._short_opts: opts.append(option._short_opts[0]) 
3351                 if option._long_opts: opts.append(option._long_opts[0]) 
3352                 if len(opts) > 1: opts.insert(1, ', ') 
3354                 if option.takes_value(): opts.append(' %s' % option.metavar) 
3356                 return "".join(opts) 
3358         def _find_term_columns(): 
3359                 columns = os.environ.get('COLUMNS
', None) 
3364                         sp = subprocess.Popen(['stty
', 'size
'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 
3365                         out,err = sp.communicate() 
3366                         return int(out.split()[1]) 
3372         max_help_position = 80 
3374         # No need to wrap help messages if we're on a wide console
 
3375         columns 
= _find_term_columns() 
3376         if columns
: max_width 
= columns
 
3378         fmt 
= optparse
.IndentedHelpFormatter(width
=max_width
, max_help_position
=max_help_position
) 
3379         fmt
.format_option_strings 
= _format_option_string
 
3382                 'version'   : __version__
, 
3384                 'usage' : '%prog [options] url...', 
3385                 'conflict_handler' : 'resolve', 
3388         parser 
= optparse
.OptionParser(**kw
) 
3391         general        
= optparse
.OptionGroup(parser
, 'General Options') 
3392         authentication 
= optparse
.OptionGroup(parser
, 'Authentication Options') 
3393         video_format   
= optparse
.OptionGroup(parser
, 'Video Format Options') 
3394         postproc       
= optparse
.OptionGroup(parser
, 'Post-processing Options') 
3395         filesystem     
= optparse
.OptionGroup(parser
, 'Filesystem Options') 
3396         verbosity      
= optparse
.OptionGroup(parser
, 'Verbosity / Simulation Options') 
3398         general
.add_option('-h', '--help', 
3399                         action
='help', help='print this help text and exit') 
3400         general
.add_option('-v', '--version', 
3401                         action
='version', help='print program version and exit') 
3402         general
.add_option('-U', '--update', 
3403                         action
='store_true', dest
='update_self', help='update this program to latest version') 
3404         general
.add_option('-i', '--ignore-errors', 
3405                         action
='store_true', dest
='ignoreerrors', help='continue on download errors', default
=False) 
3406         general
.add_option('-r', '--rate-limit', 
3407                         dest
='ratelimit', metavar
='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') 
3408         general
.add_option('-R', '--retries', 
3409                         dest
='retries', metavar
='RETRIES', help='number of retries (default is 10)', default
=10) 
3410         general
.add_option('--playlist-start', 
3411                         dest
='playliststart', metavar
='NUMBER', help='playlist video to start at (default is 1)', default
=1) 
3412         general
.add_option('--playlist-end', 
3413                         dest
='playlistend', metavar
='NUMBER', help='playlist video to end at (default is last)', default
=-1) 
3414         general
.add_option('--dump-user-agent', 
3415                         action
='store_true', dest
='dump_user_agent', 
3416                         help='display the current browser identification', default
=False) 
3418         authentication
.add_option('-u', '--username', 
3419                         dest
='username', metavar
='USERNAME', help='account username') 
3420         authentication
.add_option('-p', '--password', 
3421                         dest
='password', metavar
='PASSWORD', help='account password') 
3422         authentication
.add_option('-n', '--netrc', 
3423                         action
='store_true', dest
='usenetrc', help='use .netrc authentication data', default
=False) 
3426         video_format
.add_option('-f', '--format', 
3427                         action
='store', dest
='format', metavar
='FORMAT', help='video format code') 
3428         video_format
.add_option('--all-formats', 
3429                         action
='store_const', dest
='format', help='download all available video formats', const
='-1') 
3430         video_format
.add_option('--max-quality', 
3431                         action
='store', dest
='format_limit', metavar
='FORMAT', help='highest quality format to download') 
3434         verbosity
.add_option('-q', '--quiet', 
3435                         action
='store_true', dest
='quiet', help='activates quiet mode', default
=False) 
3436         verbosity
.add_option('-s', '--simulate', 
3437                         action
='store_true', dest
='simulate', help='do not download video', default
=False) 
3438         verbosity
.add_option('-g', '--get-url', 
3439                         action
='store_true', dest
='geturl', help='simulate, quiet but print URL', default
=False) 
3440         verbosity
.add_option('-e', '--get-title', 
3441                         action
='store_true', dest
='gettitle', help='simulate, quiet but print title', default
=False) 
3442         verbosity
.add_option('--get-thumbnail', 
3443                         action
='store_true', dest
='getthumbnail', 
3444                         help='simulate, quiet but print thumbnail URL', default
=False) 
3445         verbosity
.add_option('--get-description', 
3446                         action
='store_true', dest
='getdescription', 
3447                         help='simulate, quiet but print video description', default
=False) 
3448         verbosity
.add_option('--get-filename', 
3449                         action
='store_true', dest
='getfilename', 
3450                         help='simulate, quiet but print output filename', default
=False) 
3451         verbosity
.add_option('--no-progress', 
3452                         action
='store_true', dest
='noprogress', help='do not print progress bar', default
=False) 
3453         verbosity
.add_option('--console-title', 
3454                         action
='store_true', dest
='consoletitle', 
3455                         help='display progress in console titlebar', default
=False) 
3458         filesystem
.add_option('-t', '--title', 
3459                         action
='store_true', dest
='usetitle', help='use title in file name', default
=False) 
3460         filesystem
.add_option('-l', '--literal', 
3461                         action
='store_true', dest
='useliteral', help='use literal title in file name', default
=False) 
3462         filesystem
.add_option('-A', '--auto-number', 
3463                         action
='store_true', dest
='autonumber', 
3464                         help='number downloaded files starting from 00000', default
=False) 
3465         filesystem
.add_option('-o', '--output', 
3466                         dest
='outtmpl', metavar
='TEMPLATE', help='output filename template') 
3467         filesystem
.add_option('-a', '--batch-file', 
3468                         dest
='batchfile', metavar
='FILE', help='file containing URLs to download (\'-\' for stdin)') 
3469         filesystem
.add_option('-w', '--no-overwrites', 
3470                         action
='store_true', dest
='nooverwrites', help='do not overwrite files', default
=False) 
3471         filesystem
.add_option('-c', '--continue', 
3472                         action
='store_true', dest
='continue_dl', help='resume partially downloaded files', default
=False) 
3473         filesystem
.add_option('--cookies', 
3474                         dest
='cookiefile', metavar
='FILE', help='file to dump cookie jar to') 
3475         filesystem
.add_option('--no-part', 
3476                         action
='store_true', dest
='nopart', help='do not use .part files', default
=False) 
3477         filesystem
.add_option('--no-mtime', 
3478                         action
='store_false', dest
='updatetime', 
3479                         help='do not use the Last-modified header to set the file modification time', default
=True) 
3480         filesystem
.add_option('--write-description', 
3481                         action
='store_true', dest
='writedescription', 
3482                         help='write video description to a .description file', default
=False) 
3483         filesystem
.add_option('--write-info-json', 
3484                         action
='store_true', dest
='writeinfojson', 
3485                         help='write video metadata to a .info.json file', default
=False) 
3488         postproc
.add_option('--extract-audio', action
='store_true', dest
='extractaudio', default
=False, 
3489                         help='convert video files to audio-only files (requires ffmpeg and ffprobe)') 
3490         postproc
.add_option('--audio-format', metavar
='FORMAT', dest
='audioformat', default
='best', 
3491                         help='"best", "aac" or "mp3"; best by default') 
3494         parser
.add_option_group(general
) 
3495         parser
.add_option_group(filesystem
) 
3496         parser
.add_option_group(verbosity
) 
3497         parser
.add_option_group(video_format
) 
3498         parser
.add_option_group(authentication
) 
3499         parser
.add_option_group(postproc
) 
3501         opts
, args 
= parser
.parse_args() 
3503         return parser
, opts
, args
 
3506         parser
, opts
, args 
= parseOpts() 
3508         # Open appropriate CookieJar 
3509         if opts
.cookiefile 
is None: 
3510                 jar 
= cookielib
.CookieJar() 
3513                         jar 
= cookielib
.MozillaCookieJar(opts
.cookiefile
) 
3514                         if os
.path
.isfile(opts
.cookiefile
) and os
.access(opts
.cookiefile
, os
.R_OK
): 
3516                 except (IOError, OSError), err
: 
3517                         sys
.exit(u
'ERROR: unable to open cookie file') 
3520         if opts
.dump_user_agent
: 
3521                 print std_headers
['User-Agent'] 
3524         # General configuration 
3525         cookie_processor 
= urllib2
.HTTPCookieProcessor(jar
) 
3526         opener 
= urllib2
.build_opener(urllib2
.ProxyHandler(), cookie_processor
, YoutubeDLHandler()) 
3527         urllib2
.install_opener(opener
) 
3528         socket
.setdefaulttimeout(300) # 5 minutes should be enough (famous last words) 
3530         # Batch file verification 
3532         if opts
.batchfile 
is not None: 
3534                         if opts
.batchfile 
== '-': 
3537                                 batchfd 
= open(opts
.batchfile
, 'r') 
3538                         batchurls 
= batchfd
.readlines() 
3539                         batchurls 
= [x
.strip() for x 
in batchurls
] 
3540                         batchurls 
= [x 
for x 
in batchurls 
if len(x
) > 0 and not re
.search(r
'^[#/;]', x
)] 
3542                         sys
.exit(u
'ERROR: batch file could not be read') 
3543         all_urls 
= batchurls 
+ args
 
3545         # Conflicting, missing and erroneous options 
3546         if opts
.usenetrc 
and (opts
.username 
is not None or opts
.password 
is not None): 
3547                 parser
.error(u
'using .netrc conflicts with giving username/password') 
3548         if opts
.password 
is not None and opts
.username 
is None: 
3549                 parser
.error(u
'account username missing') 
3550         if opts
.outtmpl 
is not None and (opts
.useliteral 
or opts
.usetitle 
or opts
.autonumber
): 
3551                 parser
.error(u
'using output template conflicts with using title, literal title or auto number') 
3552         if opts
.usetitle 
and opts
.useliteral
: 
3553                 parser
.error(u
'using title conflicts with using literal title') 
3554         if opts
.username 
is not None and opts
.password 
is None: 
3555                 opts
.password 
= getpass
.getpass(u
'Type account password and press return:') 
3556         if opts
.ratelimit 
is not None: 
3557                 numeric_limit 
= FileDownloader
.parse_bytes(opts
.ratelimit
) 
3558                 if numeric_limit 
is None: 
3559                         parser
.error(u
'invalid rate limit specified') 
3560                 opts
.ratelimit 
= numeric_limit
 
3561         if opts
.retries 
is not None: 
3563                         opts
.retries 
= long(opts
.retries
) 
3564                 except (TypeError, ValueError), err
: 
3565                         parser
.error(u
'invalid retry count specified') 
3567                 opts
.playliststart 
= int(opts
.playliststart
) 
3568                 if opts
.playliststart 
<= 0: 
3569                         raise ValueError(u
'Playlist start must be positive') 
3570         except (TypeError, ValueError), err
: 
3571                 parser
.error(u
'invalid playlist start number specified') 
3573                 opts
.playlistend 
= int(opts
.playlistend
) 
3574                 if opts
.playlistend 
!= -1 and (opts
.playlistend 
<= 0 or opts
.playlistend 
< opts
.playliststart
): 
3575                         raise ValueError(u
'Playlist end must be greater than playlist start') 
3576         except (TypeError, ValueError), err
: 
3577                 parser
.error(u
'invalid playlist end number specified') 
3578         if opts
.extractaudio
: 
3579                 if opts
.audioformat 
not in ['best', 'aac', 'mp3']: 
3580                         parser
.error(u
'invalid audio format specified') 
3582         # Information extractors 
3583         youtube_ie 
= YoutubeIE() 
3584         metacafe_ie 
= MetacafeIE(youtube_ie
) 
3585         dailymotion_ie 
= DailymotionIE() 
3586         youtube_pl_ie 
= YoutubePlaylistIE(youtube_ie
) 
3587         youtube_user_ie 
= YoutubeUserIE(youtube_ie
) 
3588         youtube_search_ie 
= YoutubeSearchIE(youtube_ie
) 
3589         google_ie 
= GoogleIE() 
3590         google_search_ie 
= GoogleSearchIE(google_ie
) 
3591         photobucket_ie 
= PhotobucketIE() 
3592         yahoo_ie 
= YahooIE() 
3593         yahoo_search_ie 
= YahooSearchIE(yahoo_ie
) 
3594         deposit_files_ie 
= DepositFilesIE() 
3595         facebook_ie 
= FacebookIE() 
3596         bliptv_ie 
= BlipTVIE() 
3597         vimeo_ie 
= VimeoIE() 
3598         myvideo_ie 
= MyVideoIE() 
3599         comedycentral_ie 
= ComedyCentralIE() 
3601         generic_ie 
= GenericIE() 
3604         fd 
= FileDownloader({ 
3605                 'usenetrc': opts
.usenetrc
, 
3606                 'username': opts
.username
, 
3607                 'password': opts
.password
, 
3608                 'quiet': (opts
.quiet 
or opts
.geturl 
or opts
.gettitle 
or opts
.getthumbnail 
or opts
.getdescription 
or opts
.getfilename
), 
3609                 'forceurl': opts
.geturl
, 
3610                 'forcetitle': opts
.gettitle
, 
3611                 'forcethumbnail': opts
.getthumbnail
, 
3612                 'forcedescription': opts
.getdescription
, 
3613                 'forcefilename': opts
.getfilename
, 
3614                 'simulate': (opts
.simulate 
or opts
.geturl 
or opts
.gettitle 
or opts
.getthumbnail 
or opts
.getdescription 
or opts
.getfilename
), 
3615                 'format': opts
.format
, 
3616                 'format_limit': opts
.format_limit
, 
3617                 'outtmpl': ((opts
.outtmpl 
is not None and opts
.outtmpl
.decode(preferredencoding())) 
3618                         or (opts
.format 
== '-1' and opts
.usetitle 
and u
'%(stitle)s-%(id)s-%(format)s.%(ext)s') 
3619                         or (opts
.format 
== '-1' and opts
.useliteral 
and u
'%(title)s-%(id)s-%(format)s.%(ext)s') 
3620                         or (opts
.format 
== '-1' and u
'%(id)s-%(format)s.%(ext)s') 
3621                         or (opts
.usetitle 
and opts
.autonumber 
and u
'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') 
3622                         or (opts
.useliteral 
and opts
.autonumber 
and u
'%(autonumber)s-%(title)s-%(id)s.%(ext)s') 
3623                         or (opts
.usetitle 
and u
'%(stitle)s-%(id)s.%(ext)s') 
3624                         or (opts
.useliteral 
and u
'%(title)s-%(id)s.%(ext)s') 
3625                         or (opts
.autonumber 
and u
'%(autonumber)s-%(id)s.%(ext)s') 
3626                         or u
'%(id)s.%(ext)s'), 
3627                 'ignoreerrors': opts
.ignoreerrors
, 
3628                 'ratelimit': opts
.ratelimit
, 
3629                 'nooverwrites': opts
.nooverwrites
, 
3630                 'retries': opts
.retries
, 
3631                 'continuedl': opts
.continue_dl
, 
3632                 'noprogress': opts
.noprogress
, 
3633                 'playliststart': opts
.playliststart
, 
3634                 'playlistend': opts
.playlistend
, 
3635                 'logtostderr': opts
.outtmpl 
== '-', 
3636                 'consoletitle': opts
.consoletitle
, 
3637                 'nopart': opts
.nopart
, 
3638                 'updatetime': opts
.updatetime
, 
3639                 'writedescription': opts
.writedescription
, 
3640                 'writeinfojson': opts
.writeinfojson
, 
3642         fd
.add_info_extractor(youtube_search_ie
) 
3643         fd
.add_info_extractor(youtube_pl_ie
) 
3644         fd
.add_info_extractor(youtube_user_ie
) 
3645         fd
.add_info_extractor(metacafe_ie
) 
3646         fd
.add_info_extractor(dailymotion_ie
) 
3647         fd
.add_info_extractor(youtube_ie
) 
3648         fd
.add_info_extractor(google_ie
) 
3649         fd
.add_info_extractor(google_search_ie
) 
3650         fd
.add_info_extractor(photobucket_ie
) 
3651         fd
.add_info_extractor(yahoo_ie
) 
3652         fd
.add_info_extractor(yahoo_search_ie
) 
3653         fd
.add_info_extractor(deposit_files_ie
) 
3654         fd
.add_info_extractor(facebook_ie
) 
3655         fd
.add_info_extractor(bliptv_ie
) 
3656         fd
.add_info_extractor(vimeo_ie
) 
3657         fd
.add_info_extractor(myvideo_ie
) 
3658         fd
.add_info_extractor(comedycentral_ie
) 
3660         # This must come last since it's the 
3661         # fallback if none of the others work 
3662         fd
.add_info_extractor(generic_ie
) 
3665         if opts
.extractaudio
: 
3666                 fd
.add_post_processor(FFmpegExtractAudioPP(preferredcodec
=opts
.audioformat
)) 
3669         if opts
.update_self
: 
3670                 updateSelf(fd
, sys
.argv
[0]) 
3673         if len(all_urls
) < 1: 
3674                 if not opts
.update_self
: 
3675                         parser
.error(u
'you must provide at least one URL') 
3678         retcode 
= fd
.download(all_urls
) 
3680         # Dump cookie jar if requested 
3681         if opts
.cookiefile 
is not None: 
3684                 except (IOError, OSError), err
: 
3685                         sys
.exit(u
'ERROR: unable to save cookie jar') 
3690 if __name__ 
== '__main__': 
3693         except DownloadError
: 
3695         except SameFileError
: 
3696                 sys
.exit(u
'ERROR: fixed output name but more than one file to download') 
3697         except KeyboardInterrupt: 
3698                 sys
.exit(u
'\nERROR: Interrupted by user') 
3700 # vim: set ts=4 sw=4 sts=4 noet ai si filetype=python: