3 # Copyright (c) 2006-2008 Ricardo Garcia Gonzalez 
   5 # Permission is hereby granted, free of charge, to any person obtaining a 
   6 # copy of this software and associated documentation files (the "Software"), 
   7 # to deal in the Software without restriction, including without limitation 
   8 # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
   9 # and/or sell copies of the Software, and to permit persons to whom the 
  10 # Software is furnished to do so, subject to the following conditions: 
  12 # The above copyright notice and this permission notice shall be included 
  13 # in all copies or substantial portions of the Software. 
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
  18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
  20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
  21 # OTHER DEALINGS IN THE SOFTWARE. 
  23 # Except as contained in this notice, the name(s) of the above copyright 
  24 # holders shall not be used in advertising or otherwise to promote the 
  25 # sale, use or other dealings in this Software without prior written 
  43 const_initial_block_size 
= 10 * const_1k
 
  44 const_epsilon 
= 0.0001 
  47 const_video_url_str 
= 'http://www.youtube.com/watch?v=%s' 
  48 const_video_url_re 
= re
.compile(r
'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$') 
  49 const_video_url_best_quality_suffix
='&fmt=18' 
  50 const_login_url_str 
= 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' 
  51 const_login_post_str 
= 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' 
  52 const_age_url_str 
= 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' 
  53 const_age_post_str 
= 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' 
  54 const_url_t_param_re 
= re
.compile(r
', "t": "([^"]+)"') 
  55 const_video_url_real_str 
= 'http://www.youtube.com/get_video?video_id=%s&t=%s' 
  56 const_video_title_re 
= re
.compile(r
'<title>YouTube - ([^<]*)</title>', re
.M | re
.I
) 
  58 # Print error message, followed by standard advice information, and then exit 
  59 def error_advice_exit(error_text
): 
  60         sys
.stderr
.write('Error: %s.\n' % error_text
) 
  61         sys
.stderr
.write('Try again several times. It may be a temporary problem.\n') 
  62         sys
.stderr
.write('Other typical problems:\n\n') 
  63         sys
.stderr
.write('* Video no longer exists.\n') 
  64         sys
.stderr
.write('* Video requires age confirmation but you did not provide an account.\n') 
  65         sys
.stderr
.write('* You provided the account data, but it is not valid.\n') 
  66         sys
.stderr
.write('* The connection was cut suddenly for some reason.\n') 
  67         sys
.stderr
.write('* YouTube changed their system, and the program no longer works.\n') 
  68         sys
.stderr
.write('\nTry to confirm you are able to view the video using a web browser.\n') 
  69         sys
.stderr
.write('Use the same video URL and account information, if needed, with this program.\n') 
  70         sys
.stderr
.write('When using a proxy, make sure http_proxy has http://host:port format.\n') 
  71         sys
.stderr
.write('Try again several times and contact me if the problem persists.\n') 
  74 # Wrapper to create custom requests with typical headers 
  75 def request_create(url
, extra_headers
, post_data
): 
  76         retval 
= urllib2
.Request(url
) 
  77         if post_data 
is not None: 
  78                 retval
.add_data(post_data
) 
  79         # Try to mimic Firefox, at least a little bit 
  80         retval
.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') 
  81         retval
.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') 
  82         retval
.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') 
  83         retval
.add_header('Accept-Language', 'en-us,en;q=0.5') 
  84         if extra_headers 
is not None: 
  85                 for header 
in extra_headers
: 
  86                         retval
.add_header(header
[0], header
[1]) 
  89 # Perform a request, process headers and return response 
  90 def perform_request(url
, headers
=None, data
=None): 
  91         request 
= request_create(url
, headers
, data
) 
  92         response 
= urllib2
.urlopen(request
) 
  98         if not (cmdl_opts
.quiet 
or cmdl_opts
.get_url
): 
 102 # Title string normalization 
 103 def title_string_norm(title
): 
 104         title 
= ''.join((x 
in string
.ascii_letters 
or x 
in string
.digits
) and x 
or ' ' for x 
in title
) 
 105         title 
= '_'.join(title
.split()) 
 106         title 
= title
.lower() 
 109 # Generic download step 
 110 def download_step(return_data_flag
, step_title
, step_error
, url
, post_data
=None): 
 112                 cond_print('%s... ' % step_title
) 
 113                 data 
= perform_request(url
, data
=post_data
).read() 
 114                 cond_print('done.\n') 
 119         except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 120                 cond_print('failed.\n') 
 121                 error_advice_exit(step_error
) 
 123         except KeyboardInterrupt: 
 126 # Generic extract step 
 127 def extract_step(step_title
, step_error
, regexp
, data
): 
 129                 cond_print('%s... ' % step_title
) 
 130                 match 
= regexp
.search(data
) 
 133                         cond_print('failed.\n') 
 134                         error_advice_exit(step_error
) 
 136                 extracted_data 
= match
.group(1) 
 137                 cond_print('done.\n') 
 138                 return extracted_data
 
 140         except KeyboardInterrupt: 
 143 # Calculate new block size based on previous block size 
 144 def new_block_size(before
, after
, bytes): 
 145         new_min 
= max(bytes / 2.0, 1.0) 
 146         new_max 
= max(bytes * 2.0, 1.0) 
 148         if dif 
< const_epsilon
: 
 157 # Get optimum 1k exponent to represent a number of bytes 
 158 def optimum_k_exp(num_bytes
): 
 162         return long(math
.log(num_bytes
, const_1k
)) 
 164 # Get optimum representation of number of bytes 
 165 def format_bytes(num_bytes
): 
 168                 exp 
= optimum_k_exp(num_bytes
) 
 169                 suffix 
= 'bkMGTPEZY'[exp
] 
 171                         return '%s%s' % (num_bytes
, suffix
) 
 172                 converted 
= float(num_bytes
) / float(const_1k
**exp
) 
 173                 return '%.2f%s' % (converted
, suffix
) 
 175                 sys
.exit('Error: internal error formatting number of bytes.') 
 177 # Calculate ETA and return it in string format as MM:SS 
 178 def calc_eta(start
, now
, total
, current
): 
 180         if current 
== 0 or dif 
< const_epsilon
: 
 182         rate 
= float(current
) / dif
 
 183         eta 
= long((total 
- current
) / rate
) 
 184         (eta_mins
, eta_secs
) = divmod(eta
, 60) 
 187         return '%02d:%02d' % (eta_mins
, eta_secs
) 
 189 # Calculate speed and return it in string format 
 190 def calc_speed(start
, now
, bytes): 
 192         if bytes == 0 or dif 
< const_epsilon
: 
 194         return format_bytes(float(bytes) / dif
) 
 197 # Title string minimal transformation 
 198 def title_string_touch(title
): 
 199         return title
.replace(os
.sep
, '%') 
 201 # Create the command line options parser and parse command line 
 202 cmdl_usage 
= 'usage: %prog [options] video_url' 
 203 cmdl_version 
= '2008.03.08' 
 204 cmdl_parser 
= optparse
.OptionParser(usage
=cmdl_usage
, version
=cmdl_version
, conflict_handler
='resolve') 
 205 cmdl_parser
.add_option('-h', '--help', action
='help', help='print this help text and exit') 
 206 cmdl_parser
.add_option('-v', '--version', action
='version', help='print program version and exit') 
 207 cmdl_parser
.add_option('-u', '--username', dest
='username', metavar
='USERNAME', help='account username') 
 208 cmdl_parser
.add_option('-p', '--password', dest
='password', metavar
='PASSWORD', help='account password') 
 209 cmdl_parser
.add_option('-o', '--output', dest
='outfile', metavar
='FILE', help='output video file name') 
 210 cmdl_parser
.add_option('-q', '--quiet', action
='store_true', dest
='quiet', help='activates quiet mode') 
 211 cmdl_parser
.add_option('-s', '--simulate', action
='store_true', dest
='simulate', help='do not download video') 
 212 cmdl_parser
.add_option('-t', '--title', action
='store_true', dest
='use_title', help='use title in file name') 
 213 cmdl_parser
.add_option('-l', '--literal', action
='store_true', dest
='use_literal', help='use literal title in file name') 
 214 cmdl_parser
.add_option('-n', '--netrc', action
='store_true', dest
='use_netrc', help='use .netrc authentication data') 
 215 cmdl_parser
.add_option('-g', '--get-url', action
='store_true', dest
='get_url', help='print final video URL only') 
 216 cmdl_parser
.add_option('-2', '--title-too', action
='store_true', dest
='get_title', help='used with -g, print title too') 
 217 cmdl_parser
.add_option('-b', '--best-quality', action
='store_true', dest
='best_quality', help='try to download the best quality version') 
 218 (cmdl_opts
, cmdl_args
) = cmdl_parser
.parse_args() 
 221 socket
.setdefaulttimeout(const_timeout
) 
 224 if len(cmdl_args
) != 1: 
 225         cmdl_parser
.print_help() 
 227 video_url_cmdl 
= cmdl_args
[0] 
 229 # Verify video URL format and convert to "standard" format 
 230 video_url_mo 
= const_video_url_re
.match(video_url_cmdl
) 
 231 if video_url_mo 
is None: 
 232         sys
.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') 
 233 video_url_id 
= video_url_mo
.group(2) 
 234 video_url 
= const_video_url_str 
% video_url_id
 
 235 if cmdl_opts
.best_quality
: 
 236         video_url 
= '%s%s' % (video_url
, const_video_url_best_quality_suffix
) 
 237         video_extension 
= '.mp4' 
 239         video_extension 
= '.flv' 
 241 # Check conflicting options 
 242 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 243         sys
.stderr
.write('Warning: video file name given but will not be used.\n') 
 245 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.use_title 
or cmdl_opts
.use_literal
): 
 246         sys
.exit('Error: using the video title conflicts with using a given file name.') 
 248 if cmdl_opts
.use_title 
and cmdl_opts
.use_literal
: 
 249         sys
.exit('Error: cannot use title and literal title at the same time.') 
 251 if cmdl_opts
.quiet 
and cmdl_opts
.get_url
: 
 252         sys
.exit('Error: cannot be quiet and print final URL at the same time.') 
 254 # Incorrect option formatting 
 255 if cmdl_opts
.username 
is None and cmdl_opts
.password 
is not None: 
 256         sys
.exit('Error: password give but username is missing.') 
 258 if cmdl_opts
.use_netrc 
and (cmdl_opts
.username 
is not None or cmdl_opts
.password 
is not None): 
 259         sys
.exit('Error: cannot use netrc and username/password at the same time.') 
 261 if cmdl_opts
.get_url 
is None and cmdl_opts
.get_title 
is not None: 
 262         sys
.exit('Error: getting title requires getting URL.') 
 264 # Get account information if any 
 265 account_username 
= None 
 266 account_password 
= None 
 268 if cmdl_opts
.use_netrc
: 
 270                 info 
= netrc
.netrc().authenticators('youtube') 
 272                         sys
.exit('Error: no authenticators for machine youtube.') 
 273                 account_username 
= info
[0] 
 274                 account_password 
= info
[2] 
 276                 sys
.exit('Error: unable to read .netrc file.') 
 277         except netrc
.NetrcParseError
: 
 278                 sys
.exit('Error: unable to parse .netrc file.') 
 280         account_username 
= cmdl_opts
.username
 
 281         if account_username 
is not None: 
 282                 if cmdl_opts
.password 
is None: 
 283                         account_password 
= getpass
.getpass('Type YouTube password and press return: ') 
 285                         account_password 
= cmdl_opts
.password
 
 287 # Get output file name  
 288 if cmdl_opts
.outfile 
is None: 
 289         video_filename 
= '%s%s' % (video_url_id
, video_extension
) 
 291         video_filename 
= cmdl_opts
.outfile
 
 293 # Install cookie and proxy handlers 
 294 urllib2
.install_opener(urllib2
.build_opener(urllib2
.ProxyHandler())) 
 295 urllib2
.install_opener(urllib2
.build_opener(urllib2
.HTTPCookieProcessor())) 
 297 # Log in and confirm age if needed 
 298 if account_username 
is not None: 
 299         url 
= const_login_url_str 
% video_url_id
 
 300         post 
= const_login_post_str 
% (video_url_id
, account_username
, account_password
) 
 301         download_step(False, 'Logging in', 'unable to log in', url
, post
) 
 303         url 
= const_age_url_str 
% video_url_id
 
 304         post 
= const_age_post_str 
% video_url_id
 
 305         download_step(False, 'Confirming age', 'unable to confirm age', url
, post
) 
 307 # Retrieve video webpage 
 308 video_webpage 
= download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url
) 
 310 # Extract video title if needed 
 311 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal 
or cmdl_opts
.get_title
: 
 312         video_title 
= extract_step('Extracting video title', 'unable to extract video title', const_video_title_re
, video_webpage
) 
 314 # Extract needed video URL parameters 
 315 video_url_t_param 
= extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re
, video_webpage
) 
 316 video_url_real 
= const_video_url_real_str 
% (video_url_id
, video_url_t_param
) 
 317 if cmdl_opts
.best_quality
: 
 318         video_url_real 
= '%s%s' % (video_url_real
, const_video_url_best_quality_suffix
) 
 320 # Rebuild filename if needed 
 321 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal
: 
 322         if cmdl_opts
.use_title
: 
 323                 prefix 
= title_string_norm(video_title
) 
 325                 prefix 
= title_string_touch(video_title
) 
 326         video_filename 
= '%s-%s%s' % (prefix
, video_url_id
, video_extension
) 
 329 if not video_filename
.lower().endswith(video_extension
): 
 330         sys
.stderr
.write('Warning: video file name does not end in %s\n' % video_extension
) 
 332 # Retrieve video data 
 334         cond_print('Requesting video file... ') 
 335         video_data 
= perform_request(video_url_real
) 
 336         cond_print('done.\n') 
 337         cond_print('Video data found at %s\n' % video_data
.geturl()) 
 339         if cmdl_opts
.get_title
: 
 342         if cmdl_opts
.get_url
: 
 343                 print video_data
.geturl() 
 345         if cmdl_opts
.simulate 
or cmdl_opts
.get_url
: 
 349                 video_file 
= open(video_filename
, 'wb') 
 350         except (IOError, OSError): 
 351                 sys
.exit('Error: unable to open "%s" for writing.' % video_filename
) 
 353                 video_len 
= long(video_data
.info()['Content-length']) 
 354                 video_len_str 
= format_bytes(video_len
) 
 357                 video_len_str 
= 'N/A' 
 360         block_size 
= const_initial_block_size
 
 361         start_time 
= time
.time() 
 363                 if video_len 
is not None: 
 364                         percent 
= float(byte_counter
) / float(video_len
) * 100.0 
 365                         percent_str 
= '%.1f' % percent
 
 366                         eta_str 
= calc_eta(start_time
, time
.time(), video_len
, byte_counter
) 
 368                         percent_str 
= '---.-' 
 370                 counter 
= format_bytes(byte_counter
) 
 371                 speed_str 
= calc_speed(start_time
, time
.time(), byte_counter
) 
 372                 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str
, counter
, video_len_str
, speed_str
, eta_str
)) 
 375                 video_block 
= video_data
.read(block_size
) 
 377                 dl_bytes 
= len(video_block
) 
 380                 byte_counter 
+= dl_bytes
 
 381                 video_file
.write(video_block
) 
 382                 block_size 
= new_block_size(before
, after
, dl_bytes
) 
 384         if video_len 
is not None and byte_counter 
!= video_len
: 
 385                 error_advice_exit('server did not send the expected ammount of data') 
 388         cond_print('done.\n') 
 389         cond_print('Video data saved to %s\n' % video_filename
) 
 391 except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 392         cond_print('failed.\n') 
 393         error_advice_exit('unable to download video data') 
 395 except KeyboardInterrupt: