3 # Copyright (c) 2006-2007 Ricardo Garcia Gonzalez 
   5 # Permission is hereby granted, free of charge, to any person obtaining a 
   6 # copy of this software and associated documentation files (the "Software"), 
   7 # to deal in the Software without restriction, including without limitation 
   8 # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
   9 # and/or sell copies of the Software, and to permit persons to whom the 
  10 # Software is furnished to do so, subject to the following conditions: 
  12 # The above copyright notice and this permission notice shall be included 
  13 # in all copies or substantial portions of the Software. 
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
  18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
  20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
  21 # OTHER DEALINGS IN THE SOFTWARE. 
  23 # Except as contained in this notice, the name(s) of the above copyright 
  24 # holders shall not be used in advertising or otherwise to promote the 
  25 # sale, use or other dealings in this Software without prior written 
  42 const_video_url_str 
= 'http://www.youtube.com/watch?v=%s' 
  43 const_video_url_re 
= re
.compile(r
'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$') 
  44 const_login_url_str 
= 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' 
  45 const_login_post_str 
= 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' 
  46 const_age_url_str 
= 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' 
  47 const_age_post_str 
= 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' 
  48 const_video_url_params_re 
= re
.compile(r
'player2\.swf\?([^"]+)"', re
.M
) 
  49 const_video_url_real_str 
= 'http://www.youtube.com/get_video?%s' 
  50 const_video_title_re 
= re
.compile(r
'<title>YouTube - ([^<]*)</title>', re
.M | re
.I
) 
  52 const_initial_block_size 
= 10 * const_1k
 
  54 # Print error message, followed by standard advice information, and then exit 
  55 def error_advice_exit(error_text
): 
  56         sys
.stderr
.write('Error: %s.\n' % error_text
) 
  57         sys
.stderr
.write('Try again several times. It may be a temporary problem.\n') 
  58         sys
.stderr
.write('Other typical problems:\n\n') 
  59         sys
.stderr
.write('* Video no longer exists.\n') 
  60         sys
.stderr
.write('* Video requires age confirmation but you did not provide an account.\n') 
  61         sys
.stderr
.write('* You provided the account data, but it is not valid.\n') 
  62         sys
.stderr
.write('* The connection was cut suddenly for some reason.\n') 
  63         sys
.stderr
.write('* YouTube changed their system, and the program no longer works.\n') 
  64         sys
.stderr
.write('\nTry to confirm you are able to view the video using a web browser.\n') 
  65         sys
.stderr
.write('Use the same video URL and account information, if needed, with this program.\n') 
  66         sys
.stderr
.write('When using a proxy, make sure http_proxy has http://host:port format.\n') 
  67         sys
.stderr
.write('Try again several times and contact me if the problem persists.\n') 
  70 # Wrapper to create custom requests with typical headers 
  71 def request_create(url
, data
=None): 
  72         retval 
= urllib2
.Request(url
) 
  75         # Try to mimic Firefox, at least a little bit 
  76         retval
.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0') 
  77         retval
.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') 
  78         retval
.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') 
  79         retval
.add_header('Accept-Language', 'en-us,en;q=0.5') 
  82 # Perform a request, process headers and return response 
  83 def perform_request(url
, data
=None): 
  84         request 
= request_create(url
, data
) 
  85         response 
= urllib2
.urlopen(request
) 
  91         if not (cmdl_opts
.quiet 
or cmdl_opts
.get_url
): 
  95 # Title string normalization 
  96 def title_string_norm(title
): 
  97         title 
= ''.join((x 
in string
.ascii_letters 
or x 
in string
.digits
) and x 
or ' ' for x 
in title
) 
  98         title 
= '_'.join(title
.split()) 
 102 # Title string minimal transformation 
 103 def title_string_touch(title
): 
 104         return title
.replace(os
.sep
, '%') 
 106 # Generic download step 
 107 def download_step(return_data_flag
, step_title
, step_error
, url
, post_data
=None): 
 109                 cond_print('%s... ' % step_title
) 
 110                 data 
= perform_request(url
, post_data
).read() 
 111                 cond_print('done.\n') 
 116         except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 117                 cond_print('failed.\n') 
 118                 error_advice_exit(step_error
) 
 120         except KeyboardInterrupt: 
 123 # Generic extract step 
 124 def extract_step(step_title
, step_error
, regexp
, data
): 
 126                 cond_print('%s... ' % step_title
) 
 127                 match 
= regexp
.search(data
) 
 130                         cond_print('failed.\n') 
 131                         error_advice_exit(step_error
) 
 133                 extracted_data 
= match
.group(1) 
 134                 cond_print('done.\n') 
 135                 return extracted_data
 
 137         except KeyboardInterrupt: 
 140 # Calculate new block size based on previous block size 
 141 def new_block_size(before
, after
, bytes): 
 142         new_min 
= max(bytes / 2.0, 1.0) 
 143         new_max 
= max(bytes * 2.0, 1.0) 
 154 # Get optimum 1k exponent to represent a number of bytes 
 155 def optimum_k_exp(num_bytes
): 
 159         return long(math
.log(num_bytes
, const_1k
)) 
 161 # Get optimum representation of number of bytes 
 162 def format_bytes(num_bytes
): 
 165                 exp 
= optimum_k_exp(num_bytes
) 
 166                 suffix 
= 'bkMGTPEZY'[exp
] 
 168                         return '%s%s' % (num_bytes
, suffix
) 
 169                 converted 
= float(num_bytes
) / float(const_1k
**exp
) 
 170                 return '%.2f%s' % (converted
, suffix
) 
 172                 sys
.exit('Error: internal error formatting number of bytes.') 
 174 # Calculate ETA and return it in string format as MM:SS 
 175 def calc_eta(start
, now
, total
, current
): 
 178         rate 
= float(current
) / (now 
- start
) 
 179         eta 
= long((total 
- current
) / rate
) 
 184         return '%02d:%02d' % (eta_mins
, eta_secs
) 
 186 # Calculate speed and return it in string format 
 187 def calc_speed(start
, now
, bytes): 
 190         return format_bytes(float(bytes) / (now 
- start
)) 
 192 # Create the command line options parser and parse command line 
 193 cmdl_usage 
= 'usage: %prog [options] video_url' 
 194 cmdl_version 
= '2007.06.22' 
 195 cmdl_parser 
= optparse
.OptionParser(usage
=cmdl_usage
, version
=cmdl_version
, conflict_handler
='resolve') 
 196 cmdl_parser
.add_option('-h', '--help', action
='help', help='print this help text and exit') 
 197 cmdl_parser
.add_option('-v', '--version', action
='version', help='print program version and exit') 
 198 cmdl_parser
.add_option('-u', '--username', dest
='username', metavar
='USERNAME', help='account username') 
 199 cmdl_parser
.add_option('-p', '--password', dest
='password', metavar
='PASSWORD', help='account password') 
 200 cmdl_parser
.add_option('-o', '--output', dest
='outfile', metavar
='FILE', help='output video file name') 
 201 cmdl_parser
.add_option('-q', '--quiet', action
='store_true', dest
='quiet', help='activates quiet mode') 
 202 cmdl_parser
.add_option('-s', '--simulate', action
='store_true', dest
='simulate', help='do not download video') 
 203 cmdl_parser
.add_option('-t', '--title', action
='store_true', dest
='use_title', help='use title in file name') 
 204 cmdl_parser
.add_option('-l', '--literal', action
='store_true', dest
='use_literal', help='use literal title in file name') 
 205 cmdl_parser
.add_option('-n', '--netrc', action
='store_true', dest
='use_netrc', help='use .netrc authentication data') 
 206 cmdl_parser
.add_option('-g', '--get-url', action
='store_true', dest
='get_url', help='print final video URL only') 
 207 cmdl_parser
.add_option('-2', '--title-too', action
='store_true', dest
='get_title', help='used with -g, print title too') 
 208 (cmdl_opts
, cmdl_args
) = cmdl_parser
.parse_args() 
 211 if len(cmdl_args
) != 1: 
 212         cmdl_parser
.print_help() 
 214 video_url_cmdl 
= cmdl_args
[0] 
 216 # Verify video URL format and convert to "standard" format 
 217 video_url_mo 
= const_video_url_re
.match(video_url_cmdl
) 
 218 if video_url_mo 
is None: 
 219         sys
.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') 
 220 video_url_id 
= video_url_mo
.group(2) 
 221 video_url 
= const_video_url_str 
% video_url_id
 
 223 # Check conflicting options 
 224 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 225         sys
.stderr
.write('Warning: video file name given but will not be used.\n') 
 227 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.use_title 
or cmdl_opts
.use_literal
): 
 228         sys
.exit('Error: using the video title conflicts with using a given file name.') 
 230 if cmdl_opts
.use_netrc 
and cmdl_opts
.password 
is not None: 
 231         sys
.exit('Error: using netrc conflicts with giving command line password.') 
 233 if cmdl_opts
.use_title 
and cmdl_opts
.use_literal
: 
 234         sys
.exit('Error: cannot use title and literal title at the same time.') 
 236 if cmdl_opts
.quiet 
and cmdl_opts
.get_url
: 
 237         sys
.exit('Error: cannot be quiet and print final URL at the same time.') 
 239 # Incorrect option formatting 
 240 if cmdl_opts
.username 
is None and cmdl_opts
.password 
is not None: 
 241         sys
.exit('Error: password give but username is missing.') 
 243 if cmdl_opts
.get_url 
is None and cmdl_opts
.get_title 
is not None: 
 244         sys
.exit('Error: getting title requires getting URL.') 
 246 # Get account information if any 
 247 account_username 
= None 
 248 account_password 
= None 
 250 if cmdl_opts
.use_netrc
: 
 252                 info 
= netrc
.netrc().authenticators('youtube') 
 254                         sys
.exit('Error: no authenticators for machine youtube.') 
 255                 netrc_username 
= info
[0] 
 256                 netrc_password 
= info
[2] 
 258                 sys
.exit('Error: unable to read .netrc file.') 
 259         except netrc
.NetrcParseError
: 
 260                 sys
.exit('Error: unable to parse .netrc file.') 
 262 if cmdl_opts
.password 
is not None: 
 263         account_username 
= cmdl_opts
.username
 
 264         account_password 
= cmdl_opts
.password
 
 266         if cmdl_opts
.username 
is not None and cmdl_opts
.use_netrc
: 
 267                 if cmdl_opts
.username 
!= netrc_username
: 
 268                         sys
.exit('Error: conflicting username from .netrc and command line options.') 
 269                 account_username 
= cmdl_opts
.username
 
 270                 account_password 
= netrc_password
 
 271         elif cmdl_opts
.username 
is not None: 
 272                 account_username 
= cmdl_opts
.username
 
 273                 account_password 
= getpass
.getpass('Type YouTube password and press return: ') 
 274         elif cmdl_opts
.use_netrc
: 
 275                 if len(netrc_username
) == 0: 
 276                         sys
.exit('Error: empty username in .netrc file.') 
 277                 account_username 
= netrc_username
 
 278                 account_password 
= netrc_password
 
 280 # Get output file name  
 281 if cmdl_opts
.outfile 
is None: 
 282         video_filename 
= '%s.flv' % video_url_id
 
 284         video_filename 
= cmdl_opts
.outfile
 
 287 if not video_filename
.lower().endswith('.flv'): 
 288         sys
.stderr
.write('Warning: video file name does not end in .flv\n') 
 291 if not (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 293                 disk_test 
= open(video_filename
, 'wb') 
 296         except (OSError, IOError): 
 297                 sys
.exit('Error: unable to open %s for writing.' % video_filename
) 
 299 # Install cookie and proxy handlers 
 300 urllib2
.install_opener(urllib2
.build_opener(urllib2
.ProxyHandler())) 
 301 urllib2
.install_opener(urllib2
.build_opener(urllib2
.HTTPCookieProcessor())) 
 303 # Log in and confirm age if needed 
 304 if account_username 
is not None: 
 305         url 
= const_login_url_str 
% video_url_id
 
 306         post 
= const_login_post_str 
% (video_url_id
, account_username
, account_password
) 
 307         download_step(False, 'Logging in', 'unable to log in', url
, post
) 
 309         url 
= const_age_url_str 
% video_url_id
 
 310         post 
= const_age_post_str 
% video_url_id
 
 311         download_step(False, 'Confirming age', 'unable to confirm age', url
, post
) 
 313 # Retrieve video webpage 
 314 video_webpage 
= download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url
) 
 316 # Extract video title if needed 
 317 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal 
or cmdl_opts
.get_title
: 
 318         video_title 
= extract_step('Extracting video title', 'unable to extract video title', const_video_title_re
, video_webpage
) 
 320 # Extract needed video URL parameters 
 321 video_url_params 
= extract_step('Extracting video URL parameters', 'unable to extract URL parameters', const_video_url_params_re
, video_webpage
) 
 322 video_url_real 
= const_video_url_real_str 
% video_url_params
 
 324 # Retrieve video data 
 326         video_data 
= perform_request(video_url_real
) 
 327         cond_print('Video data found at %s\n' % video_data
.geturl()) 
 329         if cmdl_opts
.get_title
: 
 332         if cmdl_opts
.get_url
: 
 333                 print video_data
.geturl() 
 335         if cmdl_opts
.simulate 
or cmdl_opts
.get_url
: 
 338         video_file 
= open(video_filename
, 'wb') 
 340                 video_len 
= long(video_data
.info()['Content-length']) 
 341                 video_len_str 
= format_bytes(video_len
) 
 344                 video_len_str 
= 'N/A' 
 347         block_size 
= const_initial_block_size
 
 348         start_time 
= time
.time() 
 350                 if video_len 
is not None: 
 351                         percent 
= float(byte_counter
) / float(video_len
) * 100.0 
 352                         percent_str 
= '%.1f' % percent
 
 353                         eta_str 
= calc_eta(start_time
, time
.time(), video_len
, byte_counter
) 
 355                         percent_str 
= '---.-' 
 357                 counter 
= format_bytes(byte_counter
) 
 358                 speed_str 
= calc_speed(start_time
, time
.time(), byte_counter
) 
 359                 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str
, counter
, video_len_str
, speed_str
, eta_str
)) 
 362                 video_block 
= video_data
.read(block_size
) 
 364                 dl_bytes 
= len(video_block
) 
 367                 byte_counter 
+= dl_bytes
 
 368                 video_file
.write(video_block
) 
 369                 block_size 
= new_block_size(before
, after
, dl_bytes
) 
 371         if video_len 
is not None and byte_counter 
!= video_len
: 
 372                 error_advice_exit('server did not send the expected amount of data') 
 375         cond_print('done.\n') 
 376         cond_print('Video data saved to %s\n' % video_filename
) 
 378 except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 379         cond_print('failed.\n') 
 380         error_advice_exit('unable to download video data') 
 382 except KeyboardInterrupt: 
 385 # Rename video file if needed 
 386 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal
: 
 388                 if cmdl_opts
.use_title
: 
 389                         prefix 
= title_string_norm(video_title
) 
 391                         prefix 
= title_string_touch(video_title
) 
 392                 final_filename 
= '%s-%s.flv' % (prefix
, video_url_id
) 
 393                 os
.rename(video_filename
, final_filename
) 
 394                 cond_print('Video file renamed to %s\n' % final_filename
) 
 397                 sys
.stderr
.write('Warning: unable to rename file.\n') 
 399         except KeyboardInterrupt: