3 # Copyright (c) 2006 Ricardo Garcia Gonzalez 
   5 # Permission is hereby granted, free of charge, to any person obtaining a 
   6 # copy of this software and associated documentation files (the "Software"), 
   7 # to deal in the Software without restriction, including without limitation 
   8 # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
   9 # and/or sell copies of the Software, and to permit persons to whom the 
  10 # Software is furnished to do so, subject to the following conditions: 
  12 # The above copyright notice and this permission notice shall be included 
  13 # in all copies or substantial portions of the Software. 
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
  18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
  20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
  21 # OTHER DEALINGS IN THE SOFTWARE. 
  23 # Except as contained in this notice, the name(s) of the above copyright 
  24 # holders shall not be used in advertising or otherwise to promote the 
  25 # sale, use or other dealings in this Software without prior written 
  42 const_video_url_str 
= 'http://www.youtube.com/watch?v=%s' 
  43 const_video_url_re 
= re
.compile(r
'(?:http://)?(?:www\d*\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=)([^&]+).*') 
  44 const_login_url_str 
= 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' 
  45 const_login_post_str 
= 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' 
  46 const_age_url_str 
= 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' 
  47 const_age_post_str 
= 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' 
  48 const_video_url_params_re 
= re
.compile(r
'player2\.swf\?([^"]+)"', re
.M
) 
  49 const_video_url_real_str 
= 'http://www.youtube.com/get_video?%s' 
  50 const_video_title_re 
= re
.compile(r
'<title>YouTube - ([^<]*)</title>', re
.M | re
.I
) 
  52 const_initial_block_size 
= 10 * const_1k
 
  54 # Print error message, followed by standard advice information, and then exit 
  55 def error_advice_exit(error_text
): 
  56         sys
.stderr
.write('Error: %s.\n' % error_text
) 
  57         sys
.stderr
.write('Try again several times. It may be a temporary problem.\n') 
  58         sys
.stderr
.write('Other typical problems:\n\n') 
  59         sys
.stderr
.write('* Video no longer exists.\n') 
  60         sys
.stderr
.write('* Video requires age confirmation but you did not provide an account.\n') 
  61         sys
.stderr
.write('* You provided the account data, but it is not valid.\n') 
  62         sys
.stderr
.write('* The connection was cut suddenly for some reason.\n') 
  63         sys
.stderr
.write('* YouTube changed their system, and the program no longer works.\n') 
  64         sys
.stderr
.write('\nTry to confirm you are able to view the video using a web browser.\n') 
  65         sys
.stderr
.write('Use the same video URL and account information, if needed, with this program.\n') 
  66         sys
.stderr
.write('When using a proxy, make sure http_proxy has http://host:port format.\n') 
  67         sys
.stderr
.write('Try again several times and contact me if the problem persists.\n') 
  70 # Wrapper to create custom requests with typical headers 
  71 def request_create(url
, data
=None): 
  72         retval 
= urllib2
.Request(url
) 
  75         # Try to mimic Firefox, at least a little bit 
  76         retval
.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0') 
  77         retval
.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') 
  78         retval
.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') 
  79         retval
.add_header('Accept-Language', 'en-us,en;q=0.5') 
  82 # Perform a request, process headers and return response 
  83 def perform_request(url
, data
=None): 
  84         request 
= request_create(url
, data
) 
  85         response 
= urllib2
.urlopen(request
) 
  91         if not (cmdl_opts
.quiet 
or cmdl_opts
.get_url
): 
  95 # Title string normalization 
  96 def title_string_norm(title
): 
  97         title 
= ''.join((x 
in string
.ascii_letters 
or x 
in string
.digits
) and x 
or ' ' for x 
in title
) 
  98         title 
= '_'.join(title
.split()) 
 102 # Title string minimal transformation 
 103 def title_string_touch(title
): 
 104         return title
.replace(os
.sep
, '%') 
 106 # Generic download step 
 107 def download_step(return_data_flag
, step_title
, step_error
, url
, post_data
=None): 
 109                 cond_print('%s... ' % step_title
) 
 110                 data 
= perform_request(url
, post_data
).read() 
 111                 cond_print('done.\n') 
 116         except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 117                 cond_print('failed.\n') 
 118                 error_advice_exit(step_error
) 
 120         except KeyboardInterrupt: 
 123 # Generic extract step 
 124 def extract_step(step_title
, step_error
, regexp
, data
): 
 126                 cond_print('%s... ' % step_title
) 
 127                 match 
= regexp
.search(data
) 
 130                         cond_print('failed.\n') 
 131                         error_advice_exit(step_error
) 
 133                 extracted_data 
= match
.group(1) 
 134                 cond_print('done.\n') 
 135                 return extracted_data
 
 137         except KeyboardInterrupt: 
 140 # Calculate new block size based on previous block size 
 141 def new_block_size(before
, after
, bytes): 
 142         new_min 
= max(bytes / 2.0, 1.0) 
 143         new_max 
= max(bytes * 2.0, 1.0) 
 154 # Get optimum 1k exponent to represent a number of bytes 
 155 def optimum_k_exp(num_bytes
): 
 159         return long(math
.log(num_bytes
, const_1k
)) 
 161 # Get optimum representation of number of bytes 
 162 def format_bytes(num_bytes
): 
 165                 exp 
= optimum_k_exp(num_bytes
) 
 166                 suffix 
= 'bkMGTPEZY'[exp
] 
 168                         return '%s%s' % (num_bytes
, suffix
) 
 169                 converted 
= float(num_bytes
) / float(const_1k
**exp
) 
 170                 return '%.2f%s' % (converted
, suffix
) 
 172                 sys
.exit('Error: internal error formatting number of bytes.') 
 174 # Calculate ETA and return it in string format as MM:SS 
 175 def calc_eta(start
, now
, total
, current
): 
 178         rate 
= float(current
) / (now 
- start
) 
 179         eta 
= long((total 
- current
) / rate
) 
 184         return '%02d:%02d' % (eta_mins
, eta_secs
) 
 186 # Calculate speed and return it in string format 
 187 def calc_speed(start
, now
, bytes): 
 190         return format_bytes(float(bytes) / (now 
- start
)) 
 192 # Create the command line options parser and parse command line 
 193 cmdl_usage 
= 'usage: %prog [options] video_url' 
 194 cmdl_version 
= '2007.03.27' 
 195 cmdl_parser 
= optparse
.OptionParser(usage
=cmdl_usage
, version
=cmdl_version
, conflict_handler
='resolve') 
 196 cmdl_parser
.add_option('-h', '--help', action
='help', help='print this help text and exit') 
 197 cmdl_parser
.add_option('-v', '--version', action
='version', help='print program version and exit') 
 198 cmdl_parser
.add_option('-u', '--username', dest
='username', metavar
='USERNAME', help='account username') 
 199 cmdl_parser
.add_option('-p', '--password', dest
='password', metavar
='PASSWORD', help='account password') 
 200 cmdl_parser
.add_option('-o', '--output', dest
='outfile', metavar
='FILE', help='output video file name') 
 201 cmdl_parser
.add_option('-q', '--quiet', action
='store_true', dest
='quiet', help='activates quiet mode') 
 202 cmdl_parser
.add_option('-s', '--simulate', action
='store_true', dest
='simulate', help='do not download video') 
 203 cmdl_parser
.add_option('-t', '--title', action
='store_true', dest
='use_title', help='use title in file name') 
 204 cmdl_parser
.add_option('-l', '--literal', action
='store_true', dest
='use_literal', help='use literal title in file name') 
 205 cmdl_parser
.add_option('-n', '--netrc', action
='store_true', dest
='use_netrc', help='use .netrc authentication data') 
 206 cmdl_parser
.add_option('-g', '--get-url', action
='store_true', dest
='get_url', help='print final video URL only') 
 207 (cmdl_opts
, cmdl_args
) = cmdl_parser
.parse_args() 
 210 if len(cmdl_args
) != 1: 
 211         cmdl_parser
.print_help() 
 213 video_url_cmdl 
= cmdl_args
[0] 
 215 # Verify video URL format and convert to "standard" format 
 216 video_url_mo 
= const_video_url_re
.match(video_url_cmdl
) 
 217 if video_url_mo 
is None: 
 218         sys
.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') 
 219 video_url_id 
= video_url_mo
.group(1) 
 220 video_url 
= const_video_url_str 
% video_url_id
 
 222 # Check conflicting options 
 223 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 224         sys
.stderr
.write('Warning: video file name given but will not be used.\n') 
 226 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.use_title 
or cmdl_opts
.use_literal
): 
 227         sys
.exit('Error: using the video title conflicts with using a given file name.') 
 229 if cmdl_opts
.use_netrc 
and cmdl_opts
.password 
is not None: 
 230         sys
.exit('Error: using netrc conflicts with giving command line password.') 
 232 if cmdl_opts
.use_title 
and cmdl_opts
.use_literal
: 
 233         sys
.exit('Error: cannot use title and literal title at the same time.') 
 235 if cmdl_opts
.quiet 
and cmdl_opts
.get_url
: 
 236         sys
.exit('Error: cannot be quiet and print final URL at the same time.') 
 238 # Incorrect option formatting 
 239 if cmdl_opts
.username 
is None and cmdl_opts
.password 
is not None: 
 240         sys
.exit('Error: password give but username is missing.') 
 242 # Get account information if any 
 243 account_username 
= None 
 244 account_password 
= None 
 246 if cmdl_opts
.use_netrc
: 
 248                 info 
= netrc
.netrc().authenticators('youtube') 
 250                         sys
.exit('Error: no authenticators for machine youtube.') 
 251                 netrc_username 
= info
[0] 
 252                 netrc_password 
= info
[2] 
 254                 sys
.exit('Error: unable to read .netrc file.') 
 255         except netrc
.NetrcParseError
: 
 256                 sys
.exit('Error: unable to parse .netrc file.') 
 258 if cmdl_opts
.password 
is not None: 
 259         account_username 
= cmdl_opts
.username
 
 260         account_password 
= cmdl_opts
.password
 
 262         if cmdl_opts
.username 
is not None and cmdl_opts
.use_netrc
: 
 263                 if cmdl_opts
.username 
!= netrc_username
: 
 264                         sys
.exit('Error: conflicting username from .netrc and command line options.') 
 265                 account_username 
= cmdl_opts
.username
 
 266                 account_password 
= netrc_password
 
 267         elif cmdl_opts
.username 
is not None: 
 268                 account_username 
= cmdl_opts
.username
 
 269                 account_password 
= getpass
.getpass('Type YouTube password and press return: ') 
 270         elif cmdl_opts
.use_netrc
: 
 271                 if len(netrc_username
) == 0: 
 272                         sys
.exit('Error: empty username in .netrc file.') 
 273                 account_username 
= netrc_username
 
 274                 account_password 
= netrc_password
 
 276 # Get output file name  
 277 if cmdl_opts
.outfile 
is None: 
 278         video_filename 
= '%s.flv' % video_url_id
 
 280         video_filename 
= cmdl_opts
.outfile
 
 283 if not video_filename
.lower().endswith('.flv'): 
 284         sys
.stderr
.write('Warning: video file name does not end in .flv\n') 
 287 if not (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 289                 disk_test 
= open(video_filename
, 'wb') 
 292         except (OSError, IOError): 
 293                 sys
.exit('Error: unable to open %s for writing.' % video_filename
) 
 295 # Install cookie and proxy handlers 
 296 urllib2
.install_opener(urllib2
.build_opener(urllib2
.ProxyHandler())) 
 297 urllib2
.install_opener(urllib2
.build_opener(urllib2
.HTTPCookieProcessor())) 
 299 # Log in and confirm age if needed 
 300 if account_username 
is not None: 
 301         url 
= const_login_url_str 
% video_url_id
 
 302         post 
= const_login_post_str 
% (video_url_id
, account_username
, account_password
) 
 303         download_step(False, 'Logging in', 'unable to log in', url
, post
) 
 305         url 
= const_age_url_str 
% video_url_id
 
 306         post 
= const_age_post_str 
% video_url_id
 
 307         download_step(False, 'Confirming age', 'unable to confirm age', url
, post
) 
 309 # Retrieve video webpage 
 310 video_webpage 
= download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url
) 
 312 # Extract video title if needed 
 313 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal
: 
 314         video_title 
= extract_step('Extracting video title', 'unable to extract video title', const_video_title_re
, video_webpage
) 
 316 # Extract needed video URL parameters 
 317 video_url_params 
= extract_step('Extracting video URL parameters', 'unable to extract URL parameters', const_video_url_params_re
, video_webpage
) 
 318 video_url_real 
= const_video_url_real_str 
% video_url_params
 
 320 # Retrieve video data 
 322         video_data 
= perform_request(video_url_real
) 
 323         cond_print('Video data found at %s\n' % video_data
.geturl()) 
 325         if cmdl_opts
.get_url
: 
 326                 print video_data
.geturl() 
 328         if cmdl_opts
.simulate 
or cmdl_opts
.get_url
: 
 331         video_file 
= open(video_filename
, 'wb') 
 333                 video_len 
= long(video_data
.info()['Content-length']) 
 334                 video_len_str 
= format_bytes(video_len
) 
 337                 video_len_str 
= 'N/A' 
 340         block_size 
= const_initial_block_size
 
 341         start_time 
= time
.time() 
 343                 if video_len 
is not None: 
 344                         percent 
= float(byte_counter
) / float(video_len
) * 100.0 
 345                         percent_str 
= '%.1f' % percent
 
 346                         eta_str 
= calc_eta(start_time
, time
.time(), video_len
, byte_counter
) 
 348                         percent_str 
= '---.-' 
 350                 counter 
= format_bytes(byte_counter
) 
 351                 speed_str 
= calc_speed(start_time
, time
.time(), byte_counter
) 
 352                 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str
, counter
, video_len_str
, speed_str
, eta_str
)) 
 355                 video_block 
= video_data
.read(block_size
) 
 357                 dl_bytes 
= len(video_block
) 
 360                 byte_counter 
+= dl_bytes
 
 361                 video_file
.write(video_block
) 
 362                 block_size 
= new_block_size(before
, after
, dl_bytes
) 
 364         if video_len 
is not None and byte_counter 
!= video_len
: 
 365                 error_advice_exit('server did not send the expected ammount of data') 
 368         cond_print('done.\n') 
 369         cond_print('Video data saved to %s\n' % video_filename
) 
 371 except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 372         cond_print('failed.\n') 
 373         error_advice_exit('unable to download video data') 
 375 except KeyboardInterrupt: 
 378 # Rename video file if needed 
 379 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal
: 
 381                 if cmdl_opts
.use_title
: 
 382                         prefix 
= title_string_norm(video_title
) 
 384                         prefix 
= title_string_touch(video_title
) 
 385                 final_filename 
= '%s-%s.flv' % (prefix
, video_url_id
) 
 386                 os
.rename(video_filename
, final_filename
) 
 387                 cond_print('Video file renamed to %s\n' % final_filename
) 
 390                 sys
.stderr
.write('Warning: unable to rename file.\n') 
 392         except KeyboardInterrupt: