]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
   3 # Copyright (c) 2006-2007 Ricardo Garcia Gonzalez 
   5 # Permission is hereby granted, free of charge, to any person obtaining a 
   6 # copy of this software and associated documentation files (the "Software"), 
   7 # to deal in the Software without restriction, including without limitation 
   8 # the rights to use, copy, modify, merge, publish, distribute, sublicense, 
   9 # and/or sell copies of the Software, and to permit persons to whom the 
  10 # Software is furnished to do so, subject to the following conditions: 
  12 # The above copyright notice and this permission notice shall be included 
  13 # in all copies or substantial portions of the Software. 
  15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
  16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
  17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
  18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
  19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
  20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
  21 # OTHER DEALINGS IN THE SOFTWARE. 
  23 # Except as contained in this notice, the name(s) of the above copyright 
  24 # holders shall not be used in advertising or otherwise to promote the 
  25 # sale, use or other dealings in this Software without prior written 
  42 const_video_url_str 
= 'http://www.youtube.com/watch?v=%s' 
  43 const_video_url_re 
= re
.compile(r
'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$') 
  44 const_login_url_str 
= 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s' 
  45 const_login_post_str 
= 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In' 
  46 const_age_url_str 
= 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s' 
  47 const_age_post_str 
= 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm' 
  48 const_url_t_param_re 
= re
.compile(r
"[,{]t:'([^']*)'") 
  49 const_video_url_real_str 
= 'http://www.youtube.com/get_video?video_id=%s&t=%s' 
  50 const_video_title_re 
= re
.compile(r
'<title>YouTube - ([^<]*)</title>', re
.M | re
.I
) 
  52 const_initial_block_size 
= 10 * const_1k
 
  53 const_epsilon 
= 0.0001 
  55 # Print error message, followed by standard advice information, and then exit 
  56 def error_advice_exit(error_text
): 
  57         sys
.stderr
.write('Error: %s.\n' % error_text
) 
  58         sys
.stderr
.write('Try again several times. It may be a temporary problem.\n') 
  59         sys
.stderr
.write('Other typical problems:\n\n') 
  60         sys
.stderr
.write('* Video no longer exists.\n') 
  61         sys
.stderr
.write('* Video requires age confirmation but you did not provide an account.\n') 
  62         sys
.stderr
.write('* You provided the account data, but it is not valid.\n') 
  63         sys
.stderr
.write('* The connection was cut suddenly for some reason.\n') 
  64         sys
.stderr
.write('* YouTube changed their system, and the program no longer works.\n') 
  65         sys
.stderr
.write('\nTry to confirm you are able to view the video using a web browser.\n') 
  66         sys
.stderr
.write('Use the same video URL and account information, if needed, with this program.\n') 
  67         sys
.stderr
.write('When using a proxy, make sure http_proxy has http://host:port format.\n') 
  68         sys
.stderr
.write('Try again several times and contact me if the problem persists.\n') 
  71 # Wrapper to create custom requests with typical headers 
  72 def request_create(url
, data
=None): 
  73         retval 
= urllib2
.Request(url
) 
  76         # Try to mimic Firefox, at least a little bit 
  77         retval
.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0') 
  78         retval
.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7') 
  79         retval
.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5') 
  80         retval
.add_header('Accept-Language', 'en-us,en;q=0.5') 
  83 # Perform a request, process headers and return response 
  84 def perform_request(url
, data
=None): 
  85         request 
= request_create(url
, data
) 
  86         response 
= urllib2
.urlopen(request
) 
  92         if not (cmdl_opts
.quiet 
or cmdl_opts
.get_url
): 
  96 # Title string normalization 
  97 def title_string_norm(title
): 
  98         title 
= ''.join((x 
in string
.ascii_letters 
or x 
in string
.digits
) and x 
or ' ' for x 
in title
) 
  99         title 
= '_'.join(title
.split()) 
 100         title 
= title
.lower() 
 103 # Title string minimal transformation 
 104 def title_string_touch(title
): 
 105         return title
.replace(os
.sep
, '%') 
 107 # Generic download step 
 108 def download_step(return_data_flag
, step_title
, step_error
, url
, post_data
=None): 
 110                 cond_print('%s... ' % step_title
) 
 111                 data 
= perform_request(url
, post_data
).read() 
 112                 cond_print('done.\n') 
 117         except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 118                 cond_print('failed.\n') 
 119                 error_advice_exit(step_error
) 
 121         except KeyboardInterrupt: 
 124 # Generic extract step 
 125 def extract_step(step_title
, step_error
, regexp
, data
): 
 127                 cond_print('%s... ' % step_title
) 
 128                 match 
= regexp
.search(data
) 
 131                         cond_print('failed.\n') 
 132                         error_advice_exit(step_error
) 
 134                 extracted_data 
= match
.group(1) 
 135                 cond_print('done.\n') 
 136                 return extracted_data
 
 138         except KeyboardInterrupt: 
 141 # Calculate new block size based on previous block size 
 142 def new_block_size(before
, after
, bytes): 
 143         new_min 
= max(bytes / 2.0, 1.0) 
 144         new_max 
= max(bytes * 2.0, 1.0) 
 146         if dif 
< const_epsilon
: 
 155 # Get optimum 1k exponent to represent a number of bytes 
 156 def optimum_k_exp(num_bytes
): 
 160         return long(math
.log(num_bytes
, const_1k
)) 
 162 # Get optimum representation of number of bytes 
 163 def format_bytes(num_bytes
): 
 166                 exp 
= optimum_k_exp(num_bytes
) 
 167                 suffix 
= 'bkMGTPEZY'[exp
] 
 169                         return '%s%s' % (num_bytes
, suffix
) 
 170                 converted 
= float(num_bytes
) / float(const_1k
**exp
) 
 171                 return '%.2f%s' % (converted
, suffix
) 
 173                 sys
.exit('Error: internal error formatting number of bytes.') 
 175 # Calculate ETA and return it in string format as MM:SS 
 176 def calc_eta(start
, now
, total
, current
): 
 178         if current 
== 0 or dif 
< const_epsilon
: 
 180         rate 
= float(current
) / dif
 
 181         eta 
= long((total 
- current
) / rate
) 
 186         return '%02d:%02d' % (eta_mins
, eta_secs
) 
 188 # Calculate speed and return it in string format 
 189 def calc_speed(start
, now
, bytes): 
 191         if bytes == 0 or dif 
< const_epsilon
: 
 193         return format_bytes(float(bytes) / dif
) 
 195 # Create the command line options parser and parse command line 
 196 cmdl_usage 
= 'usage: %prog [options] video_url' 
 197 cmdl_version 
= '2007.10.09' 
 198 cmdl_parser 
= optparse
.OptionParser(usage
=cmdl_usage
, version
=cmdl_version
, conflict_handler
='resolve') 
 199 cmdl_parser
.add_option('-h', '--help', action
='help', help='print this help text and exit') 
 200 cmdl_parser
.add_option('-v', '--version', action
='version', help='print program version and exit') 
 201 cmdl_parser
.add_option('-u', '--username', dest
='username', metavar
='USERNAME', help='account username') 
 202 cmdl_parser
.add_option('-p', '--password', dest
='password', metavar
='PASSWORD', help='account password') 
 203 cmdl_parser
.add_option('-o', '--output', dest
='outfile', metavar
='FILE', help='output video file name') 
 204 cmdl_parser
.add_option('-q', '--quiet', action
='store_true', dest
='quiet', help='activates quiet mode') 
 205 cmdl_parser
.add_option('-s', '--simulate', action
='store_true', dest
='simulate', help='do not download video') 
 206 cmdl_parser
.add_option('-t', '--title', action
='store_true', dest
='use_title', help='use title in file name') 
 207 cmdl_parser
.add_option('-l', '--literal', action
='store_true', dest
='use_literal', help='use literal title in file name') 
 208 cmdl_parser
.add_option('-n', '--netrc', action
='store_true', dest
='use_netrc', help='use .netrc authentication data') 
 209 cmdl_parser
.add_option('-g', '--get-url', action
='store_true', dest
='get_url', help='print final video URL only') 
 210 cmdl_parser
.add_option('-2', '--title-too', action
='store_true', dest
='get_title', help='used with -g, print title too') 
 211 (cmdl_opts
, cmdl_args
) = cmdl_parser
.parse_args() 
 214 if len(cmdl_args
) != 1: 
 215         cmdl_parser
.print_help() 
 217 video_url_cmdl 
= cmdl_args
[0] 
 219 # Verify video URL format and convert to "standard" format 
 220 video_url_mo 
= const_video_url_re
.match(video_url_cmdl
) 
 221 if video_url_mo 
is None: 
 222         sys
.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.') 
 223 video_url_id 
= video_url_mo
.group(2) 
 224 video_url 
= const_video_url_str 
% video_url_id
 
 226 # Check conflicting options 
 227 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 228         sys
.stderr
.write('Warning: video file name given but will not be used.\n') 
 230 if cmdl_opts
.outfile 
is not None and (cmdl_opts
.use_title 
or cmdl_opts
.use_literal
): 
 231         sys
.exit('Error: using the video title conflicts with using a given file name.') 
 233 if cmdl_opts
.use_netrc 
and cmdl_opts
.password 
is not None: 
 234         sys
.exit('Error: using netrc conflicts with giving command line password.') 
 236 if cmdl_opts
.use_title 
and cmdl_opts
.use_literal
: 
 237         sys
.exit('Error: cannot use title and literal title at the same time.') 
 239 if cmdl_opts
.quiet 
and cmdl_opts
.get_url
: 
 240         sys
.exit('Error: cannot be quiet and print final URL at the same time.') 
 242 # Incorrect option formatting 
 243 if cmdl_opts
.username 
is None and cmdl_opts
.password 
is not None: 
 244         sys
.exit('Error: password give but username is missing.') 
 246 if cmdl_opts
.get_url 
is None and cmdl_opts
.get_title 
is not None: 
 247         sys
.exit('Error: getting title requires getting URL.') 
 249 # Get account information if any 
 250 account_username 
= None 
 251 account_password 
= None 
 253 if cmdl_opts
.use_netrc
: 
 255                 info 
= netrc
.netrc().authenticators('youtube') 
 257                         sys
.exit('Error: no authenticators for machine youtube.') 
 258                 netrc_username 
= info
[0] 
 259                 netrc_password 
= info
[2] 
 261                 sys
.exit('Error: unable to read .netrc file.') 
 262         except netrc
.NetrcParseError
: 
 263                 sys
.exit('Error: unable to parse .netrc file.') 
 265 if cmdl_opts
.password 
is not None: 
 266         account_username 
= cmdl_opts
.username
 
 267         account_password 
= cmdl_opts
.password
 
 269         if cmdl_opts
.username 
is not None and cmdl_opts
.use_netrc
: 
 270                 if cmdl_opts
.username 
!= netrc_username
: 
 271                         sys
.exit('Error: conflicting username from .netrc and command line options.') 
 272                 account_username 
= cmdl_opts
.username
 
 273                 account_password 
= netrc_password
 
 274         elif cmdl_opts
.username 
is not None: 
 275                 account_username 
= cmdl_opts
.username
 
 276                 account_password 
= getpass
.getpass('Type YouTube password and press return: ') 
 277         elif cmdl_opts
.use_netrc
: 
 278                 if len(netrc_username
) == 0: 
 279                         sys
.exit('Error: empty username in .netrc file.') 
 280                 account_username 
= netrc_username
 
 281                 account_password 
= netrc_password
 
 283 # Get output file name  
 284 if cmdl_opts
.outfile 
is None: 
 285         video_filename 
= '%s.flv' % video_url_id
 
 287         video_filename 
= cmdl_opts
.outfile
 
 290 if not video_filename
.lower().endswith('.flv'): 
 291         sys
.stderr
.write('Warning: video file name does not end in .flv\n') 
 294 if not (cmdl_opts
.simulate 
or cmdl_opts
.get_url
): 
 296                 disk_test 
= open(video_filename
, 'wb') 
 299         except (OSError, IOError): 
 300                 sys
.exit('Error: unable to open %s for writing.' % video_filename
) 
 302 # Install cookie and proxy handlers 
 303 urllib2
.install_opener(urllib2
.build_opener(urllib2
.ProxyHandler())) 
 304 urllib2
.install_opener(urllib2
.build_opener(urllib2
.HTTPCookieProcessor())) 
 306 # Log in and confirm age if needed 
 307 if account_username 
is not None: 
 308         url 
= const_login_url_str 
% video_url_id
 
 309         post 
= const_login_post_str 
% (video_url_id
, account_username
, account_password
) 
 310         download_step(False, 'Logging in', 'unable to log in', url
, post
) 
 312         url 
= const_age_url_str 
% video_url_id
 
 313         post 
= const_age_post_str 
% video_url_id
 
 314         download_step(False, 'Confirming age', 'unable to confirm age', url
, post
) 
 316 # Retrieve video webpage 
 317 video_webpage 
= download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url
) 
 319 # Extract video title if needed 
 320 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal 
or cmdl_opts
.get_title
: 
 321         video_title 
= extract_step('Extracting video title', 'unable to extract video title', const_video_title_re
, video_webpage
) 
 323 # Extract needed video URL parameters 
 324 video_url_t_param 
= extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re
, video_webpage
) 
 325 video_url_real 
= const_video_url_real_str 
% (video_url_id
, video_url_t_param
) 
 327 # Retrieve video data 
 329         cond_print('Requesting video file... ') 
 330         video_data 
= perform_request(video_url_real
) 
 332         cond_print('Video data found at %s\n' % video_data
.geturl()) 
 334         if cmdl_opts
.get_title
: 
 337         if cmdl_opts
.get_url
: 
 338                 print video_data
.geturl() 
 340         if cmdl_opts
.simulate 
or cmdl_opts
.get_url
: 
 343         video_file 
= open(video_filename
, 'wb') 
 345                 video_len 
= long(video_data
.info()['Content-length']) 
 346                 video_len_str 
= format_bytes(video_len
) 
 349                 video_len_str 
= 'N/A' 
 352         block_size 
= const_initial_block_size
 
 353         start_time 
= time
.time() 
 355                 if video_len 
is not None: 
 356                         percent 
= float(byte_counter
) / float(video_len
) * 100.0 
 357                         percent_str 
= '%.1f' % percent
 
 358                         eta_str 
= calc_eta(start_time
, time
.time(), video_len
, byte_counter
) 
 360                         percent_str 
= '---.-' 
 362                 counter 
= format_bytes(byte_counter
) 
 363                 speed_str 
= calc_speed(start_time
, time
.time(), byte_counter
) 
 364                 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str
, counter
, video_len_str
, speed_str
, eta_str
)) 
 367                 video_block 
= video_data
.read(block_size
) 
 369                 dl_bytes 
= len(video_block
) 
 372                 byte_counter 
+= dl_bytes
 
 373                 video_file
.write(video_block
) 
 374                 block_size 
= new_block_size(before
, after
, dl_bytes
) 
 376         if video_len 
is not None and byte_counter 
!= video_len
: 
 377                 error_advice_exit('server did not send the expected ammount of data') 
 380         cond_print('done.\n') 
 381         cond_print('Video data saved to %s\n' % video_filename
) 
 383 except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
): 
 384         cond_print('failed.\n') 
 385         error_advice_exit('unable to download video data') 
 387 except KeyboardInterrupt: 
 390 # Rename video file if needed 
 391 if cmdl_opts
.use_title 
or cmdl_opts
.use_literal
: 
 393                 if cmdl_opts
.use_title
: 
 394                         prefix 
= title_string_norm(video_title
) 
 396                         prefix 
= title_string_touch(video_title
) 
 397                 final_filename 
= '%s-%s.flv' % (prefix
, video_url_id
) 
 398                 os
.rename(video_filename
, final_filename
) 
 399                 cond_print('Video file renamed to %s\n' % final_filename
) 
 402                 sys
.stderr
.write('Warning: unable to rename file.\n') 
 404         except KeyboardInterrupt: