3 # Copyright (c) 2006-2008 Ricardo Garcia Gonzalez
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the "Software"),
7 # to deal in the Software without restriction, including without limitation
8 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 # and/or sell copies of the Software, and to permit persons to whom the
10 # Software is furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 # OTHER DEALINGS IN THE SOFTWARE.
23 # Except as contained in this notice, the name(s) of the above copyright
24 # holders shall not be used in advertising or otherwise to promote the
25 # sale, use or other dealings in this Software without prior written
43 const_initial_block_size
= 10 * const_1k
44 const_epsilon
= 0.0001
47 const_video_url_str
= 'http://www.youtube.com/watch?v=%s'
48 const_video_url_re
= re
.compile(r
'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$')
49 const_video_url_format_suffix
= '&fmt=%s'
50 const_best_quality_format
= 18
51 const_login_url_str
= 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s'
52 const_login_post_str
= 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In'
53 const_age_url_str
= 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s'
54 const_age_post_str
= 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm'
55 const_url_t_param_re
= re
.compile(r
', "t": "([^"]+)"')
56 const_video_url_real_str
= 'http://www.youtube.com/get_video?video_id=%s&t=%s'
57 const_video_title_re
= re
.compile(r
'<title>YouTube - ([^<]*)</title>', re
.M | re
.I
)
59 # Print error message, followed by standard advice information, and then exit
60 def error_advice_exit(error_text
):
61 sys
.stderr
.write('Error: %s.\n' % error_text
)
62 sys
.stderr
.write('Try again several times. It may be a temporary problem.\n')
63 sys
.stderr
.write('Other typical problems:\n\n')
64 sys
.stderr
.write('* Video no longer exists.\n')
65 sys
.stderr
.write('* Video requires age confirmation but you did not provide an account.\n')
66 sys
.stderr
.write('* You provided the account data, but it is not valid.\n')
67 sys
.stderr
.write('* The connection was cut suddenly for some reason.\n')
68 sys
.stderr
.write('* YouTube changed their system, and the program no longer works.\n')
69 sys
.stderr
.write('\nTry to confirm you are able to view the video using a web browser.\n')
70 sys
.stderr
.write('Use the same video URL and account information, if needed, with this program.\n')
71 sys
.stderr
.write('When using a proxy, make sure http_proxy has http://host:port format.\n')
72 sys
.stderr
.write('Try again several times and contact me if the problem persists.\n')
75 # Wrapper to create custom requests with typical headers
76 def request_create(url
, extra_headers
, post_data
):
77 retval
= urllib2
.Request(url
)
78 if post_data
is not None:
79 retval
.add_data(post_data
)
80 # Try to mimic Firefox, at least a little bit
81 retval
.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11')
82 retval
.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
83 retval
.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')
84 retval
.add_header('Accept-Language', 'en-us,en;q=0.5')
85 if extra_headers
is not None:
86 for header
in extra_headers
:
87 retval
.add_header(header
[0], header
[1])
90 # Perform a request, process headers and return response
91 def perform_request(url
, headers
=None, data
=None):
92 request
= request_create(url
, headers
, data
)
93 response
= urllib2
.urlopen(request
)
99 if not (cmdl_opts
.quiet
or cmdl_opts
.get_url
):
100 sys
.stdout
.write(str)
103 # Title string normalization
104 def title_string_norm(title
):
105 title
= ''.join((x
in string
.ascii_letters
or x
in string
.digits
) and x
or ' ' for x
in title
)
106 title
= '_'.join(title
.split())
107 title
= title
.lower()
110 # Generic download step
111 def download_step(return_data_flag
, step_title
, step_error
, url
, post_data
=None):
113 cond_print('%s... ' % step_title
)
114 data
= perform_request(url
, data
=post_data
).read()
115 cond_print('done.\n')
120 except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
):
121 cond_print('failed.\n')
122 error_advice_exit(step_error
)
124 except KeyboardInterrupt:
127 # Generic extract step
128 def extract_step(step_title
, step_error
, regexp
, data
):
130 cond_print('%s... ' % step_title
)
131 match
= regexp
.search(data
)
134 cond_print('failed.\n')
135 error_advice_exit(step_error
)
137 extracted_data
= match
.group(1)
138 cond_print('done.\n')
139 return extracted_data
141 except KeyboardInterrupt:
144 # Calculate new block size based on previous block size
145 def new_block_size(before
, after
, bytes):
146 new_min
= max(bytes / 2.0, 1.0)
147 new_max
= max(bytes * 2.0, 1.0)
149 if dif
< const_epsilon
:
158 # Get optimum 1k exponent to represent a number of bytes
159 def optimum_k_exp(num_bytes
):
163 return long(math
.log(num_bytes
, const_1k
))
165 # Get optimum representation of number of bytes
166 def format_bytes(num_bytes
):
169 exp
= optimum_k_exp(num_bytes
)
170 suffix
= 'bkMGTPEZY'[exp
]
172 return '%s%s' % (num_bytes
, suffix
)
173 converted
= float(num_bytes
) / float(const_1k
**exp
)
174 return '%.2f%s' % (converted
, suffix
)
176 sys
.exit('Error: internal error formatting number of bytes.')
178 # Calculate ETA and return it in string format as MM:SS
179 def calc_eta(start
, now
, total
, current
):
181 if current
== 0 or dif
< const_epsilon
:
183 rate
= float(current
) / dif
184 eta
= long((total
- current
) / rate
)
185 (eta_mins
, eta_secs
) = divmod(eta
, 60)
188 return '%02d:%02d' % (eta_mins
, eta_secs
)
190 # Calculate speed and return it in string format
191 def calc_speed(start
, now
, bytes):
193 if bytes == 0 or dif
< const_epsilon
:
195 return format_bytes(float(bytes) / dif
)
198 # Title string minimal transformation
199 def title_string_touch(title
):
200 return title
.replace(os
.sep
, '%')
202 # Create the command line options parser and parse command line
203 cmdl_usage
= 'usage: %prog [options] video_url'
204 cmdl_version
= '2008.03.22'
205 cmdl_parser
= optparse
.OptionParser(usage
=cmdl_usage
, version
=cmdl_version
, conflict_handler
='resolve')
206 cmdl_parser
.add_option('-h', '--help', action
='help', help='print this help text and exit')
207 cmdl_parser
.add_option('-v', '--version', action
='version', help='print program version and exit')
208 cmdl_parser
.add_option('-u', '--username', dest
='username', metavar
='USERNAME', help='account username')
209 cmdl_parser
.add_option('-p', '--password', dest
='password', metavar
='PASSWORD', help='account password')
210 cmdl_parser
.add_option('-o', '--output', dest
='outfile', metavar
='FILE', help='output video file name')
211 cmdl_parser
.add_option('-q', '--quiet', action
='store_true', dest
='quiet', help='activates quiet mode')
212 cmdl_parser
.add_option('-s', '--simulate', action
='store_true', dest
='simulate', help='do not download video')
213 cmdl_parser
.add_option('-t', '--title', action
='store_true', dest
='use_title', help='use title in file name')
214 cmdl_parser
.add_option('-l', '--literal', action
='store_true', dest
='use_literal', help='use literal title in file name')
215 cmdl_parser
.add_option('-n', '--netrc', action
='store_true', dest
='use_netrc', help='use .netrc authentication data')
216 cmdl_parser
.add_option('-g', '--get-url', action
='store_true', dest
='get_url', help='print final video URL only')
217 cmdl_parser
.add_option('-2', '--title-too', action
='store_true', dest
='get_title', help='used with -g, print title too')
218 cmdl_parser
.add_option('-f', '--format', dest
='video_format', metavar
='FORMAT', help='append &fmt=FORMAT to the URL')
219 cmdl_parser
.add_option('-b', '--best-quality', action
='store_true', dest
='best_quality', help='alias for -f 18')
220 (cmdl_opts
, cmdl_args
) = cmdl_parser
.parse_args()
223 socket
.setdefaulttimeout(const_timeout
)
226 if len(cmdl_args
) != 1:
227 cmdl_parser
.print_help()
229 video_url_cmdl
= cmdl_args
[0]
231 # Verify video URL format and convert to "standard" format
232 video_url_mo
= const_video_url_re
.match(video_url_cmdl
)
233 if video_url_mo
is None:
234 sys
.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.')
235 video_url_id
= video_url_mo
.group(2)
236 video_url
= const_video_url_str
% video_url_id
239 if cmdl_opts
.best_quality
:
240 video_format
= const_best_quality_format
241 video_extension
= '.mp4'
243 video_extension
= '.flv'
245 if cmdl_opts
.video_format
is not None:
246 if video_format
is not None and video_format
!= cmdl_opts
.video_format
:
247 sys
.exit('Error: conflicting video formats specified\n')
248 video_format
= cmdl_opts
.video_format
250 if video_format
is not None:
251 video_url
= '%s%s' % (video_url
, const_video_url_format_suffix
% video_format
)
253 # Check conflicting options
254 if cmdl_opts
.outfile
is not None and (cmdl_opts
.simulate
or cmdl_opts
.get_url
):
255 sys
.stderr
.write('Warning: video file name given but will not be used.\n')
257 if cmdl_opts
.outfile
is not None and (cmdl_opts
.use_title
or cmdl_opts
.use_literal
):
258 sys
.exit('Error: using the video title conflicts with using a given file name.')
260 if cmdl_opts
.use_title
and cmdl_opts
.use_literal
:
261 sys
.exit('Error: cannot use title and literal title at the same time.')
263 if cmdl_opts
.quiet
and cmdl_opts
.get_url
:
264 sys
.exit('Error: cannot be quiet and print final URL at the same time.')
266 # Incorrect option formatting
267 if cmdl_opts
.username
is None and cmdl_opts
.password
is not None:
268 sys
.exit('Error: password give but username is missing.')
270 if cmdl_opts
.use_netrc
and (cmdl_opts
.username
is not None or cmdl_opts
.password
is not None):
271 sys
.exit('Error: cannot use netrc and username/password at the same time.')
273 if cmdl_opts
.get_url
is None and cmdl_opts
.get_title
is not None:
274 sys
.exit('Error: getting title requires getting URL.')
276 # Get account information if any
277 account_username
= None
278 account_password
= None
280 if cmdl_opts
.use_netrc
:
282 info
= netrc
.netrc().authenticators('youtube')
284 sys
.exit('Error: no authenticators for machine youtube.')
285 account_username
= info
[0]
286 account_password
= info
[2]
288 sys
.exit('Error: unable to read .netrc file.')
289 except netrc
.NetrcParseError
:
290 sys
.exit('Error: unable to parse .netrc file.')
292 account_username
= cmdl_opts
.username
293 if account_username
is not None:
294 if cmdl_opts
.password
is None:
295 account_password
= getpass
.getpass('Type YouTube password and press return: ')
297 account_password
= cmdl_opts
.password
299 # Get output file name
300 if cmdl_opts
.outfile
is None:
301 video_filename
= '%s%s' % (video_url_id
, video_extension
)
303 video_filename
= cmdl_opts
.outfile
305 # Install cookie and proxy handlers
306 urllib2
.install_opener(urllib2
.build_opener(urllib2
.ProxyHandler()))
307 urllib2
.install_opener(urllib2
.build_opener(urllib2
.HTTPCookieProcessor()))
309 # Log in and confirm age if needed
310 if account_username
is not None:
311 url
= const_login_url_str
% video_url_id
312 post
= const_login_post_str
% (video_url_id
, account_username
, account_password
)
313 download_step(False, 'Logging in', 'unable to log in', url
, post
)
315 url
= const_age_url_str
% video_url_id
316 post
= const_age_post_str
% video_url_id
317 download_step(False, 'Confirming age', 'unable to confirm age', url
, post
)
319 # Retrieve video webpage
320 video_webpage
= download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url
)
322 # Extract video title if needed
323 if cmdl_opts
.use_title
or cmdl_opts
.use_literal
or cmdl_opts
.get_title
:
324 video_title
= extract_step('Extracting video title', 'unable to extract video title', const_video_title_re
, video_webpage
)
326 # Extract needed video URL parameters
327 video_url_t_param
= extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re
, video_webpage
)
328 video_url_real
= const_video_url_real_str
% (video_url_id
, video_url_t_param
)
329 if video_format
is not None:
330 video_url_real
= '%s%s' % (video_url_real
, const_video_url_format_suffix
% video_format
)
332 # Rebuild filename if needed
333 if cmdl_opts
.use_title
or cmdl_opts
.use_literal
:
334 if cmdl_opts
.use_title
:
335 prefix
= title_string_norm(video_title
)
337 prefix
= title_string_touch(video_title
)
338 video_filename
= '%s-%s%s' % (prefix
, video_url_id
, video_extension
)
341 if not video_filename
.lower().endswith(video_extension
):
342 sys
.stderr
.write('Warning: video file name does not end in %s\n' % video_extension
)
344 # Retrieve video data
346 cond_print('Requesting video file... ')
347 video_data
= perform_request(video_url_real
)
348 cond_print('done.\n')
349 cond_print('Video data found at %s\n' % video_data
.geturl())
351 if cmdl_opts
.get_title
:
354 if cmdl_opts
.get_url
:
355 print video_data
.geturl()
357 if cmdl_opts
.simulate
or cmdl_opts
.get_url
:
361 video_file
= open(video_filename
, 'wb')
362 except (IOError, OSError):
363 sys
.exit('Error: unable to open "%s" for writing.' % video_filename
)
365 video_len
= long(video_data
.info()['Content-length'])
366 video_len_str
= format_bytes(video_len
)
369 video_len_str
= 'N/A'
372 block_size
= const_initial_block_size
373 start_time
= time
.time()
375 if video_len
is not None:
376 percent
= float(byte_counter
) / float(video_len
) * 100.0
377 percent_str
= '%.1f' % percent
378 eta_str
= calc_eta(start_time
, time
.time(), video_len
, byte_counter
)
380 percent_str
= '---.-'
382 counter
= format_bytes(byte_counter
)
383 speed_str
= calc_speed(start_time
, time
.time(), byte_counter
)
384 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str
, counter
, video_len_str
, speed_str
, eta_str
))
387 video_block
= video_data
.read(block_size
)
389 dl_bytes
= len(video_block
)
392 byte_counter
+= dl_bytes
393 video_file
.write(video_block
)
394 block_size
= new_block_size(before
, after
, dl_bytes
)
396 if video_len
is not None and byte_counter
!= video_len
:
397 error_advice_exit('server did not send the expected ammount of data')
400 cond_print('done.\n')
401 cond_print('Video data saved to %s\n' % video_filename
)
403 except (urllib2
.URLError
, ValueError, httplib
.HTTPException
, TypeError, socket
.error
):
404 cond_print('failed.\n')
405 error_advice_exit('unable to download video data')
407 except KeyboardInterrupt: