]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
Imported Upstream version 2007.10.09
[youtubedl] / youtube-dl
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2006-2007 Ricardo Garcia Gonzalez
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the "Software"),
7 # to deal in the Software without restriction, including without limitation
8 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 # and/or sell copies of the Software, and to permit persons to whom the
10 # Software is furnished to do so, subject to the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 # OTHER DEALINGS IN THE SOFTWARE.
22 #
23 # Except as contained in this notice, the name(s) of the above copyright
24 # holders shall not be used in advertising or otherwise to promote the
25 # sale, use or other dealings in this Software without prior written
26 # authorization.
27 #
28 import getpass
29 import httplib
30 import math
31 import netrc
32 import optparse
33 import os
34 import re
35 import socket
36 import string
37 import sys
38 import time
39 import urllib2
40
41 # Global constants
42 const_video_url_str = 'http://www.youtube.com/watch?v=%s'
43 const_video_url_re = re.compile(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$')
44 const_login_url_str = 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s'
45 const_login_post_str = 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In'
46 const_age_url_str = 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s'
47 const_age_post_str = 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm'
48 const_url_t_param_re = re.compile(r"[,{]t:'([^']*)'")
49 const_video_url_real_str = 'http://www.youtube.com/get_video?video_id=%s&t=%s'
50 const_video_title_re = re.compile(r'<title>YouTube - ([^<]*)</title>', re.M | re.I)
51 const_1k = 1024
52 const_initial_block_size = 10 * const_1k
53 const_epsilon = 0.0001
54
55 # Print error message, followed by standard advice information, and then exit
56 def error_advice_exit(error_text):
57 sys.stderr.write('Error: %s.\n' % error_text)
58 sys.stderr.write('Try again several times. It may be a temporary problem.\n')
59 sys.stderr.write('Other typical problems:\n\n')
60 sys.stderr.write('* Video no longer exists.\n')
61 sys.stderr.write('* Video requires age confirmation but you did not provide an account.\n')
62 sys.stderr.write('* You provided the account data, but it is not valid.\n')
63 sys.stderr.write('* The connection was cut suddenly for some reason.\n')
64 sys.stderr.write('* YouTube changed their system, and the program no longer works.\n')
65 sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n')
66 sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n')
67 sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n')
68 sys.stderr.write('Try again several times and contact me if the problem persists.\n')
69 sys.exit('\n')
70
71 # Wrapper to create custom requests with typical headers
72 def request_create(url, data=None):
73 retval = urllib2.Request(url)
74 if data is not None:
75 retval.add_data(data)
76 # Try to mimic Firefox, at least a little bit
77 retval.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1) Gecko/20061010 Firefox/2.0')
78 retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
79 retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')
80 retval.add_header('Accept-Language', 'en-us,en;q=0.5')
81 return retval
82
83 # Perform a request, process headers and return response
84 def perform_request(url, data=None):
85 request = request_create(url, data)
86 response = urllib2.urlopen(request)
87 return response
88
89 # Conditional print
90 def cond_print(str):
91 global cmdl_opts
92 if not (cmdl_opts.quiet or cmdl_opts.get_url):
93 sys.stdout.write(str)
94 sys.stdout.flush()
95
96 # Title string normalization
97 def title_string_norm(title):
98 title = ''.join((x in string.ascii_letters or x in string.digits) and x or ' ' for x in title)
99 title = '_'.join(title.split())
100 title = title.lower()
101 return title
102
103 # Title string minimal transformation
104 def title_string_touch(title):
105 return title.replace(os.sep, '%')
106
107 # Generic download step
108 def download_step(return_data_flag, step_title, step_error, url, post_data=None):
109 try:
110 cond_print('%s... ' % step_title)
111 data = perform_request(url, post_data).read()
112 cond_print('done.\n')
113 if return_data_flag:
114 return data
115 return None
116
117 except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error):
118 cond_print('failed.\n')
119 error_advice_exit(step_error)
120
121 except KeyboardInterrupt:
122 sys.exit('\n')
123
124 # Generic extract step
125 def extract_step(step_title, step_error, regexp, data):
126 try:
127 cond_print('%s... ' % step_title)
128 match = regexp.search(data)
129
130 if match is None:
131 cond_print('failed.\n')
132 error_advice_exit(step_error)
133
134 extracted_data = match.group(1)
135 cond_print('done.\n')
136 return extracted_data
137
138 except KeyboardInterrupt:
139 sys.exit('\n')
140
141 # Calculate new block size based on previous block size
142 def new_block_size(before, after, bytes):
143 new_min = max(bytes / 2.0, 1.0)
144 new_max = max(bytes * 2.0, 1.0)
145 dif = after - before
146 if dif < const_epsilon:
147 return int(new_max)
148 rate = bytes / dif
149 if rate > new_max:
150 return int(new_max)
151 if rate < new_min:
152 return int(new_min)
153 return int(rate)
154
155 # Get optimum 1k exponent to represent a number of bytes
156 def optimum_k_exp(num_bytes):
157 global const_1k
158 if num_bytes == 0:
159 return 0
160 return long(math.log(num_bytes, const_1k))
161
162 # Get optimum representation of number of bytes
163 def format_bytes(num_bytes):
164 global const_1k
165 try:
166 exp = optimum_k_exp(num_bytes)
167 suffix = 'bkMGTPEZY'[exp]
168 if exp == 0:
169 return '%s%s' % (num_bytes, suffix)
170 converted = float(num_bytes) / float(const_1k**exp)
171 return '%.2f%s' % (converted, suffix)
172 except IndexError:
173 sys.exit('Error: internal error formatting number of bytes.')
174
175 # Calculate ETA and return it in string format as MM:SS
176 def calc_eta(start, now, total, current):
177 dif = now - start
178 if current == 0 or dif < const_epsilon:
179 return '--:--'
180 rate = float(current) / dif
181 eta = long((total - current) / rate)
182 eta_mins = eta / 60
183 eta_secs = eta % 60
184 if eta_mins > 99:
185 return '--:--'
186 return '%02d:%02d' % (eta_mins, eta_secs)
187
188 # Calculate speed and return it in string format
189 def calc_speed(start, now, bytes):
190 dif = now - start
191 if bytes == 0 or dif < const_epsilon:
192 return 'N/A b'
193 return format_bytes(float(bytes) / dif)
194
195 # Create the command line options parser and parse command line
196 cmdl_usage = 'usage: %prog [options] video_url'
197 cmdl_version = '2007.10.09'
198 cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve')
199 cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit')
200 cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit')
201 cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username')
202 cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password')
203 cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name')
204 cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode')
205 cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video')
206 cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name')
207 cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name')
208 cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data')
209 cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only')
210 cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too')
211 (cmdl_opts, cmdl_args) = cmdl_parser.parse_args()
212
213 # Get video URL
214 if len(cmdl_args) != 1:
215 cmdl_parser.print_help()
216 sys.exit('\n')
217 video_url_cmdl = cmdl_args[0]
218
219 # Verify video URL format and convert to "standard" format
220 video_url_mo = const_video_url_re.match(video_url_cmdl)
221 if video_url_mo is None:
222 sys.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.')
223 video_url_id = video_url_mo.group(2)
224 video_url = const_video_url_str % video_url_id
225
226 # Check conflicting options
227 if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url):
228 sys.stderr.write('Warning: video file name given but will not be used.\n')
229
230 if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal):
231 sys.exit('Error: using the video title conflicts with using a given file name.')
232
233 if cmdl_opts.use_netrc and cmdl_opts.password is not None:
234 sys.exit('Error: using netrc conflicts with giving command line password.')
235
236 if cmdl_opts.use_title and cmdl_opts.use_literal:
237 sys.exit('Error: cannot use title and literal title at the same time.')
238
239 if cmdl_opts.quiet and cmdl_opts.get_url:
240 sys.exit('Error: cannot be quiet and print final URL at the same time.')
241
242 # Incorrect option formatting
243 if cmdl_opts.username is None and cmdl_opts.password is not None:
244 sys.exit('Error: password give but username is missing.')
245
246 if cmdl_opts.get_url is None and cmdl_opts.get_title is not None:
247 sys.exit('Error: getting title requires getting URL.')
248
249 # Get account information if any
250 account_username = None
251 account_password = None
252
253 if cmdl_opts.use_netrc:
254 try:
255 info = netrc.netrc().authenticators('youtube')
256 if info is None:
257 sys.exit('Error: no authenticators for machine youtube.')
258 netrc_username = info[0]
259 netrc_password = info[2]
260 except IOError:
261 sys.exit('Error: unable to read .netrc file.')
262 except netrc.NetrcParseError:
263 sys.exit('Error: unable to parse .netrc file.')
264
265 if cmdl_opts.password is not None:
266 account_username = cmdl_opts.username
267 account_password = cmdl_opts.password
268 else:
269 if cmdl_opts.username is not None and cmdl_opts.use_netrc:
270 if cmdl_opts.username != netrc_username:
271 sys.exit('Error: conflicting username from .netrc and command line options.')
272 account_username = cmdl_opts.username
273 account_password = netrc_password
274 elif cmdl_opts.username is not None:
275 account_username = cmdl_opts.username
276 account_password = getpass.getpass('Type YouTube password and press return: ')
277 elif cmdl_opts.use_netrc:
278 if len(netrc_username) == 0:
279 sys.exit('Error: empty username in .netrc file.')
280 account_username = netrc_username
281 account_password = netrc_password
282
283 # Get output file name
284 if cmdl_opts.outfile is None:
285 video_filename = '%s.flv' % video_url_id
286 else:
287 video_filename = cmdl_opts.outfile
288
289 # Check name
290 if not video_filename.lower().endswith('.flv'):
291 sys.stderr.write('Warning: video file name does not end in .flv\n')
292
293 # Test writable file
294 if not (cmdl_opts.simulate or cmdl_opts.get_url):
295 try:
296 disk_test = open(video_filename, 'wb')
297 disk_test.close()
298
299 except (OSError, IOError):
300 sys.exit('Error: unable to open %s for writing.' % video_filename)
301
302 # Install cookie and proxy handlers
303 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
304 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
305
306 # Log in and confirm age if needed
307 if account_username is not None:
308 url = const_login_url_str % video_url_id
309 post = const_login_post_str % (video_url_id, account_username, account_password)
310 download_step(False, 'Logging in', 'unable to log in', url, post)
311
312 url = const_age_url_str % video_url_id
313 post = const_age_post_str % video_url_id
314 download_step(False, 'Confirming age', 'unable to confirm age', url, post)
315
316 # Retrieve video webpage
317 video_webpage = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url)
318
319 # Extract video title if needed
320 if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title:
321 video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage)
322
323 # Extract needed video URL parameters
324 video_url_t_param = extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re, video_webpage)
325 video_url_real = const_video_url_real_str % (video_url_id, video_url_t_param)
326
327 # Retrieve video data
328 try:
329 cond_print('Requesting video file... ')
330 video_data = perform_request(video_url_real)
331 cond_print('done.')
332 cond_print('Video data found at %s\n' % video_data.geturl())
333
334 if cmdl_opts.get_title:
335 print video_title
336
337 if cmdl_opts.get_url:
338 print video_data.geturl()
339
340 if cmdl_opts.simulate or cmdl_opts.get_url:
341 sys.exit()
342
343 video_file = open(video_filename, 'wb')
344 try:
345 video_len = long(video_data.info()['Content-length'])
346 video_len_str = format_bytes(video_len)
347 except KeyError:
348 video_len = None
349 video_len_str = 'N/A'
350
351 byte_counter = 0
352 block_size = const_initial_block_size
353 start_time = time.time()
354 while True:
355 if video_len is not None:
356 percent = float(byte_counter) / float(video_len) * 100.0
357 percent_str = '%.1f' % percent
358 eta_str = calc_eta(start_time, time.time(), video_len, byte_counter)
359 else:
360 percent_str = '---.-'
361 eta_str = '--:--'
362 counter = format_bytes(byte_counter)
363 speed_str = calc_speed(start_time, time.time(), byte_counter)
364 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str))
365
366 before = time.time()
367 video_block = video_data.read(block_size)
368 after = time.time()
369 dl_bytes = len(video_block)
370 if dl_bytes == 0:
371 break
372 byte_counter += dl_bytes
373 video_file.write(video_block)
374 block_size = new_block_size(before, after, dl_bytes)
375
376 if video_len is not None and byte_counter != video_len:
377 error_advice_exit('server did not send the expected ammount of data')
378
379 video_file.close()
380 cond_print('done.\n')
381 cond_print('Video data saved to %s\n' % video_filename)
382
383 except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error):
384 cond_print('failed.\n')
385 error_advice_exit('unable to download video data')
386
387 except KeyboardInterrupt:
388 sys.exit('\n')
389
390 # Rename video file if needed
391 if cmdl_opts.use_title or cmdl_opts.use_literal:
392 try:
393 if cmdl_opts.use_title:
394 prefix = title_string_norm(video_title)
395 else:
396 prefix = title_string_touch(video_title)
397 final_filename = '%s-%s.flv' % (prefix, video_url_id)
398 os.rename(video_filename, final_filename)
399 cond_print('Video file renamed to %s\n' % final_filename)
400
401 except OSError:
402 sys.stderr.write('Warning: unable to rename file.\n')
403
404 except KeyboardInterrupt:
405 sys.exit('\n')
406
407 # Finish
408 sys.exit()