]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
Imported Upstream version 2008.03.08
[youtubedl] / youtube-dl
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2006-2008 Ricardo Garcia Gonzalez
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the "Software"),
7 # to deal in the Software without restriction, including without limitation
8 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 # and/or sell copies of the Software, and to permit persons to whom the
10 # Software is furnished to do so, subject to the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 # OTHER DEALINGS IN THE SOFTWARE.
22 #
23 # Except as contained in this notice, the name(s) of the above copyright
24 # holders shall not be used in advertising or otherwise to promote the
25 # sale, use or other dealings in this Software without prior written
26 # authorization.
27 #
28 import getpass
29 import httplib
30 import math
31 import netrc
32 import optparse
33 import os
34 import re
35 import socket
36 import string
37 import sys
38 import time
39 import urllib2
40
41 # Global constants
42 const_1k = 1024
43 const_initial_block_size = 10 * const_1k
44 const_epsilon = 0.0001
45 const_timeout = 120
46
47 const_video_url_str = 'http://www.youtube.com/watch?v=%s'
48 const_video_url_re = re.compile(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$')
49 const_video_url_best_quality_suffix='&fmt=18'
50 const_login_url_str = 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s'
51 const_login_post_str = 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In'
52 const_age_url_str = 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s'
53 const_age_post_str = 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm'
54 const_url_t_param_re = re.compile(r', "t": "([^"]+)"')
55 const_video_url_real_str = 'http://www.youtube.com/get_video?video_id=%s&t=%s'
56 const_video_title_re = re.compile(r'<title>YouTube - ([^<]*)</title>', re.M | re.I)
57
58 # Print error message, followed by standard advice information, and then exit
59 def error_advice_exit(error_text):
60 sys.stderr.write('Error: %s.\n' % error_text)
61 sys.stderr.write('Try again several times. It may be a temporary problem.\n')
62 sys.stderr.write('Other typical problems:\n\n')
63 sys.stderr.write('* Video no longer exists.\n')
64 sys.stderr.write('* Video requires age confirmation but you did not provide an account.\n')
65 sys.stderr.write('* You provided the account data, but it is not valid.\n')
66 sys.stderr.write('* The connection was cut suddenly for some reason.\n')
67 sys.stderr.write('* YouTube changed their system, and the program no longer works.\n')
68 sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n')
69 sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n')
70 sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n')
71 sys.stderr.write('Try again several times and contact me if the problem persists.\n')
72 sys.exit('\n')
73
74 # Wrapper to create custom requests with typical headers
75 def request_create(url, extra_headers, post_data):
76 retval = urllib2.Request(url)
77 if post_data is not None:
78 retval.add_data(post_data)
79 # Try to mimic Firefox, at least a little bit
80 retval.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11')
81 retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
82 retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')
83 retval.add_header('Accept-Language', 'en-us,en;q=0.5')
84 if extra_headers is not None:
85 for header in extra_headers:
86 retval.add_header(header[0], header[1])
87 return retval
88
89 # Perform a request, process headers and return response
90 def perform_request(url, headers=None, data=None):
91 request = request_create(url, headers, data)
92 response = urllib2.urlopen(request)
93 return response
94
95 # Conditional print
96 def cond_print(str):
97 global cmdl_opts
98 if not (cmdl_opts.quiet or cmdl_opts.get_url):
99 sys.stdout.write(str)
100 sys.stdout.flush()
101
102 # Title string normalization
103 def title_string_norm(title):
104 title = ''.join((x in string.ascii_letters or x in string.digits) and x or ' ' for x in title)
105 title = '_'.join(title.split())
106 title = title.lower()
107 return title
108
109 # Generic download step
110 def download_step(return_data_flag, step_title, step_error, url, post_data=None):
111 try:
112 cond_print('%s... ' % step_title)
113 data = perform_request(url, data=post_data).read()
114 cond_print('done.\n')
115 if return_data_flag:
116 return data
117 return None
118
119 except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error):
120 cond_print('failed.\n')
121 error_advice_exit(step_error)
122
123 except KeyboardInterrupt:
124 sys.exit('\n')
125
126 # Generic extract step
127 def extract_step(step_title, step_error, regexp, data):
128 try:
129 cond_print('%s... ' % step_title)
130 match = regexp.search(data)
131
132 if match is None:
133 cond_print('failed.\n')
134 error_advice_exit(step_error)
135
136 extracted_data = match.group(1)
137 cond_print('done.\n')
138 return extracted_data
139
140 except KeyboardInterrupt:
141 sys.exit('\n')
142
143 # Calculate new block size based on previous block size
144 def new_block_size(before, after, bytes):
145 new_min = max(bytes / 2.0, 1.0)
146 new_max = max(bytes * 2.0, 1.0)
147 dif = after - before
148 if dif < const_epsilon:
149 return int(new_max)
150 rate = bytes / dif
151 if rate > new_max:
152 return int(new_max)
153 if rate < new_min:
154 return int(new_min)
155 return int(rate)
156
157 # Get optimum 1k exponent to represent a number of bytes
158 def optimum_k_exp(num_bytes):
159 global const_1k
160 if num_bytes == 0:
161 return 0
162 return long(math.log(num_bytes, const_1k))
163
164 # Get optimum representation of number of bytes
165 def format_bytes(num_bytes):
166 global const_1k
167 try:
168 exp = optimum_k_exp(num_bytes)
169 suffix = 'bkMGTPEZY'[exp]
170 if exp == 0:
171 return '%s%s' % (num_bytes, suffix)
172 converted = float(num_bytes) / float(const_1k**exp)
173 return '%.2f%s' % (converted, suffix)
174 except IndexError:
175 sys.exit('Error: internal error formatting number of bytes.')
176
177 # Calculate ETA and return it in string format as MM:SS
178 def calc_eta(start, now, total, current):
179 dif = now - start
180 if current == 0 or dif < const_epsilon:
181 return '--:--'
182 rate = float(current) / dif
183 eta = long((total - current) / rate)
184 (eta_mins, eta_secs) = divmod(eta, 60)
185 if eta_mins > 99:
186 return '--:--'
187 return '%02d:%02d' % (eta_mins, eta_secs)
188
189 # Calculate speed and return it in string format
190 def calc_speed(start, now, bytes):
191 dif = now - start
192 if bytes == 0 or dif < const_epsilon:
193 return 'N/A b'
194 return format_bytes(float(bytes) / dif)
195
196
197 # Title string minimal transformation
198 def title_string_touch(title):
199 return title.replace(os.sep, '%')
200
201 # Create the command line options parser and parse command line
202 cmdl_usage = 'usage: %prog [options] video_url'
203 cmdl_version = '2008.03.08'
204 cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve')
205 cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit')
206 cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit')
207 cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username')
208 cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password')
209 cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name')
210 cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode')
211 cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video')
212 cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name')
213 cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name')
214 cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data')
215 cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only')
216 cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too')
217 cmdl_parser.add_option('-b', '--best-quality', action='store_true', dest='best_quality', help='try to download the best quality version')
218 (cmdl_opts, cmdl_args) = cmdl_parser.parse_args()
219
220 # Set socket timeout
221 socket.setdefaulttimeout(const_timeout)
222
223 # Get video URL
224 if len(cmdl_args) != 1:
225 cmdl_parser.print_help()
226 sys.exit('\n')
227 video_url_cmdl = cmdl_args[0]
228
229 # Verify video URL format and convert to "standard" format
230 video_url_mo = const_video_url_re.match(video_url_cmdl)
231 if video_url_mo is None:
232 sys.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.')
233 video_url_id = video_url_mo.group(2)
234 video_url = const_video_url_str % video_url_id
235 if cmdl_opts.best_quality:
236 video_url = '%s%s' % (video_url, const_video_url_best_quality_suffix)
237 video_extension = '.mp4'
238 else:
239 video_extension = '.flv'
240
241 # Check conflicting options
242 if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url):
243 sys.stderr.write('Warning: video file name given but will not be used.\n')
244
245 if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal):
246 sys.exit('Error: using the video title conflicts with using a given file name.')
247
248 if cmdl_opts.use_title and cmdl_opts.use_literal:
249 sys.exit('Error: cannot use title and literal title at the same time.')
250
251 if cmdl_opts.quiet and cmdl_opts.get_url:
252 sys.exit('Error: cannot be quiet and print final URL at the same time.')
253
254 # Incorrect option formatting
255 if cmdl_opts.username is None and cmdl_opts.password is not None:
256 sys.exit('Error: password give but username is missing.')
257
258 if cmdl_opts.use_netrc and (cmdl_opts.username is not None or cmdl_opts.password is not None):
259 sys.exit('Error: cannot use netrc and username/password at the same time.')
260
261 if cmdl_opts.get_url is None and cmdl_opts.get_title is not None:
262 sys.exit('Error: getting title requires getting URL.')
263
264 # Get account information if any
265 account_username = None
266 account_password = None
267
268 if cmdl_opts.use_netrc:
269 try:
270 info = netrc.netrc().authenticators('youtube')
271 if info is None:
272 sys.exit('Error: no authenticators for machine youtube.')
273 account_username = info[0]
274 account_password = info[2]
275 except IOError:
276 sys.exit('Error: unable to read .netrc file.')
277 except netrc.NetrcParseError:
278 sys.exit('Error: unable to parse .netrc file.')
279 else:
280 account_username = cmdl_opts.username
281 if account_username is not None:
282 if cmdl_opts.password is None:
283 account_password = getpass.getpass('Type YouTube password and press return: ')
284 else:
285 account_password = cmdl_opts.password
286
287 # Get output file name
288 if cmdl_opts.outfile is None:
289 video_filename = '%s%s' % (video_url_id, video_extension)
290 else:
291 video_filename = cmdl_opts.outfile
292
293 # Install cookie and proxy handlers
294 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
295 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
296
297 # Log in and confirm age if needed
298 if account_username is not None:
299 url = const_login_url_str % video_url_id
300 post = const_login_post_str % (video_url_id, account_username, account_password)
301 download_step(False, 'Logging in', 'unable to log in', url, post)
302
303 url = const_age_url_str % video_url_id
304 post = const_age_post_str % video_url_id
305 download_step(False, 'Confirming age', 'unable to confirm age', url, post)
306
307 # Retrieve video webpage
308 video_webpage = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url)
309
310 # Extract video title if needed
311 if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title:
312 video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage)
313
314 # Extract needed video URL parameters
315 video_url_t_param = extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re, video_webpage)
316 video_url_real = const_video_url_real_str % (video_url_id, video_url_t_param)
317 if cmdl_opts.best_quality:
318 video_url_real = '%s%s' % (video_url_real, const_video_url_best_quality_suffix)
319
320 # Rebuild filename if needed
321 if cmdl_opts.use_title or cmdl_opts.use_literal:
322 if cmdl_opts.use_title:
323 prefix = title_string_norm(video_title)
324 else:
325 prefix = title_string_touch(video_title)
326 video_filename = '%s-%s%s' % (prefix, video_url_id, video_extension)
327
328 # Check name
329 if not video_filename.lower().endswith(video_extension):
330 sys.stderr.write('Warning: video file name does not end in %s\n' % video_extension)
331
332 # Retrieve video data
333 try:
334 cond_print('Requesting video file... ')
335 video_data = perform_request(video_url_real)
336 cond_print('done.\n')
337 cond_print('Video data found at %s\n' % video_data.geturl())
338
339 if cmdl_opts.get_title:
340 print video_title
341
342 if cmdl_opts.get_url:
343 print video_data.geturl()
344
345 if cmdl_opts.simulate or cmdl_opts.get_url:
346 sys.exit()
347
348 try:
349 video_file = open(video_filename, 'wb')
350 except (IOError, OSError):
351 sys.exit('Error: unable to open "%s" for writing.' % video_filename)
352 try:
353 video_len = long(video_data.info()['Content-length'])
354 video_len_str = format_bytes(video_len)
355 except KeyError:
356 video_len = None
357 video_len_str = 'N/A'
358
359 byte_counter = 0
360 block_size = const_initial_block_size
361 start_time = time.time()
362 while True:
363 if video_len is not None:
364 percent = float(byte_counter) / float(video_len) * 100.0
365 percent_str = '%.1f' % percent
366 eta_str = calc_eta(start_time, time.time(), video_len, byte_counter)
367 else:
368 percent_str = '---.-'
369 eta_str = '--:--'
370 counter = format_bytes(byte_counter)
371 speed_str = calc_speed(start_time, time.time(), byte_counter)
372 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str))
373
374 before = time.time()
375 video_block = video_data.read(block_size)
376 after = time.time()
377 dl_bytes = len(video_block)
378 if dl_bytes == 0:
379 break
380 byte_counter += dl_bytes
381 video_file.write(video_block)
382 block_size = new_block_size(before, after, dl_bytes)
383
384 if video_len is not None and byte_counter != video_len:
385 error_advice_exit('server did not send the expected ammount of data')
386
387 video_file.close()
388 cond_print('done.\n')
389 cond_print('Video data saved to %s\n' % video_filename)
390
391 except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error):
392 cond_print('failed.\n')
393 error_advice_exit('unable to download video data')
394
395 except KeyboardInterrupt:
396 sys.exit('\n')
397
398 # Finish
399 sys.exit()