]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube-dl
1aa612340933a1f4c55b889f018e1034e977af43
[youtubedl] / youtube-dl
1 #!/usr/bin/env python
2 #
3 # Copyright (c) 2006-2008 Ricardo Garcia Gonzalez
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the "Software"),
7 # to deal in the Software without restriction, including without limitation
8 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 # and/or sell copies of the Software, and to permit persons to whom the
10 # Software is furnished to do so, subject to the following conditions:
11 #
12 # The above copyright notice and this permission notice shall be included
13 # in all copies or substantial portions of the Software.
14 #
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 # OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21 # OTHER DEALINGS IN THE SOFTWARE.
22 #
23 # Except as contained in this notice, the name(s) of the above copyright
24 # holders shall not be used in advertising or otherwise to promote the
25 # sale, use or other dealings in this Software without prior written
26 # authorization.
27 #
28 import getpass
29 import httplib
30 import math
31 import netrc
32 import optparse
33 import os
34 import re
35 import socket
36 import string
37 import sys
38 import time
39 import urllib2
40
41 # Global constants
42 const_1k = 1024
43 const_initial_block_size = 10 * const_1k
44 const_epsilon = 0.0001
45 const_timeout = 120
46
47 const_video_url_str = 'http://www.youtube.com/watch?v=%s'
48 const_video_url_re = re.compile(r'^((?:http://)?(?:\w+\.)?youtube\.com/(?:v/|(?:watch(?:\.php)?)?\?(?:.+&)?v=))?([0-9A-Za-z_-]+)(?(1)[&/].*)?$')
49 const_login_url_str = 'http://www.youtube.com/login?next=/watch%%3Fv%%3D%s'
50 const_login_post_str = 'current_form=loginForm&next=%%2Fwatch%%3Fv%%3D%s&username=%s&password=%s&action_login=Log+In'
51 const_age_url_str = 'http://www.youtube.com/verify_age?next_url=/watch%%3Fv%%3D%s'
52 const_age_post_str = 'next_url=%%2Fwatch%%3Fv%%3D%s&action_confirm=Confirm'
53 const_url_t_param_re = re.compile(r', "t": "([^"]+)"')
54 const_video_url_real_str = 'http://www.youtube.com/get_video?video_id=%s&t=%s'
55 const_video_title_re = re.compile(r'<title>YouTube - ([^<]*)</title>', re.M | re.I)
56
57 # Print error message, followed by standard advice information, and then exit
58 def error_advice_exit(error_text):
59 sys.stderr.write('Error: %s.\n' % error_text)
60 sys.stderr.write('Try again several times. It may be a temporary problem.\n')
61 sys.stderr.write('Other typical problems:\n\n')
62 sys.stderr.write('* Video no longer exists.\n')
63 sys.stderr.write('* Video requires age confirmation but you did not provide an account.\n')
64 sys.stderr.write('* You provided the account data, but it is not valid.\n')
65 sys.stderr.write('* The connection was cut suddenly for some reason.\n')
66 sys.stderr.write('* YouTube changed their system, and the program no longer works.\n')
67 sys.stderr.write('\nTry to confirm you are able to view the video using a web browser.\n')
68 sys.stderr.write('Use the same video URL and account information, if needed, with this program.\n')
69 sys.stderr.write('When using a proxy, make sure http_proxy has http://host:port format.\n')
70 sys.stderr.write('Try again several times and contact me if the problem persists.\n')
71 sys.exit('\n')
72
73 # Wrapper to create custom requests with typical headers
74 def request_create(url, data=None):
75 retval = urllib2.Request(url)
76 if data is not None:
77 retval.add_data(data)
78 # Try to mimic Firefox, at least a little bit
79 retval.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11')
80 retval.add_header('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
81 retval.add_header('Accept', 'text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5')
82 retval.add_header('Accept-Language', 'en-us,en;q=0.5')
83 return retval
84
85 # Perform a request, process headers and return response
86 def perform_request(url, data=None):
87 request = request_create(url, data)
88 response = urllib2.urlopen(request)
89 return response
90
91 # Conditional print
92 def cond_print(str):
93 global cmdl_opts
94 if not (cmdl_opts.quiet or cmdl_opts.get_url):
95 sys.stdout.write(str)
96 sys.stdout.flush()
97
98 # Title string normalization
99 def title_string_norm(title):
100 title = ''.join((x in string.ascii_letters or x in string.digits) and x or ' ' for x in title)
101 title = '_'.join(title.split())
102 title = title.lower()
103 return title
104
105 # Generic download step
106 def download_step(return_data_flag, step_title, step_error, url, post_data=None):
107 try:
108 cond_print('%s... ' % step_title)
109 data = perform_request(url, post_data).read()
110 cond_print('done.\n')
111 if return_data_flag:
112 return data
113 return None
114
115 except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error):
116 cond_print('failed.\n')
117 error_advice_exit(step_error)
118
119 except KeyboardInterrupt:
120 sys.exit('\n')
121
122 # Generic extract step
123 def extract_step(step_title, step_error, regexp, data):
124 try:
125 cond_print('%s... ' % step_title)
126 match = regexp.search(data)
127
128 if match is None:
129 cond_print('failed.\n')
130 error_advice_exit(step_error)
131
132 extracted_data = match.group(1)
133 cond_print('done.\n')
134 return extracted_data
135
136 except KeyboardInterrupt:
137 sys.exit('\n')
138
139 # Calculate new block size based on previous block size
140 def new_block_size(before, after, bytes):
141 new_min = max(bytes / 2.0, 1.0)
142 new_max = max(bytes * 2.0, 1.0)
143 dif = after - before
144 if dif < const_epsilon:
145 return int(new_max)
146 rate = bytes / dif
147 if rate > new_max:
148 return int(new_max)
149 if rate < new_min:
150 return int(new_min)
151 return int(rate)
152
153 # Get optimum 1k exponent to represent a number of bytes
154 def optimum_k_exp(num_bytes):
155 global const_1k
156 if num_bytes == 0:
157 return 0
158 return long(math.log(num_bytes, const_1k))
159
160 # Get optimum representation of number of bytes
161 def format_bytes(num_bytes):
162 global const_1k
163 try:
164 exp = optimum_k_exp(num_bytes)
165 suffix = 'bkMGTPEZY'[exp]
166 if exp == 0:
167 return '%s%s' % (num_bytes, suffix)
168 converted = float(num_bytes) / float(const_1k**exp)
169 return '%.2f%s' % (converted, suffix)
170 except IndexError:
171 sys.exit('Error: internal error formatting number of bytes.')
172
173 # Calculate ETA and return it in string format as MM:SS
174 def calc_eta(start, now, total, current):
175 dif = now - start
176 if current == 0 or dif < const_epsilon:
177 return '--:--'
178 rate = float(current) / dif
179 eta = long((total - current) / rate)
180 (eta_mins, eta_secs) = divmod(eta, 60)
181 if eta_mins > 99:
182 return '--:--'
183 return '%02d:%02d' % (eta_mins, eta_secs)
184
185 # Calculate speed and return it in string format
186 def calc_speed(start, now, bytes):
187 dif = now - start
188 if bytes == 0 or dif < const_epsilon:
189 return 'N/A b'
190 return format_bytes(float(bytes) / dif)
191
192
193 # Title string minimal transformation
194 def title_string_touch(title):
195 return title.replace(os.sep, '%')
196
197 # Create the command line options parser and parse command line
198 cmdl_usage = 'usage: %prog [options] video_url'
199 cmdl_version = '2008.01.24'
200 cmdl_parser = optparse.OptionParser(usage=cmdl_usage, version=cmdl_version, conflict_handler='resolve')
201 cmdl_parser.add_option('-h', '--help', action='help', help='print this help text and exit')
202 cmdl_parser.add_option('-v', '--version', action='version', help='print program version and exit')
203 cmdl_parser.add_option('-u', '--username', dest='username', metavar='USERNAME', help='account username')
204 cmdl_parser.add_option('-p', '--password', dest='password', metavar='PASSWORD', help='account password')
205 cmdl_parser.add_option('-o', '--output', dest='outfile', metavar='FILE', help='output video file name')
206 cmdl_parser.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode')
207 cmdl_parser.add_option('-s', '--simulate', action='store_true', dest='simulate', help='do not download video')
208 cmdl_parser.add_option('-t', '--title', action='store_true', dest='use_title', help='use title in file name')
209 cmdl_parser.add_option('-l', '--literal', action='store_true', dest='use_literal', help='use literal title in file name')
210 cmdl_parser.add_option('-n', '--netrc', action='store_true', dest='use_netrc', help='use .netrc authentication data')
211 cmdl_parser.add_option('-g', '--get-url', action='store_true', dest='get_url', help='print final video URL only')
212 cmdl_parser.add_option('-2', '--title-too', action='store_true', dest='get_title', help='used with -g, print title too')
213 (cmdl_opts, cmdl_args) = cmdl_parser.parse_args()
214
215 # Set socket timeout
216 socket.setdefaulttimeout(const_timeout)
217
218 # Get video URL
219 if len(cmdl_args) != 1:
220 cmdl_parser.print_help()
221 sys.exit('\n')
222 video_url_cmdl = cmdl_args[0]
223
224 # Verify video URL format and convert to "standard" format
225 video_url_mo = const_video_url_re.match(video_url_cmdl)
226 if video_url_mo is None:
227 sys.exit('Error: URL does not seem to be a youtube video URL. If it is, report a bug.')
228 video_url_id = video_url_mo.group(2)
229 video_url = const_video_url_str % video_url_id
230
231 # Check conflicting options
232 if cmdl_opts.outfile is not None and (cmdl_opts.simulate or cmdl_opts.get_url):
233 sys.stderr.write('Warning: video file name given but will not be used.\n')
234
235 if cmdl_opts.outfile is not None and (cmdl_opts.use_title or cmdl_opts.use_literal):
236 sys.exit('Error: using the video title conflicts with using a given file name.')
237
238 if cmdl_opts.use_title and cmdl_opts.use_literal:
239 sys.exit('Error: cannot use title and literal title at the same time.')
240
241 if cmdl_opts.quiet and cmdl_opts.get_url:
242 sys.exit('Error: cannot be quiet and print final URL at the same time.')
243
244 # Incorrect option formatting
245 if cmdl_opts.username is None and cmdl_opts.password is not None:
246 sys.exit('Error: password give but username is missing.')
247
248 if cmdl_opts.use_netrc and (cmdl_opts.username is not None or cmdl_opts.password is not None):
249 sys.exit('Error: cannot use netrc and username/password at the same time.')
250
251 if cmdl_opts.get_url is None and cmdl_opts.get_title is not None:
252 sys.exit('Error: getting title requires getting URL.')
253
254 # Get account information if any
255 account_username = None
256 account_password = None
257
258 if cmdl_opts.use_netrc:
259 try:
260 info = netrc.netrc().authenticators('youtube')
261 if info is None:
262 sys.exit('Error: no authenticators for machine youtube.')
263 account_username = info[0]
264 account_password = info[2]
265 except IOError:
266 sys.exit('Error: unable to read .netrc file.')
267 except netrc.NetrcParseError:
268 sys.exit('Error: unable to parse .netrc file.')
269 else:
270 account_username = cmdl_opts.username
271 if account_username is not None:
272 if cmdl_opts.password is None:
273 account_password = getpass.getpass('Type YouTube password and press return: ')
274 else:
275 account_password = cmdl_opts.password
276
277 # Get output file name
278 if cmdl_opts.outfile is None:
279 video_filename = '%s.flv' % video_url_id
280 else:
281 video_filename = cmdl_opts.outfile
282
283 # Install cookie and proxy handlers
284 urllib2.install_opener(urllib2.build_opener(urllib2.ProxyHandler()))
285 urllib2.install_opener(urllib2.build_opener(urllib2.HTTPCookieProcessor()))
286
287 # Log in and confirm age if needed
288 if account_username is not None:
289 url = const_login_url_str % video_url_id
290 post = const_login_post_str % (video_url_id, account_username, account_password)
291 download_step(False, 'Logging in', 'unable to log in', url, post)
292
293 url = const_age_url_str % video_url_id
294 post = const_age_post_str % video_url_id
295 download_step(False, 'Confirming age', 'unable to confirm age', url, post)
296
297 # Retrieve video webpage
298 video_webpage = download_step(True, 'Retrieving video webpage', 'unable to retrieve video webpage', video_url)
299
300 # Extract video title if needed
301 if cmdl_opts.use_title or cmdl_opts.use_literal or cmdl_opts.get_title:
302 video_title = extract_step('Extracting video title', 'unable to extract video title', const_video_title_re, video_webpage)
303
304 # Extract needed video URL parameters
305 video_url_t_param = extract_step('Extracting URL "t" parameter', 'unable to extract URL "t" parameter', const_url_t_param_re, video_webpage)
306 video_url_real = const_video_url_real_str % (video_url_id, video_url_t_param)
307
308 # Rebuild filename if needed
309 if cmdl_opts.use_title or cmdl_opts.use_literal:
310 if cmdl_opts.use_title:
311 prefix = title_string_norm(video_title)
312 else:
313 prefix = title_string_touch(video_title)
314 video_filename = '%s-%s.flv' % (prefix, video_url_id)
315
316 # Check name
317 if not video_filename.lower().endswith('.flv'):
318 sys.stderr.write('Warning: video file name does not end in .flv\n')
319
320 # Retrieve video data
321 try:
322 cond_print('Requesting video file... ')
323 video_data = perform_request(video_url_real)
324 cond_print('done.\n')
325 cond_print('Video data found at %s\n' % video_data.geturl())
326
327 if cmdl_opts.get_title:
328 print video_title
329
330 if cmdl_opts.get_url:
331 print video_data.geturl()
332
333 if cmdl_opts.simulate or cmdl_opts.get_url:
334 sys.exit()
335
336 try:
337 video_file = open(video_filename, 'wb')
338 except (IOError, OSError):
339 sys.exit('Error: unable to open "%s" for writing.' % video_filename)
340 try:
341 video_len = long(video_data.info()['Content-length'])
342 video_len_str = format_bytes(video_len)
343 except KeyError:
344 video_len = None
345 video_len_str = 'N/A'
346
347 byte_counter = 0
348 block_size = const_initial_block_size
349 start_time = time.time()
350 while True:
351 if video_len is not None:
352 percent = float(byte_counter) / float(video_len) * 100.0
353 percent_str = '%.1f' % percent
354 eta_str = calc_eta(start_time, time.time(), video_len, byte_counter)
355 else:
356 percent_str = '---.-'
357 eta_str = '--:--'
358 counter = format_bytes(byte_counter)
359 speed_str = calc_speed(start_time, time.time(), byte_counter)
360 cond_print('\rRetrieving video data: %5s%% (%8s of %s) at %8s/s ETA %s ' % (percent_str, counter, video_len_str, speed_str, eta_str))
361
362 before = time.time()
363 video_block = video_data.read(block_size)
364 after = time.time()
365 dl_bytes = len(video_block)
366 if dl_bytes == 0:
367 break
368 byte_counter += dl_bytes
369 video_file.write(video_block)
370 block_size = new_block_size(before, after, dl_bytes)
371
372 if video_len is not None and byte_counter != video_len:
373 error_advice_exit('server did not send the expected ammount of data')
374
375 video_file.close()
376 cond_print('done.\n')
377 cond_print('Video data saved to %s\n' % video_filename)
378
379 except (urllib2.URLError, ValueError, httplib.HTTPException, TypeError, socket.error):
380 cond_print('failed.\n')
381 error_advice_exit('unable to download video data')
382
383 except KeyboardInterrupt:
384 sys.exit('\n')
385
386 # Finish
387 sys.exit()