X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/b4c12b5580c7412a055e109387d704425557755b..d1b3edec11dc27709c45b3b16fa4e50a40726794:/youtube-dl diff --git a/youtube-dl b/youtube-dl index 5fd331e..d546949 100755 --- a/youtube-dl +++ b/youtube-dl @@ -27,7 +27,7 @@ except ImportError: from cgi import parse_qs std_headers = { - 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.7) Gecko/20100720 Firefox/3.6.7', + 'User-Agent': 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.8) Gecko/20100723 Firefox/3.6.8', 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'en-us,en;q=0.5', @@ -287,16 +287,6 @@ class FileDownloader(object): multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) return long(round(number * multiplier)) - @staticmethod - def verify_url(url): - """Verify a URL is valid and data could be downloaded. Return real data URL.""" - request = urllib2.Request(url, None, std_headers) - data = urllib2.urlopen(request) - data.read(1) - url = data.geturl() - data.close() - return url - def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" self._ies.append(ie) @@ -396,13 +386,6 @@ class FileDownloader(object): """Process a single dictionary returned by an InfoExtractor.""" # Do nothing else if in simulate mode if self.params.get('simulate', False): - # Verify URL if it's an HTTP one - if info_dict['url'].startswith('http'): - try: - self.verify_url(info_dict['url'].encode('utf-8')).decode('utf-8') - except (OSError, IOError, urllib2.URLError, httplib.HTTPException, socket.error), err: - raise UnavailableVideoError - # Forced printings if self.params.get('forcetitle', False): print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') @@ -539,32 +522,50 @@ class FileDownloader(object): count = 0 retries = self.params.get('retries', 0) - while True: + while count <= retries: # Establish connection try: data = urllib2.urlopen(request) break except (urllib2.HTTPError, ), err: - if err.code == 503: - # Retry in case of HTTP error 503 - count += 1 - if count <= retries: - self.report_retry(count, retries) - continue - if err.code != 416: # 416 is 'Requested range not satisfiable' + if err.code != 503 and err.code != 416: + # Unexpected HTTP error raise - # Unable to resume - data = urllib2.urlopen(basic_request) - content_length = data.info()['Content-Length'] - - if content_length is not None and long(content_length) == resume_len: - # Because the file had already been fully downloaded - self.report_file_already_downloaded(filename) - return True - else: - # Because the server didn't let us - self.report_unable_to_resume() - open_mode = 'wb' + elif err.code == 416: + # Unable to resume (requested range not satisfiable) + try: + # Open the connection again without the range header + data = urllib2.urlopen(basic_request) + content_length = data.info()['Content-Length'] + except (urllib2.HTTPError, ), err: + if err.code != 503: + raise + else: + # Examine the reported length + if (content_length is not None and + (resume_len - 100 < long(content_length) < resume_len + 100)): + # The file had already been fully downloaded. + # Explanation to the above condition: in issue #175 it was revealed that + # YouTube sometimes adds or removes a few bytes from the end of the file, + # changing the file size slightly and causing problems for some users. So + # I decided to implement a suggested change and consider the file + # completely downloaded if the file size differs less than 100 bytes from + # the one in the hard drive. + self.report_file_already_downloaded(filename) + return True + else: + # The length does not match, we start the download over + self.report_unable_to_resume() + open_mode = 'wb' + break + # Retry + count += 1 + if count <= retries: + self.report_retry(count, retries) + + if count > retries: + self.trouble(u'ERROR: giving up after %s retries' % retries) + return False data_len = data.info().get('Content-length', None) data_len_str = self.format_bytes(data_len) @@ -844,6 +845,14 @@ class YoutubeIE(InfoExtractor): except (urllib2.URLError, httplib.HTTPException, socket.error), err: self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) return + if 'token' not in video_info: + if 'reason' in video_info: + self._downloader.trouble(u'ERROR: YouTube said: %s' % video_info['reason'][0]) + else: + self._downloader.trouble(u'ERROR: "token" parameter not in video info for unknown reason') + return + + # Start extracting information self.report_information_extraction(video_id) # uploader @@ -878,7 +887,13 @@ class YoutubeIE(InfoExtractor): if mobj is not None: video_description = mobj.group(1) + # token + video_token = urllib.unquote_plus(video_info['token'][0]) + # Decide which formats to download + requested_format = self._downloader.params.get('format', None) + get_video_template = 'http://www.youtube.com/get_video?video_id=%s&t=%s&eurl=&el=&ps=&asv=&fmt=%%s' % (video_id, video_token) + if 'fmt_url_map' in video_info: url_map = dict(tuple(pair.split('|')) for pair in video_info['fmt_url_map'][0].split(',')) format_limit = self._downloader.params.get('format_limit', None) @@ -890,19 +905,17 @@ class YoutubeIE(InfoExtractor): if len(existing_formats) == 0: self._downloader.trouble(u'ERROR: no known formats available for video') return - requested_format = self._downloader.params.get('format', None) if requested_format is None: - video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality + video_url_list = [(existing_formats[0], get_video_template % existing_formats[0])] # Best quality elif requested_format == '-1': - video_url_list = url_map.items() # All formats + video_url_list = [(f, get_video_template % f) for f in existing_formats] # All formats else: - if requested_format not in existing_formats: - self._downloader.trouble(u'ERROR: format not available for video') - return - video_url_list = [(requested_format, url_map[requested_format])] # Specific format + video_url_list = [(requested_format, get_video_template % requested_format)] # Specific format + elif 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() video_url_list = [(None, video_info['conn'][0])] + else: self._downloader.trouble(u'ERROR: no fmt_url_map or conn information found in video info') return @@ -930,7 +943,7 @@ class YoutubeIE(InfoExtractor): 'player_url': player_url, }) except UnavailableVideoError, err: - self._downloader.trouble(u'ERROR: unable to download video') + self._downloader.trouble(u'ERROR: unable to download video (format may not be available)') class MetacafeIE(InfoExtractor): @@ -1026,15 +1039,15 @@ class MetacafeIE(InfoExtractor): return mediaURL = urllib.unquote(mobj.group(1)) - #mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) - #if mobj is None: - # self._downloader.trouble(u'ERROR: unable to extract gdaKey') - # return - #gdaKey = mobj.group(1) - # - #video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) - - video_url = mediaURL + # Extract gdaKey if available + mobj = re.search(r'(?m)&gdaKey=(.*?)&', webpage) + if mobj is None: + video_url = mediaURL + #self._downloader.trouble(u'ERROR: unable to extract gdaKey') + #return + else: + gdaKey = mobj.group(1) + video_url = '%s?__gda__=%s' % (mediaURL, gdaKey) mobj = re.search(r'(?im)(.*) - Video', webpage) if mobj is None: @@ -1928,6 +1941,11 @@ class YoutubePlaylistIE(InfoExtractor): break pagenum = pagenum + 1 + playliststart = self._downloader.params.get('playliststart', 1) + playliststart -= 1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -1983,6 +2001,11 @@ class YoutubeUserIE(InfoExtractor): ids_in_page.append(mobj.group(1)) video_ids.extend(ids_in_page) + playliststart = self._downloader.params.get('playliststart', 1) + playliststart = playliststart-1 #our arrays are zero-based but the playlist is 1-based + if playliststart > 0: + video_ids = video_ids[playliststart:] + for id in video_ids: self._youtube_ie.extract('http://www.youtube.com/watch?v=%s' % id) return @@ -2064,7 +2087,7 @@ if __name__ == '__main__': # Parse command line parser = optparse.OptionParser( usage='Usage: %prog [options] url...', - version='2010.07.22', + version='2010.08.04', conflict_handler='resolve', ) @@ -2080,6 +2103,8 @@ if __name__ == '__main__': dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') parser.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is 10)', default=10) + parser.add_option('--playlist-start', + dest='playliststart', metavar='NUMBER', help='playlist video to start at (default is 1)', default=1) authentication = optparse.OptionGroup(parser, 'Authentication Options') authentication.add_option('-u', '--username', @@ -2099,6 +2124,8 @@ if __name__ == '__main__': action='store_const', dest='format', help='download all available video formats', const='-1') video_format.add_option('--max-quality', action='store', dest='format_limit', metavar='FORMAT', help='highest quality format to download') + video_format.add_option('-b', '--best-quality', + action='store_true', dest='bestquality', help='download the best video quality (DEPRECATED)') parser.add_option_group(video_format) verbosity = optparse.OptionGroup(parser, 'Verbosity / Simulation Options') @@ -2151,6 +2178,8 @@ if __name__ == '__main__': all_urls = batchurls + args # Conflicting, missing and erroneous options + if opts.bestquality: + print >>sys.stderr, u'\nWARNING: -b/--best-quality IS DEPRECATED AS IT IS THE DEFAULT BEHAVIOR NOW\n' if opts.usenetrc and (opts.username is not None or opts.password is not None): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: @@ -2171,6 +2200,11 @@ if __name__ == '__main__': opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.playliststart is not None: + try: + opts.playliststart = long(opts.playliststart) + except (TypeError, ValueError), err: + parser.error(u'invalid playlist page specified') # Information extractors youtube_ie = YoutubeIE() @@ -2212,6 +2246,7 @@ if __name__ == '__main__': 'retries': opts.retries, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, + 'playliststart': opts.playliststart, }) fd.add_info_extractor(youtube_search_ie) fd.add_info_extractor(youtube_pl_ie)