From: Rogério Brito <rbrito@ime.usp.br>
Date: Fri, 25 Feb 2011 23:27:47 +0000 (-0300)
Subject: Imported Debian patch 2011.02.25b-1
X-Git-Url: https://git.rapsys.eu/youtubedl/commitdiff_plain/15c7085f5de281daf62445c926cda39e648108a4?hp=a475f326a4099c540835dacdda9dbbf92a56130d

Imported Debian patch 2011.02.25b-1
---

diff --git a/LATEST_VERSION b/LATEST_VERSION
index 4ab2093..4851877 100644
--- a/LATEST_VERSION
+++ b/LATEST_VERSION
@@ -1 +1 @@
-2011.01.30
+2011.02.25b
diff --git a/debian/changelog b/debian/changelog
index f1cfcaa..faacb24 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,19 @@
+youtube-dl (2011.02.25b-1) unstable; urgency=low
+
+  * New upstream release. Some highlights:
+    + Enable artist playlists in YoutubePlaylistIE.
+    + Add support for getting videos from Facebook.
+    + Support more common YouTube playlist URLs
+    + Extract audio from your videos using ffmpeg as a postprocessor.
+  * debian/control:
+    + Add recommends on ffmpeg.
+    + Update the list of sites supported by youtube-dl in the long
+      description.
+  * debian/patches/*:
+    + Refresh.
+
+ -- RogÃ©rio Brito <rbrito@ime.usp.br>  Fri, 25 Feb 2011 20:27:47 -0300
+
 youtube-dl (2011.01.30-2) unstable; urgency=low
 
   * debian/control:
diff --git a/debian/control b/debian/control
index a2523c5..e92f6ca 100644
--- a/debian/control
+++ b/debian/control
@@ -11,6 +11,7 @@ Homepage: http://rg3.github.com/youtube-dl/
 Package: youtube-dl
 Architecture: all
 Depends: python (>= 2.5), ${misc:Depends}
+Recommends: ffmpeg
 Suggests: rtmpdump
 Description: download videos from youtube
  youtube-dl is a small command-line program to download videos from
@@ -22,5 +23,5 @@ Description: download videos from youtube
  automatically determine the best quality video to grab. It supports
  downloading entire playlists and all videos from a given user.
  .
- Currently supported sites are video.google.com, youtube, photobucket,
- dailymotion, and metacafe.
+ Currently supported sites are video.google.com, Youtube, Photobucket,
+ Dailymotion, Metacafe, Facebook, Yahoo!, and depositfiles.com.
diff --git a/debian/patches/01-prefer-open-formats.patch b/debian/patches/01-prefer-open-formats.patch
index 817d3d5..289b21d 100644
--- a/debian/patches/01-prefer-open-formats.patch
+++ b/debian/patches/01-prefer-open-formats.patch
@@ -5,7 +5,7 @@ Last-Update: 2010-10-13
 
 --- a/youtube-dl
 +++ b/youtube-dl
-@@ -863,7 +863,7 @@
+@@ -864,7 +864,7 @@
  	_AGE_URL = 'http://www.youtube.com/verify_age?next_url=/&gl=US&hl=en'
  	_NETRC_MACHINE = 'youtube'
  	# Listed in order of quality
diff --git a/debian/youtube-dl.1 b/debian/youtube-dl.1
index d6aa91b..bcbce6c 100644
--- a/debian/youtube-dl.1
+++ b/debian/youtube-dl.1
@@ -124,7 +124,7 @@
 .\" ========================================================================
 .\"
 .IX Title "YOUTUBE-DL 1"
-.TH YOUTUBE-DL 1 "2011-01-31" "perl v5.10.1" "User Contributed Perl Documentation"
+.TH YOUTUBE-DL 1 "2011-02-25" "perl v5.10.1" "User Contributed Perl Documentation"
 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
 .\" way too many mistakes in technical documents.
 .if n .ad l
@@ -311,6 +311,14 @@ Do not append the \fI.part\fR suffix do files that have not yet been completed.
 .IP "\-\-no\-mtime" 4
 .IX Item "--no-mtime"
 Do not use the \fILast-modified\fR header to set the file modification time.
+.IP "\-\-extract\-audio" 4
+.IX Item "--extract-audio"
+Create an audio-only file extracted from the video downloaded. Requires that
+ffmpeg and ffprobe be installed.
+.IP "\-\-audio\-format=\fB\s-1FORMAT\s0\fR" 4
+.IX Item "--audio-format=FORMAT"
+Set the audio format to be used for the extraction. Possible values are
+\&\fIbest\fR, \fIaac\fR, \fImp3\fR, with \fIbest\fR being the default.
 .SH "OUTPUT TEMPLATE"
 .IX Header "OUTPUT TEMPLATE"
 The \-o option allows users to indicate a template for the output file
diff --git a/debian/youtube-dl.pod b/debian/youtube-dl.pod
index 9f25ab1..ae2d2f0 100644
--- a/debian/youtube-dl.pod
+++ b/debian/youtube-dl.pod
@@ -216,6 +216,16 @@ Do not append the I<.part> suffix do files that have not yet been completed.
 
 Do not use the I<Last-modified> header to set the file modification time.
 
+=item --extract-audio
+
+Create an audio-only file extracted from the video downloaded. Requires that
+ffmpeg and ffprobe be installed.
+
+=item --audio-format=B<FORMAT>
+
+Set the audio format to be used for the extraction. Possible values are
+I<best>, I<aac>, I<mp3>, with I<best> being the default.
+
 =back
 
 =head1 OUTPUT TEMPLATE
diff --git a/youtube-dl b/youtube-dl
index a4c8f24..072a919 100755
--- a/youtube-dl
+++ b/youtube-dl
@@ -6,6 +6,7 @@
 # Author: Vasyl' Vavrychuk
 # Author: Witold Baryluk
 # Author: PaweÅ Paprota
+# Author: Gergely Imreh
 # License: Public domain code
 import cookielib
 import ctypes
@@ -37,7 +38,7 @@ except ImportError:
 	from cgi import parse_qs
 
 std_headers = {
-	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b10) Gecko/20100101 Firefox/4.0b10',
+	'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:2.0b11) Gecko/20100101 Firefox/4.0b11',
 	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.7',
 	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 	'Accept-Encoding': 'gzip, deflate',
@@ -1058,7 +1059,7 @@ class YoutubeIE(InfoExtractor):
 		mobj = re.search(r'id="eow-date".*?>(.*?)</span>', video_webpage, re.DOTALL)
 		if mobj is not None:
 			upload_date = ' '.join(re.sub(r'[/,-]', r' ', mobj.group(1)).split())
-			format_expressions = ['%d %B %Y', '%B %d %Y']
+			format_expressions = ['%d %B %Y', '%B %d %Y', '%b %d %Y']
 			for expression in format_expressions:
 				try:
 					upload_date = datetime.datetime.strptime(upload_date, expression).strftime('%Y%m%d')
@@ -2096,8 +2097,8 @@ class YahooSearchIE(InfoExtractor):
 class YoutubePlaylistIE(InfoExtractor):
 	"""Information Extractor for YouTube playlists."""
 
-	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists)\?.*?p=|user/.*?/user/|p/)([^&]+).*'
-	_TEMPLATE_URL = 'http://www.youtube.com/view_play_list?p=%s&page=%s&gl=US&hl=en'
+	_VALID_URL = r'(?:http://)?(?:\w+\.)?youtube.com/(?:(?:view_play_list|my_playlists|artist)\?.*?(p|a)=|user/.*?/user/|p/|user/.*?#[pg]/c/)([0-9A-Za-z]+)(?:/.*?/([0-9A-Za-z_-]+))?.*'
+	_TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en'
 	_VIDEO_INDICATOR = r'/watch\?v=(.+?)&'
 	_MORE_PAGES_INDICATOR = r'(?m)>\s*Next\s*</a>'
 	_youtube_ie = None
@@ -2124,14 +2125,26 @@ class YoutubePlaylistIE(InfoExtractor):
 			self._downloader.trouble(u'ERROR: invalid url: %s' % url)
 			return
 
+		# Single video case
+		if mobj.group(3) is not None:
+			self._youtube_ie.extract(mobj.group(3))
+			return
+
 		# Download playlist pages
-		playlist_id = mobj.group(1)
+		# prefix is 'p' as default for playlists but there are other types that need extra care
+		playlist_prefix = mobj.group(1)
+		if playlist_prefix == 'a':
+			playlist_access = 'artist'
+		else:
+			playlist_prefix = 'p'
+			playlist_access = 'view_play_list'
+		playlist_id = mobj.group(2)
 		video_ids = []
 		pagenum = 1
 
 		while True:
 			self.report_download_page(playlist_id, pagenum)
-			request = urllib2.Request(self._TEMPLATE_URL % (playlist_id, pagenum))
+			request = urllib2.Request(self._TEMPLATE_URL % (playlist_access, playlist_prefix, playlist_id, pagenum))
 			try:
 				page = urllib2.urlopen(request).read()
 			except (urllib2.URLError, httplib.HTTPException, socket.error), err:
@@ -2327,6 +2340,229 @@ class DepositFilesIE(InfoExtractor):
 		except UnavailableVideoError, err:
 			self._downloader.trouble(u'ERROR: unable to download file')
 
+class FacebookIE(InfoExtractor):
+	"""Information Extractor for Facebook"""
+
+	_VALID_URL = r'^(?:https?://)?(?:\w+\.)?facebook.com/video/video.php\?(?:.*?)v=(?P<ID>\d+)(?:.*)'
+	_LOGIN_URL = 'https://login.facebook.com/login.php?m&next=http%3A%2F%2Fm.facebook.com%2Fhome.php&'
+	_NETRC_MACHINE = 'facebook'
+	_available_formats = ['highqual', 'lowqual']
+	_video_extensions = {
+		'highqual': 'mp4',
+		'lowqual': 'mp4',
+	}
+
+	def __init__(self, downloader=None):
+		InfoExtractor.__init__(self, downloader)
+
+	@staticmethod
+	def suitable(url):
+		return (re.match(FacebookIE._VALID_URL, url) is not None)
+
+	def _reporter(self, message):
+		"""Add header and report message."""
+		self._downloader.to_screen(u'[facebook] %s' % message)
+
+	def report_login(self):
+		"""Report attempt to log in."""
+		self._reporter(u'Logging in')
+
+	def report_video_webpage_download(self, video_id):
+		"""Report attempt to download video webpage."""
+		self._reporter(u'%s: Downloading video webpage' % video_id)
+
+	def report_information_extraction(self, video_id):
+		"""Report attempt to extract video information."""
+		self._reporter(u'%s: Extracting video information' % video_id)
+
+	def _parse_page(self, video_webpage):
+		"""Extract video information from page"""
+		# General data
+		data = {'title': r'class="video_title datawrap">(.*?)</',
+			'description': r'<div class="datawrap">(.*?)</div>',
+			'owner': r'\("video_owner_name", "(.*?)"\)',
+			'upload_date': r'data-date="(.*?)"',
+			'thumbnail':  r'\("thumb_url", "(?P<THUMB>.*?)"\)',
+			}
+		video_info = {}
+		for piece in data.keys():
+			mobj = re.search(data[piece], video_webpage)
+			if mobj is not None:
+				video_info[piece] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
+
+		# Video urls
+		video_urls = {}
+		for fmt in self._available_formats:
+			mobj = re.search(r'\("%s_src\", "(.+?)"\)' % fmt, video_webpage)
+			if mobj is not None:
+				# URL is in a Javascript segment inside an escaped Unicode format within
+				# the generally utf-8 page
+				video_urls[fmt] = urllib.unquote_plus(mobj.group(1).decode("unicode_escape"))
+		video_info['video_urls'] = video_urls
+
+		return video_info
+
+	def _real_initialize(self):
+		if self._downloader is None:
+			return
+
+		useremail = None
+		password = None
+		downloader_params = self._downloader.params
+
+		# Attempt to use provided username and password or .netrc data
+		if downloader_params.get('username', None) is not None:
+			useremail = downloader_params['username']
+			password = downloader_params['password']
+		elif downloader_params.get('usenetrc', False):
+			try:
+				info = netrc.netrc().authenticators(self._NETRC_MACHINE)
+				if info is not None:
+					useremail = info[0]
+					password = info[2]
+				else:
+					raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE)
+			except (IOError, netrc.NetrcParseError), err:
+				self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err))
+				return
+
+		if useremail is None:
+			return
+
+		# Log in
+		login_form = {
+			'email': useremail,
+			'pass': password,
+			'login': 'Log+In'
+			}
+		request = urllib2.Request(self._LOGIN_URL, urllib.urlencode(login_form))
+		try:
+			self.report_login()
+			login_results = urllib2.urlopen(request).read()
+			if re.search(r'<form(.*)name="login"(.*)</form>', login_results) is not None:
+				self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.')
+				return
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err))
+			return
+
+	def _real_extract(self, url):
+		mobj = re.match(self._VALID_URL, url)
+		if mobj is None:
+			self._downloader.trouble(u'ERROR: invalid URL: %s' % url)
+			return
+		video_id = mobj.group('ID')
+
+		# Get video webpage
+		self.report_video_webpage_download(video_id)
+		request = urllib2.Request('https://www.facebook.com/video/video.php?v=%s' % video_id)
+		try:
+			page = urllib2.urlopen(request)
+			video_webpage = page.read()
+		except (urllib2.URLError, httplib.HTTPException, socket.error), err:
+			self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err))
+			return
+
+		# Start extracting information
+		self.report_information_extraction(video_id)
+
+		# Extract information
+		video_info = self._parse_page(video_webpage)
+
+		# uploader
+		if 'owner' not in video_info:
+			self._downloader.trouble(u'ERROR: unable to extract uploader nickname')
+			return
+		video_uploader = video_info['owner']
+
+		# title
+		if 'title' not in video_info:
+			self._downloader.trouble(u'ERROR: unable to extract video title')
+			return
+		video_title = video_info['title']
+		video_title = video_title.decode('utf-8')
+		video_title = sanitize_title(video_title)
+
+		# simplified title
+		simple_title = re.sub(ur'(?u)([^%s]+)' % simple_title_chars, ur'_', video_title)
+		simple_title = simple_title.strip(ur'_')
+
+		# thumbnail image
+		if 'thumbnail' not in video_info:
+			self._downloader.trouble(u'WARNING: unable to extract video thumbnail')
+			video_thumbnail = ''
+		else:
+			video_thumbnail = video_info['thumbnail']
+
+		# upload date
+		upload_date = u'NA'
+		if 'upload_date' in video_info:
+			upload_time = video_info['upload_date']
+			timetuple = email.utils.parsedate_tz(upload_time)
+			if timetuple is not None:
+				try:
+					upload_date = time.strftime('%Y%m%d', timetuple[0:9])
+				except:
+					pass
+
+		# description
+		video_description = 'No description available.'
+		if (self._downloader.params.get('forcedescription', False) and
+		    'description' in video_info):
+			video_description = video_info['description']
+
+		url_map = video_info['video_urls']
+		if len(url_map.keys()) > 0:
+			# Decide which formats to download
+			req_format = self._downloader.params.get('format', None)
+			format_limit = self._downloader.params.get('format_limit', None)
+
+			if format_limit is not None and format_limit in self._available_formats:
+				format_list = self._available_formats[self._available_formats.index(format_limit):]
+			else:
+				format_list = self._available_formats
+			existing_formats = [x for x in format_list if x in url_map]
+			if len(existing_formats) == 0:
+				self._downloader.trouble(u'ERROR: no known formats available for video')
+				return
+			if req_format is None:
+				video_url_list = [(existing_formats[0], url_map[existing_formats[0]])] # Best quality
+			elif req_format == '-1':
+				video_url_list = [(f, url_map[f]) for f in existing_formats] # All formats
+			else:
+				# Specific format
+				if req_format not in url_map:
+					self._downloader.trouble(u'ERROR: requested format not available')
+					return
+				video_url_list = [(req_format, url_map[req_format])] # Specific format
+
+		for format_param, video_real_url in video_url_list:
+
+			# At this point we have a new video
+			self._downloader.increment_downloads()
+
+			# Extension
+			video_extension = self._video_extensions.get(format_param, 'mp4')
+
+			# Find the video URL in fmt_url_map or conn paramters
+			try:
+				# Process video information
+				self._downloader.process_info({
+					'id':		video_id.decode('utf-8'),
+					'url':		video_real_url.decode('utf-8'),
+					'uploader':	video_uploader.decode('utf-8'),
+					'upload_date':	upload_date,
+					'title':	video_title,
+					'stitle':	simple_title,
+					'ext':		video_extension.decode('utf-8'),
+					'format':	(format_param is None and u'NA' or format_param.decode('utf-8')),
+					'thumbnail':	video_thumbnail.decode('utf-8'),
+					'description':	video_description.decode('utf-8'),
+					'player_url':	None,
+				})
+			except UnavailableVideoError, err:
+				self._downloader.trouble(u'\nERROR: unable to download video')
+
 class PostProcessor(object):
 	"""Post Processor class.
 
@@ -2373,6 +2609,88 @@ class PostProcessor(object):
 		"""
 		return information # by default, do nothing
 
+class FFmpegExtractAudioPP(PostProcessor):
+
+	def __init__(self, downloader=None, preferredcodec=None):
+		PostProcessor.__init__(self, downloader)
+		if preferredcodec is None:
+			preferredcodec = 'best'
+		self._preferredcodec = preferredcodec
+
+	@staticmethod
+	def get_audio_codec(path):
+		try:
+			handle = subprocess.Popen(['ffprobe', '-show_streams', path],
+					stderr=file(os.path.devnull, 'w'), stdout=subprocess.PIPE)
+			output = handle.communicate()[0]
+			if handle.wait() != 0:
+				return None
+		except (IOError, OSError):
+			return None
+		audio_codec = None
+		for line in output.split('\n'):
+			if line.startswith('codec_name='):
+				audio_codec = line.split('=')[1].strip()
+			elif line.strip() == 'codec_type=audio' and audio_codec is not None:
+				return audio_codec
+		return None
+
+	@staticmethod
+	def run_ffmpeg(path, out_path, codec, more_opts):
+		try:
+			ret = subprocess.call(['ffmpeg', '-y', '-i', path, '-vn', '-acodec', codec] + more_opts + [out_path],
+					stdout=file(os.path.devnull, 'w'), stderr=subprocess.STDOUT)
+			return (ret == 0)
+		except (IOError, OSError):
+			return False
+
+	def run(self, information):
+		path = information['filepath']
+
+		filecodec = self.get_audio_codec(path)
+		if filecodec is None:
+			self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe')
+			return None
+
+		more_opts = []
+		if self._preferredcodec == 'best' or self._preferredcodec == filecodec:
+			if filecodec == 'aac' or filecodec == 'mp3':
+				# Lossless if possible
+				acodec = 'copy'
+				extension = filecodec
+				if filecodec == 'aac':
+					more_opts = ['-f', 'adts']
+			else:
+				# MP3 otherwise.
+				acodec = 'libmp3lame'
+				extension = 'mp3'
+				more_opts = ['-ab', '128k']
+		else:
+			# We convert the audio (lossy)
+			acodec = {'mp3': 'libmp3lame', 'aac': 'aac'}[self._preferredcodec]
+			extension = self._preferredcodec
+			more_opts = ['-ab', '128k']
+			if self._preferredcodec == 'aac':
+				more_opts += ['-f', 'adts']
+
+		(prefix, ext) = os.path.splitext(path)
+		new_path = prefix + '.' + extension
+		self._downloader.to_screen(u'[ffmpeg] Destination: %s' % new_path)
+		status = self.run_ffmpeg(path, new_path, acodec, more_opts)
+
+		if not status:
+			self._downloader.to_stderr(u'WARNING: error running ffmpeg')
+			return None
+
+		try:
+			os.remove(path)
+		except (IOError, OSError):
+			self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file')
+			return None
+
+		information['filepath'] = new_path
+		return information
+
 ### MAIN PROGRAM ###
 if __name__ == '__main__':
 	try:
@@ -2405,7 +2723,7 @@ if __name__ == '__main__':
 		# Parse command line
 		parser = optparse.OptionParser(
 			usage='Usage: %prog [options] url...',
-			version='2011.01.30',
+			version='2011.02.25b',
 			conflict_handler='resolve',
 		)
 
@@ -2497,6 +2815,13 @@ if __name__ == '__main__':
 				help='do not use the Last-modified header to set the file modification time', default=True)
 		parser.add_option_group(filesystem)
 
+		postproc = optparse.OptionGroup(parser, 'Post-processing Options')
+		postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False,
+				help='convert video files to audio-only files (requires ffmpeg and ffprobe)')
+		postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best',
+				help='"best", "aac" or "mp3"; best by default')
+		parser.add_option_group(postproc)
+
 		(opts, args) = parser.parse_args()
 
 		# Open appropriate CookieJar
@@ -2568,6 +2893,9 @@ if __name__ == '__main__':
 				raise ValueError
 		except (TypeError, ValueError), err:
 			parser.error(u'invalid playlist end number specified')
+		if opts.extractaudio:
+			if opts.audioformat not in ['best', 'aac', 'mp3']:
+				parser.error(u'invalid audio format specified')
 
 		# Information extractors
 		youtube_ie = YoutubeIE()
@@ -2582,6 +2910,7 @@ if __name__ == '__main__':
 		yahoo_ie = YahooIE()
 		yahoo_search_ie = YahooSearchIE(yahoo_ie)
 		deposit_files_ie = DepositFilesIE()
+		facebook_ie = FacebookIE()
 		generic_ie = GenericIE()
 
 		# File downloader
@@ -2633,11 +2962,16 @@ if __name__ == '__main__':
 		fd.add_info_extractor(yahoo_ie)
 		fd.add_info_extractor(yahoo_search_ie)
 		fd.add_info_extractor(deposit_files_ie)
+		fd.add_info_extractor(facebook_ie)
 
 		# This must come last since it's the
 		# fallback if none of the others work
 		fd.add_info_extractor(generic_ie)
 
+		# PostProcessors
+		if opts.extractaudio:
+			fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat))
+
 		# Update version
 		if opts.update_self:
 			update_self(fd, sys.argv[0])