X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9c8b63077a48f758bf0c9a7351669557071bbd74..2459bd35438081164499efc6ea2ea8cd89eceb04:/youtube_dl/extractor/vimeo.py diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1125513..f27763a 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,3 +1,4 @@ +# encoding: utf-8 import json import re import itertools @@ -10,6 +11,7 @@ from ..utils import ( clean_html, get_element_by_attribute, ExtractorError, + RegexNotFoundError, std_headers, unsmuggle_url, ) @@ -18,12 +20,12 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?$' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|(?Pplayer))\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ { - u'url': u'http://vimeo.com/56015672', + u'url': u'http://vimeo.com/56015672#at=0', u'file': u'56015672.mp4', u'md5': u'8879b6cc097e987f02484baf890129e5', u'info_dict': { @@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor): u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', u'uploader': u'The BLN & Business of Software', }, - } + }, + { + u'url': u'http://vimeo.com/68375962', + u'file': u'68375962.mp4', + u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7', + u'note': u'Video protected with password', + u'info_dict': { + u'title': u'youtube-dl password protected test video', + u'upload_date': u'20130614', + u'uploader_id': u'user18948128', + u'uploader': u'Jaime Marquínez Ferrándiz', + }, + u'params': { + u'videopassword': u'youtube-dl', + }, + }, ] def _login(self): @@ -111,11 +128,9 @@ class VimeoIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') - if not mobj.group('proto'): - url = 'https://' + url - elif mobj.group('pro'): + if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - elif mobj.group('direct_link'): + else: url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information @@ -129,18 +144,26 @@ class VimeoIE(InfoExtractor): # Extract the config JSON try: - config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], - webpage, u'info section', flags=re.DOTALL) - config = json.loads(config) - except: + try: + config_url = self._html_search_regex( + r' data-config-url="(.+?)"', webpage, u'config URL') + config_json = self._download_webpage(config_url, video_id) + config = json.loads(config_json) + except RegexNotFoundError: + # For pro videos or player.vimeo.com urls + config = self._search_regex([r' = {config:({.+?}),assets:', r'(?:c|b)=({.+?});'], + webpage, u'info section', flags=re.DOTALL) + config = json.loads(config) + except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') - if re.search('If so please provide the correct password.', webpage): + if re.search(']+?id="pw_form"', webpage) is not None: self._verify_video_password(url, video_id, webpage) return self._real_extract(url) else: - raise ExtractorError(u'Unable to extract info section') + raise ExtractorError(u'Unable to extract info section', + cause=e) # Extract title video_title = config["video"]["title"] @@ -180,7 +203,7 @@ class VimeoIE(InfoExtractor): # Vimeo specific: extract video codec and quality information # First consider quality, then codecs, then take everything codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] - files = { 'hd': [], 'sd': [], 'other': []} + files = {'hd': [], 'sd': [], 'other': []} config_files = config["video"].get("files") or config["request"].get("files") for codec_name, codec_extension in codecs: for quality in config_files.get(codec_name, []): @@ -209,7 +232,7 @@ class VimeoIE(InfoExtractor): if len(formats) == 0: raise ExtractorError(u'No known codec found') - return [{ + return { 'id': video_id, 'uploader': video_uploader, 'uploader_id': video_uploader_id, @@ -218,32 +241,54 @@ class VimeoIE(InfoExtractor): 'thumbnail': video_thumbnail, 'description': video_description, 'formats': formats, - }] + 'webpage_url': url, + } class VimeoChannelIE(InfoExtractor): IE_NAME = u'vimeo:channel' _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P[^/]+)' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' - def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - channel_id = mobj.group('id') + def _extract_videos(self, list_id, base_url): video_ids = [] - for pagenum in itertools.count(1): - webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum), - channel_id, u'Downloading page %s' % pagenum) + webpage = self._download_webpage( + '%s/videos/page:%d/' % (base_url, pagenum),list_id, + u'Downloading page %s' % pagenum) video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage)) if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: break entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') for video_id in video_ids] - channel_title = self._html_search_regex(r'(.*?)' % channel_id, - webpage, u'channel title') + list_title = self._html_search_regex(self._TITLE_RE, webpage, + u'list title') return {'_type': 'playlist', - 'id': channel_id, - 'title': channel_title, + 'id': list_id, + 'title': list_title, 'entries': entries, } + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + channel_id = mobj.group('id') + return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) + + +class VimeoUserIE(VimeoChannelIE): + IE_NAME = u'vimeo:user' + _VALID_URL = r'(?:https?://)?vimeo.\com/(?P[^/]+)' + _TITLE_RE = r']+?class="user">([^<>]+?)' + + @classmethod + def suitable(cls, url): + if VimeoChannelIE.suitable(url) or VimeoIE.suitable(url): + return False + return super(VimeoUserIE, cls).suitable(url) + + def _real_extract(self, url): + mobj = re.match(self._VALID_URL, url) + name = mobj.group('name') + return self._extract_videos(name, 'http://vimeo.com/%s' % name)