X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9c8b63077a48f758bf0c9a7351669557071bbd74..10aeb0702c7c06db2096fcaf319711f1f09d0508:/youtube_dl/extractor/vimeo.py diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 1125513..d465bf2 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -1,3 +1,4 @@ +# encoding: utf-8 import json import re import itertools @@ -10,6 +11,7 @@ from ..utils import ( clean_html, get_element_by_attribute, ExtractorError, + RegexNotFoundError, std_headers, unsmuggle_url, ) @@ -18,12 +20,12 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?Phttps?://)?(?:(?:www|player)\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?$' + _VALID_URL = r'(?Phttps?://)?(?:(?:www|(?Pplayer))\.)?vimeo(?Ppro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?Pplay_redirect_hls\?clip_id=)?(?:videos?/)?(?P[0-9]+)/?(?:[?].*)?(?:#.*)?$' _NETRC_MACHINE = 'vimeo' IE_NAME = u'vimeo' _TESTS = [ { - u'url': u'http://vimeo.com/56015672', + u'url': u'http://vimeo.com/56015672#at=0', u'file': u'56015672.mp4', u'md5': u'8879b6cc097e987f02484baf890129e5', u'info_dict': { @@ -54,7 +56,22 @@ class VimeoIE(InfoExtractor): u'title': u'Kathy Sierra: Building the minimum Badass User, Business of Software', u'uploader': u'The BLN & Business of Software', }, - } + }, + { + u'url': u'http://vimeo.com/68375962', + u'file': u'68375962.mp4', + u'md5': u'aaf896bdb7ddd6476df50007a0ac0ae7', + u'note': u'Video protected with password', + u'info_dict': { + u'title': u'youtube-dl password protected test video', + u'upload_date': u'20130614', + u'uploader_id': u'user18948128', + u'uploader': u'Jaime Marquínez Ferrándiz', + }, + u'params': { + u'videopassword': u'youtube-dl', + }, + }, ] def _login(self): @@ -111,11 +128,9 @@ class VimeoIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_id = mobj.group('id') - if not mobj.group('proto'): - url = 'https://' + url - elif mobj.group('pro'): + if mobj.group('pro') or mobj.group('player'): url = 'http://player.vimeo.com/video/' + video_id - elif mobj.group('direct_link'): + else: url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information @@ -129,18 +144,26 @@ class VimeoIE(InfoExtractor): # Extract the config JSON try: - config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], - webpage, u'info section', flags=re.DOTALL) - config = json.loads(config) - except: + try: + config_url = self._html_search_regex( + r' data-config-url="(.+?)"', webpage, u'config URL') + config_json = self._download_webpage(config_url, video_id) + config = json.loads(config_json) + except RegexNotFoundError: + # For pro videos or player.vimeo.com urls + config = self._search_regex([r' = {config:({.+?}),assets:', r'c=({.+?);'], + webpage, u'info section', flags=re.DOTALL) + config = json.loads(config) + except Exception as e: if re.search('The creator of this video has not given you permission to embed it on this domain.', webpage): raise ExtractorError(u'The author has restricted the access to this video, try with the "--referer" option') - if re.search('If so please provide the correct password.', webpage): + if re.search(']+?id="pw_form"', webpage) is not None: self._verify_video_password(url, video_id, webpage) return self._real_extract(url) else: - raise ExtractorError(u'Unable to extract info section') + raise ExtractorError(u'Unable to extract info section', + cause=e) # Extract title video_title = config["video"]["title"] @@ -180,7 +203,7 @@ class VimeoIE(InfoExtractor): # Vimeo specific: extract video codec and quality information # First consider quality, then codecs, then take everything codecs = [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] - files = { 'hd': [], 'sd': [], 'other': []} + files = {'hd': [], 'sd': [], 'other': []} config_files = config["video"].get("files") or config["request"].get("files") for codec_name, codec_extension in codecs: for quality in config_files.get(codec_name, []): @@ -209,7 +232,7 @@ class VimeoIE(InfoExtractor): if len(formats) == 0: raise ExtractorError(u'No known codec found') - return [{ + return { 'id': video_id, 'uploader': video_uploader, 'uploader_id': video_uploader_id, @@ -218,7 +241,8 @@ class VimeoIE(InfoExtractor): 'thumbnail': video_thumbnail, 'description': video_description, 'formats': formats, - }] + 'webpage_url': url, + } class VimeoChannelIE(InfoExtractor):