X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/0865c28fb29a6481cd837cf8c1ef0cd134c6ef8e..82d01c1a88911e89e3e7ed4518d54c028f9e4792:/youtube_dl/extractor/vimeo.py diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 8f540f5..10d6745 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import json import re import itertools -import hashlib from .common import InfoExtractor from ..compat import ( @@ -20,8 +19,10 @@ from ..utils import ( RegexNotFoundError, smuggle_url, std_headers, + unified_strdate, unsmuggle_url, urlencode_postdata, + unescapeHTML, ) @@ -38,7 +39,7 @@ class VimeoBaseInfoExtractor(InfoExtractor): self.report_login() login_url = 'https://vimeo.com/log_in' webpage = self._download_webpage(login_url, None, False) - token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') + token = self._search_regex(r'xsrft":"(.*?)"', webpage, 'login token') data = urlencode_postdata({ 'email': username, 'password': password, @@ -140,6 +141,7 @@ class VimeoIE(VimeoBaseInfoExtractor): 'description': 'md5:8678b246399b070816b12313e8b4eb5c', 'uploader_id': 'atencio', 'uploader': 'Peter Atencio', + 'upload_date': '20130927', 'duration': 187, }, }, @@ -172,21 +174,34 @@ class VimeoIE(VimeoBaseInfoExtractor): }, ] + @staticmethod + def _extract_vimeo_url(url, webpage): + # Look for embedded (iframe) Vimeo player + mobj = re.search( + r']+?src=(["\'])(?P(?:https?:)?//player\.vimeo\.com/video/.+?)\1', webpage) + if mobj: + player_url = unescapeHTML(mobj.group('url')) + surl = smuggle_url(player_url, {'Referer': url}) + return surl + # Look for embedded (swf embed) Vimeo player + mobj = re.search( + r']+?src="((?:https?:)?//(?:www\.)?vimeo\.com/moogaloop\.swf.+?)"', webpage) + if mobj: + return mobj.group(1) + def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This video is protected by a password, use the --video-password option', expected=True) - token = self._search_regex(r'xsrft: \'(.*?)\'', webpage, 'login token') - data = compat_urllib_parse.urlencode({ + token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') + data = urlencode_postdata({ 'password': password, 'token': token, }) - # I didn't manage to use the password with https - if url.startswith('https'): - pass_url = url.replace('https', 'http') - else: - pass_url = url - password_request = compat_urllib_request.Request(pass_url + '/password', data) + if url.startswith('http://'): + # vimeo only supports https now, but the user can give an http url + url = url.replace('http://', 'https://') + password_request = compat_urllib_request.Request(url + '/password', data) password_request.add_header('Content-Type', 'application/x-www-form-urlencoded') password_request.add_header('Cookie', 'xsrft=%s' % token) return self._download_webpage( @@ -223,12 +238,9 @@ class VimeoIE(VimeoBaseInfoExtractor): video_id = mobj.group('id') orig_url = url if mobj.group('pro') or mobj.group('player'): - url = 'http://player.vimeo.com/video/' + video_id - - password = self._downloader.params.get('videopassword', None) - if password: - headers['Cookie'] = '%s_password=%s' % ( - video_id, hashlib.md5(password.encode('utf-8')).hexdigest()) + url = 'https://player.vimeo.com/video/' + video_id + else: + url = 'https://vimeo.com/' + video_id # Retrieve video webpage to extract further information request = compat_urllib_request.Request(url, None, headers) @@ -250,6 +262,16 @@ class VimeoIE(VimeoBaseInfoExtractor): # and latter we extract those that are Vimeo specific. self.report_extraction(video_id) + vimeo_config = self._search_regex( + r'vimeo\.config\s*=\s*({.+?});', webpage, + 'vimeo config', default=None) + if vimeo_config: + seed_status = self._parse_json(vimeo_config, video_id).get('seed_status', {}) + if seed_status.get('state') == 'failed': + raise ExtractorError( + '%s returned error: %s' % (self.IE_NAME, seed_status['title']), + expected=True) + # Extract the config JSON try: try: @@ -323,9 +345,9 @@ class VimeoIE(VimeoBaseInfoExtractor): # Extract upload date video_upload_date = None - mobj = re.search(r'[^/?#]+)/?(?:$|[?#])' + _VALID_URL = r'https://vimeo\.com/channels/(?P[^/?#]+)/?(?:$|[?#])' _MORE_PAGES_INDICATOR = r']+?title="(.*?)"' _TESTS = [{ - 'url': 'http://vimeo.com/channels/tributes', + 'url': 'https://vimeo.com/channels/tributes', 'info_dict': { 'id': 'tributes', 'title': 'Vimeo Tributes', @@ -430,15 +452,11 @@ class VimeoChannelIE(InfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) - fields = dict(re.findall(r'''(?x)[^/]+)(?:/videos|[#?]|$)' + _VALID_URL = r'https://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P[^/]+)(?:/videos|[#?]|$)' _TITLE_RE = r']+?class="user">([^<>]+?)' _TESTS = [{ - 'url': 'http://vimeo.com/nkistudio/videos', + 'url': 'https://vimeo.com/nkistudio/videos', 'info_dict': { 'title': 'Nki', 'id': 'nkistudio', @@ -495,15 +513,15 @@ class VimeoUserIE(VimeoChannelIE): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) name = mobj.group('name') - return self._extract_videos(name, 'http://vimeo.com/%s' % name) + return self._extract_videos(name, 'https://vimeo.com/%s' % name) class VimeoAlbumIE(VimeoChannelIE): IE_NAME = 'vimeo:album' - _VALID_URL = r'https?://vimeo\.com/album/(?P\d+)' + _VALID_URL = r'https://vimeo\.com/album/(?P\d+)' _TITLE_RE = r'