2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
   9 from .subtitles 
import SubtitlesInfoExtractor
 
  13     compat_urllib_request
, 
  15     get_element_by_attribute
, 
  25 class VimeoBaseInfoExtractor(InfoExtractor
): 
  26     _NETRC_MACHINE 
= 'vimeo' 
  27     _LOGIN_REQUIRED 
= False 
  30         (username
, password
) = self
._get
_login
_info
() 
  32             if self
._LOGIN
_REQUIRED
: 
  33                 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  36         login_url 
= 'https://vimeo.com/log_in' 
  37         webpage 
= self
._download
_webpage
(login_url
, None, False) 
  38         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
  39         data = urlencode_postdata({ 
  46         login_request = compat_urllib_request.Request(login_url, data) 
  47         login_request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
  48         login_request.add_header('Cookie
', 'xsrft
=%s' % token) 
  49         self._download_webpage(login_request, None, False, 'Wrong login info
') 
  52 class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 
  53     """Information extractor for vimeo.com.""" 
  55     # _VALID_URL matches Vimeo URLs 
  57         (?P<proto>(?:https?:)?//)? 
  58         (?:(?:www|(?P<player>player))\.)? 
  59         vimeo(?P<pro>pro)?\.com/ 
  61         (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? 
  64         /?(?:[?&].*)?(?:[#].*)?$''' 
  68             'url
': 'http
://vimeo
.com
/56015672#at=0', 
  69             'md5': '8879b6cc097e987f02484baf890129e5', 
  73                 "upload_date": "20121220", 
  74                 "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  75                 "uploader_id": "user7108434", 
  76                 "uploader": "Filippo Valsorda", 
  77                 "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  82             'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 
  83             'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 
  84             'note': 'Vimeo Pro video (#1197)', 
  88                 'uploader_id': 'openstreetmapus', 
  89                 'uploader': 'OpenStreetMap US', 
  90                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 
  95             'url': 'http://player.vimeo.com/video/54469442', 
  96             'md5': '619b811a4417aa4abe78dc653becf511', 
  97             'note': 'Videos that embed the url in the player page', 
 101                 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', 
 102                 'uploader': 'The BLN & Business of Software', 
 103                 'uploader_id': 'theblnbusinessofsoftware', 
 108             'url': 'http://vimeo.com/68375962', 
 109             'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 
 110             'note': 'Video protected with password', 
 114                 'title': 'youtube-dl password protected test video', 
 115                 'upload_date': '20130614', 
 116                 'uploader_id': 'user18948128', 
 117                 'uploader': 'Jaime Marquínez Ferrándiz', 
 121                 'videopassword': 'youtube-dl', 
 125             'url': 'http://vimeo.com/76979871', 
 126             'md5': '3363dd6ffebe3784d56f4132317fd446', 
 127             'note': 'Video with subtitles', 
 131                 'title': 'The New Vimeo Player (You Know, For Videos)', 
 132                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 
 133                 'upload_date': '20131015', 
 134                 'uploader_id': 'staff', 
 135                 'uploader': 'Vimeo Staff', 
 142     def suitable(cls
, url
): 
 143         if VimeoChannelIE
.suitable(url
): 
 144             # Otherwise channel urls like http://vimeo.com/channels/31259 would 
 148             return super(VimeoIE
, cls
).suitable(url
) 
 150     def _verify_video_password(self
, url
, video_id
, webpage
): 
 151         password 
= self
._downloader
.params
.get('videopassword', None) 
 153             raise ExtractorError('This video is protected by a password, use the --video-password option') 
 154         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
 155         data = compat_urllib_parse.urlencode({ 
 156             'password
': password, 
 159         # I didn't manage to use the password 
with https
 
 160         if url
.startswith('https'): 
 161             pass_url 
= url
.replace('https', 'http') 
 164         password_request 
= compat_urllib_request
.Request(pass_url 
+ '/password', data
) 
 165         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 166         password_request
.add_header('Cookie', 'xsrft=%s' % token
) 
 167         self
._download
_webpage
(password_request
, video_id
, 
 168                                'Verifying the password', 
 171     def _verify_player_video_password(self
, url
, video_id
): 
 172         password 
= self
._downloader
.params
.get('videopassword', None) 
 174             raise ExtractorError('This video is protected by a password, use the --video-password option') 
 175         data 
= compat_urllib_parse
.urlencode({'password': password
}) 
 176         pass_url 
= url 
+ '/check-password' 
 177         password_request 
= compat_urllib_request
.Request(pass_url
, data
) 
 178         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 179         return self
._download
_json
( 
 180             password_request
, video_id
, 
 181             'Verifying the password', 
 184     def _real_initialize(self
): 
 187     def _real_extract(self
, url
): 
 188         url
, data 
= unsmuggle_url(url
) 
 189         headers 
= std_headers
 
 191             headers 
= headers
.copy() 
 194         # Extract ID from URL 
 195         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 196         video_id 
= mobj
.group('id') 
 197         if mobj
.group('pro') or mobj
.group('player'): 
 198             url 
= 'http://player.vimeo.com/video/' + video_id
 
 200             url 
= 'https://vimeo.com/' + video_id
 
 202         # Retrieve video webpage to extract further information 
 203         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 205             webpage 
= self
._download
_webpage
(request
, video_id
) 
 206         except ExtractorError 
as ee
: 
 207             if isinstance(ee
.cause
, compat_HTTPError
) and ee
.cause
.code 
== 403: 
 208                 errmsg 
= ee
.cause
.read() 
 209                 if b
'Because of its privacy settings, this video cannot be played here' in errmsg
: 
 210                     raise ExtractorError( 
 211                         'Cannot download embed-only video without embedding ' 
 212                         'URL. Please call youtube-dl with the URL of the page ' 
 213                         'that embeds this video.', 
 217         # Now we begin extracting as much information as we can from what we 
 218         # retrieved. First we extract the information common to all extractors, 
 219         # and latter we extract those that are Vimeo specific. 
 220         self
.report_extraction(video_id
) 
 222         # Extract the config JSON 
 225                 config_url 
= self
._html
_search
_regex
( 
 226                     r
' data-config-url="(.+?)"', webpage
, 'config URL') 
 227                 config_json 
= self
._download
_webpage
(config_url
, video_id
) 
 228                 config 
= json
.loads(config_json
) 
 229             except RegexNotFoundError
: 
 230                 # For pro videos or player.vimeo.com urls 
 231                 # We try to find out to which variable is assigned the config dic 
 232                 m_variable_name 
= re
.search('(\w)\.video\.id', webpage
) 
 233                 if m_variable_name 
is not None: 
 234                     config_re 
= r
'%s=({.+?});' % re
.escape(m_variable_name
.group(1)) 
 236                     config_re 
= [r
' = {config:({.+?}),assets:', r
'(?:[abc])=({.+?});'] 
 237                 config 
= self
._search
_regex
(config_re
, webpage
, 'info section', 
 239                 config 
= json
.loads(config
) 
 240         except Exception as e
: 
 241             if re
.search('The creator of this video has not given you permission to embed it on this domain.', webpage
): 
 242                 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') 
 244             if re
.search('<form[^>]+?id="pw_form"', webpage
) is not None: 
 245                 self
._verify
_video
_password
(url
, video_id
, webpage
) 
 246                 return self
._real
_extract
(url
) 
 248                 raise ExtractorError('Unable to extract info section', 
 251             if config
.get('view') == 4: 
 252                 config 
= self
._verify
_player
_video
_password
(url
, video_id
) 
 255         video_title 
= config
["video"]["title"] 
 257         # Extract uploader and uploader_id 
 258         video_uploader 
= config
["video"]["owner"]["name"] 
 259         video_uploader_id 
= config
["video"]["owner"]["url"].split('/')[-1] if config
["video"]["owner"]["url"] else None 
 261         # Extract video thumbnail 
 262         video_thumbnail 
= config
["video"].get("thumbnail") 
 263         if video_thumbnail 
is None: 
 264             video_thumbs 
= config
["video"].get("thumbs") 
 265             if video_thumbs 
and isinstance(video_thumbs
, dict): 
 266                 _
, video_thumbnail 
= sorted((int(width
), t_url
) for (width
, t_url
) in video_thumbs
.items())[-1] 
 268         # Extract video description 
 269         video_description 
= None 
 271             video_description 
= get_element_by_attribute("class", "description_wrapper", webpage
) 
 272             if video_description
: 
 273                 video_description 
= clean_html(video_description
) 
 274         except AssertionError as err
: 
 275             # On some pages like (http://player.vimeo.com/video/54469442) the 
 276             # html tags are not closed, python 2.6 cannot handle it 
 277             if err
.args
[0] == 'we should not get here!': 
 282         # Extract video duration 
 283         video_duration 
= int_or_none(config
["video"].get("duration")) 
 285         # Extract upload date 
 286         video_upload_date 
= None 
 287         mobj 
= re
.search(r
'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage
) 
 289             video_upload_date 
= mobj
.group(1) + mobj
.group(2) + mobj
.group(3) 
 292             view_count 
= int(self
._search
_regex
(r
'UserPlays:(\d+)', webpage
, 'view count')) 
 293             like_count 
= int(self
._search
_regex
(r
'UserLikes:(\d+)', webpage
, 'like count')) 
 294             comment_count 
= int(self
._search
_regex
(r
'UserComments:(\d+)', webpage
, 'comment count')) 
 295         except RegexNotFoundError
: 
 296             # This info is only available in vimeo.com/{id} urls 
 301         # Vimeo specific: extract request signature and timestamp 
 302         sig 
= config
['request']['signature'] 
 303         timestamp 
= config
['request']['timestamp'] 
 305         # Vimeo specific: extract video codec and quality information 
 306         # First consider quality, then codecs, then take everything 
 307         codecs 
= [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] 
 308         files 
= {'hd': [], 'sd': [], 'other': []} 
 309         config_files 
= config
["video"].get("files") or config
["request"].get("files") 
 310         for codec_name
, codec_extension 
in codecs
: 
 311             for quality 
in config_files
.get(codec_name
, []): 
 312                 format_id 
= '-'.join((codec_name
, quality
)).lower() 
 313                 key 
= quality 
if quality 
in files 
else 'other' 
 315                 if isinstance(config_files
[codec_name
], dict): 
 316                     file_info 
= config_files
[codec_name
][quality
] 
 317                     video_url 
= file_info
.get('url') 
 320                 if video_url 
is None: 
 321                     video_url 
= "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 
 322                         % (video_id
, sig
, timestamp
, quality
, codec_name
.upper()) 
 325                     'ext': codec_extension
, 
 327                     'format_id': format_id
, 
 328                     'width': file_info
.get('width'), 
 329                     'height': file_info
.get('height'), 
 332         for key 
in ('other', 'sd', 'hd'): 
 333             formats 
+= files
[key
] 
 334         if len(formats
) == 0: 
 335             raise ExtractorError('No known codec found') 
 338         text_tracks 
= config
['request'].get('text_tracks') 
 340             for tt 
in text_tracks
: 
 341                 subtitles
[tt
['lang']] = 'http://vimeo.com' + tt
['url'] 
 343         video_subtitles 
= self
.extract_subtitles(video_id
, subtitles
) 
 344         if self
._downloader
.params
.get('listsubtitles', False): 
 345             self
._list
_available
_subtitles
(video_id
, subtitles
) 
 350             'uploader': video_uploader
, 
 351             'uploader_id': video_uploader_id
, 
 352             'upload_date': video_upload_date
, 
 353             'title': video_title
, 
 354             'thumbnail': video_thumbnail
, 
 355             'description': video_description
, 
 356             'duration': video_duration
, 
 359             'view_count': view_count
, 
 360             'like_count': like_count
, 
 361             'comment_count': comment_count
, 
 362             'subtitles': video_subtitles
, 
 366 class VimeoChannelIE(InfoExtractor
): 
 367     IE_NAME 
= 'vimeo:channel' 
 368     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)/?(\?.*)?$' 
 369     _MORE_PAGES_INDICATOR 
= r
'<a.+?rel="next"' 
 370     _TITLE_RE 
= r
'<link rel="alternate"[^>]+?title="(.*?)"' 
 372     def _page_url(self
, base_url
, pagenum
): 
 373         return '%s/videos/page:%d/' % (base_url
, pagenum
) 
 375     def _extract_list_title(self
, webpage
): 
 376         return self
._html
_search
_regex
(self
._TITLE
_RE
, webpage
, 'list title') 
 378     def _extract_videos(self
, list_id
, base_url
): 
 380         for pagenum 
in itertools
.count(1): 
 381             webpage 
= self
._download
_webpage
( 
 382                 self
._page
_url
(base_url
, pagenum
), list_id
, 
 383                 'Downloading page %s' % pagenum
) 
 384             video_ids
.extend(re
.findall(r
'id="clip_(\d+?)"', webpage
)) 
 385             if re
.search(self
._MORE
_PAGES
_INDICATOR
, webpage
, re
.DOTALL
) is None: 
 388         entries 
= [self
.url_result('http://vimeo.com/%s' % video_id
, 'Vimeo') 
 389                    for video_id 
in video_ids
] 
 390         return {'_type': 'playlist', 
 392                 'title': self
._extract
_list
_title
(webpage
), 
 396     def _real_extract(self
, url
): 
 397         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 398         channel_id 
= mobj
.group('id') 
 399         return self
._extract
_videos
(channel_id
, 'http://vimeo.com/channels/%s' % channel_id
) 
 402 class VimeoUserIE(VimeoChannelIE
): 
 403     IE_NAME 
= 'vimeo:user' 
 404     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)' 
 405     _TITLE_RE 
= r
'<a[^>]+?class="user">([^<>]+?)</a>' 
 408     def suitable(cls
, url
): 
 409         if VimeoChannelIE
.suitable(url
) or VimeoIE
.suitable(url
) or VimeoAlbumIE
.suitable(url
) or VimeoGroupsIE
.suitable(url
): 
 411         return super(VimeoUserIE
, cls
).suitable(url
) 
 413     def _real_extract(self
, url
): 
 414         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 415         name 
= mobj
.group('name') 
 416         return self
._extract
_videos
(name
, 'http://vimeo.com/%s' % name
) 
 419 class VimeoAlbumIE(VimeoChannelIE
): 
 420     IE_NAME 
= 'vimeo:album' 
 421     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/album/(?P<id>\d+)' 
 422     _TITLE_RE 
= r
'<header id="page_header">\n\s*<h1>(.*?)</h1>' 
 424     def _page_url(self
, base_url
, pagenum
): 
 425         return '%s/page:%d/' % (base_url
, pagenum
) 
 427     def _real_extract(self
, url
): 
 428         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 429         album_id 
= mobj
.group('id') 
 430         return self
._extract
_videos
(album_id
, 'http://vimeo.com/album/%s' % album_id
) 
 433 class VimeoGroupsIE(VimeoAlbumIE
): 
 434     IE_NAME 
= 'vimeo:group' 
 435     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)' 
 437     def _extract_list_title(self
, webpage
): 
 438         return self
._og
_search
_title
(webpage
) 
 440     def _real_extract(self
, url
): 
 441         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 442         name 
= mobj
.group('name') 
 443         return self
._extract
_videos
(name
, 'http://vimeo.com/groups/%s' % name
) 
 446 class VimeoReviewIE(InfoExtractor
): 
 447     IE_NAME 
= 'vimeo:review' 
 448     IE_DESC 
= 'Review pages on vimeo' 
 449     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)' 
 451         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 
 452         'file': '75524534.mp4', 
 453         'md5': 'c507a72f780cacc12b2248bb4006d253', 
 455             'title': "DICK HARDWICK 'Comedian'", 
 456             'uploader': 'Richard Hardwick', 
 460     def _real_extract(self
, url
): 
 461         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 462         video_id 
= mobj
.group('id') 
 463         player_url 
= 'https://player.vimeo.com/player/' + video_id
 
 464         return self
.url_result(player_url
, 'Vimeo', video_id
) 
 467 class VimeoWatchLaterIE(VimeoBaseInfoExtractor
, VimeoChannelIE
): 
 468     IE_NAME 
= 'vimeo:watchlater' 
 469     IE_DESC 
= 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' 
 470     _VALID_URL 
= r
'https?://vimeo\.com/home/watchlater|:vimeowatchlater' 
 471     _LOGIN_REQUIRED 
= True 
 472     _TITLE_RE 
= r
'href="/home/watchlater".*?>(.*?)<' 
 474     def _real_initialize(self
): 
 477     def _page_url(self
, base_url
, pagenum
): 
 478         url 
= '%s/page:%d/' % (base_url
, pagenum
) 
 479         request 
= compat_urllib_request
.Request(url
) 
 480         # Set the header to get a partial html page with the ids, 
 481         # the normal page doesn't contain them. 
 482         request
.add_header('X-Requested-With', 'XMLHttpRequest') 
 485     def _real_extract(self
, url
): 
 486         return self
._extract
_videos
('watchlater', 'https://vimeo.com/home/watchlater')