2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
   9 from .subtitles 
import SubtitlesInfoExtractor
 
  10 from ..compat 
import ( 
  13     compat_urllib_request
, 
  27 class VimeoBaseInfoExtractor(InfoExtractor
): 
  28     _NETRC_MACHINE 
= 'vimeo' 
  29     _LOGIN_REQUIRED 
= False 
  32         (username
, password
) = self
._get
_login
_info
() 
  34             if self
._LOGIN
_REQUIRED
: 
  35                 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  38         login_url 
= 'https://vimeo.com/log_in' 
  39         webpage 
= self
._download
_webpage
(login_url
, None, False) 
  40         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
  41         data = urlencode_postdata({ 
  48         login_request = compat_urllib_request.Request(login_url, data) 
  49         login_request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
  50         login_request.add_header('Cookie
', 'xsrft
=%s' % token) 
  51         self._download_webpage(login_request, None, False, 'Wrong login info
') 
  54 class VimeoIE(VimeoBaseInfoExtractor, SubtitlesInfoExtractor): 
  55     """Information extractor for vimeo.com.""" 
  57     # _VALID_URL matches Vimeo URLs 
  60         (?:(?:www|(?P<player>player))\.)? 
  61         vimeo(?P<pro>pro)?\.com/ 
  62         (?!channels/[^/?#]+/?(?:$|[?#])|album/) 
  64         (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? 
  67         /?(?:[?&].*)?(?:[#].*)?$''' 
  71             'url
': 'http
://vimeo
.com
/56015672#at=0', 
  72             'md5': '8879b6cc097e987f02484baf890129e5', 
  76                 "upload_date": "20121220", 
  77                 "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  78                 "uploader_id": "user7108434", 
  79                 "uploader": "Filippo Valsorda", 
  80                 "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  85             'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 
  86             'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 
  87             'note': 'Vimeo Pro video (#1197)', 
  91                 'uploader_id': 'openstreetmapus', 
  92                 'uploader': 'OpenStreetMap US', 
  93                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 
  94                 'description': 'md5:380943ec71b89736ff4bf27183233d09', 
  99             'url': 'http://player.vimeo.com/video/54469442', 
 100             'md5': '619b811a4417aa4abe78dc653becf511', 
 101             'note': 'Videos that embed the url in the player page', 
 105                 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 
 106                 'uploader': 'The BLN & Business of Software', 
 107                 'uploader_id': 'theblnbusinessofsoftware', 
 113             'url': 'http://vimeo.com/68375962', 
 114             'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 
 115             'note': 'Video protected with password', 
 119                 'title': 'youtube-dl password protected test video', 
 120                 'upload_date': '20130614', 
 121                 'uploader_id': 'user18948128', 
 122                 'uploader': 'Jaime Marquínez Ferrándiz', 
 124                 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.', 
 127                 'videopassword': 'youtube-dl', 
 131             'url': 'http://vimeo.com/channels/keypeele/75629013', 
 132             'md5': '2f86a05afe9d7abc0b9126d229bbe15d', 
 133             'note': 'Video is freely available via original URL ' 
 134                     'and protected with password when accessed via http://vimeo.com/75629013', 
 138                 'title': 'Key & Peele: Terrorist Interrogation', 
 139                 'description': 'md5:8678b246399b070816b12313e8b4eb5c', 
 140                 'uploader_id': 'atencio', 
 141                 'uploader': 'Peter Atencio', 
 146             'url': 'http://vimeo.com/76979871', 
 147             'md5': '3363dd6ffebe3784d56f4132317fd446', 
 148             'note': 'Video with subtitles', 
 152                 'title': 'The New Vimeo Player (You Know, For Videos)', 
 153                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 
 154                 'upload_date': '20131015', 
 155                 'uploader_id': 'staff', 
 156                 'uploader': 'Vimeo Staff', 
 161             # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ 
 162             'url': 'https://player.vimeo.com/video/98044508', 
 163             'note': 'The js code contains assignments to the same variable as the config', 
 167                 'title': 'Pier Solar OUYA Official Trailer', 
 168                 'uploader': 'Tulio Gonçalves', 
 169                 'uploader_id': 'user28849593', 
 174     def _verify_video_password(self
, url
, video_id
, webpage
): 
 175         password 
= self
._downloader
.params
.get('videopassword', None) 
 177             raise ExtractorError('This video is protected by a password, use the --video-password option') 
 178         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
 179         data = compat_urllib_parse.urlencode({ 
 180             'password
': password, 
 183         # I didn't manage to use the password 
with https
 
 184         if url
.startswith('https'): 
 185             pass_url 
= url
.replace('https', 'http') 
 188         password_request 
= compat_urllib_request
.Request(pass_url 
+ '/password', data
) 
 189         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 190         password_request
.add_header('Cookie', 'xsrft=%s' % token
) 
 191         self
._download
_webpage
(password_request
, video_id
, 
 192                                'Verifying the password', 
 195     def _verify_player_video_password(self
, url
, video_id
): 
 196         password 
= self
._downloader
.params
.get('videopassword', None) 
 198             raise ExtractorError('This video is protected by a password, use the --video-password option') 
 199         data 
= compat_urllib_parse
.urlencode({'password': password
}) 
 200         pass_url 
= url 
+ '/check-password' 
 201         password_request 
= compat_urllib_request
.Request(pass_url
, data
) 
 202         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 203         return self
._download
_json
( 
 204             password_request
, video_id
, 
 205             'Verifying the password', 
 208     def _real_initialize(self
): 
 211     def _real_extract(self
, url
): 
 212         url
, data 
= unsmuggle_url(url
) 
 213         headers 
= std_headers
 
 215             headers 
= headers
.copy() 
 217         if 'Referer' not in headers
: 
 218             headers
['Referer'] = url
 
 220         # Extract ID from URL 
 221         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 222         video_id 
= mobj
.group('id') 
 224         if mobj
.group('pro') or mobj
.group('player'): 
 225             url 
= 'http://player.vimeo.com/video/' + video_id
 
 227         # Retrieve video webpage to extract further information 
 228         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 230             webpage 
= self
._download
_webpage
(request
, video_id
) 
 231         except ExtractorError 
as ee
: 
 232             if isinstance(ee
.cause
, compat_HTTPError
) and ee
.cause
.code 
== 403: 
 233                 errmsg 
= ee
.cause
.read() 
 234                 if b
'Because of its privacy settings, this video cannot be played here' in errmsg
: 
 235                     raise ExtractorError( 
 236                         'Cannot download embed-only video without embedding ' 
 237                         'URL. Please call youtube-dl with the URL of the page ' 
 238                         'that embeds this video.', 
 242         # Now we begin extracting as much information as we can from what we 
 243         # retrieved. First we extract the information common to all extractors, 
 244         # and latter we extract those that are Vimeo specific. 
 245         self
.report_extraction(video_id
) 
 247         # Extract the config JSON 
 250                 config_url 
= self
._html
_search
_regex
( 
 251                     r
' data-config-url="(.+?)"', webpage
, 'config URL') 
 252                 config_json 
= self
._download
_webpage
(config_url
, video_id
) 
 253                 config 
= json
.loads(config_json
) 
 254             except RegexNotFoundError
: 
 255                 # For pro videos or player.vimeo.com urls 
 256                 # We try to find out to which variable is assigned the config dic 
 257                 m_variable_name 
= re
.search('(\w)\.video\.id', webpage
) 
 258                 if m_variable_name 
is not None: 
 259                     config_re 
= r
'%s=({[^}].+?});' % re
.escape(m_variable_name
.group(1)) 
 261                     config_re 
= [r
' = {config:({.+?}),assets:', r
'(?:[abc])=({.+?});'] 
 262                 config 
= self
._search
_regex
(config_re
, webpage
, 'info section', 
 264                 config 
= json
.loads(config
) 
 265         except Exception as e
: 
 266             if re
.search('The creator of this video has not given you permission to embed it on this domain.', webpage
): 
 267                 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') 
 269             if re
.search('<form[^>]+?id="pw_form"', webpage
) is not None: 
 270                 self
._verify
_video
_password
(url
, video_id
, webpage
) 
 271                 return self
._real
_extract
(url
) 
 273                 raise ExtractorError('Unable to extract info section', 
 276             if config
.get('view') == 4: 
 277                 config 
= self
._verify
_player
_video
_password
(url
, video_id
) 
 280         video_title 
= config
["video"]["title"] 
 282         # Extract uploader and uploader_id 
 283         video_uploader 
= config
["video"]["owner"]["name"] 
 284         video_uploader_id 
= config
["video"]["owner"]["url"].split('/')[-1] if config
["video"]["owner"]["url"] else None 
 286         # Extract video thumbnail 
 287         video_thumbnail 
= config
["video"].get("thumbnail") 
 288         if video_thumbnail 
is None: 
 289             video_thumbs 
= config
["video"].get("thumbs") 
 290             if video_thumbs 
and isinstance(video_thumbs
, dict): 
 291                 _
, video_thumbnail 
= sorted((int(width 
if width
.isdigit() else 0), t_url
) for (width
, t_url
) in video_thumbs
.items())[-1] 
 293         # Extract video description 
 295         video_description 
= self
._html
_search
_regex
( 
 296             r
'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>', 
 297             webpage
, 'description', default
=None) 
 298         if not video_description
: 
 299             video_description 
= self
._html
_search
_meta
( 
 300                 'description', webpage
, default
=None) 
 301         if not video_description 
and mobj
.group('pro'): 
 302             orig_webpage 
= self
._download
_webpage
( 
 304                 note
='Downloading webpage for description', 
 307                 video_description 
= self
._html
_search
_meta
( 
 308                     'description', orig_webpage
, default
=None) 
 309         if not video_description 
and not mobj
.group('player'): 
 310             self
._downloader
.report_warning('Cannot find video description') 
 312         # Extract video duration 
 313         video_duration 
= int_or_none(config
["video"].get("duration")) 
 315         # Extract upload date 
 316         video_upload_date 
= None 
 317         mobj 
= re
.search(r
'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage
) 
 319             video_upload_date 
= mobj
.group(1) + mobj
.group(2) + mobj
.group(3) 
 322             view_count 
= int(self
._search
_regex
(r
'UserPlays:(\d+)', webpage
, 'view count')) 
 323             like_count 
= int(self
._search
_regex
(r
'UserLikes:(\d+)', webpage
, 'like count')) 
 324             comment_count 
= int(self
._search
_regex
(r
'UserComments:(\d+)', webpage
, 'comment count')) 
 325         except RegexNotFoundError
: 
 326             # This info is only available in vimeo.com/{id} urls 
 331         # Vimeo specific: extract request signature and timestamp 
 332         sig 
= config
['request']['signature'] 
 333         timestamp 
= config
['request']['timestamp'] 
 335         # Vimeo specific: extract video codec and quality information 
 336         # First consider quality, then codecs, then take everything 
 337         codecs 
= [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] 
 338         files 
= {'hd': [], 'sd': [], 'other': []} 
 339         config_files 
= config
["video"].get("files") or config
["request"].get("files") 
 340         for codec_name
, codec_extension 
in codecs
: 
 341             for quality 
in config_files
.get(codec_name
, []): 
 342                 format_id 
= '-'.join((codec_name
, quality
)).lower() 
 343                 key 
= quality 
if quality 
in files 
else 'other' 
 345                 if isinstance(config_files
[codec_name
], dict): 
 346                     file_info 
= config_files
[codec_name
][quality
] 
 347                     video_url 
= file_info
.get('url') 
 350                 if video_url 
is None: 
 351                     video_url 
= "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 
 352                         % (video_id
, sig
, timestamp
, quality
, codec_name
.upper()) 
 355                     'ext': codec_extension
, 
 357                     'format_id': format_id
, 
 358                     'width': file_info
.get('width'), 
 359                     'height': file_info
.get('height'), 
 362         for key 
in ('other', 'sd', 'hd'): 
 363             formats 
+= files
[key
] 
 364         if len(formats
) == 0: 
 365             raise ExtractorError('No known codec found') 
 368         text_tracks 
= config
['request'].get('text_tracks') 
 370             for tt 
in text_tracks
: 
 371                 subtitles
[tt
['lang']] = 'http://vimeo.com' + tt
['url'] 
 373         video_subtitles 
= self
.extract_subtitles(video_id
, subtitles
) 
 374         if self
._downloader
.params
.get('listsubtitles', False): 
 375             self
._list
_available
_subtitles
(video_id
, subtitles
) 
 380             'uploader': video_uploader
, 
 381             'uploader_id': video_uploader_id
, 
 382             'upload_date': video_upload_date
, 
 383             'title': video_title
, 
 384             'thumbnail': video_thumbnail
, 
 385             'description': video_description
, 
 386             'duration': video_duration
, 
 389             'view_count': view_count
, 
 390             'like_count': like_count
, 
 391             'comment_count': comment_count
, 
 392             'subtitles': video_subtitles
, 
 396 class VimeoChannelIE(InfoExtractor
): 
 397     IE_NAME 
= 'vimeo:channel' 
 398     _VALID_URL 
= r
'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])' 
 399     _MORE_PAGES_INDICATOR 
= r
'<a.+?rel="next"' 
 400     _TITLE_RE 
= r
'<link rel="alternate"[^>]+?title="(.*?)"' 
 402         'url': 'http://vimeo.com/channels/tributes', 
 404             'title': 'Vimeo Tributes', 
 406         'playlist_mincount': 25, 
 409     def _page_url(self
, base_url
, pagenum
): 
 410         return '%s/videos/page:%d/' % (base_url
, pagenum
) 
 412     def _extract_list_title(self
, webpage
): 
 413         return self
._html
_search
_regex
(self
._TITLE
_RE
, webpage
, 'list title') 
 415     def _extract_videos(self
, list_id
, base_url
): 
 417         for pagenum 
in itertools
.count(1): 
 418             webpage 
= self
._download
_webpage
( 
 419                 self
._page
_url
(base_url
, pagenum
), list_id
, 
 420                 'Downloading page %s' % pagenum
) 
 421             video_ids
.extend(re
.findall(r
'id="clip_(\d+?)"', webpage
)) 
 422             if re
.search(self
._MORE
_PAGES
_INDICATOR
, webpage
, re
.DOTALL
) is None: 
 425         entries 
= [self
.url_result('http://vimeo.com/%s' % video_id
, 'Vimeo') 
 426                    for video_id 
in video_ids
] 
 427         return {'_type': 'playlist', 
 429                 'title': self
._extract
_list
_title
(webpage
), 
 433     def _real_extract(self
, url
): 
 434         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 435         channel_id 
= mobj
.group('id') 
 436         return self
._extract
_videos
(channel_id
, 'http://vimeo.com/channels/%s' % channel_id
) 
 439 class VimeoUserIE(VimeoChannelIE
): 
 440     IE_NAME 
= 'vimeo:user' 
 441     _VALID_URL 
= r
'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)' 
 442     _TITLE_RE 
= r
'<a[^>]+?class="user">([^<>]+?)</a>' 
 444         'url': 'http://vimeo.com/nkistudio/videos', 
 448         'playlist_mincount': 66, 
 451     def _real_extract(self
, url
): 
 452         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 453         name 
= mobj
.group('name') 
 454         return self
._extract
_videos
(name
, 'http://vimeo.com/%s' % name
) 
 457 class VimeoAlbumIE(VimeoChannelIE
): 
 458     IE_NAME 
= 'vimeo:album' 
 459     _VALID_URL 
= r
'https?://vimeo\.com/album/(?P<id>\d+)' 
 460     _TITLE_RE 
= r
'<header id="page_header">\n\s*<h1>(.*?)</h1>' 
 462         'url': 'http://vimeo.com/album/2632481', 
 464             'title': 'Staff Favorites: November 2013', 
 466         'playlist_mincount': 13, 
 469     def _page_url(self
, base_url
, pagenum
): 
 470         return '%s/page:%d/' % (base_url
, pagenum
) 
 472     def _real_extract(self
, url
): 
 473         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 474         album_id 
= mobj
.group('id') 
 475         return self
._extract
_videos
(album_id
, 'http://vimeo.com/album/%s' % album_id
) 
 478 class VimeoGroupsIE(VimeoAlbumIE
): 
 479     IE_NAME 
= 'vimeo:group' 
 480     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)' 
 482         'url': 'http://vimeo.com/groups/rolexawards', 
 484             'title': 'Rolex Awards for Enterprise', 
 486         'playlist_mincount': 73, 
 489     def _extract_list_title(self
, webpage
): 
 490         return self
._og
_search
_title
(webpage
) 
 492     def _real_extract(self
, url
): 
 493         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 494         name 
= mobj
.group('name') 
 495         return self
._extract
_videos
(name
, 'http://vimeo.com/groups/%s' % name
) 
 498 class VimeoReviewIE(InfoExtractor
): 
 499     IE_NAME 
= 'vimeo:review' 
 500     IE_DESC 
= 'Review pages on vimeo' 
 501     _VALID_URL 
= r
'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' 
 503         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 
 504         'file': '75524534.mp4', 
 505         'md5': 'c507a72f780cacc12b2248bb4006d253', 
 507             'title': "DICK HARDWICK 'Comedian'", 
 508             'uploader': 'Richard Hardwick', 
 511         'note': 'video player needs Referer', 
 512         'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053', 
 513         'md5': '6295fdab8f4bf6a002d058b2c6dce276', 
 517             'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn', 
 518             'uploader': 'DevWeek Events', 
 520             'thumbnail': 're:^https?://.*\.jpg$', 
 524     def _real_extract(self
, url
): 
 525         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 526         video_id 
= mobj
.group('id') 
 527         player_url 
= 'https://player.vimeo.com/player/' + video_id
 
 528         return self
.url_result(player_url
, 'Vimeo', video_id
) 
 531 class VimeoWatchLaterIE(VimeoBaseInfoExtractor
, VimeoChannelIE
): 
 532     IE_NAME 
= 'vimeo:watchlater' 
 533     IE_DESC 
= 'Vimeo watch later list, "vimeowatchlater" keyword (requires authentication)' 
 534     _VALID_URL 
= r
'https?://vimeo\.com/home/watchlater|:vimeowatchlater' 
 535     _LOGIN_REQUIRED 
= True 
 536     _TITLE_RE 
= r
'href="/home/watchlater".*?>(.*?)<' 
 538         'url': 'http://vimeo.com/home/watchlater', 
 539         'only_matching': True, 
 542     def _real_initialize(self
): 
 545     def _page_url(self
, base_url
, pagenum
): 
 546         url 
= '%s/page:%d/' % (base_url
, pagenum
) 
 547         request 
= compat_urllib_request
.Request(url
) 
 548         # Set the header to get a partial html page with the ids, 
 549         # the normal page doesn't contain them. 
 550         request
.add_header('X-Requested-With', 'XMLHttpRequest') 
 553     def _real_extract(self
, url
): 
 554         return self
._extract
_videos
('watchlater', 'https://vimeo.com/home/watchlater') 
 557 class VimeoLikesIE(InfoExtractor
): 
 558     _VALID_URL 
= r
'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)' 
 559     IE_NAME 
= 'vimeo:likes' 
 560     IE_DESC 
= 'Vimeo user likes' 
 562         'url': 'https://vimeo.com/user755559/likes/', 
 563         'playlist_mincount': 293, 
 565             "description": "See all the videos urza likes", 
 566             "title": 'Videos urza likes', 
 570     def _real_extract(self
, url
): 
 571         user_id 
= self
._match
_id
(url
) 
 572         webpage 
= self
._download
_webpage
(url
, user_id
) 
 573         page_count 
= self
._int
( 
 575                 r
'''(?x)<li><a\s+href="[^"]+"\s+data-page="([0-9]+)"> 
 576                     .*?</a></li>\s*<li\s+class="pagination_next"> 
 577                 ''', webpage
, 'page count'), 
 578             'page count', fatal
=True) 
 580         title 
= self
._html
_search
_regex
( 
 581             r
'(?s)<h1>(.+?)</h1>', webpage
, 'title', fatal
=False) 
 582         description 
= self
._html
_search
_meta
('description', webpage
) 
 585             page_url 
= '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( 
 586                 self
.http_scheme(), user_id
, idx 
+ 1) 
 587             webpage 
= self
._download
_webpage
( 
 589                 note
='Downloading page %d/%d' % (idx 
+ 1, page_count
)) 
 590             video_list 
= self
._search
_regex
( 
 591                 r
'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>', 
 592                 webpage
, 'video content') 
 594                 r
'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list
) 
 598                     'url': compat_urlparse
.urljoin(page_url
, path
), 
 601         pl 
= InAdvancePagedList(_get_page
, page_count
, PAGE_SIZE
) 
 605             'id': 'user%s_likes' % user_id
, 
 607             'description': description
,