2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
   9 from .subtitles 
import SubtitlesInfoExtractor
 
  12     compat_urllib_request
, 
  14     get_element_by_attribute
, 
  22 class VimeoIE(SubtitlesInfoExtractor
): 
  23     """Information extractor for vimeo.com.""" 
  25     # _VALID_URL matches Vimeo URLs 
  27         (?P<proto>(?:https?:)?//)? 
  28         (?:(?:www|(?P<player>player))\.)? 
  29         vimeo(?P<pro>pro)?\.com/ 
  31         (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? 
  34         /?(?:[?&].*)?(?:[#].*)?$''' 
  35     _NETRC_MACHINE 
= 'vimeo' 
  39             'url': 'http://vimeo.com/56015672#at=0', 
  40             'file': '56015672.mp4', 
  41             'md5': '8879b6cc097e987f02484baf890129e5', 
  43                 "upload_date": "20121220",  
  44                 "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  
  45                 "uploader_id": "user7108434",  
  46                 "uploader": "Filippo Valsorda",  
  47                 "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  51             'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 
  52             'file': '68093876.mp4', 
  53             'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 
  54             'note': 'Vimeo Pro video (#1197)', 
  56                 'uploader_id': 'openstreetmapus', 
  57                 'uploader': 'OpenStreetMap US', 
  58                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 
  62             'url': 'http://player.vimeo.com/video/54469442', 
  63             'file': '54469442.mp4', 
  64             'md5': '619b811a4417aa4abe78dc653becf511', 
  65             'note': 'Videos that embed the url in the player page', 
  67                 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software', 
  68                 'uploader': 'The BLN & Business of Software', 
  69                 'uploader_id': 'theblnbusinessofsoftware', 
  73             'url': 'http://vimeo.com/68375962', 
  74             'file': '68375962.mp4', 
  75             'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 
  76             'note': 'Video protected with password', 
  78                 'title': 'youtube-dl password protected test video', 
  79                 'upload_date': '20130614', 
  80                 'uploader_id': 'user18948128', 
  81                 'uploader': 'Jaime Marquínez Ferrándiz', 
  84                 'videopassword': 'youtube-dl', 
  88             'url': 'http://vimeo.com/76979871', 
  89             'md5': '3363dd6ffebe3784d56f4132317fd446', 
  90             'note': 'Video with subtitles', 
  94                 'title': 'The New Vimeo Player (You Know, For Videos)', 
  95                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 
  96                 'upload_date': '20131015', 
  97                 'uploader_id': 'staff', 
  98                 'uploader': 'Vimeo Staff', 
 104         (username
, password
) = self
._get
_login
_info
() 
 108         login_url 
= 'https://vimeo.com/log_in' 
 109         webpage 
= self
._download
_webpage
(login_url
, None, False) 
 110         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
 111         data = compat_urllib_parse.urlencode({'email
': username, 
 112                                               'password
': password, 
 117         login_request = compat_urllib_request.Request(login_url, data) 
 118         login_request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
 119         login_request.add_header('Cookie
', 'xsrft
=%s' % token) 
 120         self._download_webpage(login_request, None, False, 'Wrong login info
') 
 122     def _verify_video_password(self, url, video_id, webpage): 
 123         password = self._downloader.params.get('videopassword
', None) 
 125             raise ExtractorError('This video 
is protected by a password
, use the 
--video
-password option
') 
 126         token = self._search_regex(r'xsrft
: \'(.*?
)\'', webpage, 'login token
') 
 127         data = compat_urllib_parse.urlencode({'password
': password, 
 129         # I didn't manage to use the password 
with https
 
 130         if url
.startswith('https'): 
 131             pass_url 
= url
.replace('https','http') 
 134         password_request 
= compat_urllib_request
.Request(pass_url
+'/password', data
) 
 135         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 136         password_request
.add_header('Cookie', 'xsrft=%s' % token
) 
 137         self
._download
_webpage
(password_request
, video_id
, 
 138                                'Verifying the password', 
 141     def _verify_player_video_password(self
, url
, video_id
): 
 142         password 
= self
._downloader
.params
.get('videopassword', None) 
 144             raise ExtractorError('This video is protected by a password, use the --video-password option') 
 145         data 
= compat_urllib_parse
.urlencode({'password': password
}) 
 146         pass_url 
= url 
+ '/check-password' 
 147         password_request 
= compat_urllib_request
.Request(pass_url
, data
) 
 148         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 149         return self
._download
_json
( 
 150             password_request
, video_id
, 
 151             'Verifying the password', 
 154     def _real_initialize(self
): 
 157     def _real_extract(self
, url
): 
 158         url
, data 
= unsmuggle_url(url
) 
 159         headers 
= std_headers
 
 161             headers 
= headers
.copy() 
 164         # Extract ID from URL 
 165         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 166         video_id 
= mobj
.group('id') 
 167         if mobj
.group('pro') or mobj
.group('player'): 
 168             url 
= 'http://player.vimeo.com/video/' + video_id
 
 170             url 
= 'https://vimeo.com/' + video_id
 
 172         # Retrieve video webpage to extract further information 
 173         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 174         webpage 
= self
._download
_webpage
(request
, video_id
) 
 176         # Now we begin extracting as much information as we can from what we 
 177         # retrieved. First we extract the information common to all extractors, 
 178         # and latter we extract those that are Vimeo specific. 
 179         self
.report_extraction(video_id
) 
 181         # Extract the config JSON 
 184                 config_url 
= self
._html
_search
_regex
( 
 185                     r
' data-config-url="(.+?)"', webpage
, 'config URL') 
 186                 config_json 
= self
._download
_webpage
(config_url
, video_id
) 
 187                 config 
= json
.loads(config_json
) 
 188             except RegexNotFoundError
: 
 189                 # For pro videos or player.vimeo.com urls 
 190                 # We try to find out to which variable is assigned the config dic 
 191                 m_variable_name 
= re
.search('(\w)\.video\.id', webpage
) 
 192                 if m_variable_name 
is not None: 
 193                     config_re 
= r
'%s=({.+?});' % re
.escape(m_variable_name
.group(1)) 
 195                     config_re 
= [r
' = {config:({.+?}),assets:', r
'(?:[abc])=({.+?});'] 
 196                 config 
= self
._search
_regex
(config_re
, webpage
, 'info section', 
 198                 config 
= json
.loads(config
) 
 199         except Exception as e
: 
 200             if re
.search('The creator of this video has not given you permission to embed it on this domain.', webpage
): 
 201                 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') 
 203             if re
.search('<form[^>]+?id="pw_form"', webpage
) is not None: 
 204                 self
._verify
_video
_password
(url
, video_id
, webpage
) 
 205                 return self
._real
_extract
(url
) 
 207                 raise ExtractorError('Unable to extract info section', 
 210             if config
.get('view') == 4: 
 211                 config 
= self
._verify
_player
_video
_password
(url
, video_id
) 
 214         video_title 
= config
["video"]["title"] 
 216         # Extract uploader and uploader_id 
 217         video_uploader 
= config
["video"]["owner"]["name"] 
 218         video_uploader_id 
= config
["video"]["owner"]["url"].split('/')[-1] if config
["video"]["owner"]["url"] else None 
 220         # Extract video thumbnail 
 221         video_thumbnail 
= config
["video"].get("thumbnail") 
 222         if video_thumbnail 
is None: 
 223             _
, video_thumbnail 
= sorted((int(width
), t_url
) for (width
, t_url
) in config
["video"]["thumbs"].items())[-1] 
 225         # Extract video description 
 226         video_description 
= None 
 228             video_description 
= get_element_by_attribute("itemprop", "description", webpage
) 
 229             if video_description
: video_description 
= clean_html(video_description
) 
 230         except AssertionError as err
: 
 231             # On some pages like (http://player.vimeo.com/video/54469442) the 
 232             # html tags are not closed, python 2.6 cannot handle it 
 233             if err
.args
[0] == 'we should not get here!': 
 238         # Extract upload date 
 239         video_upload_date 
= None 
 240         mobj 
= re
.search(r
'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage
) 
 242             video_upload_date 
= mobj
.group(1) + mobj
.group(2) + mobj
.group(3) 
 245             view_count 
= int(self
._search
_regex
(r
'UserPlays:(\d+)', webpage
, 'view count')) 
 246             like_count 
= int(self
._search
_regex
(r
'UserLikes:(\d+)', webpage
, 'like count')) 
 247             comment_count 
= int(self
._search
_regex
(r
'UserComments:(\d+)', webpage
, 'comment count')) 
 248         except RegexNotFoundError
: 
 249             # This info is only available in vimeo.com/{id} urls 
 254         # Vimeo specific: extract request signature and timestamp 
 255         sig 
= config
['request']['signature'] 
 256         timestamp 
= config
['request']['timestamp'] 
 258         # Vimeo specific: extract video codec and quality information 
 259         # First consider quality, then codecs, then take everything 
 260         codecs 
= [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] 
 261         files 
= {'hd': [], 'sd': [], 'other': []} 
 262         config_files 
= config
["video"].get("files") or config
["request"].get("files") 
 263         for codec_name
, codec_extension 
in codecs
: 
 264             for quality 
in config_files
.get(codec_name
, []): 
 265                 format_id 
= '-'.join((codec_name
, quality
)).lower() 
 266                 key 
= quality 
if quality 
in files 
else 'other' 
 268                 if isinstance(config_files
[codec_name
], dict): 
 269                     file_info 
= config_files
[codec_name
][quality
] 
 270                     video_url 
= file_info
.get('url') 
 273                 if video_url 
is None: 
 274                     video_url 
= "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 
 275                         %(video_id
, sig
, timestamp
, quality
, codec_name
.upper()) 
 278                     'ext': codec_extension
, 
 280                     'format_id': format_id
, 
 281                     'width': file_info
.get('width'), 
 282                     'height': file_info
.get('height'), 
 285         for key 
in ('other', 'sd', 'hd'): 
 286             formats 
+= files
[key
] 
 287         if len(formats
) == 0: 
 288             raise ExtractorError('No known codec found') 
 291         text_tracks 
= config
['request'].get('text_tracks') 
 293             for tt 
in text_tracks
: 
 294                 subtitles
[tt
['lang']] = 'http://vimeo.com' + tt
['url'] 
 296         video_subtitles 
= self
.extract_subtitles(video_id
, subtitles
) 
 297         if self
._downloader
.params
.get('listsubtitles', False): 
 298             self
._list
_available
_subtitles
(video_id
, subtitles
) 
 303             'uploader': video_uploader
, 
 304             'uploader_id': video_uploader_id
, 
 305             'upload_date': video_upload_date
, 
 306             'title': video_title
, 
 307             'thumbnail': video_thumbnail
, 
 308             'description': video_description
, 
 311             'view_count': view_count
, 
 312             'like_count': like_count
, 
 313             'comment_count': comment_count
, 
 314             'subtitles': video_subtitles
, 
 318 class VimeoChannelIE(InfoExtractor
): 
 319     IE_NAME 
= 'vimeo:channel' 
 320     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/channels/(?P<id>[^/]+)' 
 321     _MORE_PAGES_INDICATOR 
= r
'<a.+?rel="next"' 
 322     _TITLE_RE 
= r
'<link rel="alternate"[^>]+?title="(.*?)"' 
 324     def _page_url(self
, base_url
, pagenum
): 
 325         return '%s/videos/page:%d/' % (base_url
, pagenum
) 
 327     def _extract_list_title(self
, webpage
): 
 328         return self
._html
_search
_regex
(self
._TITLE
_RE
, webpage
, 'list title') 
 330     def _extract_videos(self
, list_id
, base_url
): 
 332         for pagenum 
in itertools
.count(1): 
 333             webpage 
= self
._download
_webpage
( 
 334                 self
._page
_url
(base_url
, pagenum
) ,list_id
, 
 335                 'Downloading page %s' % pagenum
) 
 336             video_ids
.extend(re
.findall(r
'id="clip_(\d+?)"', webpage
)) 
 337             if re
.search(self
._MORE
_PAGES
_INDICATOR
, webpage
, re
.DOTALL
) is None: 
 340         entries 
= [self
.url_result('http://vimeo.com/%s' % video_id
, 'Vimeo') 
 341                    for video_id 
in video_ids
] 
 342         return {'_type': 'playlist', 
 344                 'title': self
._extract
_list
_title
(webpage
), 
 348     def _real_extract(self
, url
): 
 349         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 350         channel_id 
=  mobj
.group('id') 
 351         return self
._extract
_videos
(channel_id
, 'http://vimeo.com/channels/%s' % channel_id
) 
 354 class VimeoUserIE(VimeoChannelIE
): 
 355     IE_NAME 
= 'vimeo:user' 
 356     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/(?P<name>[^/]+)(?:/videos|[#?]|$)' 
 357     _TITLE_RE 
= r
'<a[^>]+?class="user">([^<>]+?)</a>' 
 360     def suitable(cls
, url
): 
 361         if VimeoChannelIE
.suitable(url
) or VimeoIE
.suitable(url
) or VimeoAlbumIE
.suitable(url
) or VimeoGroupsIE
.suitable(url
): 
 363         return super(VimeoUserIE
, cls
).suitable(url
) 
 365     def _real_extract(self
, url
): 
 366         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 367         name 
= mobj
.group('name') 
 368         return self
._extract
_videos
(name
, 'http://vimeo.com/%s' % name
) 
 371 class VimeoAlbumIE(VimeoChannelIE
): 
 372     IE_NAME 
= 'vimeo:album' 
 373     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/album/(?P<id>\d+)' 
 374     _TITLE_RE 
= r
'<header id="page_header">\n\s*<h1>(.*?)</h1>' 
 376     def _page_url(self
, base_url
, pagenum
): 
 377         return '%s/page:%d/' % (base_url
, pagenum
) 
 379     def _real_extract(self
, url
): 
 380         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 381         album_id 
= mobj
.group('id') 
 382         return self
._extract
_videos
(album_id
, 'http://vimeo.com/album/%s' % album_id
) 
 385 class VimeoGroupsIE(VimeoAlbumIE
): 
 386     IE_NAME 
= 'vimeo:group' 
 387     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)' 
 389     def _extract_list_title(self
, webpage
): 
 390         return self
._og
_search
_title
(webpage
) 
 392     def _real_extract(self
, url
): 
 393         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 394         name 
= mobj
.group('name') 
 395         return self
._extract
_videos
(name
, 'http://vimeo.com/groups/%s' % name
) 
 398 class VimeoReviewIE(InfoExtractor
): 
 399     IE_NAME 
= 'vimeo:review' 
 400     IE_DESC 
= 'Review pages on vimeo' 
 401     _VALID_URL 
= r
'(?:https?://)?vimeo\.com/[^/]+/review/(?P<id>[^/]+)' 
 403         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 
 404         'file': '75524534.mp4', 
 405         'md5': 'c507a72f780cacc12b2248bb4006d253', 
 407             'title': "DICK HARDWICK 'Comedian'", 
 408             'uploader': 'Richard Hardwick', 
 412     def _real_extract(self
, url
): 
 413         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 414         video_id 
= mobj
.group('id') 
 415         player_url 
= 'https://player.vimeo.com/player/' + video_id
 
 416         return self
.url_result(player_url
, 'Vimeo', video_id
)