5 from .common 
import InfoExtractor
 
  11     get_element_by_attribute
, 
  16 class VimeoIE(InfoExtractor
): 
  17     """Information extractor for vimeo.com.""" 
  19     # _VALID_URL matches Vimeo URLs 
  20     _VALID_URL 
= r
'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)(?:[?].*)?$' 
  21     _NETRC_MACHINE 
= 'vimeo' 
  25             u
'url': u
'http://vimeo.com/56015672', 
  26             u
'file': u
'56015672.mp4', 
  27             u
'md5': u
'8879b6cc097e987f02484baf890129e5', 
  29                 u
"upload_date": u
"20121220",  
  30                 u
"description": u
"This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550",  
  31                 u
"uploader_id": u
"user7108434",  
  32                 u
"uploader": u
"Filippo Valsorda",  
  33                 u
"title": u
"youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  37             u
'url': u
'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 
  38             u
'file': u
'68093876.mp4', 
  39             u
'md5': u
'3b5ca6aa22b60dfeeadf50b72e44ed82', 
  40             u
'note': u
'Vimeo Pro video (#1197)', 
  42                 u
'uploader_id': u
'openstreetmapus',  
  43                 u
'uploader': u
'OpenStreetMap US',  
  44                 u
'title': u
'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 
  48             u
'url': u
'http://player.vimeo.com/video/54469442', 
  49             u
'file': u
'54469442.mp4', 
  50             u
'md5': u
'619b811a4417aa4abe78dc653becf511', 
  51             u
'note': u
'Videos that embed the url in the player page', 
  53                 u
'title': u
'Kathy Sierra: Building the minimum Badass User, Business of Software', 
  54                 u
'uploader': u
'The BLN & Business of Software', 
  60         (username
, password
) = self
._get
_login
_info
() 
  64         login_url 
= 'https://vimeo.com/log_in' 
  65         webpage 
= self
._download
_webpage
(login_url
, None, False) 
  66         token 
= re
.search(r
'xsrft: \'(.*?
)\'', webpage).group(1) 
  67         data = compat_urllib_parse.urlencode({'email
': username, 
  73         login_request = compat_urllib_request.Request(login_url, data) 
  74         login_request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
  75         login_request.add_header('Cookie
', 'xsrft
=%s' % token) 
  76         self._download_webpage(login_request, None, False, u'Wrong login info
') 
  78     def _verify_video_password(self, url, video_id, webpage): 
  79         password = self._downloader.params.get('videopassword
', None) 
  81             raise ExtractorError(u'This video 
is protected by a password
, use the 
--video
-password option
') 
  82         token = re.search(r'xsrft
: \'(.*?
)\'', webpage).group(1) 
  83         data = compat_urllib_parse.urlencode({'password
': password, 
  85         # I didn't manage to use the password 
with https
 
  86         if url
.startswith('https'): 
  87             pass_url 
= url
.replace('https','http') 
  90         password_request 
= compat_urllib_request
.Request(pass_url
+'/password', data
) 
  91         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
  92         password_request
.add_header('Cookie', 'xsrft=%s' % token
) 
  93         self
._download
_webpage
(password_request
, video_id
, 
  94                                u
'Verifying the password', 
  97     def _real_initialize(self
): 
 100     def _real_extract(self
, url
, new_video
=True): 
 101         # Extract ID from URL 
 102         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 104             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 106         video_id 
= mobj
.group('id') 
 107         if not mobj
.group('proto'): 
 108             url 
= 'https://' + url
 
 109         elif mobj
.group('pro'): 
 110             url 
= 'http://player.vimeo.com/video/' + video_id
 
 111         elif mobj
.group('direct_link'): 
 112             url 
= 'https://vimeo.com/' + video_id
 
 114         # Retrieve video webpage to extract further information 
 115         request 
= compat_urllib_request
.Request(url
, None, std_headers
) 
 116         webpage 
= self
._download
_webpage
(request
, video_id
) 
 118         # Now we begin extracting as much information as we can from what we 
 119         # retrieved. First we extract the information common to all extractors, 
 120         # and latter we extract those that are Vimeo specific. 
 121         self
.report_extraction(video_id
) 
 123         # Extract the config JSON 
 125             config 
= self
._search
_regex
([r
' = {config:({.+?}),assets:', r
'c=({.+?);'], 
 126                 webpage
, u
'info section', flags
=re
.DOTALL
) 
 127             config 
= json
.loads(config
) 
 129             if re
.search('The creator of this video has not given you permission to embed it on this domain.', webpage
): 
 130                 raise ExtractorError(u
'The author has restricted the access to this video, try with the "--referer" option') 
 132             if re
.search('If so please provide the correct password.', webpage
): 
 133                 self
._verify
_video
_password
(url
, video_id
, webpage
) 
 134                 return self
._real
_extract
(url
) 
 136                 raise ExtractorError(u
'Unable to extract info section') 
 139         video_title 
= config
["video"]["title"] 
 141         # Extract uploader and uploader_id 
 142         video_uploader 
= config
["video"]["owner"]["name"] 
 143         video_uploader_id 
= config
["video"]["owner"]["url"].split('/')[-1] if config
["video"]["owner"]["url"] else None 
 145         # Extract video thumbnail 
 146         video_thumbnail 
= config
["video"].get("thumbnail") 
 147         if video_thumbnail 
is None: 
 148             _
, video_thumbnail 
= sorted((int(width
), t_url
) for (width
, t_url
) in config
["video"]["thumbs"].items())[-1] 
 150         # Extract video description 
 151         video_description 
= None 
 153             video_description 
= get_element_by_attribute("itemprop", "description", webpage
) 
 154             if video_description
: video_description 
= clean_html(video_description
) 
 155         except AssertionError as err
: 
 156             # On some pages like (http://player.vimeo.com/video/54469442) the 
 157             # html tags are not closed, python 2.6 cannot handle it 
 158             if err
.args
[0] == 'we should not get here!': 
 163         # Extract upload date 
 164         video_upload_date 
= None 
 165         mobj 
= re
.search(r
'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage
) 
 167             video_upload_date 
= mobj
.group(1) + mobj
.group(2) + mobj
.group(3) 
 169         # Vimeo specific: extract request signature and timestamp 
 170         sig 
= config
['request']['signature'] 
 171         timestamp 
= config
['request']['timestamp'] 
 173         # Vimeo specific: extract video codec and quality information 
 174         # First consider quality, then codecs, then take everything 
 175         # TODO bind to format param 
 176         codecs 
= [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] 
 177         files 
= { 'hd': [], 'sd': [], 'other': []} 
 178         config_files 
= config
["video"].get("files") or config
["request"].get("files") 
 179         for codec_name
, codec_extension 
in codecs
: 
 180             if codec_name 
in config_files
: 
 181                 if 'hd' in config_files
[codec_name
]: 
 182                     files
['hd'].append((codec_name
, codec_extension
, 'hd')) 
 183                 elif 'sd' in config_files
[codec_name
]: 
 184                     files
['sd'].append((codec_name
, codec_extension
, 'sd')) 
 186                     files
['other'].append((codec_name
, codec_extension
, config_files
[codec_name
][0])) 
 188         for quality 
in ('hd', 'sd', 'other'): 
 189             if len(files
[quality
]) > 0: 
 190                 video_quality 
= files
[quality
][0][2] 
 191                 video_codec 
= files
[quality
][0][0] 
 192                 video_extension 
= files
[quality
][0][1] 
 193                 self
.to_screen(u
'%s: Downloading %s file at %s quality' % (video_id
, video_codec
.upper(), video_quality
)) 
 196             raise ExtractorError(u
'No known codec found') 
 199         if isinstance(config_files
[video_codec
], dict): 
 200             video_url 
= config_files
[video_codec
][video_quality
].get("url") 
 201         if video_url 
is None: 
 202             video_url 
= "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 
 203                         %(video_id
, sig
, timestamp
, video_quality
, video_codec
.upper()) 
 208             'uploader': video_uploader
, 
 209             'uploader_id': video_uploader_id
, 
 210             'upload_date':  video_upload_date
, 
 211             'title':    video_title
, 
 212             'ext':      video_extension
, 
 213             'thumbnail':    video_thumbnail
, 
 214             'description':  video_description
, 
 218 class VimeoChannelIE(InfoExtractor
): 
 219     IE_NAME 
= u
'vimeo:channel' 
 220     _VALID_URL 
= r
'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)' 
 221     _MORE_PAGES_INDICATOR 
= r
'<a.+?rel="next"' 
 223     def _real_extract(self
, url
): 
 224         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 225         channel_id 
=  mobj
.group('id') 
 228         for pagenum 
in itertools
.count(1): 
 229             webpage 
= self
._download
_webpage
('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id
, pagenum
), 
 230                                              channel_id
, u
'Downloading page %s' % pagenum
) 
 231             video_ids
.extend(re
.findall(r
'id="clip_(\d+?)"', webpage
)) 
 232             if re
.search(self
._MORE
_PAGES
_INDICATOR
, webpage
, re
.DOTALL
) is None: 
 235         entries 
= [self
.url_result('http://vimeo.com/%s' % video_id
, 'Vimeo') 
 236                    for video_id 
in video_ids
] 
 237         channel_title 
= self
._html
_search
_regex
(r
'<a href="/channels/%s">(.*?)</a>' % channel_id
, 
 238                                                 webpage
, u
'channel title') 
 239         return {'_type': 'playlist', 
 241                 'title': channel_title
,