2 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  13     compat_urllib_request
, 
  28 class VimeoBaseInfoExtractor(InfoExtractor
): 
  29     _NETRC_MACHINE 
= 'vimeo' 
  30     _LOGIN_REQUIRED 
= False 
  33         (username
, password
) = self
._get
_login
_info
() 
  35             if self
._LOGIN
_REQUIRED
: 
  36                 raise ExtractorError('No login info available, needed for using %s.' % self
.IE_NAME
, expected
=True) 
  39         login_url 
= 'https://vimeo.com/log_in' 
  40         webpage 
= self
._download
_webpage
(login_url
, None, False) 
  41         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
  42         data = urlencode_postdata({ 
  49         login_request = compat_urllib_request.Request(login_url, data) 
  50         login_request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
  51         login_request.add_header('Cookie
', 'xsrft
=%s' % token) 
  52         self._download_webpage(login_request, None, False, 'Wrong login info
') 
  55 class VimeoIE(VimeoBaseInfoExtractor): 
  56     """Information extractor for vimeo.com.""" 
  58     # _VALID_URL matches Vimeo URLs 
  61         (?:(?:www|(?P<player>player))\.)? 
  62         vimeo(?P<pro>pro)?\.com/ 
  63         (?!channels/[^/?#]+/?(?:$|[?#])|album/) 
  65         (?:(?:play_redirect_hls|moogaloop\.swf)\?clip_id=)? 
  68         /?(?:[?&].*)?(?:[#].*)?$''' 
  72             'url
': 'http
://vimeo
.com
/56015672#at=0', 
  73             'md5': '8879b6cc097e987f02484baf890129e5', 
  77                 "upload_date": "20121220", 
  78                 "description": "This is a test case for youtube-dl.\nFor more information, see github.com/rg3/youtube-dl\nTest chars: \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  79                 "uploader_id": "user7108434", 
  80                 "uploader": "Filippo Valsorda", 
  81                 "title": "youtube-dl test video - \u2605 \" ' \u5e78 / \\ \u00e4 \u21ad \U0001d550", 
  86             'url': 'http://vimeopro.com/openstreetmapus/state-of-the-map-us-2013/video/68093876', 
  87             'md5': '3b5ca6aa22b60dfeeadf50b72e44ed82', 
  88             'note': 'Vimeo Pro video (#1197)', 
  92                 'uploader_id': 'openstreetmapus', 
  93                 'uploader': 'OpenStreetMap US', 
  94                 'title': 'Andy Allan - Putting the Carto into OpenStreetMap Cartography', 
  95                 'description': 'md5:380943ec71b89736ff4bf27183233d09', 
 100             'url': 'http://player.vimeo.com/video/54469442', 
 101             'md5': '619b811a4417aa4abe78dc653becf511', 
 102             'note': 'Videos that embed the url in the player page', 
 106                 'title': 'Kathy Sierra: Building the minimum Badass User, Business of Software 2012', 
 107                 'uploader': 'The BLN & Business of Software', 
 108                 'uploader_id': 'theblnbusinessofsoftware', 
 114             'url': 'http://vimeo.com/68375962', 
 115             'md5': 'aaf896bdb7ddd6476df50007a0ac0ae7', 
 116             'note': 'Video protected with password', 
 120                 'title': 'youtube-dl password protected test video', 
 121                 'upload_date': '20130614', 
 122                 'uploader_id': 'user18948128', 
 123                 'uploader': 'Jaime Marquínez Ferrándiz', 
 125                 'description': 'This is "youtube-dl password protected test video" by Jaime Marquínez Ferrándiz on Vimeo, the home for high quality videos and the people who love them.', 
 128                 'videopassword': 'youtube-dl', 
 132             'url': 'http://vimeo.com/channels/keypeele/75629013', 
 133             'md5': '2f86a05afe9d7abc0b9126d229bbe15d', 
 134             'note': 'Video is freely available via original URL ' 
 135                     'and protected with password when accessed via http://vimeo.com/75629013', 
 139                 'title': 'Key & Peele: Terrorist Interrogation', 
 140                 'description': 'md5:8678b246399b070816b12313e8b4eb5c', 
 141                 'uploader_id': 'atencio', 
 142                 'uploader': 'Peter Atencio', 
 147             'url': 'http://vimeo.com/76979871', 
 148             'md5': '3363dd6ffebe3784d56f4132317fd446', 
 149             'note': 'Video with subtitles', 
 153                 'title': 'The New Vimeo Player (You Know, For Videos)', 
 154                 'description': 'md5:2ec900bf97c3f389378a96aee11260ea', 
 155                 'upload_date': '20131015', 
 156                 'uploader_id': 'staff', 
 157                 'uploader': 'Vimeo Staff', 
 162             # from https://www.ouya.tv/game/Pier-Solar-and-the-Great-Architects/ 
 163             'url': 'https://player.vimeo.com/video/98044508', 
 164             'note': 'The js code contains assignments to the same variable as the config', 
 168                 'title': 'Pier Solar OUYA Official Trailer', 
 169                 'uploader': 'Tulio Gonçalves', 
 170                 'uploader_id': 'user28849593', 
 175     def _verify_video_password(self
, url
, video_id
, webpage
): 
 176         password 
= self
._downloader
.params
.get('videopassword', None) 
 178             raise ExtractorError('This video is protected by a password, use the --video-password option', expected
=True) 
 179         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
 180         data = compat_urllib_parse.urlencode({ 
 181             'password
': password, 
 184         # I didn't manage to use the password 
with https
 
 185         if url
.startswith('https'): 
 186             pass_url 
= url
.replace('https', 'http') 
 189         password_request 
= compat_urllib_request
.Request(pass_url 
+ '/password', data
) 
 190         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 191         password_request
.add_header('Cookie', 'xsrft=%s' % token
) 
 192         return self
._download
_webpage
( 
 193             password_request
, video_id
, 
 194             'Verifying the password', 'Wrong password') 
 196     def _verify_player_video_password(self
, url
, video_id
): 
 197         password 
= self
._downloader
.params
.get('videopassword', None) 
 199             raise ExtractorError('This video is protected by a password, use the --video-password option') 
 200         data 
= compat_urllib_parse
.urlencode({'password': password
}) 
 201         pass_url 
= url 
+ '/check-password' 
 202         password_request 
= compat_urllib_request
.Request(pass_url
, data
) 
 203         password_request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 204         return self
._download
_json
( 
 205             password_request
, video_id
, 
 206             'Verifying the password', 
 209     def _real_initialize(self
): 
 212     def _real_extract(self
, url
): 
 213         url
, data 
= unsmuggle_url(url
) 
 214         headers 
= std_headers
 
 216             headers 
= headers
.copy() 
 218         if 'Referer' not in headers
: 
 219             headers
['Referer'] = url
 
 221         # Extract ID from URL 
 222         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 223         video_id 
= mobj
.group('id') 
 225         if mobj
.group('pro') or mobj
.group('player'): 
 226             url 
= 'http://player.vimeo.com/video/' + video_id
 
 228         password 
= self
._downloader
.params
.get('videopassword', None) 
 230             headers
['Cookie'] = '%s_password=%s' % ( 
 231                 video_id
, hashlib
.md5(password
.encode('utf-8')).hexdigest()) 
 233         # Retrieve video webpage to extract further information 
 234         request 
= compat_urllib_request
.Request(url
, None, headers
) 
 236             webpage 
= self
._download
_webpage
(request
, video_id
) 
 237         except ExtractorError 
as ee
: 
 238             if isinstance(ee
.cause
, compat_HTTPError
) and ee
.cause
.code 
== 403: 
 239                 errmsg 
= ee
.cause
.read() 
 240                 if b
'Because of its privacy settings, this video cannot be played here' in errmsg
: 
 241                     raise ExtractorError( 
 242                         'Cannot download embed-only video without embedding ' 
 243                         'URL. Please call youtube-dl with the URL of the page ' 
 244                         'that embeds this video.', 
 248         # Now we begin extracting as much information as we can from what we 
 249         # retrieved. First we extract the information common to all extractors, 
 250         # and latter we extract those that are Vimeo specific. 
 251         self
.report_extraction(video_id
) 
 253         # Extract the config JSON 
 256                 config_url 
= self
._html
_search
_regex
( 
 257                     r
' data-config-url="(.+?)"', webpage
, 'config URL') 
 258                 config_json 
= self
._download
_webpage
(config_url
, video_id
) 
 259                 config 
= json
.loads(config_json
) 
 260             except RegexNotFoundError
: 
 261                 # For pro videos or player.vimeo.com urls 
 262                 # We try to find out to which variable is assigned the config dic 
 263                 m_variable_name 
= re
.search('(\w)\.video\.id', webpage
) 
 264                 if m_variable_name 
is not None: 
 265                     config_re 
= r
'%s=({[^}].+?});' % re
.escape(m_variable_name
.group(1)) 
 267                     config_re 
= [r
' = {config:({.+?}),assets:', r
'(?:[abc])=({.+?});'] 
 268                 config 
= self
._search
_regex
(config_re
, webpage
, 'info section', 
 270                 config 
= json
.loads(config
) 
 271         except Exception as e
: 
 272             if re
.search('The creator of this video has not given you permission to embed it on this domain.', webpage
): 
 273                 raise ExtractorError('The author has restricted the access to this video, try with the "--referer" option') 
 275             if re
.search(r
'<form[^>]+?id="pw_form"', webpage
) is not None: 
 276                 if data 
and '_video_password_verified' in data
: 
 277                     raise ExtractorError('video password verification failed!') 
 278                 self
._verify
_video
_password
(url
, video_id
, webpage
) 
 279                 return self
._real
_extract
( 
 280                     smuggle_url(url
, {'_video_password_verified': 'verified'})) 
 282                 raise ExtractorError('Unable to extract info section', 
 285             if config
.get('view') == 4: 
 286                 config 
= self
._verify
_player
_video
_password
(url
, video_id
) 
 289         video_title 
= config
["video"]["title"] 
 291         # Extract uploader and uploader_id 
 292         video_uploader 
= config
["video"]["owner"]["name"] 
 293         video_uploader_id 
= config
["video"]["owner"]["url"].split('/')[-1] if config
["video"]["owner"]["url"] else None 
 295         # Extract video thumbnail 
 296         video_thumbnail 
= config
["video"].get("thumbnail") 
 297         if video_thumbnail 
is None: 
 298             video_thumbs 
= config
["video"].get("thumbs") 
 299             if video_thumbs 
and isinstance(video_thumbs
, dict): 
 300                 _
, video_thumbnail 
= sorted((int(width 
if width
.isdigit() else 0), t_url
) for (width
, t_url
) in video_thumbs
.items())[-1] 
 302         # Extract video description 
 304         video_description 
= self
._html
_search
_regex
( 
 305             r
'(?s)<div\s+class="[^"]*description[^"]*"[^>]*>(.*?)</div>', 
 306             webpage
, 'description', default
=None) 
 307         if not video_description
: 
 308             video_description 
= self
._html
_search
_meta
( 
 309                 'description', webpage
, default
=None) 
 310         if not video_description 
and mobj
.group('pro'): 
 311             orig_webpage 
= self
._download
_webpage
( 
 313                 note
='Downloading webpage for description', 
 316                 video_description 
= self
._html
_search
_meta
( 
 317                     'description', orig_webpage
, default
=None) 
 318         if not video_description 
and not mobj
.group('player'): 
 319             self
._downloader
.report_warning('Cannot find video description') 
 321         # Extract video duration 
 322         video_duration 
= int_or_none(config
["video"].get("duration")) 
 324         # Extract upload date 
 325         video_upload_date 
= None 
 326         mobj 
= re
.search(r
'<meta itemprop="dateCreated" content="(\d{4})-(\d{2})-(\d{2})T', webpage
) 
 328             video_upload_date 
= mobj
.group(1) + mobj
.group(2) + mobj
.group(3) 
 331             view_count 
= int(self
._search
_regex
(r
'UserPlays:(\d+)', webpage
, 'view count')) 
 332             like_count 
= int(self
._search
_regex
(r
'UserLikes:(\d+)', webpage
, 'like count')) 
 333             comment_count 
= int(self
._search
_regex
(r
'UserComments:(\d+)', webpage
, 'comment count')) 
 334         except RegexNotFoundError
: 
 335             # This info is only available in vimeo.com/{id} urls 
 340         # Vimeo specific: extract request signature and timestamp 
 341         sig 
= config
['request']['signature'] 
 342         timestamp 
= config
['request']['timestamp'] 
 344         # Vimeo specific: extract video codec and quality information 
 345         # First consider quality, then codecs, then take everything 
 346         codecs 
= [('vp6', 'flv'), ('vp8', 'flv'), ('h264', 'mp4')] 
 347         files 
= {'hd': [], 'sd': [], 'other': []} 
 348         config_files 
= config
["video"].get("files") or config
["request"].get("files") 
 349         for codec_name
, codec_extension 
in codecs
: 
 350             for quality 
in config_files
.get(codec_name
, []): 
 351                 format_id 
= '-'.join((codec_name
, quality
)).lower() 
 352                 key 
= quality 
if quality 
in files 
else 'other' 
 354                 if isinstance(config_files
[codec_name
], dict): 
 355                     file_info 
= config_files
[codec_name
][quality
] 
 356                     video_url 
= file_info
.get('url') 
 359                 if video_url 
is None: 
 360                     video_url 
= "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \
 
 361                         % (video_id
, sig
, timestamp
, quality
, codec_name
.upper()) 
 364                     'ext': codec_extension
, 
 366                     'format_id': format_id
, 
 367                     'width': file_info
.get('width'), 
 368                     'height': file_info
.get('height'), 
 371         for key 
in ('other', 'sd', 'hd'): 
 372             formats 
+= files
[key
] 
 373         if len(formats
) == 0: 
 374             raise ExtractorError('No known codec found') 
 377         text_tracks 
= config
['request'].get('text_tracks') 
 379             for tt 
in text_tracks
: 
 380                 subtitles
[tt
['lang']] = [{ 
 382                     'url': 'http://vimeo.com' + tt
['url'], 
 387             'uploader': video_uploader
, 
 388             'uploader_id': video_uploader_id
, 
 389             'upload_date': video_upload_date
, 
 390             'title': video_title
, 
 391             'thumbnail': video_thumbnail
, 
 392             'description': video_description
, 
 393             'duration': video_duration
, 
 396             'view_count': view_count
, 
 397             'like_count': like_count
, 
 398             'comment_count': comment_count
, 
 399             'subtitles': subtitles
, 
 403 class VimeoChannelIE(InfoExtractor
): 
 404     IE_NAME 
= 'vimeo:channel' 
 405     _VALID_URL 
= r
'https?://vimeo\.com/channels/(?P<id>[^/?#]+)/?(?:$|[?#])' 
 406     _MORE_PAGES_INDICATOR 
= r
'<a.+?rel="next"' 
 407     _TITLE_RE 
= r
'<link rel="alternate"[^>]+?title="(.*?)"' 
 409         'url': 'http://vimeo.com/channels/tributes', 
 412             'title': 'Vimeo Tributes', 
 414         'playlist_mincount': 25, 
 417     def _page_url(self
, base_url
, pagenum
): 
 418         return '%s/videos/page:%d/' % (base_url
, pagenum
) 
 420     def _extract_list_title(self
, webpage
): 
 421         return self
._html
_search
_regex
(self
._TITLE
_RE
, webpage
, 'list title') 
 423     def _login_list_password(self
, page_url
, list_id
, webpage
): 
 424         login_form 
= self
._search
_regex
( 
 425             r
'(?s)<form[^>]+?id="pw_form"(.*?)</form>', 
 426             webpage
, 'login form', default
=None) 
 430         password 
= self
._downloader
.params
.get('videopassword', None) 
 432             raise ExtractorError('This album is protected by a password, use the --video-password option', expected
=True) 
 433         fields 
= dict(re
.findall(r
'''(?x)<input\s+ 
 438         token 
= self
._search
_regex
(r
'xsrft: \'(.*?
)\'', webpage, 'login token
') 
 439         fields['token
'] = token 
 440         fields['password
'] = password 
 441         post = compat_urllib_parse.urlencode(fields) 
 442         password_path = self._search_regex( 
 443             r'action
="([^"]+)"', login_form, 'password URL') 
 444         password_url = compat_urlparse.urljoin(page_url, password_path) 
 445         password_request = compat_urllib_request.Request(password_url, post) 
 446         password_request.add_header('Content-type', 'application/x-www-form-urlencoded') 
 447         self._set_cookie('vimeo.com', 'xsrft', token) 
 449         return self._download_webpage( 
 450             password_request, list_id, 
 451             'Verifying the password', 'Wrong password') 
 453     def _extract_videos(self, list_id, base_url): 
 455         for pagenum in itertools.count(1): 
 456             page_url = self._page_url(base_url, pagenum) 
 457             webpage = self._download_webpage( 
 459                 'Downloading page %s' % pagenum) 
 462                 webpage = self._login_list_password(page_url, list_id, webpage) 
 464             video_ids.extend(re.findall(r'id="clip_(\d
+?
)"', webpage)) 
 465             if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None: 
 468         entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo') 
 469                    for video_id in video_ids] 
 470         return {'_type': 'playlist', 
 472                 'title': self._extract_list_title(webpage), 
 476     def _real_extract(self, url): 
 477         mobj = re.match(self._VALID_URL, url) 
 478         channel_id = mobj.group('id') 
 479         return self._extract_videos(channel_id, 'http://vimeo.com/channels/%s' % channel_id) 
 482 class VimeoUserIE(VimeoChannelIE): 
 483     IE_NAME = 'vimeo:user' 
 484     _VALID_URL = r'https?://vimeo\.com/(?![0-9]+(?:$|[?#/]))(?P<name>[^/]+)(?:/videos|[#?]|$)' 
 485     _TITLE_RE = r'<a[^>]+?class="user
">([^<>]+?)</a>' 
 487         'url': 'http://vimeo.com/nkistudio/videos', 
 492         'playlist_mincount': 66, 
 495     def _real_extract(self, url): 
 496         mobj = re.match(self._VALID_URL, url) 
 497         name = mobj.group('name') 
 498         return self._extract_videos(name, 'http://vimeo.com/%s' % name) 
 501 class VimeoAlbumIE(VimeoChannelIE): 
 502     IE_NAME = 'vimeo:album' 
 503     _VALID_URL = r'https?://vimeo\.com/album/(?P<id>\d+)' 
 504     _TITLE_RE = r'<header id="page_header
">\n\s*<h1>(.*?)</h1>' 
 506         'url': 'http://vimeo.com/album/2632481', 
 509             'title': 'Staff Favorites: November 2013', 
 511         'playlist_mincount': 13, 
 513         'note': 'Password-protected album', 
 514         'url': 'https://vimeo.com/album/3253534', 
 521             'videopassword': 'youtube-dl', 
 525     def _page_url(self, base_url, pagenum): 
 526         return '%s/page:%d/' % (base_url, pagenum) 
 528     def _real_extract(self, url): 
 529         album_id = self._match_id(url) 
 530         return self._extract_videos(album_id, 'http://vimeo.com/album/%s' % album_id) 
 533 class VimeoGroupsIE(VimeoAlbumIE): 
 534     IE_NAME = 'vimeo:group' 
 535     _VALID_URL = r'(?:https?://)?vimeo\.com/groups/(?P<name>[^/]+)' 
 537         'url': 'http://vimeo.com/groups/rolexawards', 
 540             'title': 'Rolex Awards for Enterprise', 
 542         'playlist_mincount': 73, 
 545     def _extract_list_title(self, webpage): 
 546         return self._og_search_title(webpage) 
 548     def _real_extract(self, url): 
 549         mobj = re.match(self._VALID_URL, url) 
 550         name = mobj.group('name') 
 551         return self._extract_videos(name, 'http://vimeo.com/groups/%s' % name) 
 554 class VimeoReviewIE(InfoExtractor): 
 555     IE_NAME = 'vimeo:review' 
 556     IE_DESC = 'Review pages on vimeo' 
 557     _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)' 
 559         'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', 
 560         'md5': 'c507a72f780cacc12b2248bb4006d253', 
 564             'title': "DICK HARDWICK 
'Comedian'", 
 565             'uploader': 'Richard Hardwick', 
 568         'note': 'video player needs Referer', 
 569         'url': 'http://vimeo.com/user22258446/review/91613211/13f927e053', 
 570         'md5': '6295fdab8f4bf6a002d058b2c6dce276', 
 574             'title': 're:(?i)^Death by dogma versus assembling agile . Sander Hoogendoorn', 
 575             'uploader': 'DevWeek Events', 
 577             'thumbnail': 're:^https?://.*\.jpg$', 
 581     def _real_extract(self, url): 
 582         mobj = re.match(self._VALID_URL, url) 
 583         video_id = mobj.group('id') 
 584         player_url = 'https://player.vimeo.com/player/' + video_id 
 585         return self.url_result(player_url, 'Vimeo', video_id) 
 588 class VimeoWatchLaterIE(VimeoBaseInfoExtractor, VimeoChannelIE): 
 589     IE_NAME = 'vimeo:watchlater' 
 590     IE_DESC = 'Vimeo watch later list, "vimeowatchlater
" keyword (requires authentication)' 
 591     _VALID_URL = r'https?://vimeo\.com/home/watchlater|:vimeowatchlater' 
 592     _LOGIN_REQUIRED = True 
 593     _TITLE_RE = r'href="/home
/watchlater
".*?>(.*?)<' 
 595         'url': 'http://vimeo.com/home/watchlater', 
 596         'only_matching': True, 
 599     def _real_initialize(self): 
 602     def _page_url(self, base_url, pagenum): 
 603         url = '%s/page:%d/' % (base_url, pagenum) 
 604         request = compat_urllib_request.Request(url) 
 605         # Set the header to get a partial html page with the ids, 
 606         # the normal page doesn't contain them. 
 607         request.add_header('X-Requested-With', 'XMLHttpRequest') 
 610     def _real_extract(self, url): 
 611         return self._extract_videos('watchlater', 'https://vimeo.com/home/watchlater') 
 614 class VimeoLikesIE(InfoExtractor): 
 615     _VALID_URL = r'https?://(?:www\.)?vimeo\.com/user(?P<id>[0-9]+)/likes/?(?:$|[?#]|sort:)' 
 616     IE_NAME = 'vimeo:likes' 
 617     IE_DESC = 'Vimeo user likes' 
 619         'url': 'https://vimeo.com/user755559/likes/', 
 620         'playlist_mincount': 293, 
 622             'id': 'user755559_likes', 
 623             "description
": "See all the videos urza likes
", 
 624             "title
": 'Videos urza likes', 
 628     def _real_extract(self, url): 
 629         user_id = self._match_id(url) 
 630         webpage = self._download_webpage(url, user_id) 
 631         page_count = self._int( 
 633                 r'''(?x)<li><a\s+href="[^
"]+"\s
+data
-page
="([0-9]+)"> 
 634                     .*?
</a
></li
>\s
*<li\s
+class="pagination_next"> 
 635                 ''', webpage, 'page count'), 
 636             'page count', fatal=True) 
 638         title = self._html_search_regex( 
 639             r'(?s)<h1>(.+?)</h1>', webpage, 'title', fatal=False) 
 640         description = self._html_search_meta('description', webpage) 
 643             page_url = '%s//vimeo.com/user%s/likes/page:%d/sort:date' % ( 
 644                 self.http_scheme(), user_id, idx + 1) 
 645             webpage = self._download_webpage( 
 647                 note='Downloading page %d/%d' % (idx + 1, page_count)) 
 648             video_list = self._search_regex( 
 649                 r'(?s)<ol class="js-browse_list[^"]+"[^>]*>(.*?)</ol>', 
 650                 webpage, 'video content') 
 652                 r'<li[^>]*>\s*<a\s+href="([^"]+)"', video_list) 
 656                     'url': compat_urlparse.urljoin(page_url, path), 
 659         pl = InAdvancePagedList(_get_page, page_count, PAGE_SIZE) 
 663             'id': 'user%s_likes' % user_id, 
 665             'description': description,