]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vk.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
   9 from ..compat 
import compat_urlparse
 
  24 from .dailymotion 
import DailymotionIE
 
  25 from .odnoklassniki 
import OdnoklassnikiIE
 
  26 from .pladform 
import PladformIE
 
  27 from .vimeo 
import VimeoIE
 
  28 from .youtube 
import YoutubeIE
 
  31 class VKBaseIE(InfoExtractor
): 
  35         username
, password 
= self
._get
_login
_info
() 
  39         login_page
, url_handle 
= self
._download
_webpage
_handle
( 
  40             'https://vk.com', None, 'Downloading login page') 
  42         login_form 
= self
._hidden
_inputs
(login_page
) 
  45             'email': username
.encode('cp1251'), 
  46             'pass': password
.encode('cp1251'), 
  49         # vk serves two same remixlhk cookies in Set-Cookie header and expects 
  50         # first one to be actually set 
  51         self
._apply
_first
_set
_cookie
_header
(url_handle
, 'remixlhk') 
  53         login_page 
= self
._download
_webpage
( 
  54             'https://login.vk.com/?act=login', None, 
  56             data
=urlencode_postdata(login_form
)) 
  58         if re
.search(r
'onLoginFailed', login_page
): 
  60                 'Unable to login, incorrect username and/or password', expected
=True) 
  62     def _real_initialize(self
): 
  65     def _download_payload(self
, path
, video_id
, data
, fatal
=True): 
  67         code
, payload 
= self
._download
_json
( 
  68             'https://vk.com/%s.php' % path
, video_id
, 
  69             data
=urlencode_postdata(data
), fatal
=fatal
, 
  70             headers
={'X-Requested-With': 'XMLHttpRequest'})['payload'] 
  72             self
.raise_login_required() 
  74             raise ExtractorError(clean_html(payload
[0][1:-1]), expected
=True) 
  85                                 (?:(?:m|new)\.)?vk\.com/video_| 
  88                             ext\.php\?(?P<embed_query>.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+).*)| 
  90                                 (?:(?:m|new)\.)?vk\.com/(?:.+?\?.*?z=)?video| 
  91                                 (?:www\.)?daxab.com/embed/ 
  93                             (?P<videoid>-?\d+_\d+)(?:.*\blist=(?P<list_id>[\da-f]+))? 
  98             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 
  99             'md5': '7babad3b85ea2e91948005b1b8b0cb84', 
 101                 'id': '-77521_162222515', 
 103                 'title': 'ProtivoGunz - Хуёвая песня', 
 104                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 
 105                 'uploader_id': '-77521', 
 107                 'timestamp': 1329049880, 
 108                 'upload_date': '20120212', 
 112             'url': 'http://vk.com/video205387401_165548505', 
 114                 'id': '205387401_165548505', 
 117                 'uploader': 'Tom Cruise', 
 118                 'uploader_id': '205387401', 
 120                 'timestamp': 1374364108, 
 121                 'upload_date': '20130720', 
 125             'note': 'Embedded video', 
 126             'url': 'https://vk.com/video_ext.php?oid=-77521&id=162222515&hash=87b046504ccd8bfa', 
 127             'md5': '7babad3b85ea2e91948005b1b8b0cb84', 
 129                 'id': '-77521_162222515', 
 131                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 
 132                 'title': 'ProtivoGunz - Хуёвая песня', 
 134                 'upload_date': '20120212', 
 135                 'timestamp': 1329049880, 
 136                 'uploader_id': '-77521', 
 141             # please update if you find a video whose URL follows the same pattern 
 142             'url': 'http://vk.com/video-8871596_164049491', 
 143             'md5': 'a590bcaf3d543576c9bd162812387666', 
 144             'note': 'Only available for registered users', 
 146                 'id': '-8871596_164049491', 
 148                 'uploader': 'Триллеры', 
 149                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 
 151                 'upload_date': '20121218', 
 157             'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 
 159                 'id': '-43215063_168067957', 
 161                 'uploader': 'Bro Mazter', 
 164                 'upload_date': '20140328', 
 165                 'uploader_id': '223413403', 
 166                 'timestamp': 1396018030, 
 168             'skip': 'Requires vk account credentials', 
 171             'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', 
 172             'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', 
 173             'note': 'ivi.ru embed', 
 175                 'id': '-43215063_169084319', 
 177                 'title': 'Книга Илая', 
 179                 'upload_date': '20140626', 
 185             # video (removed?) only available with list id 
 186             'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4', 
 187             'md5': '091287af5402239a1051c37ec7b92913', 
 189                 'id': '30481095_171201961', 
 191                 'title': 'ТюменцевВВ_09.07.2015', 
 192                 'uploader': 'Anton Ivanov', 
 194                 'upload_date': '20150709', 
 201             'url': 'https://vk.com/video276849682_170681728', 
 205                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 
 206                 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', 
 208                 'upload_date': '20130116', 
 209                 'uploader': "Children's Joy Foundation Inc.", 
 210                 'uploader_id': 'thecjf', 
 216             'url': 'https://vk.com/video-37468416_456239855', 
 218                 'id': 'k3lz2cmXyRuJQSjGHUv', 
 220                 'title': 'md5:d52606645c20b0ddbb21655adaa4f56f', 
 221                 'description': 'md5:424b8e88cc873217f520e582ba28bb36', 
 222                 'uploader': 'AniLibria.Tv', 
 223                 'upload_date': '20160914', 
 224                 'uploader_id': 'x1p5vl5', 
 225                 'timestamp': 1473877246, 
 228                 'skip_download': True, 
 232             # video key is extra_data not url\d+ 
 233             'url': 'http://vk.com/video-110305615_171782105', 
 234             'md5': 'e13fcda136f99764872e739d13fac1d1', 
 236                 'id': '-110305615_171782105', 
 238                 'title': 'S-Dance, репетиции к The way show', 
 239                 'uploader': 'THE WAY SHOW | 17 апреля', 
 240                 'uploader_id': '-110305615', 
 241                 'timestamp': 1454859345, 
 242                 'upload_date': '20160207', 
 245                 'skip_download': True, 
 249             # finished live stream, postlive_mp4 
 250             'url': 'https://vk.com/videos-387766?z=video-387766_456242764%2Fpl_-387766_-2', 
 252                 'id': '-387766_456242764', 
 254                 'title': 'ИгроМир 2016 День 1 — Игромания Утром', 
 255                 'uploader': 'Игромания', 
 257                 # TODO: use act=show to extract view_count 
 259                 'upload_date': '20160929', 
 260                 'uploader_id': '-387766', 
 261                 'timestamp': 1475137527, 
 264                 'skip_download': True, 
 268             # live stream, hls and rtmp links, most likely already finished live 
 269             # stream by the time you are reading this comment 
 270             'url': 'https://vk.com/video-140332_456239111', 
 271             'only_matching': True, 
 274             # removed video, just testing that we match the pattern 
 275             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', 
 276             'only_matching': True, 
 279             # age restricted video, requires vk account credentials 
 280             'url': 'https://vk.com/video205387401_164765225', 
 281             'only_matching': True, 
 285             'url': 'https://vk.com/video-76116461_171554880', 
 286             'only_matching': True, 
 289             'url': 'http://new.vk.com/video205387401_165548505', 
 290             'only_matching': True, 
 293             # This video is no longer available, because its author has been blocked. 
 294             'url': 'https://vk.com/video-10639516_456240611', 
 295             'only_matching': True, 
 298             # The video is not available in your region. 
 299             'url': 'https://vk.com/video-51812607_171445436', 
 300             'only_matching': True, 
 303     def _real_extract(self
, url
): 
 304         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 305         video_id 
= mobj
.group('videoid') 
 310                 'act': 'show_inline', 
 313             # Some videos (removed?) can only be downloaded with list id specified 
 314             list_id 
= mobj
.group('list_id') 
 316                 data
['list'] = list_id
 
 318             payload 
= self
._download
_payload
('al_video', video_id
, data
) 
 319             info_page 
= payload
[1] 
 321             mv_data 
= opts
.get('mvData') or {} 
 322             player 
= opts
.get('player') or {} 
 324             video_id 
= '%s_%s' % (mobj
.group('oid'), mobj
.group('id')) 
 326             info_page 
= self
._download
_webpage
( 
 327                 'http://vk.com/video_ext.php?' + mobj
.group('embed_query'), video_id
) 
 329             error_message 
= self
._html
_search
_regex
( 
 330                 [r
'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', 
 331                     r
'(?s)<div[^>]+id="video_ext_msg"[^>]*>(.+?)</div>'], 
 332                 info_page
, 'error message', default
=None) 
 334                 raise ExtractorError(error_message
, expected
=True) 
 336             if re
.search(r
'<!>/login\.php\?.*\bact=security_check', info_page
): 
 337                 raise ExtractorError( 
 338                     'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', 
 341             ERROR_COPYRIGHT 
= 'Video %s has been removed from public access due to rightholder complaint.' 
 344                 r
'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': 
 347                 r
'>The video .*? was removed from public access by request of the copyright holder.<': 
 350                 r
'<!>Please log in or <': 
 351                 'Video %s is only available for registered users, ' 
 352                 'use --username and --password options to provide account credentials.', 
 355                 'Video %s does not exist.', 
 357                 r
'<!>Видео временно недоступно': 
 358                 'Video %s is temporarily unavailable.', 
 361                 'Access denied to video %s.', 
 363                 r
'<!>Видеозапись недоступна, так как её автор был заблокирован.': 
 364                 'Video %s is no longer available, because its author has been blocked.', 
 366                 r
'<!>This video is no longer available, because its author has been blocked.': 
 367                 'Video %s is no longer available, because its author has been blocked.', 
 369                 r
'<!>This video is no longer available, because it has been deleted.': 
 370                 'Video %s is no longer available, because it has been deleted.', 
 372                 r
'<!>The video .+? is not available in your region.': 
 373                 'Video %s is not available in your region.', 
 376             for error_re
, error_msg 
in ERRORS
.items(): 
 377                 if re
.search(error_re
, info_page
): 
 378                     raise ExtractorError(error_msg 
% video_id
, expected
=True) 
 380             player 
= self
._parse
_json
(self
._search
_regex
( 
 381                 r
'var\s+playerParams\s*=\s*({.+?})\s*;\s*\n', 
 382                 info_page
, 'player params'), video_id
) 
 384         youtube_url 
= YoutubeIE
._extract
_url
(info_page
) 
 386             return self
.url_result(youtube_url
, YoutubeIE
.ie_key()) 
 388         vimeo_url 
= VimeoIE
._extract
_url
(url
, info_page
) 
 389         if vimeo_url 
is not None: 
 390             return self
.url_result(vimeo_url
, VimeoIE
.ie_key()) 
 392         pladform_url 
= PladformIE
._extract
_url
(info_page
) 
 394             return self
.url_result(pladform_url
, PladformIE
.ie_key()) 
 396         m_rutube 
= re
.search( 
 397             r
'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page
) 
 398         if m_rutube 
is not None: 
 399             rutube_url 
= self
._proto
_relative
_url
( 
 400                 m_rutube
.group(1).replace('\\', '')) 
 401             return self
.url_result(rutube_url
) 
 403         dailymotion_urls 
= DailymotionIE
._extract
_urls
(info_page
) 
 405             return self
.url_result(dailymotion_urls
[0], DailymotionIE
.ie_key()) 
 407         odnoklassniki_url 
= OdnoklassnikiIE
._extract
_url
(info_page
) 
 408         if odnoklassniki_url
: 
 409             return self
.url_result(odnoklassniki_url
, OdnoklassnikiIE
.ie_key()) 
 411         m_opts 
= re
.search(r
'(?s)var\s+opts\s*=\s*({.+?});', info_page
) 
 413             m_opts_url 
= re
.search(r
"url\s*:\s*'((?!/\b)[^']+)", m_opts
.group(1)) 
 415                 opts_url 
= m_opts_url
.group(1) 
 416                 if opts_url
.startswith('//'): 
 417                     opts_url 
= 'http:' + opts_url
 
 418                 return self
.url_result(opts_url
) 
 420         data 
= player
['params'][0] 
 421         title 
= unescapeHTML(data
['md_title']) 
 424         # 3 = post live (finished live) 
 425         is_live 
= data
.get('live') == 2 
 427             title 
= self
._live
_title
(title
) 
 429         timestamp 
= unified_timestamp(self
._html
_search
_regex
( 
 430             r
'class=["\']mv_info_date
[^
>]+>([^
<]+)(?
:<|
from)', info_page, 
 431             'upload date
', default=None)) or int_or_none(data.get('date
')) 
 433         view_count = str_to_int(self._search_regex( 
 434             r'class=["\']mv_views_count[^>]+>\s*([\d,.]+)', 
 435             info_page, 'view count', default=None)) 
 438         for format_id, format_url in data.items(): 
 439             format_url = url_or_none(format_url) 
 440             if not format_url or not format_url.startswith(('http', '//', 'rtmp')): 
 442             if (format_id.startswith(('url', 'cache')) 
 443                     or format_id in ('extra_data', 'live_mp4', 'postlive_mp4')): 
 444                 height = int_or_none(self._search_regex( 
 445                     r'^(?:url|cache)(\d+)', format_id, 'height', default=None)) 
 447                     'format_id': format_id, 
 451             elif format_id == 'hls': 
 452                 formats.extend(self._extract_m3u8_formats( 
 453                     format_url, video_id, 'mp4', 'm3u8_native', 
 454                     m3u8_id=format_id, fatal=False, live=is_live)) 
 455             elif format_id == 'rtmp': 
 457                     'format_id': format_id, 
 461         self._sort_formats(formats) 
 467             'thumbnail': data.get('jpg'), 
 468             'uploader': data.get('md_author'), 
 469             'uploader_id': str_or_none(data.get('author_id') or mv_data.get('authorId')), 
 470             'duration': int_or_none(data.get('duration') or mv_data.get('duration')), 
 471             'timestamp': timestamp, 
 472             'view_count': view_count, 
 473             'like_count': int_or_none(mv_data.get('likes')), 
 474             'comment_count': int_or_none(mv_data.get('commcount')), 
 479 class VKUserVideosIE(VKBaseIE): 
 480     IE_NAME = 'vk:uservideos' 
 481     IE_DESC = "VK 
- User
's Videos" 
 482     _VALID_URL = r'https?
://(?
:(?
:m|new
)\
.)?vk\
.com
/videos(?P
<id>-?
[0-9]+)(?
!\?.*\bz
=video
)(?
:[/?
#&](?:.*?\bsection=(?P<section>\w+))?|$)' 
 483     _TEMPLATE_URL 
= 'https://vk.com/videos' 
 485         'url': 'https://vk.com/videos-767561', 
 489         'playlist_mincount': 1150, 
 491         'url': 'https://vk.com/videos-767561?section=uploaded', 
 493             'id': '-767561_uploaded', 
 495         'playlist_mincount': 425, 
 497         'url': 'http://vk.com/videos205387401', 
 498         'only_matching': True, 
 500         'url': 'http://vk.com/videos-77521', 
 501         'only_matching': True, 
 503         'url': 'http://vk.com/videos-97664626?section=all', 
 504         'only_matching': True, 
 506         'url': 'http://m.vk.com/videos205387401', 
 507         'only_matching': True, 
 509         'url': 'http://new.vk.com/videos205387401', 
 510         'only_matching': True, 
 513     _VIDEO 
= collections
.namedtuple('Video', ['owner_id', 'id']) 
 515     def _fetch_page(self
, page_id
, section
, page
): 
 516         l 
= self
._download
_payload
('al_video', page_id
, { 
 517             'act': 'load_videos_silent', 
 518             'offset': page 
* self
._PAGE
_SIZE
, 
 521         })[0][section
]['list'] 
 524             v 
= self
._VIDEO
._make
(video
[:2]) 
 525             video_id 
= '%d_%d' % (v
.owner_id
, v
.id) 
 526             yield self
.url_result( 
 527                 'http://vk.com/video' + video_id
, VKIE
.ie_key(), video_id
) 
 529     def _real_extract(self
, url
): 
 530         page_id
, section 
= re
.match(self
._VALID
_URL
, url
).groups() 
 534         entries 
= OnDemandPagedList( 
 535             functools
.partial(self
._fetch
_page
, page_id
, section
), 
 538         return self
.playlist_result(entries
, '%s_%s' % (page_id
, section
)) 
 541 class VKWallPostIE(VKBaseIE
): 
 542     IE_NAME 
= 'vk:wallpost' 
 543     _VALID_URL 
= r
'https?://(?:(?:(?:(?:m|new)\.)?vk\.com/(?:[^?]+\?.*\bw=)?wall(?P<id>-?\d+_\d+)))' 
 545         # public page URL, audio playlist 
 546         'url': 'https://vk.com/bs.official?w=wall-23538238_35', 
 548             'id': '-23538238_35', 
 549             'title': 'Black Shadow - Wall post -23538238_35', 
 550             'description': 'md5:3f84b9c4f9ef499731cf1ced9998cc0c', 
 553             'md5': '5ba93864ec5b85f7ce19a9af4af080f6', 
 555                 'id': '135220665_111806521', 
 557                 'title': 'Black Shadow - Слепое Верование', 
 559                 'uploader': 'Black Shadow', 
 560                 'artist': 'Black Shadow', 
 561                 'track': 'Слепое Верование', 
 564             'md5': '4cc7e804579122b17ea95af7834c9233', 
 566                 'id': '135220665_111802303', 
 568                 'title': 'Black Shadow - Война - Негасимое Бездны Пламя!', 
 570                 'uploader': 'Black Shadow', 
 571                 'artist': 'Black Shadow', 
 572                 'track': 'Война - Негасимое Бездны Пламя!', 
 576             'skip_download': True, 
 579         'skip': 'Requires vk account credentials', 
 581         # single YouTube embed, no leading - 
 582         'url': 'https://vk.com/wall85155021_6319', 
 584             'id': '85155021_6319', 
 585             'title': 'Сергей Горбунов - Wall post 85155021_6319', 
 591         'skip': 'Requires vk account credentials', 
 594         'url': 'https://vk.com/wall-23538238_35', 
 595         'only_matching': True, 
 597         # mobile wall page URL 
 598         'url': 'https://m.vk.com/wall-23538238_35', 
 599         'only_matching': True, 
 601     _BASE64_CHARS 
= 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMN0PQRSTUVWXYZO123456789+/=' 
 602     _AUDIO 
= collections
.namedtuple('Audio', ['id', 'owner_id', 'url', 'title', 'performer', 'duration', 'album_id', 'unk', 'author_link', 'lyrics', 'flags', 'context', 'extra', 'hashes', 'cover_url', 'ads']) 
 604     def _decode(self
, enc
): 
 608             r 
= self
._BASE
64_CHARS
.index(c
) 
 610             e 
= 64 * e 
+ r 
if cond 
else r
 
 613                 dec 
+= chr(255 & e 
>> (-2 * n 
& 6)) 
 616     def _unmask_url(self
, mask_url
, vk_id
): 
 617         if 'audio_api_unavailable' in mask_url
: 
 618             extra 
= mask_url
.split('?extra=')[1].split('#') 
 619             func
, base 
= self
._decode
(extra
[1]).split(chr(11)) 
 620             mask_url 
= list(self
._decode
(extra
[0])) 
 621             url_len 
= len(mask_url
) 
 622             indexes 
= [None] * url_len
 
 623             index 
= int(base
) ^ vk_id
 
 624             for n 
in range(url_len 
- 1, -1, -1): 
 625                 index 
= (url_len 
* (n 
+ 1) ^ index 
+ n
) % url_len
 
 627             for n 
in range(1, url_len
): 
 629                 index 
= indexes
[url_len 
- 1 - n
] 
 630                 mask_url
[n
] = mask_url
[index
] 
 632             mask_url 
= ''.join(mask_url
) 
 635     def _real_extract(self
, url
): 
 636         post_id 
= self
._match
_id
(url
) 
 638         webpage 
= self
._download
_payload
('wkview', post_id
, { 
 640             'w': 'wall' + post_id
, 
 643         description 
= clean_html(get_element_by_class('wall_post_text', webpage
)) 
 644         uploader 
= clean_html(get_element_by_class('author', webpage
)) 
 648         for audio 
in re
.findall(r
'data-audio="([^"]+)', webpage
): 
 649             audio 
= self
._parse
_json
(unescapeHTML(audio
), post_id
) 
 650             a 
= self
._AUDIO
._make
(audio
[:16]) 
 653             title 
= unescapeHTML(a
.title
) 
 654             performer 
= unescapeHTML(a
.performer
) 
 656                 'id': '%s_%s' % (a
.owner_id
, a
.id), 
 657                 'url': self
._unmask
_url
(a
.url
, a
.ads
['vk_id']), 
 658                 'title': '%s - %s' % (performer
, title
) if performer 
else title
, 
 659                 'thumbnails': [{'url': c_url
} for c_url 
in a
.cover_url
.split(',')] if a
.cover_url 
else None, 
 660                 'duration': int_or_none(a
.duration
), 
 661                 'uploader': uploader
, 
 668         for video 
in re
.finditer( 
 669                 r
'<a[^>]+href=(["\'])(?P
<url
>/video(?
:-?
[\d_
]+).*?
)\
1', webpage): 
 670             entries.append(self.url_result( 
 671                 compat_urlparse.urljoin(url, video.group('url
')), VKIE.ie_key())) 
 673         title = 'Wall post 
%s' % post_id 
 675         return self.playlist_result( 
 676             orderedSet(entries), post_id, 
 677             '%s - %s' % (uploader, title) if uploader else title,