]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vk.py
0805e3c083937d7f58acbe4b872d41c19f8d1c02
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  20 from .vimeo 
import VimeoIE
 
  21 from .pladform 
import PladformIE
 
  24 class VKIE(InfoExtractor
): 
  30                             (?:m\.)?vk\.com/video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)| 
  32                                 (?:m\.)?vk\.com/(?:.+?\?.*?z=)?video| 
  33                                 (?:www\.)?biqle\.ru/watch/ 
  35                             (?P<videoid>[^s].*?)(?:\?(?:.*\blist=(?P<list_id>[\da-f]+))?|%2F|$) 
  42             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 
  43             'md5': '0deae91935c54e00003c2a00646315f0', 
  47                 'title': 'ProtivoGunz - Хуёвая песня', 
  48                 'uploader': 're:(?:Noize MC|Alexander Ilyashenko).*', 
  50                 'upload_date': '20120212', 
  55             'url': 'http://vk.com/video205387401_165548505', 
  56             'md5': '6c0aeb2e90396ba97035b9cbde548700', 
  60                 'uploader': 'Tom Cruise', 
  63                 'upload_date': '20130721', 
  68             'note': 'Embedded video', 
  69             'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', 
  70             'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', 
  74                 'uploader': 'Vladimir Gavrin', 
  77                 'upload_date': '20120730', 
  83             # please update if you find a video whose URL follows the same pattern 
  84             'url': 'http://vk.com/video-8871596_164049491', 
  85             'md5': 'a590bcaf3d543576c9bd162812387666', 
  86             'note': 'Only available for registered users', 
  90                 'uploader': 'Триллеры', 
  91                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 
  93                 'upload_date': '20121218', 
  96             'skip': 'Requires vk account credentials', 
  99             'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 
 100             'md5': '4d7a5ef8cf114dfa09577e57b2993202', 
 104                 'uploader': 'Киномания - лучшее из мира кино', 
 107                 'upload_date': '20140328', 
 109             'skip': 'Requires vk account credentials', 
 112             'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', 
 113             'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', 
 114             'note': 'ivi.ru embed', 
 118                 'title': 'Книга Илая', 
 120                 'upload_date': '20140626', 
 123             'skip': 'Only works from Russia', 
 126             # video (removed?) only available with list id 
 127             'url': 'https://vk.com/video30481095_171201961?list=8764ae2d21f14088d4', 
 128             'md5': '091287af5402239a1051c37ec7b92913', 
 132                 'title': 'ТюменцевВВ_09.07.2015', 
 133                 'uploader': 'Anton Ivanov', 
 135                 'upload_date': '20150709', 
 141             'url': 'https://vk.com/video276849682_170681728', 
 145                 'title': "DSWD Awards 'Children's Joy Foundation, Inc.' Certificate of Registration and License to Operate", 
 146                 'description': 'md5:bf9c26cfa4acdfb146362682edd3827a', 
 148                 'upload_date': '20130116', 
 149                 'uploader': "Children's Joy Foundation", 
 150                 'uploader_id': 'thecjf', 
 155             # removed video, just testing that we match the pattern 
 156             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', 
 157             'only_matching': True, 
 160             # age restricted video, requires vk account credentials 
 161             'url': 'https://vk.com/video205387401_164765225', 
 162             'only_matching': True, 
 166             'url': 'http://www.biqle.ru/watch/847655_160197695', 
 167             'only_matching': True, 
 171             'url': 'https://vk.com/video-76116461_171554880', 
 172             'only_matching': True, 
 177         (username
, password
) = self
._get
_login
_info
() 
 181         login_page 
= self
._download
_webpage
( 
 182             'https://vk.com', None, 'Downloading login page') 
 184         login_form 
= self
._hidden
_inputs
(login_page
) 
 187             'email': username
.encode('cp1251'), 
 188             'pass': password
.encode('cp1251'), 
 191         request 
= sanitized_Request( 
 192             'https://login.vk.com/?act=login', 
 193             compat_urllib_parse
.urlencode(login_form
).encode('utf-8')) 
 194         login_page 
= self
._download
_webpage
( 
 195             request
, None, note
='Logging in as %s' % username
) 
 197         if re
.search(r
'onLoginFailed', login_page
): 
 198             raise ExtractorError( 
 199                 'Unable to login, incorrect username and/or password', expected
=True) 
 201     def _real_initialize(self
): 
 204     def _real_extract(self
, url
): 
 205         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 206         video_id 
= mobj
.group('videoid') 
 209             video_id 
= '%s_%s' % (mobj
.group('oid'), mobj
.group('id')) 
 211         info_url 
= 'https://vk.com/al_video.php?act=show&al=1&module=video&video=%s' % video_id
 
 213         # Some videos (removed?) can only be downloaded with list id specified 
 214         list_id 
= mobj
.group('list_id') 
 216             info_url 
+= '&list=%s' % list_id
 
 218         info_page 
= self
._download
_webpage
(info_url
, video_id
) 
 220         error_message 
= self
._html
_search
_regex
( 
 221             r
'(?s)<!><div[^>]+class="video_layer_message"[^>]*>(.+?)</div>', 
 222             info_page
, 'error message', default
=None) 
 224             raise ExtractorError(error_message
, expected
=True) 
 226         if re
.search(r
'<!>/login\.php\?.*\bact=security_check', info_page
): 
 227             raise ExtractorError( 
 228                 'You are trying to log in from an unusual location. You should confirm ownership at vk.com to log in with this IP.', 
 232             r
'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': 
 233             'Video %s has been removed from public access due to rightholder complaint.', 
 235             r
'<!>Please log in or <': 
 236             'Video %s is only available for registered users, ' 
 237             'use --username and --password options to provide account credentials.', 
 240             'Video %s does not exist.', 
 242             r
'<!>Видео временно недоступно': 
 243             'Video %s is temporarily unavailable.', 
 246             'Access denied to video %s.', 
 249         for error_re
, error_msg 
in ERRORS
.items(): 
 250             if re
.search(error_re
, info_page
): 
 251                 raise ExtractorError(error_msg 
% video_id
, expected
=True) 
 253         youtube_url 
= self
._search
_regex
( 
 254             r
'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"', 
 255             info_page
, 'youtube iframe', default
=None) 
 257             return self
.url_result(youtube_url
, 'Youtube') 
 259         vimeo_url 
= VimeoIE
._extract
_vimeo
_url
(url
, info_page
) 
 260         if vimeo_url 
is not None: 
 261             return self
.url_result(vimeo_url
) 
 263         pladform_url 
= PladformIE
._extract
_url
(info_page
) 
 265             return self
.url_result(pladform_url
) 
 267         m_rutube 
= re
.search( 
 268             r
'\ssrc="((?:https?:)?//rutube\.ru\\?/(?:video|play)\\?/embed(?:.*?))\\?"', info_page
) 
 269         if m_rutube 
is not None: 
 270             rutube_url 
= self
._proto
_relative
_url
( 
 271                 m_rutube
.group(1).replace('\\', '')) 
 272             return self
.url_result(rutube_url
) 
 274         m_opts 
= re
.search(r
'(?s)var\s+opts\s*=\s*({.+?});', info_page
) 
 276             m_opts_url 
= re
.search(r
"url\s*:\s*'((?!/\b)[^']+)", m_opts
.group(1)) 
 278                 opts_url 
= m_opts_url
.group(1) 
 279                 if opts_url
.startswith('//'): 
 280                     opts_url 
= 'http:' + opts_url
 
 281                 return self
.url_result(opts_url
) 
 283         data_json 
= self
._search
_regex
(r
'var\s+vars\s*=\s*({.+?});', info_page
, 'vars') 
 284         data 
= json
.loads(data_json
) 
 286         # Extract upload date 
 288         mobj 
= re
.search(r
'id="mv_date(?:_views)?_wrap"[^>]*>([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page
) 
 290             mobj
.group(1) + ' ' + mobj
.group(2) 
 291             upload_date 
= unified_strdate(mobj
.group(1) + ' ' + mobj
.group(2)) 
 294         views 
= self
._html
_search
_regex
( 
 295             r
'"mv_views_count_number"[^>]*>(.+?\bviews?)<', 
 296             info_page
, 'view count', fatal
=False) 
 298             view_count 
= str_to_int(self
._search
_regex
( 
 299                 r
'([\d,.]+)', views
, 'view count', fatal
=False)) 
 304             'width': int(k
[len('url'):]), 
 305         } for k
, v 
in data
.items() 
 306             if k
.startswith('url')] 
 307         self
._sort
_formats
(formats
) 
 310             'id': compat_str(data
['vid']), 
 312             'title': unescapeHTML(data
['md_title']), 
 313             'thumbnail': data
.get('jpg'), 
 314             'uploader': data
.get('md_author'), 
 315             'duration': data
.get('duration'), 
 316             'upload_date': upload_date
, 
 317             'view_count': view_count
, 
 321 class VKUserVideosIE(InfoExtractor
): 
 322     IE_NAME 
= 'vk:uservideos' 
 323     IE_DESC 
= "VK - User's Videos" 
 324     _VALID_URL 
= r
'https?://vk\.com/videos(?P<id>-?[0-9]+)(?!\?.*\bz=video)(?:[/?#&]|$)' 
 325     _TEMPLATE_URL 
= 'https://vk.com/videos' 
 327         'url': 'http://vk.com/videos205387401', 
 330             'title': "Tom Cruise's Videos", 
 332         'playlist_mincount': 4, 
 334         'url': 'http://vk.com/videos-77521', 
 335         'only_matching': True, 
 337         'url': 'http://vk.com/videos-97664626?section=all', 
 338         'only_matching': True, 
 341     def _real_extract(self
, url
): 
 342         page_id 
= self
._match
_id
(url
) 
 344         webpage 
= self
._download
_webpage
(url
, page_id
) 
 348                 'http://vk.com/video' + video_id
, 'VK', video_id
=video_id
) 
 349             for video_id 
in orderedSet(re
.findall(r
'href="/video(-?[0-9_]+)"', webpage
))] 
 351         title 
= unescapeHTML(self
._search
_regex
( 
 352             r
'<title>\s*([^<]+?)\s+\|\s+\d+\s+videos', 
 353             webpage
, 'title', default
=page_id
)) 
 355         return self
.playlist_result(entries
, page_id
, title
)