]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vk.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  11     compat_urllib_request
, 
  21 class VKIE(InfoExtractor
): 
  23     _VALID_URL 
= r
'https?://(?:m\.)?vk\.com/(?:video_ext\.php\?.*?\boid=(?P<oid>-?\d+).*?\bid=(?P<id>\d+)|(?:.+?\?.*?z=)?video(?P<videoid>[^s].*?)(?:\?|%2F|$))' 
  28             'url': 'http://vk.com/videos-77521?z=video-77521_162222515%2Fclub77521', 
  29             'md5': '0deae91935c54e00003c2a00646315f0', 
  33                 'title': 'ProtivoGunz - Хуёвая песня', 
  34                 'uploader': 're:Noize MC.*', 
  36                 'upload_date': '20120212', 
  40             'url': 'http://vk.com/video205387401_165548505', 
  41             'md5': '6c0aeb2e90396ba97035b9cbde548700', 
  45                 'uploader': 'Tom Cruise', 
  48                 'upload_date': '20130721' 
  52             'note': 'Embedded video', 
  53             'url': 'http://vk.com/video_ext.php?oid=32194266&id=162925554&hash=7d8c2e0d5e05aeaa&hd=1', 
  54             'md5': 'c7ce8f1f87bec05b3de07fdeafe21a0a', 
  58                 'uploader': 'Vladimir Gavrin', 
  61                 'upload_date': '20120730', 
  66             # please update if you find a video whose URL follows the same pattern 
  67             'url': 'http://vk.com/video-8871596_164049491', 
  68             'md5': 'a590bcaf3d543576c9bd162812387666', 
  69             'note': 'Only available for registered users', 
  73                 'uploader': 'Триллеры', 
  74                 'title': '► Бойцовский клуб / Fight Club 1999 [HD 720]', 
  76                 'upload_date': '20121218' 
  78             'skip': 'Requires vk account credentials', 
  81             'url': 'http://vk.com/hd_kino_mania?z=video-43215063_168067957%2F15c66b9b533119788d', 
  82             'md5': '4d7a5ef8cf114dfa09577e57b2993202', 
  86                 'uploader': 'Киномания - лучшее из мира кино', 
  89                 'upload_date': '20140328', 
  91             'skip': 'Requires vk account credentials', 
  94             'url': 'http://m.vk.com/video-43215063_169084319?list=125c627d1aa1cebb83&from=wall-43215063_2566540', 
  95             'md5': '0c45586baa71b7cb1d0784ee3f4e00a6', 
  96             'note': 'ivi.ru embed', 
 100                 'title': 'Книга Илая', 
 102                 'upload_date': '20140626', 
 104             'skip': 'Only works from Russia', 
 107             # removed video, just testing that we match the pattern 
 108             'url': 'http://vk.com/feed?z=video-43215063_166094326%2Fbb50cacd3177146d7a', 
 109             'only_matching': True, 
 114         (username
, password
) = self
._get
_login
_info
() 
 126         request 
= compat_urllib_request
.Request('https://login.vk.com/?act=login', 
 127                                                 compat_urllib_parse
.urlencode(login_form
).encode('utf-8')) 
 128         login_page 
= self
._download
_webpage
(request
, None, note
='Logging in as %s' % username
) 
 130         if re
.search(r
'onLoginFailed', login_page
): 
 131             raise ExtractorError('Unable to login, incorrect username and/or password', expected
=True) 
 133     def _real_initialize(self
): 
 136     def _real_extract(self
, url
): 
 137         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 138         video_id 
= mobj
.group('videoid') 
 141             video_id 
= '%s_%s' % (mobj
.group('oid'), mobj
.group('id')) 
 143         info_url 
= 'http://vk.com/al_video.php?act=show&al=1&video=%s' % video_id
 
 144         info_page 
= self
._download
_webpage
(info_url
, video_id
) 
 147             r
'>Видеозапись .*? была изъята из публичного доступа в связи с обращением правообладателя.<': 
 148             'Video %s has been removed from public access due to rightholder complaint.', 
 150             r
'<!>Please log in or <': 
 151             'Video %s is only available for registered users, ' 
 152             'use --username and --password options to provide account credentials.', 
 155             'Video %s does not exist.' 
 158         for error_re
, error_msg 
in ERRORS
.items(): 
 159             if re
.search(error_re
, info_page
): 
 160                 raise ExtractorError(error_msg 
% video_id
, expected
=True) 
 162         m_yt 
= re
.search(r
'src="(http://www.youtube.com/.*?)"', info_page
) 
 164             self
.to_screen('Youtube video detected') 
 165             return self
.url_result(m_yt
.group(1), 'Youtube') 
 167         m_rutube 
= re
.search( 
 168             r
'\ssrc="((?:https?:)?//rutube\.ru\\?/video\\?/embed(?:.*?))\\?"', info_page
) 
 169         if m_rutube 
is not None: 
 170             self
.to_screen('rutube video detected') 
 171             rutube_url 
= self
._proto
_relative
_url
( 
 172                 m_rutube
.group(1).replace('\\', '')) 
 173             return self
.url_result(rutube_url
) 
 175         m_opts 
= re
.search(r
'(?s)var\s+opts\s*=\s*({.*?});', info_page
) 
 177             m_opts_url 
= re
.search(r
"url\s*:\s*'([^']+)", m_opts
.group(1)) 
 179                 opts_url 
= m_opts_url
.group(1) 
 180                 if opts_url
.startswith('//'): 
 181                     opts_url 
= 'http:' + opts_url
 
 182                 return self
.url_result(opts_url
) 
 184         data_json 
= self
._search
_regex
(r
'var vars = ({.*?});', info_page
, 'vars') 
 185         data 
= json
.loads(data_json
) 
 187         # Extract upload date 
 189         mobj 
= re
.search(r
'id="mv_date_wrap".*?Added ([a-zA-Z]+ [0-9]+), ([0-9]+) at', info_page
) 
 191             mobj
.group(1) + ' ' + mobj
.group(2) 
 192             upload_date 
= unified_strdate(mobj
.group(1) + ' ' + mobj
.group(2)) 
 197             'width': int(k
[len('url'):]), 
 198         } for k
, v 
in data
.items() 
 199             if k
.startswith('url')] 
 200         self
._sort
_formats
(formats
) 
 203             'id': compat_str(data
['vid']), 
 205             'title': unescapeHTML(data
['md_title']), 
 206             'thumbnail': data
.get('jpg'), 
 207             'uploader': data
.get('md_author'), 
 208             'duration': data
.get('duration'), 
 209             'upload_date': upload_date
, 
 213 class VKUserVideosIE(InfoExtractor
): 
 214     IE_NAME 
= 'vk.com:user-videos' 
 215     IE_DESC 
= 'vk.com:All of a user\'s videos' 
 216     _VALID_URL 
= r
'https?://vk\.com/videos(?P<id>[0-9]+)(?:m\?.*)?' 
 217     _TEMPLATE_URL 
= 'https://vk.com/videos' 
 219         'url': 'http://vk.com/videos205387401', 
 223         'playlist_mincount': 4, 
 226     def _real_extract(self
, url
): 
 227         page_id 
= self
._match
_id
(url
) 
 228         page 
= self
._download
_webpage
(url
, page_id
) 
 229         video_ids 
= orderedSet( 
 230             m
.group(1) for m 
in re
.finditer(r
'href="/video([0-9_]+)"', page
)) 
 233                 'http://vk.com/video' + video_id
, 'VK', video_id
=video_id
) 
 234             for video_id 
in video_ids
] 
 235         return self
.playlist_result(url_entries
, page_id
)