]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vbox7.py
Imported Upstream version 2014.01.17.2
[youtubedl] / youtube_dl / extractor / vbox7.py
1 import re
2
3 from .common import InfoExtractor
4 from ..utils import (
5 compat_urllib_parse,
6 compat_urllib_request,
7
8 ExtractorError,
9 )
10
11
12 class Vbox7IE(InfoExtractor):
13 """Information Extractor for Vbox7"""
14 _VALID_URL = r'(?:http://)?(?:www\.)?vbox7\.com/play:([^/]+)'
15 _TEST = {
16 u'url': u'http://vbox7.com/play:249bb972c2',
17 u'file': u'249bb972c2.flv',
18 u'md5': u'99f65c0c9ef9b682b97313e052734c3f',
19 u'info_dict': {
20 u"title": u"\u0421\u043c\u044f\u0445! \u0427\u0443\u0434\u043e - \u0447\u0438\u0441\u0442 \u0437\u0430 \u0441\u0435\u043a\u0443\u043d\u0434\u0438 - \u0421\u043a\u0440\u0438\u0442\u0430 \u043a\u0430\u043c\u0435\u0440\u0430"
21 }
22 }
23
24 def _real_extract(self,url):
25 mobj = re.match(self._VALID_URL, url)
26 if mobj is None:
27 raise ExtractorError(u'Invalid URL: %s' % url)
28 video_id = mobj.group(1)
29
30 redirect_page, urlh = self._download_webpage_handle(url, video_id)
31 new_location = self._search_regex(r'window\.location = \'(.*)\';', redirect_page, u'redirect location')
32 redirect_url = urlh.geturl() + new_location
33 webpage = self._download_webpage(redirect_url, video_id, u'Downloading redirect page')
34
35 title = self._html_search_regex(r'<title>(.*)</title>',
36 webpage, u'title').split('/')[0].strip()
37
38 ext = "flv"
39 info_url = "http://vbox7.com/play/magare.do"
40 data = compat_urllib_parse.urlencode({'as3':'1','vid':video_id})
41 info_request = compat_urllib_request.Request(info_url, data)
42 info_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
43 info_response = self._download_webpage(info_request, video_id, u'Downloading info webpage')
44 if info_response is None:
45 raise ExtractorError(u'Unable to extract the media url')
46 (final_url, thumbnail_url) = map(lambda x: x.split('=')[1], info_response.split('&'))
47
48 return [{
49 'id': video_id,
50 'url': final_url,
51 'ext': ext,
52 'title': title,
53 'thumbnail': thumbnail_url,
54 }]