]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/thisav.py
debian/control: Update list of extractors.
[youtubedl] / youtube_dl / extractor / thisav.py
1 #coding: utf-8
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7 determine_ext,
8 )
9
10 class ThisAVIE(InfoExtractor):
11 _VALID_URL = r'https?://(?:www\.)?thisav\.com/video/(?P<id>[0-9]+)/.*'
12 _TEST = {
13 u"url": u"http://www.thisav.com/video/47734/%98%26sup1%3B%83%9E%83%82---just-fit.html",
14 u"file": u"47734.flv",
15 u"md5": u"0480f1ef3932d901f0e0e719f188f19b",
16 u"info_dict": {
17 u"title": u"高樹マリア - Just fit",
18 u"uploader": u"dj7970",
19 u"uploader_id": u"dj7970"
20 }
21 }
22
23 def _real_extract(self, url):
24 mobj = re.match(self._VALID_URL, url)
25
26 video_id = mobj.group('id')
27 webpage = self._download_webpage(url, video_id)
28 title = self._html_search_regex(r'<h1>([^<]*)</h1>', webpage, u'title')
29 video_url = self._html_search_regex(
30 r"addVariable\('file','([^']+)'\);", webpage, u'video url')
31 uploader = self._html_search_regex(
32 r': <a href="http://www.thisav.com/user/[0-9]+/(?:[^"]+)">([^<]+)</a>',
33 webpage, u'uploader name', fatal=False)
34 uploader_id = self._html_search_regex(
35 r': <a href="http://www.thisav.com/user/[0-9]+/([^"]+)">(?:[^<]+)</a>',
36 webpage, u'uploader id', fatal=False)
37 ext = determine_ext(video_url)
38
39 return {
40 '_type': 'video',
41 'id': video_id,
42 'url': video_url,
43 'uploader': uploader,
44 'uploader_id': uploader_id,
45 'title': title,
46 'ext': ext,
47 }