]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viddler.py
Imported Upstream version 2013.12.23
[youtubedl] / youtube_dl / extractor / viddler.py
1 import json
2 import re
3
4 from .common import InfoExtractor
5
6
7 class ViddlerIE(InfoExtractor):
8 _VALID_URL = r'(?P<domain>https?://(?:www\.)?viddler\.com)/(?:v|embed|player)/(?P<id>[a-z0-9]+)'
9 _TEST = {
10 u"url": u"http://www.viddler.com/v/43903784",
11 u'file': u'43903784.mp4',
12 u'md5': u'fbbaedf7813e514eb7ca30410f439ac9',
13 u'info_dict': {
14 u"title": u"Video Made Easy",
15 u"uploader": u"viddler",
16 u"duration": 100.89,
17 }
18 }
19
20 def _real_extract(self, url):
21 mobj = re.match(self._VALID_URL, url)
22 video_id = mobj.group('id')
23
24 embed_url = mobj.group('domain') + u'/embed/' + video_id
25 webpage = self._download_webpage(embed_url, video_id)
26
27 video_sources_code = self._search_regex(
28 r"(?ms)sources\s*:\s*(\{.*?\})", webpage, u'video URLs')
29 video_sources = json.loads(video_sources_code.replace("'", '"'))
30
31 formats = [{
32 'url': video_url,
33 'format': format_id,
34 } for video_url, format_id in video_sources.items()]
35
36 title = self._html_search_regex(
37 r"title\s*:\s*'([^']*)'", webpage, u'title')
38 uploader = self._html_search_regex(
39 r"authorName\s*:\s*'([^']*)'", webpage, u'uploader', fatal=False)
40 duration_s = self._html_search_regex(
41 r"duration\s*:\s*([0-9.]*)", webpage, u'duration', fatal=False)
42 duration = float(duration_s) if duration_s else None
43 thumbnail = self._html_search_regex(
44 r"thumbnail\s*:\s*'([^']*)'",
45 webpage, u'thumbnail', fatal=False)
46
47 return {
48 '_type': 'video',
49 'id': video_id,
50 'title': title,
51 'thumbnail': thumbnail,
52 'uploader': uploader,
53 'duration': duration,
54 'formats': formats,
55 }