]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/trutube.py
57f9566832401ff8eb2d1a90aaf91d3e68cdf873
[youtubedl] / youtube_dl / extractor / trutube.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6
7
8 class TruTubeIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?trutube\.tv/video/(?P<id>[0-9]+)/.*'
10 _TEST = {
11 'url': 'http://trutube.tv/video/14880/Ramses-II-Proven-To-Be-A-Red-Headed-Caucasoid-',
12 'md5': 'c5b6e301b0a2040b074746cbeaa26ca1',
13 'info_dict': {
14 'id': '14880',
15 'ext': 'flv',
16 'title': 'Ramses II - Proven To Be A Red Headed Caucasoid',
17 'thumbnail': 're:^http:.*\.jpg$',
18 }
19 }
20
21 def _real_extract(self, url):
22 mobj = re.match(self._VALID_URL, url)
23 video_id = mobj.group('id')
24
25 webpage = self._download_webpage(url, video_id)
26 video_title = self._og_search_title(webpage).strip()
27 thumbnail = self._search_regex(
28 r"var splash_img = '([^']+)';", webpage, 'thumbnail', fatal=False)
29
30 all_formats = re.finditer(
31 r"var (?P<key>[a-z]+)_video_file\s*=\s*'(?P<url>[^']+)';", webpage)
32 formats = [{
33 'format_id': m.group('key'),
34 'quality': -i,
35 'url': m.group('url'),
36 } for i, m in enumerate(all_formats)]
37 self._sort_formats(formats)
38
39 return {
40 'id': video_id,
41 'title': video_title,
42 'formats': formats,
43 'thumbnail': thumbnail,
44 }