]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/pladform.py
Merge tag 'upstream/2015.11.27.1'
[youtubedl] / youtube_dl / extractor / pladform.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 int_or_none,
8 xpath_text,
9 qualities,
10 )
11
12
13 class PladformIE(InfoExtractor):
14 _VALID_URL = r'''(?x)
15 https?://
16 (?:
17 (?:
18 out\.pladform\.ru/player|
19 static\.pladform\.ru/player\.swf
20 )
21 \?.*\bvideoid=|
22 video\.pladform\.ru/catalog/video/videoid/
23 )
24 (?P<id>\d+)
25 '''
26 _TESTS = [{
27 # http://muz-tv.ru/kinozal/view/7400/
28 'url': 'http://out.pladform.ru/player?pl=24822&videoid=100183293',
29 'md5': '61f37b575dd27f1bb2e1854777fe31f4',
30 'info_dict': {
31 'id': '100183293',
32 'ext': 'mp4',
33 'title': 'Тайны перевала Дятлова • 1 серия 2 часть',
34 'description': 'Документальный сериал-расследование одной из самых жутких тайн ХХ века',
35 'thumbnail': 're:^https?://.*\.jpg$',
36 'duration': 694,
37 'age_limit': 0,
38 },
39 }, {
40 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0',
41 'only_matching': True,
42 }, {
43 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0',
44 'only_matching': True,
45 }]
46
47 def _real_extract(self, url):
48 video_id = self._match_id(url)
49
50 video = self._download_xml(
51 'http://out.pladform.ru/getVideo?pl=1&videoid=%s' % video_id,
52 video_id)
53
54 if video.tag == 'error':
55 raise ExtractorError(
56 '%s returned error: %s' % (self.IE_NAME, video.text),
57 expected=True)
58
59 quality = qualities(('ld', 'sd', 'hd'))
60
61 formats = [{
62 'url': src.text,
63 'format_id': src.get('quality'),
64 'quality': quality(src.get('quality')),
65 } for src in video.findall('./src')]
66 self._sort_formats(formats)
67
68 webpage = self._download_webpage(
69 'http://video.pladform.ru/catalog/video/videoid/%s' % video_id,
70 video_id)
71
72 title = self._og_search_title(webpage, fatal=False) or xpath_text(
73 video, './/title', 'title', fatal=True)
74 description = self._search_regex(
75 r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False)
76 thumbnail = self._og_search_thumbnail(webpage) or xpath_text(
77 video, './/cover', 'cover')
78
79 duration = int_or_none(xpath_text(video, './/time', 'duration'))
80 age_limit = int_or_none(xpath_text(video, './/age18', 'age limit'))
81
82 return {
83 'id': video_id,
84 'title': title,
85 'description': description,
86 'thumbnail': thumbnail,
87 'duration': duration,
88 'age_limit': age_limit,
89 'formats': formats,
90 }