]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kanalplay.py
Fix extraction from youtube.
[youtubedl] / youtube_dl / extractor / kanalplay.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 float_or_none,
10 srt_subtitles_timecode,
11 )
12
13
14 class KanalPlayIE(InfoExtractor):
15 IE_DESC = 'Kanal 5/9/11 Play'
16 _VALID_URL = r'https?://(?:www\.)?kanal(?P<channel_id>5|9|11)play\.se/(?:#!/)?(?:play/)?program/\d+/video/(?P<id>\d+)'
17 _TESTS = [{
18 'url': 'http://www.kanal5play.se/#!/play/program/3060212363/video/3270012277',
19 'info_dict': {
20 'id': '3270012277',
21 'ext': 'flv',
22 'title': 'Saknar både dusch och avlopp',
23 'description': 'md5:6023a95832a06059832ae93bc3c7efb7',
24 'duration': 2636.36,
25 },
26 'params': {
27 # rtmp download
28 'skip_download': True,
29 }
30 }, {
31 'url': 'http://www.kanal9play.se/#!/play/program/335032/video/246042',
32 'only_matching': True,
33 }, {
34 'url': 'http://www.kanal11play.se/#!/play/program/232835958/video/367135199',
35 'only_matching': True,
36 }]
37
38 def _fix_subtitles(self, subs):
39 return '\r\n\r\n'.join(
40 '%s\r\n%s --> %s\r\n%s'
41 % (
42 num,
43 srt_subtitles_timecode(item['startMillis'] / 1000.0),
44 srt_subtitles_timecode(item['endMillis'] / 1000.0),
45 item['text'],
46 ) for num, item in enumerate(subs, 1))
47
48 def _get_subtitles(self, channel_id, video_id):
49 subs = self._download_json(
50 'http://www.kanal%splay.se/api/subtitles/%s' % (channel_id, video_id),
51 video_id, 'Downloading subtitles JSON', fatal=False)
52 return {'sv': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} if subs else {}
53
54 def _real_extract(self, url):
55 mobj = re.match(self._VALID_URL, url)
56 video_id = mobj.group('id')
57 channel_id = mobj.group('channel_id')
58
59 video = self._download_json(
60 'http://www.kanal%splay.se/api/getVideo?format=FLASH&videoId=%s' % (channel_id, video_id),
61 video_id)
62
63 reasons_for_no_streams = video.get('reasonsForNoStreams')
64 if reasons_for_no_streams:
65 raise ExtractorError(
66 '%s returned error: %s' % (self.IE_NAME, '\n'.join(reasons_for_no_streams)),
67 expected=True)
68
69 title = video['title']
70 description = video.get('description')
71 duration = float_or_none(video.get('length'), 1000)
72 thumbnail = video.get('posterUrl')
73
74 stream_base_url = video['streamBaseUrl']
75
76 formats = [{
77 'url': stream_base_url,
78 'play_path': stream['source'],
79 'ext': 'flv',
80 'tbr': float_or_none(stream.get('bitrate'), 1000),
81 'rtmp_real_time': True,
82 } for stream in video['streams']]
83 self._sort_formats(formats)
84
85 subtitles = {}
86 if video.get('hasSubtitle'):
87 subtitles = self.extract_subtitles(channel_id, video_id)
88
89 return {
90 'id': video_id,
91 'title': title,
92 'description': description,
93 'thumbnail': thumbnail,
94 'duration': duration,
95 'formats': formats,
96 'subtitles': subtitles,
97 }