]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hotstar.py
debian/control: Update list of supported sites/extractors.
[youtubedl] / youtube_dl / extractor / hotstar.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 determine_ext,
10 ExtractorError,
11 int_or_none,
12 )
13
14
15 class HotStarBaseIE(InfoExtractor):
16 _GEO_COUNTRIES = ['IN']
17
18 def _download_json(self, *args, **kwargs):
19 response = super(HotStarBaseIE, self)._download_json(*args, **kwargs)
20 if response['resultCode'] != 'OK':
21 if kwargs.get('fatal'):
22 raise ExtractorError(
23 response['errorDescription'], expected=True)
24 return None
25 return response['resultObj']
26
27 def _download_content_info(self, content_id):
28 return self._download_json(
29 'https://account.hotstar.com/AVS/besc', content_id, query={
30 'action': 'GetAggregatedContentDetails',
31 'appVersion': '5.0.40',
32 'channel': 'PCTV',
33 'contentId': content_id,
34 })['contentInfo'][0]
35
36
37 class HotStarIE(HotStarBaseIE):
38 _VALID_URL = r'https?://(?:www\.)?hotstar\.com/(?:.+?[/-])?(?P<id>\d{10})'
39 _TESTS = [{
40 'url': 'http://www.hotstar.com/on-air-with-aib--english-1000076273',
41 'info_dict': {
42 'id': '1000076273',
43 'ext': 'mp4',
44 'title': 'On Air With AIB',
45 'description': 'md5:c957d8868e9bc793ccb813691cc4c434',
46 'timestamp': 1447227000,
47 'upload_date': '20151111',
48 'duration': 381,
49 },
50 'params': {
51 # m3u8 download
52 'skip_download': True,
53 }
54 }, {
55 'url': 'http://www.hotstar.com/sports/cricket/rajitha-sizzles-on-debut-with-329/2001477583',
56 'only_matching': True,
57 }, {
58 'url': 'http://www.hotstar.com/1000000515',
59 'only_matching': True,
60 }]
61
62 def _real_extract(self, url):
63 video_id = self._match_id(url)
64
65 video_data = self._download_content_info(video_id)
66
67 title = video_data['episodeTitle']
68
69 if video_data.get('encrypted') == 'Y':
70 raise ExtractorError('This video is DRM protected.', expected=True)
71
72 formats = []
73 for f in ('JIO',):
74 format_data = self._download_json(
75 'http://getcdn.hotstar.com/AVS/besc',
76 video_id, 'Downloading %s JSON metadata' % f,
77 fatal=False, query={
78 'action': 'GetCDN',
79 'asJson': 'Y',
80 'channel': f,
81 'id': video_id,
82 'type': 'VOD',
83 })
84 if format_data:
85 format_url = format_data.get('src')
86 if not format_url:
87 continue
88 ext = determine_ext(format_url)
89 if ext == 'm3u8':
90 formats.extend(self._extract_m3u8_formats(
91 format_url, video_id, 'mp4',
92 m3u8_id='hls', fatal=False))
93 elif ext == 'f4m':
94 # produce broken files
95 continue
96 else:
97 formats.append({
98 'url': format_url,
99 'width': int_or_none(format_data.get('width')),
100 'height': int_or_none(format_data.get('height')),
101 })
102 self._sort_formats(formats)
103
104 return {
105 'id': video_id,
106 'title': title,
107 'description': video_data.get('description'),
108 'duration': int_or_none(video_data.get('duration')),
109 'timestamp': int_or_none(video_data.get('broadcastDate')),
110 'formats': formats,
111 'episode': title,
112 'episode_number': int_or_none(video_data.get('episodeNumber')),
113 'series': video_data.get('contentTitle'),
114 }
115
116
117 class HotStarPlaylistIE(HotStarBaseIE):
118 IE_NAME = 'hotstar:playlist'
119 _VALID_URL = r'(?P<url>https?://(?:www\.)?hotstar\.com/tv/[^/]+/(?P<content_id>\d+))/(?P<type>[^/]+)/(?P<id>\d+)'
120 _TESTS = [{
121 'url': 'http://www.hotstar.com/tv/pratidaan/14982/episodes/14812/9993',
122 'info_dict': {
123 'id': '14812',
124 },
125 'playlist_mincount': 75,
126 }, {
127 'url': 'http://www.hotstar.com/tv/pratidaan/14982/popular-clips/9998/9998',
128 'only_matching': True,
129 }]
130 _ITEM_TYPES = {
131 'episodes': 'EPISODE',
132 'popular-clips': 'CLIPS',
133 }
134
135 def _real_extract(self, url):
136 mobj = re.match(self._VALID_URL, url)
137 base_url = mobj.group('url')
138 content_id = mobj.group('content_id')
139 playlist_type = mobj.group('type')
140
141 content_info = self._download_content_info(content_id)
142 playlist_id = compat_str(content_info['categoryId'])
143
144 collection = self._download_json(
145 'https://search.hotstar.com/AVS/besc', playlist_id, query={
146 'action': 'SearchContents',
147 'appVersion': '5.0.40',
148 'channel': 'PCTV',
149 'moreFilters': 'series:%s;' % playlist_id,
150 'query': '*',
151 'searchOrder': 'last_broadcast_date desc,year desc,title asc',
152 'type': self._ITEM_TYPES.get(playlist_type, 'EPISODE'),
153 })
154
155 entries = [
156 self.url_result(
157 '%s/_/%s' % (base_url, video['contentId']),
158 ie=HotStarIE.ie_key(), video_id=video['contentId'])
159 for video in collection['response']['docs']
160 if video.get('contentId')]
161
162 return self.playlist_result(entries, playlist_id)