]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hrti.py
2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
8 from ..compat
import compat_HTTPError
19 class HRTiBaseIE(InfoExtractor
):
21 Base Information Extractor for Croatian Radiotelevision
22 video on demand site https://hrti.hrt.hr
23 Reverse engineered from the JavaScript app in app.min.js
25 _NETRC_MACHINE
= 'hrti'
29 _APP_PUBLICATION_ID
= 'all_in_one'
30 _API_URL
= 'http://clientapi.hrt.hr/client_api.php/config/identify/format/json'
32 def _initialize_api(self
):
34 'application_publication_id': self
._APP
_PUBLICATION
_ID
37 uuid
= self
._download
_json
(
38 self
._API
_URL
, None, note
='Downloading uuid',
39 errnote
='Unable to download uuid',
40 data
=json
.dumps(init_data
).encode('utf-8'))['uuid']
44 'application_publication_id': self
._APP
_PUBLICATION
_ID
,
45 'application_version': self
._APP
_VERSION
48 req
= sanitized_Request(self
._API
_URL
, data
=json
.dumps(app_data
).encode('utf-8'))
49 req
.get_method
= lambda: 'PUT'
51 resources
= self
._download
_json
(
52 req
, None, note
='Downloading session information',
53 errnote
='Unable to download session information')
55 self
._session
_id
= resources
['session_id']
57 modules
= resources
['modules']
59 self
._search
_url
= modules
['vod_catalog']['resources']['search']['uri'].format(
60 language
=self
._APP
_LANGUAGE
,
61 application_id
=self
._APP
_PUBLICATION
_ID
)
63 self
._login
_url
= (modules
['user']['resources']['login']['uri'] +
64 '/format/json').format(session_id
=self
._session
_id
)
66 self
._logout
_url
= modules
['user']['resources']['logout']['uri']
69 (username
, password
) = self
._get
_login
_info
()
70 # TODO: figure out authentication with cookies
71 if username
is None or password
is None:
72 self
.raise_login_required()
80 auth_info
= self
._download
_json
(
81 self
._login
_url
, None, note
='Logging in', errnote
='Unable to log in',
82 data
=json
.dumps(auth_data
).encode('utf-8'))
83 except ExtractorError
as e
:
84 if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code
== 406:
85 auth_info
= self
._parse
_json
(e
.cause
.read().encode('utf-8'), None)
89 error_message
= auth_info
.get('error', {}).get('message')
92 '%s said: %s' % (self
.IE_NAME
, error_message
),
95 self
._token
= auth_info
['secure_streaming_token']
97 def _real_initialize(self
):
98 self
._initialize
_api
()
102 class HRTiIE(HRTiBaseIE
):
103 _VALID_URL
= r
'''(?x)
105 hrti:(?P<short_id>[0-9]+)|
107 hrti\.hrt\.hr/\#/video/show/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?
111 'url': 'https://hrti.hrt.hr/#/video/show/2181385/republika-dokumentarna-serija-16-hd',
114 'display_id': 'republika-dokumentarna-serija-16-hd',
116 'title': 'REPUBLIKA, dokumentarna serija (1/6) (HD)',
117 'description': 'md5:48af85f620e8e0e1df4096270568544f',
120 'average_rating': int,
121 'episode_number': int,
122 'season_number': int,
125 'skip': 'Requires account credentials',
127 'url': 'https://hrti.hrt.hr/#/video/show/2181385/',
128 'only_matching': True,
130 'url': 'hrti:2181385',
131 'only_matching': True,
134 def _real_extract(self
, url
):
135 mobj
= re
.match(self
._VALID
_URL
, url
)
136 video_id
= mobj
.group('short_id') or mobj
.group('id')
137 display_id
= mobj
.group('display_id') or video_id
139 video
= self
._download
_json
(
140 '%s/video_id/%s/format/json' % (self
._search
_url
, video_id
),
141 display_id
, 'Downloading video metadata JSON')['video'][0]
143 title_info
= video
['title']
144 title
= title_info
['title_long']
146 movie
= video
['video_assets']['movie'][0]
147 m3u8_url
= movie
['url'].format(TOKEN
=self
._token
)
148 formats
= self
._extract
_m
3u8_formats
(
149 m3u8_url
, display_id
, 'mp4', entry_protocol
='m3u8_native',
151 self
._sort
_formats
(formats
)
153 description
= clean_html(title_info
.get('summary_long'))
154 age_limit
= parse_age_limit(video
.get('parental_control', {}).get('rating'))
155 view_count
= int_or_none(video
.get('views'))
156 average_rating
= int_or_none(video
.get('user_rating'))
157 duration
= int_or_none(movie
.get('duration'))
161 'display_id': display_id
,
163 'description': description
,
164 'duration': duration
,
165 'view_count': view_count
,
166 'average_rating': average_rating
,
167 'age_limit': age_limit
,
172 class HRTiPlaylistIE(HRTiBaseIE
):
173 _VALID_URL
= r
'https?://hrti.hrt.hr/#/video/list/category/(?P<id>[0-9]+)/(?P<display_id>[^/]+)?'
175 'url': 'https://hrti.hrt.hr/#/video/list/category/212/ekumena',
180 'playlist_mincount': 8,
181 'skip': 'Requires account credentials',
183 'url': 'https://hrti.hrt.hr/#/video/list/category/212/',
184 'only_matching': True,
187 def _real_extract(self
, url
):
188 mobj
= re
.match(self
._VALID
_URL
, url
)
189 category_id
= mobj
.group('id')
190 display_id
= mobj
.group('display_id') or category_id
192 response
= self
._download
_json
(
193 '%s/category_id/%s/format/json' % (self
._search
_url
, category_id
),
194 display_id
, 'Downloading video metadata JSON')
197 response
, lambda x
: x
['video_listings'][0]['alternatives'][0]['list'],
198 list) or [video
['id'] for video
in response
.get('videos', []) if video
.get('id')]
200 entries
= [self
.url_result('hrti:%s' % video_id
) for video_id
in video_ids
]
202 return self
.playlist_result(entries
, category_id
, display_id
)