- film_id = self._search_regex(r'filmId=([\da-f-]{36})"', webpage, 'film id')
-
- snag = self._parse_json(
- self._search_regex(
- r'Snag\.page\.data\s*=\s*(\[.+?\]);', webpage, 'snag'),
- display_id)
-
- for item in snag:
- if item.get('data', {}).get('film', {}).get('id') == film_id:
- data = item['data']['film']
- title = data['title']
- description = clean_html(data.get('synopsis'))
- thumbnail = data.get('image')
- duration = int_or_none(data.get('duration') or data.get('runtime'))
- categories = [
- category['title'] for category in data.get('categories', [])
- if category.get('title')]
- break
+ initial_store_state = self._search_regex(
+ r"window\.initialStoreState\s*=.*?JSON\.parse\(unescape\(atob\('([^']+)'\)\)\)",
+ webpage, 'Initial Store State', default=None)
+ if initial_store_state:
+ modules = self._parse_json(compat_urllib_parse_unquote(base64.b64decode(
+ initial_store_state).decode()), display_id)['page']['data']['modules']
+ content_data = next(m['contentData'][0] for m in modules if m.get('moduleType') == 'VideoDetailModule')
+ gist = content_data['gist']
+ film_id = gist['id']
+ title = gist['title']
+ video_assets = content_data['streamingInfo']['videoAssets']
+
+ formats = []
+ mpeg_video_assets = video_assets.get('mpeg') or []
+ for video_asset in mpeg_video_assets:
+ video_asset_url = video_asset.get('url')
+ if not video_asset:
+ continue
+ bitrate = int_or_none(video_asset.get('bitrate'))
+ height = int_or_none(self._search_regex(
+ r'^_?(\d+)[pP]$', video_asset.get('renditionValue'),
+ 'height', default=None))
+ formats.append({
+ 'url': video_asset_url,
+ 'format_id': 'http%s' % ('-%d' % bitrate if bitrate else ''),
+ 'tbr': bitrate,
+ 'height': height,
+ 'vcodec': video_asset.get('codec'),
+ })
+
+ hls_url = video_assets.get('hls')
+ if hls_url:
+ formats.extend(self._extract_m3u8_formats(
+ hls_url, film_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
+ self._sort_formats(formats, ('height', 'tbr', 'format_id'))
+
+ info = {
+ 'id': film_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': gist.get('description'),
+ 'thumbnail': gist.get('videoImageUrl'),
+ 'duration': int_or_none(gist.get('runtime')),
+ 'age_limit': parse_age_limit(content_data.get('parentalRating')),
+ 'timestamp': int_or_none(gist.get('publishDate'), 1000),
+ 'formats': formats,
+ }
+ for k in ('categories', 'tags'):
+ info[k] = [v['title'] for v in content_data.get(k, []) if v.get('title')]
+ return info