+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
+
+ view_count = str_to_int(self._search_regex(
+ [r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
+ r'video_views_count[^>]+>\s+([\d\.,]+)'],
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._search_regex(
+ r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
+ webpage, 'comment count', fatal=False))
+
+ player_v5 = self._search_regex(
+ [r'buildPlayer\(({.+?})\);', r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);'],
+ webpage, 'player v5', default=None)
+ if player_v5:
+ player = self._parse_json(player_v5, video_id)
+ metadata = player['metadata']
+
+ self._check_error(metadata)
+
+ formats = []
+ for quality, media_list in metadata['qualities'].items():
+ for media in media_list:
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ type_ = media.get('type')
+ if type_ == 'application/vnd.lumberjack.manifest':
+ continue
+ ext = determine_ext(media_url)
+ if type_ == 'application/x-mpegURL' or ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif type_ == 'application/f4m' or ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ media_url, video_id, preference=-1, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ else:
+ f = {
+ 'url': media_url,
+ 'format_id': quality,
+ }
+ m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ title = metadata['title']
+ duration = int_or_none(metadata.get('duration'))
+ timestamp = int_or_none(metadata.get('created_time'))
+ thumbnail = metadata.get('poster_url')
+ uploader = metadata.get('owner', {}).get('screenname')
+ uploader_id = metadata.get('owner', {}).get('id')
+
+ subtitles = {}
+ for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
+ subtitles[subtitle_lang] = [{
+ 'ext': determine_ext(subtitle_url),
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'age_limit': age_limit,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }