- # Retrieve video webpage to extract further information
- request = compat_urllib_request.Request(url)
- request.add_header('Cookie', 'family_filter=off')
- webpage = self._download_webpage(request, video_id)
-
- # Extract URL, uploader and title from webpage
- self.report_extraction(video_id)
-
- video_uploader = self._search_regex([r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>',
- # Looking for official user
- r'<(?:span|a) .*?rel="author".*?>([^<]+?)</'],
- webpage, 'video uploader')
-
- video_upload_date = None
- mobj = re.search(r'<div class="[^"]*uploaded_cont[^"]*" title="[^"]*">([0-9]{2})-([0-9]{2})-([0-9]{4})</div>', webpage)
- if mobj is not None:
- video_upload_date = mobj.group(3) + mobj.group(2) + mobj.group(1)
-
- embed_url = 'http://www.dailymotion.com/embed/video/%s' % video_id
- embed_page = self._download_webpage(embed_url, video_id,
- u'Downloading embed page')
- info = self._search_regex(r'var info = ({.*?}),', embed_page, 'video info')
- info = json.loads(info)
-
- # TODO: support choosing qualities
-
- for key in ['stream_h264_hd1080_url','stream_h264_hd_url',
- 'stream_h264_hq_url','stream_h264_url',
- 'stream_h264_ld_url']:
- if info.get(key):#key in info and info[key]:
- max_quality = key
- self.to_screen(u'Using %s' % key)
- break
- else:
- raise ExtractorError(u'Unable to extract video URL')
- video_url = info[max_quality]
-
- return [{
- 'id': video_id,
- 'url': video_url,
- 'uploader': video_uploader,
- 'upload_date': video_upload_date,
- 'title': self._og_search_title(webpage),
- 'ext': video_extension,
- 'thumbnail': info['thumbnail_url']
- }]
-
-
-class DailymotionPlaylistIE(InfoExtractor):
+ description = self._og_search_description(webpage) or self._html_search_meta(
+ 'description', webpage, 'description')
+
+ view_count = str_to_int(self._search_regex(
+ [r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserPlays:(\d+)"',
+ r'video_views_count[^>]+>\s+([\d\.,]+)'],
+ webpage, 'view count', fatal=False))
+ comment_count = int_or_none(self._search_regex(
+ r'<meta[^>]+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"',
+ webpage, 'comment count', fatal=False))
+
+ player_v5 = self._search_regex(
+ [r'buildPlayer\(({.+?})\);', r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);'],
+ webpage, 'player v5', default=None)
+ if player_v5:
+ player = self._parse_json(player_v5, video_id)
+ metadata = player['metadata']
+
+ self._check_error(metadata)
+
+ formats = []
+ for quality, media_list in metadata['qualities'].items():
+ for media in media_list:
+ media_url = media.get('url')
+ if not media_url:
+ continue
+ type_ = media.get('type')
+ if type_ == 'application/vnd.lumberjack.manifest':
+ continue
+ ext = determine_ext(media_url)
+ if type_ == 'application/x-mpegURL' or ext == 'm3u8':
+ m3u8_formats = self._extract_m3u8_formats(
+ media_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
+ if m3u8_formats:
+ formats.extend(m3u8_formats)
+ elif type_ == 'application/f4m' or ext == 'f4m':
+ f4m_formats = self._extract_f4m_formats(
+ media_url, video_id, preference=-1, f4m_id='hds', fatal=False)
+ if f4m_formats:
+ formats.extend(f4m_formats)
+ else:
+ f = {
+ 'url': media_url,
+ 'format_id': quality,
+ }
+ m = re.search(r'H264-(?P<width>\d+)x(?P<height>\d+)', media_url)
+ if m:
+ f.update({
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ formats.append(f)
+ self._sort_formats(formats)
+
+ title = metadata['title']
+ duration = int_or_none(metadata.get('duration'))
+ timestamp = int_or_none(metadata.get('created_time'))
+ thumbnail = metadata.get('poster_url')
+ uploader = metadata.get('owner', {}).get('screenname')
+ uploader_id = metadata.get('owner', {}).get('id')
+
+ subtitles = {}
+ for subtitle_lang, subtitle in metadata.get('subtitles', {}).get('data', {}).items():
+ subtitles[subtitle_lang] = [{
+ 'ext': determine_ext(subtitle_url),
+ 'url': subtitle_url,
+ } for subtitle_url in subtitle.get('urls', [])]
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'thumbnail': thumbnail,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'uploader': uploader,
+ 'uploader_id': uploader_id,
+ 'age_limit': age_limit,
+ 'view_count': view_count,
+ 'comment_count': comment_count,
+ 'formats': formats,
+ 'subtitles': subtitles,
+ }
+
+ # vevo embed
+ vevo_id = self._search_regex(
+ r'<link rel="video_src" href="[^"]*?vevo.com[^"]*?video=(?P<id>[\w]*)',
+ webpage, 'vevo embed', default=None)
+ if vevo_id:
+ return self.url_result('vevo:%s' % vevo_id, 'Vevo')
+
+ # fallback old player
+ embed_page = self._download_webpage_no_ff(
+ 'https://www.dailymotion.com/embed/video/%s' % video_id,
+ video_id, 'Downloading embed page')
+
+ timestamp = parse_iso8601(self._html_search_meta(
+ 'video:release_date', webpage, 'upload date'))
+
+ info = self._parse_json(
+ self._search_regex(
+ r'var info = ({.*?}),$', embed_page,
+ 'video info', flags=re.MULTILINE),
+ video_id)
+
+ self._check_error(info)
+
+ formats = []
+ for (key, format_id) in self._FORMATS:
+ video_url = info.get(key)
+ if video_url is not None:
+ m_size = re.search(r'H264-(\d+)x(\d+)', video_url)
+ if m_size is not None:
+ width, height = map(int_or_none, (m_size.group(1), m_size.group(2)))
+ else:
+ width, height = None, None
+ formats.append({
+ 'url': video_url,
+ 'ext': 'mp4',
+ 'format_id': format_id,
+ 'width': width,
+ 'height': height,
+ })
+ self._sort_formats(formats)
+
+ # subtitles
+ video_subtitles = self.extract_subtitles(video_id, webpage)
+
+ title = self._og_search_title(webpage, default=None)
+ if title is None:
+ title = self._html_search_regex(
+ r'(?s)<span\s+id="video_title"[^>]*>(.*?)</span>', webpage,
+ 'title')
+
+ return {
+ 'id': video_id,
+ 'formats': formats,
+ 'uploader': info['owner.screenname'],
+ 'timestamp': timestamp,
+ 'title': title,
+ 'description': description,
+ 'subtitles': video_subtitles,
+ 'thumbnail': info['thumbnail_url'],
+ 'age_limit': age_limit,
+ 'view_count': view_count,
+ 'duration': info['duration']
+ }
+
+ def _check_error(self, info):
+ if info.get('error') is not None:
+ raise ExtractorError(
+ '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True)
+
+ def _get_subtitles(self, video_id, webpage):
+ try:
+ sub_list = self._download_webpage(
+ 'https://api.dailymotion.com/video/%s/subtitles?fields=id,language,url' % video_id,
+ video_id, note=False)
+ except ExtractorError as err:
+ self._downloader.report_warning('unable to download video subtitles: %s' % compat_str(err))
+ return {}
+ info = json.loads(sub_list)
+ if (info['total'] > 0):
+ sub_lang_list = dict((l['language'], [{'url': l['url'], 'ext': 'srt'}]) for l in info['list'])
+ return sub_lang_list
+ self._downloader.report_warning('video doesn\'t have subtitles')
+ return {}
+
+
+class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
+ IE_NAME = 'dailymotion:playlist'