- thumbnail = video_info.find('.//thumbnail_url').text
- description = video_info.find('.//description').text
- upload_date = unified_strdate(video_info.find('.//first_retrieve').text.split('+')[0])
- view_count = int_or_none(video_info.find('.//view_counter').text)
- comment_count = int_or_none(video_info.find('.//comment_num').text)
- duration = parse_duration(video_info.find('.//length').text)
- webpage_url = video_info.find('.//watch_url').text
+
+ thumbnail = (
+ xpath_text(video_info, './/thumbnail_url') or
+ self._html_search_meta('image', webpage, 'thumbnail', default=None) or
+ video_detail.get('thumbnail'))
+
+ description = xpath_text(video_info, './/description')
+
+ timestamp = parse_iso8601(xpath_text(video_info, './/first_retrieve'))
+ if not timestamp:
+ match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
+ if match:
+ timestamp = parse_iso8601(match.replace('+', ':00+'))
+ if not timestamp and video_detail.get('postedAt'):
+ timestamp = parse_iso8601(
+ video_detail['postedAt'].replace('/', '-'),
+ delimiter=' ', timezone=datetime.timedelta(hours=9))
+
+ view_count = int_or_none(xpath_text(video_info, './/view_counter'))
+ if not view_count:
+ match = self._html_search_regex(
+ r'>Views: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'view count', default=None)
+ if match:
+ view_count = int_or_none(match.replace(',', ''))
+ view_count = view_count or video_detail.get('viewCount')
+
+ comment_count = int_or_none(xpath_text(video_info, './/comment_num'))
+ if not comment_count:
+ match = self._html_search_regex(
+ r'>Comments: <strong[^>]*>([^<]+)</strong>',
+ webpage, 'comment count', default=None)
+ if match:
+ comment_count = int_or_none(match.replace(',', ''))
+ comment_count = comment_count or video_detail.get('commentCount')
+
+ duration = (parse_duration(
+ xpath_text(video_info, './/length') or
+ self._html_search_meta(
+ 'video:duration', webpage, 'video duration', default=None)) or
+ video_detail.get('length'))
+
+ webpage_url = xpath_text(video_info, './/watch_url') or url