- view_count = self._html_search_regex(
- r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False)
- if view_count:
- view_count = str_to_int(view_count)
- comment_count = self._html_search_regex(
- r'<span id="allCommentsCount">(\d+)</span>', webpage, 'comment count', fatal=False)
- if comment_count:
- comment_count = str_to_int(comment_count)
+ view_count = str_to_int(self._search_regex(
+ r'Views:\s*</dt>\s*<dd>([\d,\.]+)',
+ webpage, 'view count', fatal=False))
+ comment_count = str_to_int(self._search_regex(
+ r'<span id="allCommentsCount">(\d+)</span>',
+ webpage, 'comment count', fatal=False))
+
+ category = self._search_regex(
+ r'Category:\s*</dt>\s*<dd>\s*<a[^>]+href=[^>]+>([^<]+)',
+ webpage, 'category', fatal=False)
+ categories = [category] if category else None
+
+ tags_str = self._search_regex(
+ r'(?s)Tags:\s*</dt>\s*<dd>(.+?)</(?!a)',
+ webpage, 'tags', fatal=False)
+ tags = [t for t in re.findall(
+ r'<a[^>]+href=[^>]+>([^<]+)', tags_str)] if tags_str else None