xpath_element,
xpath_with_ns,
find_xpath_attr,
+ orderedSet,
parse_duration,
parse_iso8601,
parse_age_limit,
+ strip_or_none,
int_or_none,
ExtractorError,
)
def _real_extract(self, url):
display_id = self._match_id(url)
webpage = self._download_webpage(url, display_id)
+ title = self._og_search_title(webpage, default=None) or self._html_search_meta(
+ 'twitter:title', webpage, 'title', default=None) or self._html_search_regex(
+ r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
entries = [
self._extract_player_init(player_init, display_id)
for player_init in re.findall(r'CBC\.APP\.Caffeine\.initInstance\(({.+?})\);', webpage)]
+ media_ids = []
+ for media_id_re in (
+ r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"',
+ r'<div[^>]+\bid=["\']player-(\d+)',
+ r'guid["\']\s*:\s*["\'](\d+)'):
+ media_ids.extend(re.findall(media_id_re, webpage))
entries.extend([
self.url_result('cbcplayer:%s' % media_id, 'CBCPlayer', media_id)
- for media_id in re.findall(r'<iframe[^>]+src="[^"]+?mediaId=(\d+)"', webpage)])
+ for media_id in orderedSet(media_ids)])
return self.playlist_result(
- entries, display_id,
- self._og_search_title(webpage, fatal=False),
+ entries, display_id, strip_or_none(title),
self._og_search_description(webpage))