+class YoutubePlaylistBaseInfoExtractor(InfoExtractor):
+ # Extract the video ids from the playlist pages
+ def _entries(self, page, playlist_id):
+ more_widget_html = content_html = page
+ for page_num in itertools.count(1):
+ for video_id, video_title in self.extract_videos_from_page(content_html):
+ yield self.url_result(
+ video_id, 'Youtube', video_id=video_id,
+ video_title=video_title)
+
+ mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
+ if not mobj:
+ break
+
+ more = self._download_json(
+ 'https://youtube.com/%s' % mobj.group('more'), playlist_id,
+ 'Downloading page #%s' % page_num,
+ transform_source=uppercase_escape)
+ content_html = more['content_html']
+ if not content_html.strip():
+ # Some webpages show a "Load more" button but they don't
+ # have more videos
+ break
+ more_widget_html = more['load_more_widget_html']
+
+ def extract_videos_from_page(self, page):
+ ids_in_page = []
+ titles_in_page = []
+ for mobj in re.finditer(self._VIDEO_RE, page):
+ # The link with index 0 is not the first video of the playlist (not sure if still actual)
+ if 'index' in mobj.groupdict() and mobj.group('id') == '0':
+ continue
+ video_id = mobj.group('id')
+ video_title = unescapeHTML(mobj.group('title'))
+ if video_title:
+ video_title = video_title.strip()
+ try:
+ idx = ids_in_page.index(video_id)
+ if video_title and not titles_in_page[idx]:
+ titles_in_page[idx] = video_title
+ except ValueError:
+ ids_in_page.append(video_id)
+ titles_in_page.append(video_title)
+ return zip(ids_in_page, titles_in_page)
+
+