# coding: utf-8
from __future__ import unicode_literals
+import re
+
from .common import InfoExtractor
from ..utils import int_or_none
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)
- story_filename = self._search_regex(
- r'\.storyFileName\("([^"]+)"\)', webpage, 'story filename')
- speaker_id = self._search_regex(
- r'\.speakerId\("([^"]+)"\)', webpage, 'speaker ID')
- story_id = self._search_regex(
- r'\.storyId\((\d+)\)', webpage, 'story ID')
- speaker_type = self._search_regex(
- r'\.speakerType\("([^"]+)"\)', webpage, 'speaker type')
- great_life = self._search_regex(
- r'isGreatLifeStory\s*=\s*(true|false)', webpage, 'great life story')
+ embed_params = [s.strip(" \r\n\t'") for s in self._search_regex(
+ r'(?s)\$\("#embedCode"\).html\(getEmbedCode\((.*?)\)',
+ webpage, 'embed params').split(',')]
+
+ (
+ _, speaker_id, story_id, story_duration,
+ speaker_type, great_life, _thumbnail, _has_subtitles,
+ story_filename, _story_order) = embed_params
+
is_great_life_series = great_life == 'true'
- duration = int_or_none(self._search_regex(
- r'\.duration\((\d+)\)', webpage, 'duration', fatal=False))
+ duration = int_or_none(story_duration)
# URL building, see: http://www.webofstories.com/scripts/player.js
ms_prefix = ''
'description': description,
'duration': duration,
}
+
+
+class WebOfStoriesPlaylistIE(InfoExtractor):
+ _VALID_URL = r'https?://(?:www\.)?webofstories\.com/playAll/(?P<id>[^/]+)'
+ _TEST = {
+ 'url': 'http://www.webofstories.com/playAll/donald.knuth',
+ 'info_dict': {
+ 'id': 'donald.knuth',
+ 'title': 'Donald Knuth (Scientist)',
+ },
+ 'playlist_mincount': 97,
+ }
+
+ def _real_extract(self, url):
+ playlist_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, playlist_id)
+
+ entries = [
+ self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
+ for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
+ ]
+
+ title = self._search_regex(
+ r'<div id="speakerName">\s*<span>([^<]+)</span>',
+ webpage, 'speaker', default=None)
+ if title:
+ field = self._search_regex(
+ r'<span id="primaryField">([^<]+)</span>',
+ webpage, 'field', default=None)
+ if field:
+ title += ' (%s)' % field
+
+ if not title:
+ title = self._search_regex(
+ r'<title>Play\s+all\s+stories\s*-\s*([^<]+)\s*-\s*Web\s+of\s+Stories</title>',
+ webpage, 'title')
+
+ return self.playlist_result(entries, playlist_id, title)