import re
from .common import InfoExtractor
-from ..utils import int_or_none
+from ..utils import (
+ int_or_none,
+ orderedSet,
+)
class WebOfStoriesIE(InfoExtractor):
_VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/'
_GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/'
_USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/'
- _TESTS = [
- {
- 'url': 'http://www.webofstories.com/play/hans.bethe/71',
- 'md5': '373e4dd915f60cfe3116322642ddf364',
- 'info_dict': {
- 'id': '4536',
- 'ext': 'mp4',
- 'title': 'The temperature of the sun',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'Hans Bethe talks about calculating the temperature of the sun',
- 'duration': 238,
- }
+ _TESTS = [{
+ 'url': 'http://www.webofstories.com/play/hans.bethe/71',
+ 'md5': '373e4dd915f60cfe3116322642ddf364',
+ 'info_dict': {
+ 'id': '4536',
+ 'ext': 'mp4',
+ 'title': 'The temperature of the sun',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Hans Bethe talks about calculating the temperature of the sun',
+ 'duration': 238,
+ }
+ }, {
+ 'url': 'http://www.webofstories.com/play/55908',
+ 'md5': '2985a698e1fe3211022422c4b5ed962c',
+ 'info_dict': {
+ 'id': '55908',
+ 'ext': 'mp4',
+ 'title': 'The story of Gemmata obscuriglobus',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
+ 'duration': 169,
+ },
+ 'skip': 'notfound',
+ }, {
+ # malformed og:title meta
+ 'url': 'http://www.webofstories.com/play/54215?o=MS',
+ 'info_dict': {
+ 'id': '54215',
+ 'ext': 'mp4',
+ 'title': '"A Leg to Stand On"',
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'description': 'Oliver Sacks talks about the death and resurrection of a limb',
+ 'duration': 97,
},
- {
- 'url': 'http://www.webofstories.com/play/55908',
- 'md5': '2985a698e1fe3211022422c4b5ed962c',
- 'info_dict': {
- 'id': '55908',
- 'ext': 'mp4',
- 'title': 'The story of Gemmata obscuriglobus',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus',
- 'duration': 169,
- }
+ 'params': {
+ 'skip_download': True,
},
- ]
+ }]
def _real_extract(self, url):
video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id)
- title = self._og_search_title(webpage)
+ # Sometimes og:title meta is malformed
+ title = self._og_search_title(webpage, default=None) or self._html_search_regex(
+ r'(?s)<strong>Title:\s*</strong>(.+?)<', webpage, 'title')
description = self._html_search_meta('description', webpage)
thumbnail = self._og_search_thumbnail(webpage)
webpage = self._download_webpage(url, playlist_id)
entries = [
- self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories')
- for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage))
+ self.url_result(
+ 'http://www.webofstories.com/play/%s' % video_id,
+ 'WebOfStories', video_id=video_id)
+ for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage))
]
title = self._search_regex(