X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/38bb9b1b0a044cabaf5691553815e334cd2e9213..dd50658396011c8e98b7201f035408b7959d56e1:/youtube_dl/extractor/webofstories.py diff --git a/youtube_dl/extractor/webofstories.py b/youtube_dl/extractor/webofstories.py index 2037d9b..f2b8d19 100644 --- a/youtube_dl/extractor/webofstories.py +++ b/youtube_dl/extractor/webofstories.py @@ -4,7 +4,10 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..utils import int_or_none +from ..utils import ( + int_or_none, + orderedSet, +) class WebOfStoriesIE(InfoExtractor): @@ -12,38 +15,52 @@ class WebOfStoriesIE(InfoExtractor): _VIDEO_DOMAIN = 'http://eu-mobile.webofstories.com/' _GREAT_LIFE_STREAMER = 'rtmp://eu-cdn1.webofstories.com/cfx/st/' _USER_STREAMER = 'rtmp://eu-users.webofstories.com/cfx/st/' - _TESTS = [ - { - 'url': 'http://www.webofstories.com/play/hans.bethe/71', - 'md5': '373e4dd915f60cfe3116322642ddf364', - 'info_dict': { - 'id': '4536', - 'ext': 'mp4', - 'title': 'The temperature of the sun', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Hans Bethe talks about calculating the temperature of the sun', - 'duration': 238, - } + _TESTS = [{ + 'url': 'http://www.webofstories.com/play/hans.bethe/71', + 'md5': '373e4dd915f60cfe3116322642ddf364', + 'info_dict': { + 'id': '4536', + 'ext': 'mp4', + 'title': 'The temperature of the sun', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'Hans Bethe talks about calculating the temperature of the sun', + 'duration': 238, + } + }, { + 'url': 'http://www.webofstories.com/play/55908', + 'md5': '2985a698e1fe3211022422c4b5ed962c', + 'info_dict': { + 'id': '55908', + 'ext': 'mp4', + 'title': 'The story of Gemmata obscuriglobus', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', + 'duration': 169, + }, + 'skip': 'notfound', + }, { + # malformed og:title meta + 'url': 'http://www.webofstories.com/play/54215?o=MS', + 'info_dict': { + 'id': '54215', + 'ext': 'mp4', + 'title': '"A Leg to Stand On"', + 'thumbnail': r're:^https?://.*\.jpg$', + 'description': 'Oliver Sacks talks about the death and resurrection of a limb', + 'duration': 97, }, - { - 'url': 'http://www.webofstories.com/play/55908', - 'md5': '2985a698e1fe3211022422c4b5ed962c', - 'info_dict': { - 'id': '55908', - 'ext': 'mp4', - 'title': 'The story of Gemmata obscuriglobus', - 'thumbnail': 're:^https?://.*\.jpg$', - 'description': 'Planctomycete talks about The story of Gemmata obscuriglobus', - 'duration': 169, - } + 'params': { + 'skip_download': True, }, - ] + }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - title = self._og_search_title(webpage) + # Sometimes og:title meta is malformed + title = self._og_search_title(webpage, default=None) or self._html_search_regex( + r'(?s)Title:\s*(.+?)<', webpage, 'title') description = self._html_search_meta('description', webpage) thumbnail = self._og_search_thumbnail(webpage) @@ -119,8 +136,10 @@ class WebOfStoriesPlaylistIE(InfoExtractor): webpage = self._download_webpage(url, playlist_id) entries = [ - self.url_result('http://www.webofstories.com/play/%s' % video_number, 'WebOfStories') - for video_number in set(re.findall('href="/playAll/%s\?sId=(\d+)"' % playlist_id, webpage)) + self.url_result( + 'http://www.webofstories.com/play/%s' % video_id, + 'WebOfStories', video_id=video_id) + for video_id in orderedSet(re.findall(r'\bid=["\']td_(\d+)', webpage)) ] title = self._search_regex(