X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/47d80ec0b18245caeb97018d4c1af18d0b5b972b..bc5a42ee389bd51b63eb8a6b914aa8139762bc10:/youtube_dl/extractor/discoverygo.py diff --git a/youtube_dl/extractor/discoverygo.py b/youtube_dl/extractor/discoverygo.py index c4e83b2..7cd5d42 100644 --- a/youtube_dl/extractor/discoverygo.py +++ b/youtube_dl/extractor/discoverygo.py @@ -1,18 +1,21 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..compat import compat_str from ..utils import ( extract_attributes, + ExtractorError, int_or_none, parse_age_limit, + remove_end, unescapeHTML, - ExtractorError, ) -class DiscoveryGoIE(InfoExtractor): - _VALID_URL = r'''(?x)https?://(?:www\.)?(?: +class DiscoveryGoBaseIE(InfoExtractor): + _VALID_URL_TEMPLATE = r'''(?x)https?://(?:www\.)?(?: discovery| investigationdiscovery| discoverylife| @@ -22,18 +25,23 @@ class DiscoveryGoIE(InfoExtractor): sciencechannel| tlc| velocitychannel - )go\.com/(?:[^/]+/)*(?P[^/?#&]+)''' + )go\.com/%s(?P[^/?#&]+)''' + + +class DiscoveryGoIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % r'(?:[^/]+/)+' + _GEO_COUNTRIES = ['US'] _TEST = { - 'url': 'https://www.discoverygo.com/love-at-first-kiss/kiss-first-ask-questions-later/', + 'url': 'https://www.discoverygo.com/bering-sea-gold/reaper-madness/', 'info_dict': { - 'id': '57a33c536b66d1cd0345eeb1', + 'id': '58c167d86b66d12f2addeb01', 'ext': 'mp4', - 'title': 'Kiss First, Ask Questions Later!', - 'description': 'md5:fe923ba34050eae468bffae10831cb22', - 'duration': 2579, - 'series': 'Love at First Kiss', - 'season_number': 1, - 'episode_number': 1, + 'title': 'Reaper Madness', + 'description': 'md5:09f2c625c99afb8946ed4fb7865f6e78', + 'duration': 2519, + 'series': 'Bering Sea Gold', + 'season_number': 8, + 'episode_number': 6, 'age_limit': 14, }, } @@ -49,7 +57,7 @@ class DiscoveryGoIE(InfoExtractor): webpage, 'video container')) video = self._parse_json( - unescapeHTML(container.get('data-video') or container.get('data-json')), + container.get('data-video') or container.get('data-json'), display_id) title = video['name'] @@ -114,3 +122,46 @@ class DiscoveryGoIE(InfoExtractor): 'formats': formats, 'subtitles': subtitles, } + + +class DiscoveryGoPlaylistIE(DiscoveryGoBaseIE): + _VALID_URL = DiscoveryGoBaseIE._VALID_URL_TEMPLATE % '' + _TEST = { + 'url': 'https://www.discoverygo.com/bering-sea-gold/', + 'info_dict': { + 'id': 'bering-sea-gold', + 'title': 'Bering Sea Gold', + 'description': 'md5:cc5c6489835949043c0cc3ad66c2fa0e', + }, + 'playlist_mincount': 6, + } + + @classmethod + def suitable(cls, url): + return False if DiscoveryGoIE.suitable(url) else super( + DiscoveryGoPlaylistIE, cls).suitable(url) + + def _real_extract(self, url): + display_id = self._match_id(url) + + webpage = self._download_webpage(url, display_id) + + entries = [] + for mobj in re.finditer(r'data-json=(["\'])(?P{.+?})\1', webpage): + data = self._parse_json( + mobj.group('json'), display_id, + transform_source=unescapeHTML, fatal=False) + if not isinstance(data, dict) or data.get('type') != 'episode': + continue + episode_url = data.get('socialUrl') + if not episode_url: + continue + entries.append(self.url_result( + episode_url, ie=DiscoveryGoIE.ie_key(), + video_id=data.get('id'))) + + return self.playlist_result( + entries, display_id, + remove_end(self._og_search_title( + webpage, fatal=False), ' | Discovery GO'), + self._og_search_description(webpage))