X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/fe979149c83b5a935f7d28baf75848a9137316fd..97a8fc3ae80fb363c69c2e6b8c29b5373ac72aea:/youtube_dl/extractor/safari.py?ds=sidebyside diff --git a/youtube_dl/extractor/safari.py b/youtube_dl/extractor/safari.py index 6ba91f2..c3aec1e 100644 --- a/youtube_dl/extractor/safari.py +++ b/youtube_dl/extractor/safari.py @@ -1,4 +1,4 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import re @@ -75,7 +75,7 @@ class SafariBaseIE(InfoExtractor): class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P[^/]+)/(?Ppart\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P[^/]+)/(?P[^/?#&]+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', @@ -92,6 +92,9 @@ class SafariIE(SafariBaseIE): # non-digits in course id 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 'only_matching': True, + }, { + 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', + 'only_matching': True, }] def _real_extract(self, url): @@ -100,13 +103,13 @@ class SafariIE(SafariBaseIE): webpage = self._download_webpage(url, video_id) reference_id = self._search_regex( - r'data-reference-id=(["\'])(?P.+?)\1', + r'data-reference-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( - r'data-partner-id=(["\'])(?P.+?)\1', + r'data-partner-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura widget id', group='id') ui_id = self._search_regex( - r'data-ui-id=(["\'])(?P.+?)\1', + r'data-ui-id=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', group='id') query = { @@ -132,12 +135,15 @@ class SafariIE(SafariBaseIE): class SafariApiIE(SafariBaseIE): IE_NAME = 'safari:api' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?Ppart\d+)\.html' + _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P[^/]+)/chapter(?:-content)?/(?P[^/?#&]+)\.html' - _TEST = { + _TESTS = [{ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'only_matching': True, - } + }, { + 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) @@ -151,7 +157,14 @@ class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' - _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P[^/]+)/?(?:[#?]|$)' + _VALID_URL = r'''(?x) + https?:// + (?: + (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)| + techbus\.safaribooksonline\.com + ) + /(?P[^/]+)/?(?:[#?]|$) + ''' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', @@ -164,6 +177,9 @@ class SafariCourseIE(SafariBaseIE): }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 'only_matching': True, + }, { + 'url': 'http://techbus.safaribooksonline.com/9780134426365', + 'only_matching': True, }] def _real_extract(self, url):