]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/safari.py
Merge tag 'upstream/2017.02.07'
[youtubedl] / youtube_dl / extractor / safari.py
index 6ba91f202baadbfd72160cc739efde868a60d421..c3aec1edde5e9d02efb377fa39941ae01d2f04b4 100644 (file)
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
 from __future__ import unicode_literals
 
 import re
 from __future__ import unicode_literals
 
 import re
@@ -75,7 +75,7 @@ class SafariBaseIE(InfoExtractor):
 class SafariIE(SafariBaseIE):
     IE_NAME = 'safari'
     IE_DESC = 'safaribooksonline.com online video'
 class SafariIE(SafariBaseIE):
     IE_NAME = 'safari'
     IE_DESC = 'safaribooksonline.com online video'
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html'
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
@@ -92,6 +92,9 @@ class SafariIE(SafariBaseIE):
         # non-digits in course id
         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
         'only_matching': True,
         # non-digits in course id
         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -100,13 +103,13 @@ class SafariIE(SafariBaseIE):
 
         webpage = self._download_webpage(url, video_id)
         reference_id = self._search_regex(
 
         webpage = self._download_webpage(url, video_id)
         reference_id = self._search_regex(
-            r'data-reference-id=(["\'])(?P<id>.+?)\1',
+            r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura reference id', group='id')
         partner_id = self._search_regex(
             webpage, 'kaltura reference id', group='id')
         partner_id = self._search_regex(
-            r'data-partner-id=(["\'])(?P<id>.+?)\1',
+            r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura widget id', group='id')
         ui_id = self._search_regex(
             webpage, 'kaltura widget id', group='id')
         ui_id = self._search_regex(
-            r'data-ui-id=(["\'])(?P<id>.+?)\1',
+            r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1',
             webpage, 'kaltura uiconf id', group='id')
 
         query = {
             webpage, 'kaltura uiconf id', group='id')
 
         query = {
@@ -132,12 +135,15 @@ class SafariIE(SafariBaseIE):
 
 class SafariApiIE(SafariBaseIE):
     IE_NAME = 'safari:api'
 
 class SafariApiIE(SafariBaseIE):
     IE_NAME = 'safari:api'
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>part\d+)\.html'
+    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html'
 
 
-    _TEST = {
+    _TESTS = [{
         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
         'only_matching': True,
         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
         'only_matching': True,
-    }
+    }, {
+        'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)
@@ -151,7 +157,14 @@ class SafariCourseIE(SafariBaseIE):
     IE_NAME = 'safari:course'
     IE_DESC = 'safaribooksonline.com online courses'
 
     IE_NAME = 'safari:course'
     IE_DESC = 'safaribooksonline.com online courses'
 
-    _VALID_URL = r'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)|
+                            techbus\.safaribooksonline\.com
+                        )
+                        /(?P<id>[^/]+)/?(?:[#?]|$)
+                    '''
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
 
     _TESTS = [{
         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
@@ -164,6 +177,9 @@ class SafariCourseIE(SafariBaseIE):
     }, {
         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
         'only_matching': True,
     }, {
         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
         'only_matching': True,
+    }, {
+        'url': 'http://techbus.safaribooksonline.com/9780134426365',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):