]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/lynda.py
New upstream version 2018.01.27
[youtubedl] / youtube_dl / extractor / lynda.py
index 1b6f5091d36eecb0f241b95e38739d4b967d6c34..f5c7abc13eb431605355c080ade55da197fc04e3 100644 (file)
@@ -94,7 +94,15 @@ class LyndaBaseIE(InfoExtractor):
 class LyndaIE(LyndaBaseIE):
     IE_NAME = 'lynda'
     IE_DESC = 'lynda.com videos'
-    _VALID_URL = r'https?://(?:www\.)?(?:lynda\.com|educourse\.ga)/(?:[^/]+/[^/]+/(?P<course_id>\d+)|player/embed)/(?P<id>\d+)'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:www\.)?(?:lynda\.com|educourse\.ga)/
+                        (?:
+                            (?:[^/]+/){2,3}(?P<course_id>\d+)|
+                            player/embed
+                        )/
+                        (?P<id>\d+)
+                    '''
 
     _TIMECODE_REGEX = r'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]'
 
@@ -113,6 +121,9 @@ class LyndaIE(LyndaBaseIE):
     }, {
         'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html',
         'only_matching': True,
+    }, {
+        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html',
+        'only_matching': True,
     }]
 
     def _raise_unavailable(self, video_id):
@@ -244,8 +255,9 @@ class LyndaIE(LyndaBaseIE):
     def _get_subtitles(self, video_id):
         url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
         subs = self._download_json(url, None, False)
-        if subs:
-            return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]}
+        fixed_subs = self._fix_subtitles(subs)
+        if fixed_subs:
+            return {'en': [{'ext': 'srt', 'data': fixed_subs}]}
         else:
             return {}
 
@@ -256,7 +268,15 @@ class LyndaCourseIE(LyndaBaseIE):
 
     # Course link equals to welcome/introduction video link of same course
     # We will recognize it as course link
-    _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html'
+    _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P<coursepath>(?:[^/]+/){2,3}(?P<courseid>\d+))-2\.html'
+
+    _TESTS = [{
+        'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html',
+        'only_matching': True,
+    }]
 
     def _real_extract(self, url):
         mobj = re.match(self._VALID_URL, url)