]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/platzi.py
debian/copyright: use spaces rather than tabs to start continuation lines.
[youtubedl] / youtube_dl / extractor / platzi.py
index 557b2b5ade2fcd0269b8b2873fd644015f785c6c..23c8256b59dab4a92ae79ef48dc8e3b0adf0ff68 100644 (file)
@@ -18,43 +18,10 @@ from ..utils import (
 )
 
 
 )
 
 
-class PlatziIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            platzi\.com/clases|           # es version
-                            courses\.platzi\.com/classes  # en version
-                        )/[^/]+/(?P<id>\d+)-[^/?\#&]+
-                    '''
+class PlatziBaseIE(InfoExtractor):
     _LOGIN_URL = 'https://platzi.com/login/'
     _NETRC_MACHINE = 'platzi'
 
     _LOGIN_URL = 'https://platzi.com/login/'
     _NETRC_MACHINE = 'platzi'
 
-    _TESTS = [{
-        'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
-        'md5': '8f56448241005b561c10f11a595b37e3',
-        'info_dict': {
-            'id': '12074',
-            'ext': 'mp4',
-            'title': 'Creando nuestra primera página',
-            'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
-            'duration': 420,
-        },
-        'skip': 'Requires platzi account credentials',
-    }, {
-        'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
-        'info_dict': {
-            'id': '13430',
-            'ext': 'mp4',
-            'title': 'Background',
-            'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
-            'duration': 360,
-        },
-        'skip': 'Requires platzi account credentials',
-        'params': {
-            'skip_download': True,
-        },
-    }]
-
     def _real_initialize(self):
         self._login()
 
     def _real_initialize(self):
         self._login()
 
@@ -79,7 +46,7 @@ class PlatziIE(InfoExtractor):
             headers={'Referer': self._LOGIN_URL})
 
         # login succeeded
             headers={'Referer': self._LOGIN_URL})
 
         # login succeeded
-        if 'platzi.com/login' not in compat_str(urlh.geturl()):
+        if 'platzi.com/login' not in urlh.geturl():
             return
 
         login_error = self._webpage_read_content(
             return
 
         login_error = self._webpage_read_content(
@@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor):
                     'Unable to login: %s' % error, expected=True)
         raise ExtractorError('Unable to log in')
 
                     'Unable to login: %s' % error, expected=True)
         raise ExtractorError('Unable to log in')
 
+
+class PlatziIE(PlatziBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            platzi\.com/clases|           # es version
+                            courses\.platzi\.com/classes  # en version
+                        )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+                    '''
+
+    _TESTS = [{
+        'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+        'md5': '8f56448241005b561c10f11a595b37e3',
+        'info_dict': {
+            'id': '12074',
+            'ext': 'mp4',
+            'title': 'Creando nuestra primera página',
+            'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+            'duration': 420,
+        },
+        'skip': 'Requires platzi account credentials',
+    }, {
+        'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+        'info_dict': {
+            'id': '13430',
+            'ext': 'mp4',
+            'title': 'Background',
+            'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+            'duration': 360,
+        },
+        'skip': 'Requires platzi account credentials',
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
     def _real_extract(self, url):
         lecture_id = self._match_id(url)
 
     def _real_extract(self, url):
         lecture_id = self._match_id(url)
 
@@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor):
 
         data = self._parse_json(
             self._search_regex(
 
         data = self._parse_json(
             self._search_regex(
-                r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
+                # client_data may contain "};" so that we have to try more
+                # strict regex first
+                (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+                 r'client_data\s*=\s*({.+?})\s*;'),
+                webpage, 'client data'),
             lecture_id)
 
         material = data['initialState']['material']
             lecture_id)
 
         material = data['initialState']['material']
@@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor):
         }
 
 
         }
 
 
-class PlatziCourseIE(InfoExtractor):
+class PlatziCourseIE(PlatziBaseIE):
     _VALID_URL = r'''(?x)
                     https?://
                         (?:
     _VALID_URL = r'''(?x)
                     https?://
                         (?: