New upstream version 2019.09.28

[youtubedl] / youtube_dl / extractor / platzi.py
diff --git a/youtube_dl/extractor/platzi.py b/youtube_dl/extractor/platzi.py

index 557b2b5ade2fcd0269b8b2873fd644015f785c6c..602207bebdd6a01d7f33dbf08302ab5a75ccf207 100644 (file)
--- a/youtube_dl/extractor/platzi.py
+++ b/youtube_dl/extractor/platzi.py
@@ -18,43 +18,10 @@ from ..utils import (
  )
  
  
-class PlatziIE(InfoExtractor):
-    _VALID_URL = r'''(?x)
-                    https?://
-                        (?:
-                            platzi\.com/clases|           # es version
-                            courses\.platzi\.com/classes  # en version
-                        )/[^/]+/(?P<id>\d+)-[^/?\#&]+
-                    '''
+class PlatziBaseIE(InfoExtractor):
      _LOGIN_URL = 'https://platzi.com/login/'
      _NETRC_MACHINE = 'platzi'
  
-    _TESTS = [{
-        'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
-        'md5': '8f56448241005b561c10f11a595b37e3',
-        'info_dict': {
-            'id': '12074',
-            'ext': 'mp4',
-            'title': 'Creando nuestra primera página',
-            'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
-            'duration': 420,
-        },
-        'skip': 'Requires platzi account credentials',
-    }, {
-        'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
-        'info_dict': {
-            'id': '13430',
-            'ext': 'mp4',
-            'title': 'Background',
-            'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
-            'duration': 360,
-        },
-        'skip': 'Requires platzi account credentials',
-        'params': {
-            'skip_download': True,
-        },
-    }]
-
      def _real_initialize(self):
          self._login()
  
@@ -97,6 +64,42 @@ class PlatziIE(InfoExtractor):
                      'Unable to login: %s' % error, expected=True)
          raise ExtractorError('Unable to log in')
  
+
+class PlatziIE(PlatziBaseIE):
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:
+                            platzi\.com/clases|           # es version
+                            courses\.platzi\.com/classes  # en version
+                        )/[^/]+/(?P<id>\d+)-[^/?\#&]+
+                    '''
+
+    _TESTS = [{
+        'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
+        'md5': '8f56448241005b561c10f11a595b37e3',
+        'info_dict': {
+            'id': '12074',
+            'ext': 'mp4',
+            'title': 'Creando nuestra primera página',
+            'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
+            'duration': 420,
+        },
+        'skip': 'Requires platzi account credentials',
+    }, {
+        'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
+        'info_dict': {
+            'id': '13430',
+            'ext': 'mp4',
+            'title': 'Background',
+            'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
+            'duration': 360,
+        },
+        'skip': 'Requires platzi account credentials',
+        'params': {
+            'skip_download': True,
+        },
+    }]
+
      def _real_extract(self, url):
          lecture_id = self._match_id(url)
  
@@ -104,7 +107,11 @@ class PlatziIE(InfoExtractor):
  
          data = self._parse_json(
              self._search_regex(
-                r'client_data\s*=\s*({.+?})\s*;', webpage, 'client data'),
+                # client_data may contain "};" so that we have to try more
+                # strict regex first
+                (r'client_data\s*=\s*({.+?})\s*;\s*\n',
+                 r'client_data\s*=\s*({.+?})\s*;'),
+                webpage, 'client data'),
              lecture_id)
  
          material = data['initialState']['material']
@@ -146,7 +153,7 @@ class PlatziIE(InfoExtractor):
          }
  
  
-class PlatziCourseIE(InfoExtractor):
+class PlatziCourseIE(PlatziBaseIE):
      _VALID_URL = r'''(?x)
                      https?://
                          (?: