debian/patches/remove-autoupdate-mechanism.patch: Remove fewer things to avoid future...

[youtubedl] / youtube_dl / extractor / udemy.py
diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py

index 207c4a6a7ee8131c3e2e5d5823aefb336ad47c47..6d6c0a98fa64e9e2afc68ce2ad569f5a91d5c24b 100644 (file)
--- a/youtube_dl/extractor/udemy.py
+++ b/youtube_dl/extractor/udemy.py
@@ -5,6 +5,7 @@ import re
  from .common import InfoExtractor
  from ..compat import (
      compat_HTTPError,
  from .common import InfoExtractor
  from ..compat import (
      compat_HTTPError,
+    compat_kwargs,
      compat_str,
      compat_urllib_request,
      compat_urlparse,
      compat_str,
      compat_urllib_request,
      compat_urlparse,
@@ -62,11 +63,11 @@ class UdemyIE(InfoExtractor):
      def _extract_course_info(self, webpage, video_id):
          course = self._parse_json(
              unescapeHTML(self._search_regex(
      def _extract_course_info(self, webpage, video_id):
          course = self._parse_json(
              unescapeHTML(self._search_regex(
-                r'ng-init=["\'].*\bcourse=({.+?});', webpage, 'course', default='{}')),
+                r'ng-init=["\'].*\bcourse=({.+?})[;"\']',
+                webpage, 'course', default='{}')),
              video_id, fatal=False) or {}
          course_id = course.get('id') or self._search_regex(
              video_id, fatal=False) or {}
          course_id = course.get('id') or self._search_regex(
-            (r'&quot;id&quot;\s*:\s*(\d+)', r'data-course-id=["\'](\d+)'),
-            webpage, 'course id')
+            r'data-course-id=["\'](\d+)', webpage, 'course id')
          return course_id, course.get('title')
  
      def _enroll_course(self, base_url, webpage, course_id):
          return course_id, course.get('title')
  
      def _enroll_course(self, base_url, webpage, course_id):
@@ -114,6 +115,11 @@ class UdemyIE(InfoExtractor):
                  error_str += ' - %s' % error_data.get('formErrors')
              raise ExtractorError(error_str, expected=True)
  
                  error_str += ' - %s' % error_data.get('formErrors')
              raise ExtractorError(error_str, expected=True)
  
+    def _download_webpage(self, *args, **kwargs):
+        kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4'
+        return super(UdemyIE, self)._download_webpage(
+            *args, **compat_kwargs(kwargs))
+
      def _download_json(self, url_or_request, *args, **kwargs):
          headers = {
              'X-Udemy-Snail-Case': 'true',
      def _download_json(self, url_or_request, *args, **kwargs):
          headers = {
              'X-Udemy-Snail-Case': 'true',
@@ -164,7 +170,7 @@ class UdemyIE(InfoExtractor):
          })
  
          response = self._download_webpage(
          })
  
          response = self._download_webpage(
-            self._LOGIN_URL, None, 'Logging in as %s' % username,
+            self._LOGIN_URL, None, 'Logging in',
              data=urlencode_postdata(login_form),
              headers={
                  'Referer': self._ORIGIN_URL,
              data=urlencode_postdata(login_form),
              headers={
                  'Referer': self._ORIGIN_URL,
@@ -257,6 +263,11 @@ class UdemyIE(InfoExtractor):
                  video_url = source.get('file') or source.get('src')
                  if not video_url or not isinstance(video_url, compat_str):
                      continue
                  video_url = source.get('file') or source.get('src')
                  if not video_url or not isinstance(video_url, compat_str):
                      continue
+                if source.get('type') == 'application/x-mpegURL' or determine_ext(video_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id='hls', fatal=False))
+                    continue
                  format_id = source.get('label')
                  f = {
                      'url': video_url,
                  format_id = source.get('label')
                  f = {
                      'url': video_url,