1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
  18 class UdemyIE(InfoExtractor
): 
  20     _VALID_URL 
= r
'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' 
  21     _LOGIN_URL 
= 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1' 
  22     _ORIGIN_URL 
= 'https://www.udemy.com' 
  23     _NETRC_MACHINE 
= 'udemy' 
  26         'url': 'https://www.udemy.com/java-tutorial/#/lecture/172757', 
  27         'md5': '98eda5b657e752cf945d8445e261b5c5', 
  31             'title': 'Introduction and Installation', 
  32             'description': 'md5:c0d51f6f21ef4ec65f091055a5eef876', 
  35         'skip': 'Requires udemy account credentials', 
  38     def _enroll_course(self
, webpage
, course_id
): 
  39         checkout_url 
= unescapeHTML(self
._search
_regex
( 
  40             r
'href=(["\'])(?P
<url
>https?
://(?
:www\
.)?udemy\
.com
/payment
/checkout
/.+?
)\
1', 
  41             webpage, 'checkout url
', group='url
', default=None)) 
  44                 'Course 
%s is not free
. You have to pay 
for it before you can download
. ' 
  45                 'Use this URL to confirm purchase
: %s' % (course_id, checkout_url), expected=True) 
  47         enroll_url = unescapeHTML(self._search_regex( 
  48             r'href
=(["\'])(?P<url>https?://(?:www\.)?udemy\.com/course/subscribe/.+?)\1', 
  49             webpage, 'enroll url', group='url', default=None)) 
  51             webpage = self._download_webpage(enroll_url, course_id, 'Enrolling in the course') 
  52             if '>You have enrolled in' in webpage: 
  53                 self.to_screen('%s: Successfully enrolled in the course' % course_id) 
  55     def _download_lecture(self, course_id, lecture_id): 
  56         return self._download_json( 
  57             'https://www.udemy.com/api-2.0/users/me/subscribed-courses/%s/lectures/%s?%s' % ( 
  58                 course_id, lecture_id, compat_urllib_parse.urlencode({ 
  61                     'fields[lecture]': 'title,description,asset', 
  62                     'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', 
  63                     'instructorPreviewMode': 'False', 
  65             lecture_id, 'Downloading lecture JSON') 
  67     def _handle_error(self, response): 
  68         if not isinstance(response, dict): 
  70         error = response.get('error') 
  72             error_str = 'Udemy returned error #%s: %s' % (error.get('code'), error.get('message')) 
  73             error_data = error.get('data') 
  75                 error_str += ' - %s' % error_data.get('formErrors') 
  76             raise ExtractorError(error_str, expected=True) 
  78     def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata'): 
  80             'X-Udemy-Snail-Case': 'true', 
  81             'X-Requested-With': 'XMLHttpRequest', 
  83         for cookie in self._downloader.cookiejar: 
  84             if cookie.name == 'client_id': 
  85                 headers['X-Udemy-Client-Id'] = cookie.value 
  86             elif cookie.name == 'access_token': 
  87                 headers['X-Udemy-Bearer-Token'] = cookie.value 
  88                 headers['X-Udemy-Authorization'] = 'Bearer %s' % cookie.value 
  90         if isinstance(url_or_request, compat_urllib_request.Request): 
  91             for header, value in headers.items(): 
  92                 url_or_request.add_header(header, value) 
  94             url_or_request = sanitized_Request(url_or_request, headers=headers) 
  96         response = super(UdemyIE, self)._download_json(url_or_request, video_id, note) 
  97         self._handle_error(response) 
 100     def _real_initialize(self): 
 104         (username, password) = self._get_login_info() 
 108         login_popup = self._download_webpage( 
 109             self._LOGIN_URL, None, 'Downloading login popup') 
 111         def is_logged(webpage): 
 112             return any(p in webpage for p in ['href="https
://www
.udemy
.com
/user
/logout
/', '>Logout
<']) 
 115         if is_logged(login_popup): 
 118         login_form = self._form_hidden_inputs('login
-form
', login_popup) 
 121             'email
': username.encode('utf
-8'), 
 122             'password
': password.encode('utf
-8'), 
 125         request = sanitized_Request( 
 126             self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf
-8')) 
 127         request.add_header('Referer
', self._ORIGIN_URL) 
 128         request.add_header('Origin
', self._ORIGIN_URL) 
 130         response = self._download_webpage( 
 131             request, None, 'Logging 
in as %s' % username) 
 133         if not is_logged(response): 
 134             error = self._html_search_regex( 
 135                 r'(?s
)<div
[^
>]+class="form-errors[^"]*">(.+?)</div>', 
 136                 response, 'error message', default=None) 
 138                 raise ExtractorError('Unable to login: %s' % error, expected=True) 
 139             raise ExtractorError('Unable to log in') 
 141     def _real_extract(self, url): 
 142         lecture_id = self._match_id(url) 
 144         webpage = self._download_webpage(url, lecture_id) 
 146         course_id = self._search_regex( 
 147             r'data-course-id=["\'](\d
+)', webpage, 'course 
id') 
 150             lecture = self._download_lecture(course_id, lecture_id) 
 151         except ExtractorError as e: 
 152             # Error could possibly mean we are not enrolled in the course 
 153             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403: 
 154                 self._enroll_course(webpage, course_id) 
 155                 lecture = self._download_lecture(course_id, lecture_id) 
 159         title = lecture['title
'] 
 160         description = lecture.get('description
') 
 162         asset = lecture['asset
'] 
 164         asset_type = asset.get('assetType
') or asset.get('asset_type
') 
 165         if asset_type != 'Video
': 
 166             raise ExtractorError( 
 167                 'Lecture 
%s is not a video
' % lecture_id, expected=True) 
 169         stream_url = asset.get('streamUrl
') or asset.get('stream_url
') 
 171             youtube_url = self._search_regex( 
 172                 r'(https?
://www\
.youtube\
.com
/watch
\?v
=.*)', stream_url, 'youtube URL
', default=None) 
 174                 return self.url_result(youtube_url, 'Youtube
') 
 176         video_id = asset['id'] 
 177         thumbnail = asset.get('thumbnailUrl
') or asset.get('thumbnail_url
') 
 178         duration = float_or_none(asset.get('data
', {}).get('duration
')) 
 179         outputs = asset.get('data
', {}).get('outputs
', {}) 
 182         for format_ in asset.get('download_urls
', {}).get('Video
', []): 
 183             video_url = format_.get('file') 
 186             format_id = format_.get('label
') 
 188                 'url
': format_['file'], 
 189                 'height
': int_or_none(format_id), 
 192                 # Some videos contain additional metadata (e.g. 
 193                 # https://www.udemy.com/ios9-swift/learn/#/lecture/3383208) 
 194                 output = outputs.get(format_id) 
 195                 if isinstance(output, dict): 
 197                         'format_id
': '%sp
' % (output.get('label
') or format_id), 
 198                         'width
': int_or_none(output.get('width
')), 
 199                         'height
': int_or_none(output.get('height
')), 
 200                         'vbr
': int_or_none(output.get('video_bitrate_in_kbps
')), 
 201                         'vcodec
': output.get('video_codec
'), 
 202                         'fps
': int_or_none(output.get('frame_rate
')), 
 203                         'abr
': int_or_none(output.get('audio_bitrate_in_kbps
')), 
 204                         'acodec
': output.get('audio_codec
'), 
 205                         'asr
': int_or_none(output.get('audio_sample_rate
')), 
 206                         'tbr
': int_or_none(output.get('total_bitrate_in_kbps
')), 
 207                         'filesize
': int_or_none(output.get('file_size_in_bytes
')), 
 210                     f['format_id
'] = '%sp
' % format_id 
 213         self._sort_formats(formats) 
 218             'description
': description, 
 219             'thumbnail
': thumbnail, 
 220             'duration
': duration, 
 225 class UdemyCourseIE(UdemyIE): 
 226     IE_NAME = 'udemy
:course
' 
 227     _VALID_URL = r'https?
://www\
.udemy\
.com
/(?P
<id>[\da
-z
-]+)' 
 231     def suitable(cls, url): 
 232         return False if UdemyIE.suitable(url) else super(UdemyCourseIE, cls).suitable(url) 
 234     def _real_extract(self, url): 
 235         course_path = self._match_id(url) 
 237         webpage = self._download_webpage(url, course_path) 
 239         response = self._download_json( 
 240             'https
://www
.udemy
.com
/api
-1.1/courses
/%s' % course_path, 
 241             course_path, 'Downloading course JSON
') 
 243         course_id = response['id'] 
 244         course_title = response.get('title
') 
 246         self._enroll_course(webpage, course_id) 
 248         response = self._download_json( 
 249             'https
://www
.udemy
.com
/api
-1.1/courses
/%s/curriculum
' % course_id, 
 250             course_id, 'Downloading course curriculum
') 
 253         chapter, chapter_number = None, None 
 254         for asset in response: 
 255             asset_type = asset.get('assetType
') or asset.get('asset_type
') 
 256             if asset_type == 'Video
': 
 257                 asset_id = asset.get('id') 
 260                         '_type
': 'url_transparent
', 
 261                         'url
': 'https
://www
.udemy
.com
/%s/#/lecture/%s' % (course_path, asset['id']), 
 262                         'ie_key': UdemyIE
.ie_key(), 
 265                         entry
['chapter_number'] = chapter_number
 
 267                         entry
['chapter'] = chapter
 
 268                     entries
.append(entry
) 
 269             elif asset
.get('type') == 'chapter': 
 270                 chapter_number 
= asset
.get('index') or asset
.get('object_index') 
 271                 chapter 
= asset
.get('title') 
 273         return self
.playlist_result(entries
, course_id
, course_title
)