X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/b4a0c9f9de9d715538a1718922d6ab01a40f7ce3..4d073ced87f1fff7fd6fdfd82462444769b9f90b:/youtube_dl/extractor/udemy.py?ds=sidebyside diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 195f5ce..a719699 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -5,6 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_HTTPError, + compat_kwargs, compat_str, compat_urllib_request, compat_urlparse, @@ -17,6 +18,7 @@ from ..utils import ( int_or_none, js_to_json, sanitized_Request, + try_get, unescapeHTML, urlencode_postdata, ) @@ -57,6 +59,10 @@ class UdemyIE(InfoExtractor): # no url in outputs format entry 'url': 'https://www.udemy.com/learn-web-development-complete-step-by-step-guide-to-success/learn/v4/t/lecture/4125812', 'only_matching': True, + }, { + # only outputs rendition + 'url': 'https://www.udemy.com/how-you-can-help-your-local-community-5-amazing-examples/learn/v4/t/lecture/3225750?start=0', + 'only_matching': True, }] def _extract_course_info(self, webpage, video_id): @@ -100,7 +106,7 @@ class UdemyIE(InfoExtractor): % (course_id, lecture_id), lecture_id, 'Downloading lecture JSON', query={ 'fields[lecture]': 'title,description,view_html,asset', - 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,data', + 'fields[asset]': 'asset_type,stream_url,thumbnail_url,download_urls,stream_urls,captions,data', }) def _handle_error(self, response): @@ -114,6 +120,11 @@ class UdemyIE(InfoExtractor): error_str += ' - %s' % error_data.get('formErrors') raise ExtractorError(error_str, expected=True) + def _download_webpage_handle(self, *args, **kwargs): + kwargs.setdefault('headers', {})['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4' + return super(UdemyIE, self)._download_webpage_handle( + *args, **compat_kwargs(kwargs)) + def _download_json(self, url_or_request, *args, **kwargs): headers = { 'X-Udemy-Snail-Case': 'true', @@ -140,7 +151,7 @@ class UdemyIE(InfoExtractor): self._login() def _login(self): - (username, password) = self._get_login_info() + username, password = self._get_login_info() if username is None: return @@ -293,9 +304,25 @@ class UdemyIE(InfoExtractor): 'url': src, }) - download_urls = asset.get('download_urls') - if isinstance(download_urls, dict): - extract_formats(download_urls.get('Video')) + for url_kind in ('download', 'stream'): + urls = asset.get('%s_urls' % url_kind) + if isinstance(urls, dict): + extract_formats(urls.get('Video')) + + captions = asset.get('captions') + if isinstance(captions, list): + for cc in captions: + if not isinstance(cc, dict): + continue + cc_url = cc.get('url') + if not cc_url or not isinstance(cc_url, compat_str): + continue + lang = try_get(cc, lambda x: x['locale']['locale'], compat_str) + sub_dict = (automatic_captions if cc.get('source') == 'auto' + else subtitles) + sub_dict.setdefault(lang or 'en', []).append({ + 'url': cc_url, + }) view_html = lecture.get('view_html') if view_html: @@ -351,6 +378,12 @@ class UdemyIE(InfoExtractor): fatal=False) extract_subtitles(text_tracks) + if not formats and outputs: + for format_id, output in outputs.items(): + f = extract_output_format(output, format_id) + if f.get('url'): + formats.append(f) + self._sort_formats(formats, field_preference=('height', 'width', 'tbr', 'format_id')) return {