X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/fe979149c83b5a935f7d28baf75848a9137316fd..9865587cf7b579793ba274067c3e44065d5ff77d:/youtube_dl/extractor/lynda.py diff --git a/youtube_dl/extractor/lynda.py b/youtube_dl/extractor/lynda.py index 2d50400..3084c6d 100644 --- a/youtube_dl/extractor/lynda.py +++ b/youtube_dl/extractor/lynda.py @@ -4,7 +4,6 @@ import re from .common import InfoExtractor from ..compat import ( - compat_HTTPError, compat_str, compat_urlparse, ) @@ -16,7 +15,7 @@ from ..utils import ( class LyndaBaseIE(InfoExtractor): - _SIGNIN_URL = 'https://www.lynda.com/signin' + _SIGNIN_URL = 'https://www.lynda.com/signin/lynda' _PASSWORD_URL = 'https://www.lynda.com/signin/password' _USER_URL = 'https://www.lynda.com/signin/user' _ACCOUNT_CREDENTIALS_HINT = 'Use --username and --password options to provide lynda.com account credentials.' @@ -44,21 +43,15 @@ class LyndaBaseIE(InfoExtractor): form_data = self._hidden_inputs(form_html) form_data.update(extra_form_data) - try: - response = self._download_json( - action_url, None, note, - data=urlencode_postdata(form_data), - headers={ - 'Referer': referrer_url, - 'X-Requested-With': 'XMLHttpRequest', - }) - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError) and e.cause.code == 500: - response = self._parse_json(e.cause.read().decode('utf-8'), None) - self._check_error(response, ('email', 'password')) - raise - - self._check_error(response, 'ErrorMessage') + response = self._download_json( + action_url, None, note, + data=urlencode_postdata(form_data), + headers={ + 'Referer': referrer_url, + 'X-Requested-With': 'XMLHttpRequest', + }, expected_status=(418, 500, )) + + self._check_error(response, ('email', 'password', 'ErrorMessage')) return response, action_url @@ -73,7 +66,7 @@ class LyndaBaseIE(InfoExtractor): # Already logged in if any(re.search(p, signin_page) for p in ( - 'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): + r'isLoggedIn\s*:\s*true', r'logout\.aspx', r'>Log out<')): return # Step 2: submit email @@ -94,13 +87,21 @@ class LyndaBaseIE(InfoExtractor): class LyndaIE(LyndaBaseIE): IE_NAME = 'lynda' IE_DESC = 'lynda.com videos' - _VALID_URL = r'https?://www\.lynda\.com/(?:[^/]+/[^/]+/\d+|player/embed)/(?P\d+)' + _VALID_URL = r'''(?x) + https?:// + (?:www\.)?(?:lynda\.com|educourse\.ga)/ + (?: + (?:[^/]+/){2,3}(?P\d+)| + player/embed + )/ + (?P\d+) + ''' _TIMECODE_REGEX = r'\[(?P\d+:\d+:\d+[\.,]\d+)\]' _TESTS = [{ - 'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', - 'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', + 'url': 'https://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', + # md5 is unstable 'info_dict': { 'id': '114408', 'ext': 'mp4', @@ -110,21 +111,79 @@ class LyndaIE(LyndaBaseIE): }, { 'url': 'https://www.lynda.com/player/embed/133770?tr=foo=1;bar=g;fizz=rt&fs=0', 'only_matching': True, + }, { + 'url': 'https://educourse.ga/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', + 'only_matching': True, + }, { + 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Willkommen-Grundlagen-guten-Gestaltung/393570/393572-4.html', + 'only_matching': True, }] + def _raise_unavailable(self, video_id): + self.raise_login_required( + 'Video %s is only available for members' % video_id) + def _real_extract(self, url): - video_id = self._match_id(url) + mobj = re.match(self._VALID_URL, url) + video_id = mobj.group('id') + course_id = mobj.group('course_id') + + query = { + 'videoId': video_id, + 'type': 'video', + } video = self._download_json( - 'http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id, - video_id, 'Downloading video JSON') + 'https://www.lynda.com/ajax/player', video_id, + 'Downloading video JSON', fatal=False, query=query) + + # Fallback scenario + if not video: + query['courseId'] = course_id + + play = self._download_json( + 'https://www.lynda.com/ajax/course/%s/%s/play' + % (course_id, video_id), video_id, 'Downloading play JSON') + + if not play: + self._raise_unavailable(video_id) + + formats = [] + for formats_dict in play: + urls = formats_dict.get('urls') + if not isinstance(urls, dict): + continue + cdn = formats_dict.get('name') + for format_id, format_url in urls.items(): + if not format_url: + continue + formats.append({ + 'url': format_url, + 'format_id': '%s-%s' % (cdn, format_id) if cdn else format_id, + 'height': int_or_none(format_id), + }) + self._sort_formats(formats) + + conviva = self._download_json( + 'https://www.lynda.com/ajax/player/conviva', video_id, + 'Downloading conviva JSON', query=query) + + return { + 'id': video_id, + 'title': conviva['VideoTitle'], + 'description': conviva.get('VideoDescription'), + 'release_year': int_or_none(conviva.get('ReleaseYear')), + 'duration': int_or_none(conviva.get('Duration')), + 'creator': conviva.get('Author'), + 'formats': formats, + } if 'Status' in video: raise ExtractorError( 'lynda returned error: %s' % video['Message'], expected=True) if video.get('HasAccess') is False: - self.raise_login_required('Video %s is only available for members' % video_id) + self._raise_unavailable(video_id) video_id = compat_str(video.get('ID') or video_id) duration = int_or_none(video.get('DurationInSeconds')) @@ -148,7 +207,7 @@ class LyndaIE(LyndaBaseIE): for prioritized_stream_id, prioritized_stream in prioritized_streams.items(): formats.extend([{ 'url': video_url, - 'width': int_or_none(format_id), + 'height': int_or_none(format_id), 'format_id': '%s-%s' % (prioritized_stream_id, format_id), } for format_id, video_url in prioritized_stream.items()]) @@ -187,10 +246,11 @@ class LyndaIE(LyndaBaseIE): return srt def _get_subtitles(self, video_id): - url = 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id + url = 'https://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id subs = self._download_json(url, None, False) - if subs: - return {'en': [{'ext': 'srt', 'data': self._fix_subtitles(subs)}]} + fixed_subs = self._fix_subtitles(subs) + if fixed_subs: + return {'en': [{'ext': 'srt', 'data': fixed_subs}]} else: return {} @@ -201,16 +261,39 @@ class LyndaCourseIE(LyndaBaseIE): # Course link equals to welcome/introduction video link of same course # We will recognize it as course link - _VALID_URL = r'https?://(?:www|m)\.lynda\.com/(?P[^/]+/[^/]+/(?P\d+))-\d\.html' + _VALID_URL = r'https?://(?:www|m)\.(?:lynda\.com|educourse\.ga)/(?P(?:[^/]+/){2,3}(?P\d+))-2\.html' + + _TESTS = [{ + 'url': 'https://www.lynda.com/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html', + 'only_matching': True, + }, { + 'url': 'https://www.lynda.com/de/Graphic-Design-tutorials/Grundlagen-guten-Gestaltung/393570-2.html', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) course_path = mobj.group('coursepath') course_id = mobj.group('courseid') + item_template = 'https://www.lynda.com/%s/%%s-4.html' % course_path + course = self._download_json( - 'http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, - course_id, 'Downloading course JSON') + 'https://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id, + course_id, 'Downloading course JSON', fatal=False) + + if not course: + webpage = self._download_webpage(url, course_id) + entries = [ + self.url_result( + item_template % video_id, ie=LyndaIE.ie_key(), + video_id=video_id) + for video_id in re.findall( + r'data-video-id=["\'](\d+)', webpage)] + return self.playlist_result( + entries, course_id, + self._og_search_title(webpage, fatal=False), + self._og_search_description(webpage)) if course.get('Status') == 'NotFound': raise ExtractorError( @@ -231,7 +314,7 @@ class LyndaCourseIE(LyndaBaseIE): if video_id: entries.append({ '_type': 'url_transparent', - 'url': 'http://www.lynda.com/%s/%s-4.html' % (course_path, video_id), + 'url': item_template % video_id, 'ie_key': LyndaIE.ie_key(), 'chapter': chapter.get('Title'), 'chapter_number': int_or_none(chapter.get('ChapterIndex')),