X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/d9d7cd0e85dc712461d9185db9df9d6c900a573b..00368b4c3a5d4e909e1b7ecfc4030bf28da020f3:/youtube_dl/extractor/egghead.py diff --git a/youtube_dl/extractor/egghead.py b/youtube_dl/extractor/egghead.py index db92146..e4a3046 100644 --- a/youtube_dl/extractor/egghead.py +++ b/youtube_dl/extractor/egghead.py @@ -1,15 +1,18 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor +from ..utils import ( + int_or_none, + try_get, + unified_timestamp, +) class EggheadCourseIE(InfoExtractor): IE_DESC = 'egghead.io course' IE_NAME = 'egghead:course' - _VALID_URL = r'https://egghead\.io/courses/(?P[a-zA-Z_0-9-]+)' + _VALID_URL = r'https://egghead\.io/courses/(?P[^/?#&]+)' _TEST = { 'url': 'https://egghead.io/courses/professor-frisby-introduces-composable-functional-javascript', 'playlist_count': 29, @@ -22,18 +25,60 @@ class EggheadCourseIE(InfoExtractor): def _real_extract(self, url): playlist_id = self._match_id(url) - webpage = self._download_webpage(url, playlist_id) - title = self._html_search_regex(r'

([^<]+)

', webpage, 'title') - ul = self._search_regex(r'(?s)', webpage, 'session list') + course = self._download_json( + 'https://egghead.io/api/v1/series/%s' % playlist_id, playlist_id) + + entries = [ + self.url_result( + 'wistia:%s' % lesson['wistia_id'], ie='Wistia', + video_id=lesson['wistia_id'], video_title=lesson.get('title')) + for lesson in course['lessons'] if lesson.get('wistia_id')] + + return self.playlist_result( + entries, playlist_id, course.get('title'), + course.get('description')) + + +class EggheadLessonIE(InfoExtractor): + IE_DESC = 'egghead.io lesson' + IE_NAME = 'egghead:lesson' + _VALID_URL = r'https://egghead\.io/lessons/(?P[^/?#&]+)' + _TEST = { + 'url': 'https://egghead.io/lessons/javascript-linear-data-flow-with-container-style-types-box', + 'info_dict': { + 'id': 'fv5yotjxcg', + 'ext': 'mp4', + 'title': 'Create linear data flow with container style types (Box)', + 'description': 'md5:9aa2cdb6f9878ed4c39ec09e85a8150e', + 'thumbnail': r're:^https?:.*\.jpg$', + 'timestamp': 1481296768, + 'upload_date': '20161209', + 'duration': 304, + 'view_count': 0, + 'tags': ['javascript', 'free'], + }, + 'params': { + 'skip_download': True, + }, + } + + def _real_extract(self, url): + lesson_id = self._match_id(url) - found = re.findall(r'(?s)\s*