1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
6 from .wistia
import WistiaIE
7 from ..compat
import compat_str
17 class UpskillBaseIE(InfoExtractor
):
18 _LOGIN_URL
= 'http://upskillcourses.com/sign_in'
19 _NETRC_MACHINE
= 'upskill'
21 def _real_initialize(self
):
25 username
, password
= self
._get
_login
_info
()
29 login_page
, urlh
= self
._download
_webpage
_handle
(
30 self
._LOGIN
_URL
, None, 'Downloading login page')
32 login_url
= compat_str(urlh
.geturl())
34 login_form
= self
._hidden
_inputs
(login_page
)
37 'user[email]': username
,
38 'user[password]': password
,
41 post_url
= self
._search
_regex
(
42 r
'<form[^>]+action=(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', login_page,
43 'post url
', default=login_url, group='url
')
45 if not post_url.startswith('http
'):
46 post_url = urljoin(login_url, post_url)
48 response = self._download_webpage(
49 post_url, None, 'Logging
in',
50 data=urlencode_postdata(login_form),
52 'Content
-Type
': 'application
/x
-www
-form
-urlencoded
',
57 if any(re.search(p, response) for p in (
58 r'class=["\']user-signout',
59 r'<a[^>]+\bhref=["\']/sign_out
',
63 message = get_element_by_class('alert
', response)
64 if message is not None:
66 'Unable to login
: %s' % clean_html(message), expected=True)
68 raise ExtractorError('Unable to log
in')
71 class UpskillIE(UpskillBaseIE):
72 _VALID_URL = r'https?
://(?
:www\
.)?upskillcourses\
.com
/courses
/[^
/]+/lectures
/(?P
<id>\d
+)'
75 'url
': 'http
://upskillcourses
.com
/courses
/essential
-web
-developer
-course
/lectures
/1747100',
79 'title
': 'Welcome to the Course
!',
80 'description
': 'md5
:8d66c13403783370af62ca97a7357bdd
',
82 'timestamp
': 1479846621,
83 'upload_date
': '20161122',
86 'skip_download
': True,
89 'url
': 'http
://upskillcourses
.com
/courses
/119763/lectures
/1747100',
90 'only_matching
': True,
93 def _real_extract(self, url):
94 video_id = self._match_id(url)
96 webpage = self._download_webpage(url, video_id)
98 wistia_url = WistiaIE._extract_url(webpage)
100 if any(re.search(p, webpage) for p in (
101 r'class=["\']lecture-contents-locked',
102 r'>\s*Lecture contents locked',
103 r'id=["\']lecture
-locked
')):
104 self.raise_login_required('Lecture contents locked
')
106 title = self._og_search_title(webpage, default=None)
109 '_type
': 'url_transparent
',
111 'ie_key
': WistiaIE.ie_key(),
116 class UpskillCourseIE(UpskillBaseIE):
117 _VALID_URL = r'https?
://(?
:www\
.)?upskillcourses\
.com
/courses
/(?
:enrolled
/)?
(?P
<id>[^
/?
#&]+)'
119 'url': 'http://upskillcourses.com/courses/essential-web-developer-course/',
122 'title': 'The Essential Web Developer Course (Free)',
124 'playlist_count': 192,
126 'url': 'http://upskillcourses.com/courses/119763/',
127 'only_matching': True,
129 'url': 'http://upskillcourses.com/courses/enrolled/119763',
130 'only_matching': True,
134 def suitable(cls
, url
):
135 return False if UpskillIE
.suitable(url
) else super(
136 UpskillCourseIE
, cls
).suitable(url
)
138 def _real_extract(self
, url
):
139 course_id
= self
._match
_id
(url
)
141 webpage
= self
._download
_webpage
(url
, course_id
)
143 course_id
= self
._search
_regex
(
144 r
'data-course-id=["\'](\d
+)', webpage, 'course
id',
149 for mobj in re.finditer(
150 r'(?s
)(?P
<li
><li
[^
>]+class=(["\'])(?:(?!\2).)*?section-item[^>]+>.+?</li>)',
152 li = mobj.group('li')
153 if 'fa-youtube-play' not in li:
155 lecture_url = self._search_regex(
156 r'<a[^>]+href=(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', li,
157 'lecture url
', default=None, group='url
')
160 lecture_id = self._search_regex(
161 r'/lectures
/(\d
+)', lecture_url, 'lecture
id', default=None)
162 title = self._html_search_regex(
163 r'<span
[^
>]+class=["\']lecture-name[^>]+>([^<]+)', li,
164 'title', default=None)
167 urljoin('http://upskillcourses.com/', lecture_url),
168 ie=UpskillIE.ie_key(), video_id=lecture_id,
169 video_title=clean_html(title)))
171 course_title = self._html_search_regex(
172 (r'(?s)<img[^>]+class=["\']course
-image
[^
>]+>\s
*<h\d
>(.+?
)</h
',
173 r'(?s
)<h\d
[^
>]+class=["\']course-title[^>]+>(.+?)</h'),
174 webpage, 'course title', fatal=False)
176 return self.playlist_result(entries, course_id, course_title)