]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/platzi.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
21 class PlatziBaseIE(InfoExtractor
):
22 _LOGIN_URL
= 'https://platzi.com/login/'
23 _NETRC_MACHINE
= 'platzi'
25 def _real_initialize(self
):
29 username
, password
= self
._get
_login
_info
()
33 login_page
= self
._download
_webpage
(
34 self
._LOGIN
_URL
, None, 'Downloading login page')
36 login_form
= self
._hidden
_inputs
(login_page
)
43 urlh
= self
._request
_webpage
(
44 self
._LOGIN
_URL
, None, 'Logging in',
45 data
=urlencode_postdata(login_form
),
46 headers
={'Referer': self
._LOGIN
_URL
})
49 if 'platzi.com/login' not in compat_str(urlh
.geturl()):
52 login_error
= self
._webpage
_read
_content
(
53 urlh
, self
._LOGIN
_URL
, None, 'Downloading login error page')
55 login
= self
._parse
_json
(
57 r
'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error
, 'login'),
60 for kind
in ('error', 'password', 'nonFields'):
61 error
= str_or_none(login
.get('%sError' % kind
))
64 'Unable to login: %s' % error
, expected
=True)
65 raise ExtractorError('Unable to log in')
68 class PlatziIE(PlatziBaseIE
):
72 platzi\.com/clases| # es version
73 courses\.platzi\.com/classes # en version
74 )/[^/]+/(?P<id>\d+)-[^/?\#&]+
78 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
79 'md5': '8f56448241005b561c10f11a595b37e3',
83 'title': 'Creando nuestra primera página',
84 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
87 'skip': 'Requires platzi account credentials',
89 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
93 'title': 'Background',
94 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
97 'skip': 'Requires platzi account credentials',
99 'skip_download': True,
103 def _real_extract(self
, url
):
104 lecture_id
= self
._match
_id
(url
)
106 webpage
= self
._download
_webpage
(url
, lecture_id
)
108 data
= self
._parse
_json
(
110 # client_data may contain "};" so that we have to try more
112 (r
'client_data\s*=\s*({.+?})\s*;\s*\n',
113 r
'client_data\s*=\s*({.+?})\s*;'),
114 webpage
, 'client data'),
117 material
= data
['initialState']['material']
118 desc
= material
['description']
119 title
= desc
['title']
122 for server_id
, server
in material
['videos'].items():
123 if not isinstance(server
, dict):
125 for format_id
in ('hls', 'dash'):
126 format_url
= url_or_none(server
.get(format_id
))
129 if format_id
== 'hls':
130 formats
.extend(self
._extract
_m
3u8_formats
(
131 format_url
, lecture_id
, 'mp4',
132 entry_protocol
='m3u8_native', m3u8_id
=format_id
,
133 note
='Downloading %s m3u8 information' % server_id
,
135 elif format_id
== 'dash':
136 formats
.extend(self
._extract
_mpd
_formats
(
137 format_url
, lecture_id
, mpd_id
=format_id
,
138 note
='Downloading %s MPD manifest' % server_id
,
140 self
._sort
_formats
(formats
)
142 content
= str_or_none(desc
.get('content'))
143 description
= (clean_html(compat_b64decode(content
).decode('utf-8'))
144 if content
else None)
145 duration
= int_or_none(material
.get('duration'), invscale
=60)
150 'description': description
,
151 'duration': duration
,
156 class PlatziCourseIE(PlatziBaseIE
):
157 _VALID_URL
= r
'''(?x)
160 platzi\.com/clases| # es version
161 courses\.platzi\.com/classes # en version
165 'url': 'https://platzi.com/clases/next-js/',
168 'title': 'Curso de Next.js',
170 'playlist_count': 22,
172 'url': 'https://courses.platzi.com/classes/communication-codestream/',
175 'title': 'Codestream Course',
177 'playlist_count': 14,
181 def suitable(cls
, url
):
182 return False if PlatziIE
.suitable(url
) else super(PlatziCourseIE
, cls
).suitable(url
)
184 def _real_extract(self
, url
):
185 course_name
= self
._match
_id
(url
)
187 webpage
= self
._download
_webpage
(url
, course_name
)
189 props
= self
._parse
_json
(
190 self
._search
_regex
(r
'data\s*=\s*({.+?})\s*;', webpage
, 'data'),
191 course_name
)['initialProps']
194 for chapter_num
, chapter
in enumerate(props
['concepts'], 1):
195 if not isinstance(chapter
, dict):
197 materials
= chapter
.get('materials')
198 if not materials
or not isinstance(materials
, list):
200 chapter_title
= chapter
.get('title')
201 chapter_id
= str_or_none(chapter
.get('id'))
202 for material
in materials
:
203 if not isinstance(material
, dict):
205 if material
.get('material_type') != 'video':
207 video_url
= urljoin(url
, material
.get('url'))
211 '_type': 'url_transparent',
213 'title': str_or_none(material
.get('name')),
214 'id': str_or_none(material
.get('id')),
215 'ie_key': PlatziIE
.ie_key(),
216 'chapter': chapter_title
,
217 'chapter_number': chapter_num
,
218 'chapter_id': chapter_id
,
221 course_id
= compat_str(try_get(props
, lambda x
: x
['course']['id']))
222 course_title
= try_get(props
, lambda x
: x
['course']['name'], compat_str
)
224 return self
.playlist_result(entries
, course_id
, course_title
)