]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/platzi.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  21 class PlatziIE(InfoExtractor
): 
  25                             platzi\.com/clases|           # es version 
  26                             courses\.platzi\.com/classes  # en version 
  27                         )/[^/]+/(?P<id>\d+)-[^/?\#&]+ 
  29     _LOGIN_URL 
= 'https://platzi.com/login/' 
  30     _NETRC_MACHINE 
= 'platzi' 
  33         'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', 
  34         'md5': '8f56448241005b561c10f11a595b37e3', 
  38             'title': 'Creando nuestra primera página', 
  39             'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', 
  42         'skip': 'Requires platzi account credentials', 
  44         'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', 
  48             'title': 'Background', 
  49             'description': 'md5:49c83c09404b15e6e71defaf87f6b305', 
  52         'skip': 'Requires platzi account credentials', 
  54             'skip_download': True, 
  58     def _real_initialize(self
): 
  62         username
, password 
= self
._get
_login
_info
() 
  66         login_page 
= self
._download
_webpage
( 
  67             self
._LOGIN
_URL
, None, 'Downloading login page') 
  69         login_form 
= self
._hidden
_inputs
(login_page
) 
  76         urlh 
= self
._request
_webpage
( 
  77             self
._LOGIN
_URL
, None, 'Logging in', 
  78             data
=urlencode_postdata(login_form
), 
  79             headers
={'Referer': self
._LOGIN
_URL
}) 
  82         if 'platzi.com/login' not in compat_str(urlh
.geturl()): 
  85         login_error 
= self
._webpage
_read
_content
( 
  86             urlh
, self
._LOGIN
_URL
, None, 'Downloading login error page') 
  88         login 
= self
._parse
_json
( 
  90                 r
'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error
, 'login'), 
  93         for kind 
in ('error', 'password', 'nonFields'): 
  94             error 
= str_or_none(login
.get('%sError' % kind
)) 
  97                     'Unable to login: %s' % error
, expected
=True) 
  98         raise ExtractorError('Unable to log in') 
 100     def _real_extract(self
, url
): 
 101         lecture_id 
= self
._match
_id
(url
) 
 103         webpage 
= self
._download
_webpage
(url
, lecture_id
) 
 105         data 
= self
._parse
_json
( 
 107                 r
'client_data\s*=\s*({.+?})\s*;', webpage
, 'client data'), 
 110         material 
= data
['initialState']['material'] 
 111         desc 
= material
['description'] 
 112         title 
= desc
['title'] 
 115         for server_id
, server 
in material
['videos'].items(): 
 116             if not isinstance(server
, dict): 
 118             for format_id 
in ('hls', 'dash'): 
 119                 format_url 
= url_or_none(server
.get(format_id
)) 
 122                 if format_id 
== 'hls': 
 123                     formats
.extend(self
._extract
_m
3u8_formats
( 
 124                         format_url
, lecture_id
, 'mp4', 
 125                         entry_protocol
='m3u8_native', m3u8_id
=format_id
, 
 126                         note
='Downloading %s m3u8 information' % server_id
, 
 128                 elif format_id 
== 'dash': 
 129                     formats
.extend(self
._extract
_mpd
_formats
( 
 130                         format_url
, lecture_id
, mpd_id
=format_id
, 
 131                         note
='Downloading %s MPD manifest' % server_id
, 
 133         self
._sort
_formats
(formats
) 
 135         content 
= str_or_none(desc
.get('content')) 
 136         description 
= (clean_html(compat_b64decode(content
).decode('utf-8')) 
 137                        if content 
else None) 
 138         duration 
= int_or_none(material
.get('duration'), invscale
=60) 
 143             'description': description
, 
 144             'duration': duration
, 
 149 class PlatziCourseIE(InfoExtractor
): 
 150     _VALID_URL 
= r
'''(?x) 
 153                             platzi\.com/clases|           # es version 
 154                             courses\.platzi\.com/classes  # en version 
 158         'url': 'https://platzi.com/clases/next-js/', 
 161             'title': 'Curso de Next.js', 
 163         'playlist_count': 22, 
 165         'url': 'https://courses.platzi.com/classes/communication-codestream/', 
 168             'title': 'Codestream Course', 
 170         'playlist_count': 14, 
 174     def suitable(cls
, url
): 
 175         return False if PlatziIE
.suitable(url
) else super(PlatziCourseIE
, cls
).suitable(url
) 
 177     def _real_extract(self
, url
): 
 178         course_name 
= self
._match
_id
(url
) 
 180         webpage 
= self
._download
_webpage
(url
, course_name
) 
 182         props 
= self
._parse
_json
( 
 183             self
._search
_regex
(r
'data\s*=\s*({.+?})\s*;', webpage
, 'data'), 
 184             course_name
)['initialProps'] 
 187         for chapter_num
, chapter 
in enumerate(props
['concepts'], 1): 
 188             if not isinstance(chapter
, dict): 
 190             materials 
= chapter
.get('materials') 
 191             if not materials 
or not isinstance(materials
, list): 
 193             chapter_title 
= chapter
.get('title') 
 194             chapter_id 
= str_or_none(chapter
.get('id')) 
 195             for material 
in materials
: 
 196                 if not isinstance(material
, dict): 
 198                 if material
.get('material_type') != 'video': 
 200                 video_url 
= urljoin(url
, material
.get('url')) 
 204                     '_type': 'url_transparent', 
 206                     'title': str_or_none(material
.get('name')), 
 207                     'id': str_or_none(material
.get('id')), 
 208                     'ie_key': PlatziIE
.ie_key(), 
 209                     'chapter': chapter_title
, 
 210                     'chapter_number': chapter_num
, 
 211                     'chapter_id': chapter_id
, 
 214         course_id 
= compat_str(try_get(props
, lambda x
: x
['course']['id'])) 
 215         course_title 
= try_get(props
, lambda x
: x
['course']['name'], compat_str
) 
 217         return self
.playlist_result(entries
, course_id
, course_title
)