]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lecturio.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  21 class LecturioBaseIE(InfoExtractor
): 
  22     _API_BASE_URL 
= 'https://app.lecturio.com/api/en/latest/html5/' 
  23     _LOGIN_URL 
= 'https://app.lecturio.com/en/login' 
  24     _NETRC_MACHINE 
= 'lecturio' 
  26     def _real_initialize(self
): 
  30         username
, password 
= self
._get
_login
_info
() 
  35         _
, urlh 
= self
._download
_webpage
_handle
( 
  36             self
._LOGIN
_URL
, None, 'Downloading login popup') 
  38         def is_logged(url_handle
): 
  39             return self
._LOGIN
_URL 
not in compat_str(url_handle
.geturl()) 
  46             'signin[email]': username
, 
  47             'signin[password]': password
, 
  48             'signin[remember]': 'on', 
  51         response
, urlh 
= self
._download
_webpage
_handle
( 
  52             self
._LOGIN
_URL
, None, 'Logging in', 
  53             data
=urlencode_postdata(login_form
)) 
  55         # Logged in successfully 
  59         errors 
= self
._html
_search
_regex
( 
  60             r
'(?s)<ul[^>]+class=["\']error_list
[^
>]+>(.+?
)</ul
>', response, 
  61             'errors
', default=None) 
  63             raise ExtractorError('Unable to login
: %s' % errors, expected=True) 
  64         raise ExtractorError('Unable to log 
in') 
  67 class LecturioIE(LecturioBaseIE): 
  71                             app\.lecturio\.com/([^/]+/(?P<nt>[^/?#&]+)\.lecture|(?:\#/)?lecture/c/\d+/(?P<id>\d+))| 
  72                             (?:www\.)?lecturio\.de/[^/]+/(?P<nt_de>[^/?#&]+)\.vortrag 
  76         'url
': 'https
://app
.lecturio
.com
/medical
-courses
/important
-concepts
-and-terms
-introduction
-to
-microbiology
.lecture
#tab/videos', 
  77         'md5': '9a42cf1d8282a6311bf7211bbde26fde', 
  81             'title': 'Important Concepts and Terms — Introduction to Microbiology', 
  83         'skip': 'Requires lecturio account credentials', 
  85         'url': 'https://www.lecturio.de/jura/oeffentliches-recht-staatsexamen.vortrag', 
  86         'only_matching': True, 
  88         'url': 'https://app.lecturio.com/#/lecture/c/6434/39634', 
  89         'only_matching': True, 
 106     def _real_extract(self
, url
): 
 107         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 108         nt 
= mobj
.group('nt') or mobj
.group('nt_de') 
 109         lecture_id 
= mobj
.group('id') 
 110         display_id 
= nt 
or lecture_id
 
 111         api_path 
= 'lectures/' + lecture_id 
if lecture_id 
else 'lecture/' + nt 
+ '.json' 
 112         video 
= self
._download
_json
( 
 113             self
._API
_BASE
_URL 
+ api_path
, display_id
) 
 114         title 
= video
['title'].strip() 
 116             pid 
= video
.get('productId') or video
.get('uid') 
 118                 spid 
= pid
.split('_') 
 119                 if spid 
and len(spid
) == 2: 
 123         for format_ 
in video
['content']['media']: 
 124             if not isinstance(format_
, dict): 
 126             file_ 
= format_
.get('file') 
 129             ext 
= determine_ext(file_
) 
 131                 # smil contains only broken RTMP formats anyway 
 133             file_url 
= url_or_none(file_
) 
 136             label 
= str_or_none(format_
.get('label')) 
 137             filesize 
= int_or_none(format_
.get('fileSize')) 
 141                 'filesize': float_or_none(filesize
, invscale
=1000) 
 144                 mobj 
= re
.match(r
'(\d+)p\s*\(([^)]+)\)', label
) 
 147                         'format_id': mobj
.group(2), 
 148                         'height': int(mobj
.group(1)), 
 151         self
._sort
_formats
(formats
) 
 154         automatic_captions 
= {} 
 155         captions 
= video
.get('captions') or [] 
 157             cc_url 
= cc
.get('url') 
 160             cc_label 
= cc
.get('translatedCode') 
 161             lang 
= cc
.get('languageCode') or self
._search
_regex
( 
 162                 r
'/([a-z]{2})_', cc_url
, 'lang', 
 163                 default
=cc_label
.split()[0] if cc_label 
else 'en') 
 164             original_lang 
= self
._search
_regex
( 
 165                 r
'/[a-z]{2}_([a-z]{2})_', cc_url
, 'original lang', 
 167             sub_dict 
= (automatic_captions
 
 168                         if 'auto-translated' in cc_label 
or original_lang
 
 170             sub_dict
.setdefault(self
._CC
_LANGS
.get(lang
, lang
), []).append({ 
 175             'id': lecture_id 
or nt
, 
 178             'subtitles': subtitles
, 
 179             'automatic_captions': automatic_captions
, 
 183 class LecturioCourseIE(LecturioBaseIE
): 
 184     _VALID_URL 
= r
'https://app\.lecturio\.com/(?:[^/]+/(?P<nt>[^/?#&]+)\.course|(?:#/)?course/c/(?P<id>\d+))' 
 186         'url': 'https://app.lecturio.com/medical-courses/microbiology-introduction.course#/', 
 188             'id': 'microbiology-introduction', 
 189             'title': 'Microbiology: Introduction', 
 190             'description': 'md5:13da8500c25880c6016ae1e6d78c386a', 
 192         'playlist_count': 45, 
 193         'skip': 'Requires lecturio account credentials', 
 195         'url': 'https://app.lecturio.com/#/course/c/6434', 
 196         'only_matching': True, 
 199     def _real_extract(self
, url
): 
 200         nt
, course_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
 201         display_id 
= nt 
or course_id
 
 202         api_path 
= 'courses/' + course_id 
if course_id 
else 'course/content/' + nt 
+ '.json' 
 203         course 
= self
._download
_json
( 
 204             self
._API
_BASE
_URL 
+ api_path
, display_id
) 
 206         for lecture 
in course
.get('lectures', []): 
 207             lecture_id 
= str_or_none(lecture
.get('id')) 
 208             lecture_url 
= lecture
.get('url') 
 210                 lecture_url 
= urljoin(url
, lecture_url
) 
 212                 lecture_url 
= 'https://app.lecturio.com/#/lecture/c/%s/%s' % (course_id
, lecture_id
) 
 213             entries
.append(self
.url_result( 
 214                 lecture_url
, ie
=LecturioIE
.ie_key(), video_id
=lecture_id
)) 
 215         return self
.playlist_result( 
 216             entries
, display_id
, course
.get('title'), 
 217             clean_html(course
.get('description'))) 
 220 class LecturioDeCourseIE(LecturioBaseIE
): 
 221     _VALID_URL 
= r
'https://(?:www\.)?lecturio\.de/[^/]+/(?P<id>[^/?#&]+)\.kurs' 
 223         'url': 'https://www.lecturio.de/jura/grundrechte.kurs', 
 224         'only_matching': True, 
 227     def _real_extract(self
, url
): 
 228         display_id 
= self
._match
_id
(url
) 
 230         webpage 
= self
._download
_webpage
(url
, display_id
) 
 233         for mobj 
in re
.finditer( 
 234                 r
'(?s)<td[^>]+\bdata-lecture-id=["\'](?P
<id>\d
+).+?
\bhref
=(["\'])(?P<url>(?:(?!\2).)+\.vortrag)\b[^>]+>', 
 236             lecture_url = urljoin(url, mobj.group('url')) 
 237             lecture_id = mobj.group('id') 
 238             entries.append(self.url_result( 
 239                 lecture_url, ie=LecturioIE.ie_key(), video_id=lecture_id)) 
 241         title = self._search_regex( 
 242             r'<h1[^>]*>([^<]+)', webpage, 'title', default=None) 
 244         return self.playlist_result(entries, display_id, title)