]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lynda.py
2160d6cb08ae5b71584ffdc0d76982e2a9bdf0c0
   1 from __future__ 
import unicode_literals
 
   6 from .subtitles 
import SubtitlesInfoExtractor
 
   7 from .common 
import InfoExtractor
 
  10     compat_urllib_request
, 
  17 class LyndaIE(SubtitlesInfoExtractor
): 
  19     IE_DESC 
= 'lynda.com videos' 
  20     _VALID_URL 
= r
'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' 
  21     _LOGIN_URL 
= 'https://www.lynda.com/login/login.aspx' 
  22     _NETRC_MACHINE 
= 'lynda' 
  24     _SUCCESSFUL_LOGIN_REGEX 
= r
'isLoggedIn: true' 
  25     _TIMECODE_REGEX 
= r
'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' 
  27     ACCOUNT_CREDENTIALS_HINT 
= 'Use --username and --password options to provide lynda.com account credentials.' 
  30         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', 
  31         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', 
  35             'title': 'Using the exercise files', 
  40     def _real_initialize(self
): 
  43     def _real_extract(self
, url
): 
  44         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  45         video_id 
= mobj
.group(1) 
  47         page 
= self
._download
_webpage
('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id
, video_id
, 
  48                                       'Downloading video JSON') 
  49         video_json 
= json
.loads(page
) 
  51         if 'Status' in video_json
: 
  52             raise ExtractorError('lynda returned error: %s' % video_json
['Message'], expected
=True) 
  54         if video_json
['HasAccess'] is False: 
  56                 'Video %s is only available for members. ' % video_id 
+ self
.ACCOUNT_CREDENTIALS_HINT
, expected
=True) 
  58         video_id 
= compat_str(video_json
['ID']) 
  59         duration 
= video_json
['DurationInSeconds'] 
  60         title 
= video_json
['Title'] 
  64         fmts 
= video_json
.get('Formats') 
  69                     'ext': fmt
['Extension'], 
  70                     'width': fmt
['Width'], 
  71                     'height': fmt
['Height'], 
  72                     'filesize': fmt
['FileSize'], 
  73                     'format_id': str(fmt
['Resolution']) 
  76         prioritized_streams 
= video_json
.get('PrioritizedStreams') 
  77         if prioritized_streams
: 
  81                     'width': int_or_none(format_id
), 
  82                     'format_id': format_id
, 
  83                 } for format_id
, video_url 
in prioritized_streams
['0'].items() 
  86         self
._sort
_formats
(formats
) 
  88         if self
._downloader
.params
.get('listsubtitles', False): 
  89             self
._list
_available
_subtitles
(video_id
, page
) 
  92         subtitles 
= self
._fix
_subtitles
(self
.extract_subtitles(video_id
, page
)) 
  98             'subtitles': subtitles
, 
 103         (username
, password
) = self
._get
_login
_info
() 
 108             'username': username
, 
 109             'password': password
, 
 113         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
)) 
 114         login_page 
= self
._download
_webpage
(request
, None, 'Logging in as %s' % username
) 
 116         # Not (yet) logged in 
 117         m 
= re
.search(r
'loginResultJson = \'(?P
<json
>[^
\']+)\';', login_page) 
 119             response = m.group('json
') 
 120             response_json = json.loads(response) 
 121             state = response_json['state
'] 
 123             if state == 'notlogged
': 
 124                 raise ExtractorError('Unable to login
, incorrect username 
and/or password
', expected=True) 
 126             # This is when we get popup: 
 127             # > You're already logged 
in to lynda
.com on two devices
. 
 128             # > If you log in here, we'll log you out of another device. 
 129             # So, we need to confirm this. 
 130             if state 
== 'conflicted': 
 138                 request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(confirm_form
)) 
 139                 login_page 
= self
._download
_webpage
(request
, None, 'Confirming log in and log out from another device') 
 141         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
 142             raise ExtractorError('Unable to log in') 
 144     def _fix_subtitles(self
, subtitles
): 
 145         if subtitles 
is None: 
 146             return subtitles  
# subtitles not requested 
 149         for k
, v 
in subtitles
.items(): 
 154             for pos 
in range(0, len(subs
) - 1): 
 155                 seq_current 
= subs
[pos
] 
 156                 m_current 
= re
.match(self
._TIMECODE
_REGEX
, seq_current
['Timecode']) 
 157                 if m_current 
is None: 
 159                 seq_next 
= subs
[pos 
+ 1] 
 160                 m_next 
= re
.match(self
._TIMECODE
_REGEX
, seq_next
['Timecode']) 
 163                 appear_time 
= m_current
.group('timecode') 
 164                 disappear_time 
= m_next
.group('timecode') 
 165                 text 
= seq_current
['Caption'] 
 166                 srt 
+= '%s\r\n%s --> %s\r\n%s' % (str(pos
), appear_time
, disappear_time
, text
) 
 168                 fixed_subtitles
[k
] = srt
 
 169         return fixed_subtitles
 
 171     def _get_available_subtitles(self
, video_id
, webpage
): 
 172         url 
= 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
 
 173         sub 
= self
._download
_webpage
(url
, None, False) 
 174         sub_json 
= json
.loads(sub
) 
 175         return {'en': url
} if len(sub_json
) > 0 else {} 
 178 class LyndaCourseIE(InfoExtractor
): 
 179     IE_NAME 
= 'lynda:course' 
 180     IE_DESC 
= 'lynda.com online courses' 
 182     # Course link equals to welcome/introduction video link of same course 
 183     # We will recognize it as course link 
 184     _VALID_URL 
= r
'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html' 
 186     def _real_extract(self
, url
): 
 187         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 188         course_path 
= mobj
.group('coursepath') 
 189         course_id 
= mobj
.group('courseid') 
 191         page 
= self
._download
_webpage
('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id
, 
 192                                       course_id
, 'Downloading course JSON') 
 193         course_json 
= json
.loads(page
) 
 195         if 'Status' in course_json 
and course_json
['Status'] == 'NotFound': 
 196             raise ExtractorError('Course %s does not exist' % course_id
, expected
=True) 
 198         unaccessible_videos 
= 0 
 200         (username
, _
) = self
._get
_login
_info
() 
 202         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided 
 203         # by single video API anymore 
 205         for chapter 
in course_json
['Chapters']: 
 206             for video 
in chapter
['Videos']: 
 207                 if username 
is None and video
['HasAccess'] is False: 
 208                     unaccessible_videos 
+= 1 
 210                 videos
.append(video
['ID']) 
 212         if unaccessible_videos 
> 0: 
 213             self
._downloader
.report_warning('%s videos are only available for members and will not be downloaded. ' 
 214                                             % unaccessible_videos 
+ LyndaIE
.ACCOUNT_CREDENTIALS_HINT
) 
 217             self
.url_result('http://www.lynda.com/%s/%s-4.html' % 
 218                             (course_path
, video_id
), 
 220             for video_id 
in videos
] 
 222         course_title 
= course_json
['Title'] 
 224         return self
.playlist_result(entries
, course_id
, course_title
)