]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lynda.py
   1 from __future__ 
import unicode_literals
 
   6 from .subtitles 
import SubtitlesInfoExtractor
 
   7 from .common 
import InfoExtractor
 
  10     compat_urllib_request
, 
  15 class LyndaIE(SubtitlesInfoExtractor
): 
  17     IE_DESC 
= 'lynda.com videos' 
  18     _VALID_URL 
= r
'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' 
  19     _LOGIN_URL 
= 'https://www.lynda.com/login/login.aspx' 
  20     _NETRC_MACHINE 
= 'lynda' 
  22     _SUCCESSFUL_LOGIN_REGEX 
= r
'<a href="https://www.lynda.com/home/userAccount/ChangeContactInfo.aspx" data-qa="eyebrow_account_menu">My account' 
  23     _TIMECODE_REGEX 
= r
'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' 
  25     ACCOUNT_CREDENTIALS_HINT 
= 'Use --username and --password options to provide lynda.com account credentials.' 
  28         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', 
  30         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', 
  32             'title': 'Using the exercise files', 
  37     def _real_initialize(self
): 
  40     def _real_extract(self
, url
): 
  41         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  42         video_id 
= mobj
.group(1) 
  44         page 
= self
._download
_webpage
('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id
, 
  45                                       video_id
, 'Downloading video JSON') 
  46         video_json 
= json
.loads(page
) 
  48         if 'Status' in video_json
: 
  49             raise ExtractorError('lynda returned error: %s' % video_json
['Message'], expected
=True) 
  51         if video_json
['HasAccess'] is False: 
  52             raise ExtractorError('Video %s is only available for members. ' % video_id 
+ self
.ACCOUNT_CREDENTIALS_HINT
, expected
=True) 
  54         video_id 
= video_json
['ID'] 
  55         duration 
= video_json
['DurationInSeconds'] 
  56         title 
= video_json
['Title'] 
  58         formats 
= [{'url': fmt
['Url'], 
  59                     'ext': fmt
['Extension'], 
  60                     'width': fmt
['Width'], 
  61                     'height': fmt
['Height'], 
  62                     'filesize': fmt
['FileSize'], 
  63                     'format_id': str(fmt
['Resolution']) 
  64                     } for fmt 
in video_json
['Formats']] 
  66         self
._sort
_formats
(formats
) 
  68         if self
._downloader
.params
.get('listsubtitles', False): 
  69             self
._list
_available
_subtitles
(video_id
, page
) 
  72         subtitles 
= self
._fix
_subtitles
(self
.extract_subtitles(video_id
, page
)) 
  78             'subtitles': subtitles
, 
  83         (username
, password
) = self
._get
_login
_info
() 
  93         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
)) 
  94         login_page 
= self
._download
_webpage
(request
, None, note
='Logging in as %s' % username
) 
  97         m 
= re
.search(r
'loginResultJson = \'(?P
<json
>[^
\']+)\';', login_page) 
  99             response = m.group('json
') 
 100             response_json = json.loads(response)             
 101             state = response_json['state
'] 
 103             if state == 'notlogged
': 
 104                 raise ExtractorError('Unable to login
, incorrect username 
and/or password
', expected=True) 
 106             # This is when we get popup: 
 107             # > You're already logged 
in to lynda
.com on two devices
. 
 108             # > If you log in here, we'll log you out of another device. 
 109             # So, we need to confirm this. 
 110             if state 
== 'conflicted': 
 118                 request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(confirm_form
)) 
 119                 login_page 
= self
._download
_webpage
(request
, None, note
='Confirming log in and log out from another device') 
 121         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
 122             raise ExtractorError('Unable to log in') 
 124     def _fix_subtitles(self
, subtitles
): 
 125         if subtitles 
is None: 
 126             return subtitles  
# subtitles not requested 
 129         for k
, v 
in subtitles
.items(): 
 134             for pos 
in range(0, len(subs
) - 1): 
 135                 seq_current 
= subs
[pos
] 
 136                 m_current 
= re
.match(self
._TIMECODE
_REGEX
, seq_current
['Timecode']) 
 137                 if m_current 
is None: 
 139                 seq_next 
= subs
[pos 
+ 1] 
 140                 m_next 
= re
.match(self
._TIMECODE
_REGEX
, seq_next
['Timecode']) 
 143                 appear_time 
= m_current
.group('timecode') 
 144                 disappear_time 
= m_next
.group('timecode') 
 145                 text 
= seq_current
['Caption'] 
 146                 srt 
+= '%s\r\n%s --> %s\r\n%s' % (str(pos
), appear_time
, disappear_time
, text
) 
 148                 fixed_subtitles
[k
] = srt
 
 149         return fixed_subtitles
 
 151     def _get_available_subtitles(self
, video_id
, webpage
): 
 152         url 
= 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
 
 153         sub 
= self
._download
_webpage
(url
, None, note
=False) 
 154         sub_json 
= json
.loads(sub
) 
 155         return {'en': url
} if len(sub_json
) > 0 else {} 
 158 class LyndaCourseIE(InfoExtractor
): 
 159     IE_NAME 
= 'lynda:course' 
 160     IE_DESC 
= 'lynda.com online courses' 
 162     # Course link equals to welcome/introduction video link of same course 
 163     # We will recognize it as course link 
 164     _VALID_URL 
= r
'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html' 
 166     def _real_extract(self
, url
): 
 167         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 168         course_path 
= mobj
.group('coursepath') 
 169         course_id 
= mobj
.group('courseid') 
 171         page 
= self
._download
_webpage
('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id
, 
 172                                       course_id
, 'Downloading course JSON') 
 173         course_json 
= json
.loads(page
) 
 175         if 'Status' in course_json 
and course_json
['Status'] == 'NotFound': 
 176             raise ExtractorError('Course %s does not exist' % course_id
, expected
=True) 
 178         unaccessible_videos 
= 0 
 180         (username
, _
) = self
._get
_login
_info
() 
 182         for chapter 
in course_json
['Chapters']: 
 183             for video 
in chapter
['Videos']: 
 184                 if username 
is None and video
['HasAccess'] is False: 
 185                     unaccessible_videos 
+= 1 
 187                 videos
.append(video
['ID']) 
 189         if unaccessible_videos 
> 0: 
 190             self
._downloader
.report_warning('%s videos are only available for members and will not be downloaded. ' 
 191                                             % unaccessible_videos 
+ LyndaIE
.ACCOUNT_CREDENTIALS_HINT
) 
 194             self
.url_result('http://www.lynda.com/%s/%s-4.html' % 
 195                             (course_path
, video_id
), 
 197             for video_id 
in videos
] 
 199         course_title 
= course_json
['Title'] 
 201         return self
.playlist_result(entries
, course_id
, course_title
)