]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/lynda.py
   1 from __future__ 
import unicode_literals
 
   6 from .subtitles 
import SubtitlesInfoExtractor
 
   7 from .common 
import InfoExtractor
 
  11     compat_urllib_request
, 
  19 class LyndaIE(SubtitlesInfoExtractor
): 
  21     IE_DESC 
= 'lynda.com videos' 
  22     _VALID_URL 
= r
'https?://www\.lynda\.com/[^/]+/[^/]+/\d+/(\d+)-\d\.html' 
  23     _LOGIN_URL 
= 'https://www.lynda.com/login/login.aspx' 
  24     _NETRC_MACHINE 
= 'lynda' 
  26     _SUCCESSFUL_LOGIN_REGEX 
= r
'isLoggedIn: true' 
  27     _TIMECODE_REGEX 
= r
'\[(?P<timecode>\d+:\d+:\d+[\.,]\d+)\]' 
  29     ACCOUNT_CREDENTIALS_HINT 
= 'Use --username and --password options to provide lynda.com account credentials.' 
  32         'url': 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html', 
  33         'md5': 'ecfc6862da89489161fb9cd5f5a6fac1', 
  37             'title': 'Using the exercise files', 
  42     def _real_initialize(self
): 
  45     def _real_extract(self
, url
): 
  46         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  47         video_id 
= mobj
.group(1) 
  49         page 
= self
._download
_webpage
('http://www.lynda.com/ajax/player?videoId=%s&type=video' % video_id
, video_id
, 
  50                                       'Downloading video JSON') 
  51         video_json 
= json
.loads(page
) 
  53         if 'Status' in video_json
: 
  54             raise ExtractorError('lynda returned error: %s' % video_json
['Message'], expected
=True) 
  56         if video_json
['HasAccess'] is False: 
  58                 'Video %s is only available for members. ' % video_id 
+ self
.ACCOUNT_CREDENTIALS_HINT
, expected
=True) 
  60         video_id 
= compat_str(video_json
['ID']) 
  61         duration 
= video_json
['DurationInSeconds'] 
  62         title 
= video_json
['Title'] 
  66         fmts 
= video_json
.get('Formats') 
  71                     'ext': fmt
['Extension'], 
  72                     'width': fmt
['Width'], 
  73                     'height': fmt
['Height'], 
  74                     'filesize': fmt
['FileSize'], 
  75                     'format_id': str(fmt
['Resolution']) 
  78         prioritized_streams 
= video_json
.get('PrioritizedStreams') 
  79         if prioritized_streams
: 
  83                     'width': int_or_none(format_id
), 
  84                     'format_id': format_id
, 
  85                 } for format_id
, video_url 
in prioritized_streams
['0'].items() 
  88         self
._check
_formats
(formats
, video_id
) 
  89         self
._sort
_formats
(formats
) 
  91         if self
._downloader
.params
.get('listsubtitles', False): 
  92             self
._list
_available
_subtitles
(video_id
, page
) 
  95         subtitles 
= self
._fix
_subtitles
(self
.extract_subtitles(video_id
, page
)) 
 100             'duration': duration
, 
 101             'subtitles': subtitles
, 
 106         (username
, password
) = self
._get
_login
_info
() 
 111             'username': username
, 
 112             'password': password
, 
 116         request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
)) 
 117         login_page 
= self
._download
_webpage
(request
, None, 'Logging in as %s' % username
) 
 119         # Not (yet) logged in 
 120         m 
= re
.search(r
'loginResultJson = \'(?P
<json
>[^
\']+)\';', login_page) 
 122             response = m.group('json
') 
 123             response_json = json.loads(response) 
 124             state = response_json['state
'] 
 126             if state == 'notlogged
': 
 127                 raise ExtractorError('Unable to login
, incorrect username 
and/or password
', expected=True) 
 129             # This is when we get popup: 
 130             # > You're already logged 
in to lynda
.com on two devices
. 
 131             # > If you log in here, we'll log you out of another device. 
 132             # So, we need to confirm this. 
 133             if state 
== 'conflicted': 
 141                 request 
= compat_urllib_request
.Request(self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(confirm_form
)) 
 142                 login_page 
= self
._download
_webpage
(request
, None, 'Confirming log in and log out from another device') 
 144         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
 145             raise ExtractorError('Unable to log in') 
 147     def _fix_subtitles(self
, subtitles
): 
 148         if subtitles 
is None: 
 149             return subtitles  
# subtitles not requested 
 152         for k
, v 
in subtitles
.items(): 
 157             for pos 
in range(0, len(subs
) - 1): 
 158                 seq_current 
= subs
[pos
] 
 159                 m_current 
= re
.match(self
._TIMECODE
_REGEX
, seq_current
['Timecode']) 
 160                 if m_current 
is None: 
 162                 seq_next 
= subs
[pos 
+ 1] 
 163                 m_next 
= re
.match(self
._TIMECODE
_REGEX
, seq_next
['Timecode']) 
 166                 appear_time 
= m_current
.group('timecode') 
 167                 disappear_time 
= m_next
.group('timecode') 
 168                 text 
= seq_current
['Caption'] 
 169                 srt 
+= '%s\r\n%s --> %s\r\n%s' % (str(pos
), appear_time
, disappear_time
, text
) 
 171                 fixed_subtitles
[k
] = srt
 
 172         return fixed_subtitles
 
 174     def _get_available_subtitles(self
, video_id
, webpage
): 
 175         url 
= 'http://www.lynda.com/ajax/player?videoId=%s&type=transcript' % video_id
 
 176         sub 
= self
._download
_webpage
(url
, None, False) 
 177         sub_json 
= json
.loads(sub
) 
 178         return {'en': url
} if len(sub_json
) > 0 else {} 
 181 class LyndaCourseIE(InfoExtractor
): 
 182     IE_NAME 
= 'lynda:course' 
 183     IE_DESC 
= 'lynda.com online courses' 
 185     # Course link equals to welcome/introduction video link of same course 
 186     # We will recognize it as course link 
 187     _VALID_URL 
= r
'https?://(?:www|m)\.lynda\.com/(?P<coursepath>[^/]+/[^/]+/(?P<courseid>\d+))-\d\.html' 
 189     def _real_extract(self
, url
): 
 190         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 191         course_path 
= mobj
.group('coursepath') 
 192         course_id 
= mobj
.group('courseid') 
 194         page 
= self
._download
_webpage
('http://www.lynda.com/ajax/player?courseId=%s&type=course' % course_id
, 
 195                                       course_id
, 'Downloading course JSON') 
 196         course_json 
= json
.loads(page
) 
 198         if 'Status' in course_json 
and course_json
['Status'] == 'NotFound': 
 199             raise ExtractorError('Course %s does not exist' % course_id
, expected
=True) 
 201         unaccessible_videos 
= 0 
 203         (username
, _
) = self
._get
_login
_info
() 
 205         # Might want to extract videos right here from video['Formats'] as it seems 'Formats' is not provided 
 206         # by single video API anymore 
 208         for chapter 
in course_json
['Chapters']: 
 209             for video 
in chapter
['Videos']: 
 210                 if username 
is None and video
['HasAccess'] is False: 
 211                     unaccessible_videos 
+= 1 
 213                 videos
.append(video
['ID']) 
 215         if unaccessible_videos 
> 0: 
 216             self
._downloader
.report_warning('%s videos are only available for members and will not be downloaded. ' 
 217                                             % unaccessible_videos 
+ LyndaIE
.ACCOUNT_CREDENTIALS_HINT
) 
 220             self
.url_result('http://www.lynda.com/%s/%s-4.html' % 
 221                             (course_path
, video_id
), 
 223             for video_id 
in videos
] 
 225         course_title 
= course_json
['Title'] 
 227         return self
.playlist_result(entries
, course_id
, course_title
)