]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/safari.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from .brightcove 
import BrightcoveIE
 
  11     compat_urllib_request
, 
  20 class SafariBaseIE(InfoExtractor
): 
  21     _LOGIN_URL 
= 'https://www.safaribooksonline.com/accounts/login/' 
  22     _SUCCESSFUL_LOGIN_REGEX 
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>' 
  23     _NETRC_MACHINE 
= 'safari' 
  25     _API_BASE 
= 'https://www.safaribooksonline.com/api/v1/book' 
  30     def _real_initialize(self
): 
  31         # We only need to log in once for courses or individual videos 
  32         if not self
.LOGGED_IN
: 
  34             SafariBaseIE
.LOGGED_IN 
= True 
  37         (username
, password
) = self
._get
_login
_info
() 
  39             self
.raise_login_required('safaribooksonline.com account is required') 
  42         if 'Referer' not in headers
: 
  43             headers
['Referer'] = self
._LOGIN
_URL
 
  45         login_page 
= self
._download
_webpage
( 
  46             self
._LOGIN
_URL
, None, 
  47             'Downloading login form') 
  49         csrf 
= self
._html
_search
_regex
( 
  50             r
"name='csrfmiddlewaretoken'\s+value='([^']+)'", 
  51             login_page
, 'csrf token') 
  54             'csrfmiddlewaretoken': csrf
, 
  56             'password1': password
, 
  61         request 
= compat_urllib_request
.Request( 
  62             self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
), headers
=headers
) 
  63         login_page 
= self
._download
_webpage
( 
  64             request
, None, 'Logging in as %s' % username
) 
  66         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
  68                 'Login failed; make sure your credentials are correct and try again.', 
  71         self
.to_screen('Login successful') 
  74 class SafariIE(SafariBaseIE
): 
  76     IE_DESC 
= 'safaribooksonline.com online video' 
  77     _VALID_URL 
= r
'''(?x)https?:// 
  78                             (?:www\.)?safaribooksonline\.com/ 
  84                                     (?:chapter(?:-content)?/)? 
  85                                 (?P<part>part\d+)\.html 
  89         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 
  90         'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', 
  92             'id': '2842601850001', 
  94             'title': 'Introduction', 
  96         'skip': 'Requires safaribooksonline account credentials', 
  98         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 
  99         'only_matching': True, 
 101         # non-digits in course id 
 102         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 
 103         'only_matching': True, 
 106     def _real_extract(self
, url
): 
 107         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 108         course_id 
= mobj
.group('course_id') 
 109         part 
= mobj
.group('part') 
 111         webpage 
= self
._download
_webpage
( 
 112             '%s/%s/chapter-content/%s.html' % (self
._API
_BASE
, course_id
, part
), 
 115         bc_url 
= BrightcoveIE
._extract
_brightcove
_url
(webpage
) 
 117             raise ExtractorError('Could not extract Brightcove URL from %s' % url
, expected
=True) 
 119         return self
.url_result(smuggle_url(bc_url
, {'Referer': url
}), 'Brightcove') 
 122 class SafariCourseIE(SafariBaseIE
): 
 123     IE_NAME 
= 'safari:course' 
 124     IE_DESC 
= 'safaribooksonline.com online courses' 
 126     _VALID_URL 
= r
'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' 
 129         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 131             'id': '9780133392838', 
 132             'title': 'Hadoop Fundamentals LiveLessons', 
 134         'playlist_count': 22, 
 135         'skip': 'Requires safaribooksonline account credentials', 
 137         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 
 138         'only_matching': True, 
 141     def _real_extract(self
, url
): 
 142         course_id 
= self
._match
_id
(url
) 
 144         course_json 
= self
._download
_json
( 
 145             '%s/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
), 
 146             course_id
, 'Downloading course JSON') 
 148         if 'chapters' not in course_json
: 
 149             raise ExtractorError( 
 150                 'No chapters found for course %s' % course_id
, expected
=True) 
 153             self
.url_result(chapter
, 'Safari') 
 154             for chapter 
in course_json
['chapters']] 
 156         course_title 
= course_json
['title'] 
 158         return self
.playlist_result(entries
, course_id
, course_title
)