]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/safari.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from .brightcove 
import BrightcoveLegacyIE
 
  18 class SafariBaseIE(InfoExtractor
): 
  19     _LOGIN_URL 
= 'https://www.safaribooksonline.com/accounts/login/' 
  20     _SUCCESSFUL_LOGIN_REGEX 
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>' 
  21     _NETRC_MACHINE 
= 'safari' 
  23     _API_BASE 
= 'https://www.safaribooksonline.com/api/v1/book' 
  28     def _real_initialize(self
): 
  29         # We only need to log in once for courses or individual videos 
  30         if not self
.LOGGED_IN
: 
  32             SafariBaseIE
.LOGGED_IN 
= True 
  35         (username
, password
) = self
._get
_login
_info
() 
  37             self
.raise_login_required('safaribooksonline.com account is required') 
  40         if 'Referer' not in headers
: 
  41             headers
['Referer'] = self
._LOGIN
_URL
 
  43         login_page 
= self
._download
_webpage
( 
  44             self
._LOGIN
_URL
, None, 
  45             'Downloading login form') 
  47         csrf 
= self
._html
_search
_regex
( 
  48             r
"name='csrfmiddlewaretoken'\s+value='([^']+)'", 
  49             login_page
, 'csrf token') 
  52             'csrfmiddlewaretoken': csrf
, 
  54             'password1': password
, 
  59         request 
= sanitized_Request( 
  60             self
._LOGIN
_URL
, urlencode_postdata(login_form
), headers
=headers
) 
  61         login_page 
= self
._download
_webpage
( 
  62             request
, None, 'Logging in as %s' % username
) 
  64         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
  66                 'Login failed; make sure your credentials are correct and try again.', 
  69         self
.to_screen('Login successful') 
  72 class SafariIE(SafariBaseIE
): 
  74     IE_DESC 
= 'safaribooksonline.com online video' 
  75     _VALID_URL 
= r
'''(?x)https?:// 
  76                             (?:www\.)?safaribooksonline\.com/ 
  82                                     (?:chapter(?:-content)?/)? 
  83                                 (?P<part>part\d+)\.html 
  87         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 
  88         'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', 
  90             'id': '2842601850001', 
  92             'title': 'Introduction', 
  94         'skip': 'Requires safaribooksonline account credentials', 
  96         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 
  97         'only_matching': True, 
  99         # non-digits in course id 
 100         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 
 101         'only_matching': True, 
 104     def _real_extract(self
, url
): 
 105         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 106         course_id 
= mobj
.group('course_id') 
 107         part 
= mobj
.group('part') 
 109         webpage 
= self
._download
_webpage
( 
 110             '%s/%s/chapter-content/%s.html' % (self
._API
_BASE
, course_id
, part
), 
 113         bc_url 
= BrightcoveLegacyIE
._extract
_brightcove
_url
(webpage
) 
 115             raise ExtractorError('Could not extract Brightcove URL from %s' % url
, expected
=True) 
 117         return self
.url_result(smuggle_url(bc_url
, {'Referer': url
}), 'BrightcoveLegacy') 
 120 class SafariCourseIE(SafariBaseIE
): 
 121     IE_NAME 
= 'safari:course' 
 122     IE_DESC 
= 'safaribooksonline.com online courses' 
 124     _VALID_URL 
= r
'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)' 
 127         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 129             'id': '9780133392838', 
 130             'title': 'Hadoop Fundamentals LiveLessons', 
 132         'playlist_count': 22, 
 133         'skip': 'Requires safaribooksonline account credentials', 
 135         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 
 136         'only_matching': True, 
 139     def _real_extract(self
, url
): 
 140         course_id 
= self
._match
_id
(url
) 
 142         course_json 
= self
._download
_json
( 
 143             '%s/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
), 
 144             course_id
, 'Downloading course JSON') 
 146         if 'chapters' not in course_json
: 
 147             raise ExtractorError( 
 148                 'No chapters found for course %s' % course_id
, expected
=True) 
 151             self
.url_result(chapter
, 'Safari') 
 152             for chapter 
in course_json
['chapters']] 
 154         course_title 
= course_json
['title'] 
 156         return self
.playlist_result(entries
, course_id
, course_title
)