2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from .brightcove 
import BrightcoveIE
 
  11     compat_urllib_request
, 
  20 class SafariBaseIE(InfoExtractor
): 
  21     _LOGIN_URL 
= 'https://www.safaribooksonline.com/accounts/login/' 
  22     _SUCCESSFUL_LOGIN_REGEX 
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>' 
  23     _ACCOUNT_CREDENTIALS_HINT 
= 'Use --username and --password options to supply credentials for safaribooksonline.com' 
  24     _NETRC_MACHINE 
= 'safari' 
  26     _API_BASE 
= 'https://www.safaribooksonline.com/api/v1/book' 
  31     def _real_initialize(self
): 
  32         # We only need to log in once for courses or individual videos 
  33         if not self
.LOGGED_IN
: 
  35             SafariBaseIE
.LOGGED_IN 
= True 
  38         (username
, password
) = self
._get
_login
_info
() 
  41                 self
._ACCOUNT
_CREDENTIALS
_HINT
, 
  45         if 'Referer' not in headers
: 
  46             headers
['Referer'] = self
._LOGIN
_URL
 
  48         login_page 
= self
._download
_webpage
( 
  49             self
._LOGIN
_URL
, None, 
  50             'Downloading login form') 
  52         csrf 
= self
._html
_search
_regex
( 
  53             r
"name='csrfmiddlewaretoken'\s+value='([^']+)'", 
  54             login_page
, 'csrf token') 
  57             'csrfmiddlewaretoken': csrf
, 
  59             'password1': password
, 
  64         request 
= compat_urllib_request
.Request( 
  65             self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
), headers
=headers
) 
  66         login_page 
= self
._download
_webpage
( 
  67             request
, None, 'Logging in as %s' % username
) 
  69         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
  71                 'Login failed; make sure your credentials are correct and try again.', 
  74         self
.to_screen('Login successful') 
  77 class SafariIE(SafariBaseIE
): 
  79     IE_DESC 
= 'safaribooksonline.com online video' 
  80     _VALID_URL 
= r
'''(?x)https?:// 
  81                             (?:www\.)?safaribooksonline\.com/ 
  87                                     (?:chapter(?:-content)?/)? 
  88                                 (?P<part>part\d+)\.html 
  92         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 
  93         'md5': '5b0c4cc1b3c1ba15dda7344085aa5592', 
  95             'id': '2842601850001', 
  97             'title': 'Introduction', 
  99         'skip': 'Requires safaribooksonline account credentials', 
 101         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 
 102         'only_matching': True, 
 105     def _real_extract(self
, url
): 
 106         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 107         course_id 
= mobj
.group('course_id') 
 108         part 
= mobj
.group('part') 
 110         webpage 
= self
._download
_webpage
( 
 111             '%s/%s/chapter-content/%s.html' % (self
._API
_BASE
, course_id
, part
), 
 114         bc_url 
= BrightcoveIE
._extract
_brightcove
_url
(webpage
) 
 116             raise ExtractorError('Could not extract Brightcove URL from %s' % url
, expected
=True) 
 118         return self
.url_result(smuggle_url(bc_url
, {'Referer': url
}), 'Brightcove') 
 121 class SafariCourseIE(SafariBaseIE
): 
 122     IE_NAME 
= 'safari:course' 
 123     IE_DESC 
= 'safaribooksonline.com online courses' 
 125     _VALID_URL 
= r
'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)' 
 128         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 130             'id': '9780133392838', 
 131             'title': 'Hadoop Fundamentals LiveLessons', 
 133         'playlist_count': 22, 
 134         'skip': 'Requires safaribooksonline account credentials', 
 136         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 
 137         'only_matching': True, 
 140     def _real_extract(self
, url
): 
 141         course_id 
= self
._match
_id
(url
) 
 143         course_json 
= self
._download
_json
( 
 144             '%s/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
), 
 145             course_id
, 'Downloading course JSON') 
 147         if 'chapters' not in course_json
: 
 148             raise ExtractorError( 
 149                 'No chapters found for course %s' % course_id
, expected
=True) 
 152             self
.url_result(chapter
, 'Safari') 
 153             for chapter 
in course_json
['chapters']] 
 155         course_title 
= course_json
['title'] 
 157         return self
.playlist_result(entries
, course_id
, course_title
)