2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from .brightcove
import BrightcoveIE
11 compat_urllib_request
,
20 class SafariBaseIE(InfoExtractor
):
21 _LOGIN_URL
= 'https://www.safaribooksonline.com/accounts/login/'
22 _SUCCESSFUL_LOGIN_REGEX
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
23 _ACCOUNT_CREDENTIALS_HINT
= 'Use --username and --password options to supply credentials for safaribooksonline.com'
24 _NETRC_MACHINE
= 'safari'
26 _API_BASE
= 'https://www.safaribooksonline.com/api/v1/book'
31 def _real_initialize(self
):
32 # We only need to log in once for courses or individual videos
33 if not self
.LOGGED_IN
:
35 SafariBaseIE
.LOGGED_IN
= True
38 (username
, password
) = self
._get
_login
_info
()
41 self
._ACCOUNT
_CREDENTIALS
_HINT
,
45 if 'Referer' not in headers
:
46 headers
['Referer'] = self
._LOGIN
_URL
48 login_page
= self
._download
_webpage
(
49 self
._LOGIN
_URL
, None,
50 'Downloading login form')
52 csrf
= self
._html
_search
_regex
(
53 r
"name='csrfmiddlewaretoken'\s+value='([^']+)'",
54 login_page
, 'csrf token')
57 'csrfmiddlewaretoken': csrf
,
59 'password1': password
,
64 request
= compat_urllib_request
.Request(
65 self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
), headers
=headers
)
66 login_page
= self
._download
_webpage
(
67 request
, None, 'Logging in as %s' % username
)
69 if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None:
71 'Login failed; make sure your credentials are correct and try again.',
74 self
.to_screen('Login successful')
77 class SafariIE(SafariBaseIE
):
79 IE_DESC
= 'safaribooksonline.com online video'
80 _VALID_URL
= r
'''(?x)https?://
81 (?:www\.)?safaribooksonline\.com/
87 (?:chapter(?:-content)?/)?
88 (?P<part>part\d+)\.html
92 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
93 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
95 'id': '2842601850001',
97 'title': 'Introduction',
99 'skip': 'Requires safaribooksonline account credentials',
101 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
102 'only_matching': True,
105 def _real_extract(self
, url
):
106 mobj
= re
.match(self
._VALID
_URL
, url
)
107 course_id
= mobj
.group('course_id')
108 part
= mobj
.group('part')
110 webpage
= self
._download
_webpage
(
111 '%s/%s/chapter-content/%s.html' % (self
._API
_BASE
, course_id
, part
),
114 bc_url
= BrightcoveIE
._extract
_brightcove
_url
(webpage
)
116 raise ExtractorError('Could not extract Brightcove URL from %s' % url
, expected
=True)
118 return self
.url_result(smuggle_url(bc_url
, {'Referer': url
}), 'Brightcove')
121 class SafariCourseIE(SafariBaseIE
):
122 IE_NAME
= 'safari:course'
123 IE_DESC
= 'safaribooksonline.com online courses'
125 _VALID_URL
= r
'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>\d+)/?(?:[#?]|$)'
128 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
130 'id': '9780133392838',
131 'title': 'Hadoop Fundamentals LiveLessons',
133 'playlist_count': 22,
134 'skip': 'Requires safaribooksonline account credentials',
136 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
137 'only_matching': True,
140 def _real_extract(self
, url
):
141 course_id
= self
._match
_id
(url
)
143 course_json
= self
._download
_json
(
144 '%s/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
),
145 course_id
, 'Downloading course JSON')
147 if 'chapters' not in course_json
:
148 raise ExtractorError(
149 'No chapters found for course %s' % course_id
, expected
=True)
152 self
.url_result(chapter
, 'Safari')
153 for chapter
in course_json
['chapters']]
155 course_title
= course_json
['title']
157 return self
.playlist_result(entries
, course_id
, course_title
)