]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/safari.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from .brightcove
import BrightcoveIE
11 compat_urllib_request
,
20 class SafariBaseIE(InfoExtractor
):
21 _LOGIN_URL
= 'https://www.safaribooksonline.com/accounts/login/'
22 _SUCCESSFUL_LOGIN_REGEX
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>'
23 _NETRC_MACHINE
= 'safari'
25 _API_BASE
= 'https://www.safaribooksonline.com/api/v1/book'
30 def _real_initialize(self
):
31 # We only need to log in once for courses or individual videos
32 if not self
.LOGGED_IN
:
34 SafariBaseIE
.LOGGED_IN
= True
37 (username
, password
) = self
._get
_login
_info
()
39 self
.raise_login_required('safaribooksonline.com account is required')
42 if 'Referer' not in headers
:
43 headers
['Referer'] = self
._LOGIN
_URL
45 login_page
= self
._download
_webpage
(
46 self
._LOGIN
_URL
, None,
47 'Downloading login form')
49 csrf
= self
._html
_search
_regex
(
50 r
"name='csrfmiddlewaretoken'\s+value='([^']+)'",
51 login_page
, 'csrf token')
54 'csrfmiddlewaretoken': csrf
,
56 'password1': password
,
61 request
= compat_urllib_request
.Request(
62 self
._LOGIN
_URL
, compat_urllib_parse
.urlencode(login_form
), headers
=headers
)
63 login_page
= self
._download
_webpage
(
64 request
, None, 'Logging in as %s' % username
)
66 if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None:
68 'Login failed; make sure your credentials are correct and try again.',
71 self
.to_screen('Login successful')
74 class SafariIE(SafariBaseIE
):
76 IE_DESC
= 'safaribooksonline.com online video'
77 _VALID_URL
= r
'''(?x)https?://
78 (?:www\.)?safaribooksonline\.com/
84 (?:chapter(?:-content)?/)?
85 (?P<part>part\d+)\.html
89 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html',
90 'md5': '5b0c4cc1b3c1ba15dda7344085aa5592',
92 'id': '2842601850001',
94 'title': 'Introduction',
96 'skip': 'Requires safaribooksonline account credentials',
98 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html',
99 'only_matching': True,
101 # non-digits in course id
102 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html',
103 'only_matching': True,
106 def _real_extract(self
, url
):
107 mobj
= re
.match(self
._VALID
_URL
, url
)
108 course_id
= mobj
.group('course_id')
109 part
= mobj
.group('part')
111 webpage
= self
._download
_webpage
(
112 '%s/%s/chapter-content/%s.html' % (self
._API
_BASE
, course_id
, part
),
115 bc_url
= BrightcoveIE
._extract
_brightcove
_url
(webpage
)
117 raise ExtractorError('Could not extract Brightcove URL from %s' % url
, expected
=True)
119 return self
.url_result(smuggle_url(bc_url
, {'Referer': url
}), 'Brightcove')
122 class SafariCourseIE(SafariBaseIE
):
123 IE_NAME
= 'safari:course'
124 IE_DESC
= 'safaribooksonline.com online courses'
126 _VALID_URL
= r
'https?://(?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)/(?P<id>[^/]+)/?(?:[#?]|$)'
129 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/',
131 'id': '9780133392838',
132 'title': 'Hadoop Fundamentals LiveLessons',
134 'playlist_count': 22,
135 'skip': 'Requires safaribooksonline account credentials',
137 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json',
138 'only_matching': True,
141 def _real_extract(self
, url
):
142 course_id
= self
._match
_id
(url
)
144 course_json
= self
._download
_json
(
145 '%s/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
),
146 course_id
, 'Downloading course JSON')
148 if 'chapters' not in course_json
:
149 raise ExtractorError(
150 'No chapters found for course %s' % course_id
, expected
=True)
153 self
.url_result(chapter
, 'Safari')
154 for chapter
in course_json
['chapters']]
156 course_title
= course_json
['title']
158 return self
.playlist_result(entries
, course_id
, course_title
)