2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class SafariBaseIE(InfoExtractor
): 
  18     _LOGIN_URL 
= 'https://www.safaribooksonline.com/accounts/login/' 
  19     _SUCCESSFUL_LOGIN_REGEX 
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>' 
  20     _NETRC_MACHINE 
= 'safari' 
  22     _API_BASE 
= 'https://www.safaribooksonline.com/api/v1' 
  27     def _real_initialize(self
): 
  31         # We only need to log in once for courses or individual videos 
  35         (username
, password
) = self
._get
_login
_info
() 
  39         headers 
= std_headers
.copy() 
  40         if 'Referer' not in headers
: 
  41             headers
['Referer'] = self
._LOGIN
_URL
 
  42         login_page_request 
= sanitized_Request(self
._LOGIN
_URL
, headers
=headers
) 
  44         login_page 
= self
._download
_webpage
( 
  45             login_page_request
, None, 
  46             'Downloading login form') 
  48         csrf 
= self
._html
_search
_regex
( 
  49             r
"name='csrfmiddlewaretoken'\s+value='([^']+)'", 
  50             login_page
, 'csrf token') 
  53             'csrfmiddlewaretoken': csrf
, 
  55             'password1': password
, 
  60         request 
= sanitized_Request( 
  61             self
._LOGIN
_URL
, urlencode_postdata(login_form
), headers
=headers
) 
  62         login_page 
= self
._download
_webpage
( 
  63             request
, None, 'Logging in as %s' % username
) 
  65         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
  67                 'Login failed; make sure your credentials are correct and try again.', 
  70         SafariBaseIE
.LOGGED_IN 
= True 
  72         self
.to_screen('Login successful') 
  75 class SafariIE(SafariBaseIE
): 
  77     IE_DESC 
= 'safaribooksonline.com online video' 
  78     _VALID_URL 
= r
'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?#&]+)\.html' 
  81         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 
  82         'md5': 'dcc5a425e79f2564148652616af1f2a3', 
  86             'title': 'Introduction to Hadoop Fundamentals LiveLessons', 
  87             'timestamp': 1437758058, 
  88             'upload_date': '20150724', 
  89             'uploader_id': 'stork', 
  92         # non-digits in course id 
  93         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 
  94         'only_matching': True, 
  96         'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', 
  97         'only_matching': True, 
 100     def _real_extract(self
, url
): 
 101         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 102         video_id 
= '%s/%s' % (mobj
.group('course_id'), mobj
.group('part')) 
 104         webpage 
= self
._download
_webpage
(url
, video_id
) 
 105         reference_id 
= self
._search
_regex
( 
 106             r
'data-reference-id=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', 
 107             webpage, 'kaltura reference 
id', group='id') 
 108         partner_id = self._search_regex( 
 109             r'data
-partner
-id=(["\'])(?P<id>(?:(?!\1).)+)\1', 
 110             webpage, 'kaltura widget id', group='id') 
 111         ui_id = self._search_regex( 
 112             r'data-ui-id=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', 
 113             webpage, 'kaltura uiconf 
id', group='id') 
 116             'wid
': '_
%s' % partner_id, 
 118             'flashvars
[referenceId
]': reference_id, 
 122             kaltura_session = self._download_json( 
 123                 '%s/player
/kaltura_session
/?reference_id
=%s' % (self._API_BASE, reference_id), 
 124                 video_id, 'Downloading kaltura session JSON
', 
 125                 'Unable to download kaltura session JSON
', fatal=False) 
 127                 session = kaltura_session.get('session
') 
 129                     query['flashvars
[ks
]'] = session 
 131         return self.url_result(update_url_query( 
 132             'https
://cdnapisec
.kaltura
.com
/html5
/html5lib
/v2
.37
.1/mwEmbedFrame
.php
', query), 
 136 class SafariApiIE(SafariBaseIE): 
 137     IE_NAME = 'safari
:api
' 
 138     _VALID_URL = r'https?
://(?
:www\
.)?safaribooksonline\
.com
/api
/v1
/book
/(?P
<course_id
>[^
/]+)/chapter(?
:-content
)?
/(?P
<part
>[^
/?
#&]+)\.html' 
 141         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 
 142         'only_matching': True, 
 144         'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', 
 145         'only_matching': True, 
 148     def _real_extract(self
, url
): 
 149         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 150         part 
= self
._download
_json
( 
 151             url
, '%s/%s' % (mobj
.group('course_id'), mobj
.group('part')), 
 152             'Downloading part JSON') 
 153         return self
.url_result(part
['web_url'], SafariIE
.ie_key()) 
 156 class SafariCourseIE(SafariBaseIE
): 
 157     IE_NAME 
= 'safari:course' 
 158     IE_DESC 
= 'safaribooksonline.com online courses' 
 160     _VALID_URL 
= r
'''(?x) 
 163                             (?:www\.)?safaribooksonline\.com/(?:library/view/[^/]+|api/v1/book)| 
 164                             techbus\.safaribooksonline\.com 
 166                         /(?P<id>[^/]+)/?(?:[#?]|$) 
 170         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 172             'id': '9780133392838', 
 173             'title': 'Hadoop Fundamentals LiveLessons', 
 175         'playlist_count': 22, 
 176         'skip': 'Requires safaribooksonline account credentials', 
 178         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 
 179         'only_matching': True, 
 181         'url': 'http://techbus.safaribooksonline.com/9780134426365', 
 182         'only_matching': True, 
 185     def _real_extract(self
, url
): 
 186         course_id 
= self
._match
_id
(url
) 
 188         course_json 
= self
._download
_json
( 
 189             '%s/book/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
), 
 190             course_id
, 'Downloading course JSON') 
 192         if 'chapters' not in course_json
: 
 193             raise ExtractorError( 
 194                 'No chapters found for course %s' % course_id
, expected
=True) 
 197             self
.url_result(chapter
, SafariApiIE
.ie_key()) 
 198             for chapter 
in course_json
['chapters']] 
 200         course_title 
= course_json
['title'] 
 202         return self
.playlist_result(entries
, course_id
, course_title
)