2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  17 class SafariBaseIE(InfoExtractor
): 
  18     _LOGIN_URL 
= 'https://www.safaribooksonline.com/accounts/login/' 
  19     _SUCCESSFUL_LOGIN_REGEX 
= r
'<a href="/accounts/logout/"[^>]*>Sign Out</a>' 
  20     _NETRC_MACHINE 
= 'safari' 
  22     _API_BASE 
= 'https://www.safaribooksonline.com/api/v1' 
  27     def _real_initialize(self
): 
  31         # We only need to log in once for courses or individual videos 
  35         (username
, password
) = self
._get
_login
_info
() 
  39         headers 
= std_headers
.copy() 
  40         if 'Referer' not in headers
: 
  41             headers
['Referer'] = self
._LOGIN
_URL
 
  42         login_page_request 
= sanitized_Request(self
._LOGIN
_URL
, headers
=headers
) 
  44         login_page 
= self
._download
_webpage
( 
  45             login_page_request
, None, 
  46             'Downloading login form') 
  48         csrf 
= self
._html
_search
_regex
( 
  49             r
"name='csrfmiddlewaretoken'\s+value='([^']+)'", 
  50             login_page
, 'csrf token') 
  53             'csrfmiddlewaretoken': csrf
, 
  55             'password1': password
, 
  60         request 
= sanitized_Request( 
  61             self
._LOGIN
_URL
, urlencode_postdata(login_form
), headers
=headers
) 
  62         login_page 
= self
._download
_webpage
( 
  63             request
, None, 'Logging in as %s' % username
) 
  65         if re
.search(self
._SUCCESSFUL
_LOGIN
_REGEX
, login_page
) is None: 
  67                 'Login failed; make sure your credentials are correct and try again.', 
  70         SafariBaseIE
.LOGGED_IN 
= True 
  72         self
.to_screen('Login successful') 
  75 class SafariIE(SafariBaseIE
): 
  77     IE_DESC 
= 'safaribooksonline.com online video' 
  78     _VALID_URL 
= r
'https?://(?:www\.)?safaribooksonline\.com/library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>part\d+)\.html' 
  81         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 
  82         'md5': 'dcc5a425e79f2564148652616af1f2a3', 
  86             'title': 'Introduction to Hadoop Fundamentals LiveLessons', 
  87             'timestamp': 1437758058, 
  88             'upload_date': '20150724', 
  89             'uploader_id': 'stork', 
  92         # non-digits in course id 
  93         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 
  94         'only_matching': True, 
  97     def _real_extract(self
, url
): 
  98         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  99         video_id 
= '%s/%s' % (mobj
.group('course_id'), mobj
.group('part')) 
 101         webpage 
= self
._download
_webpage
(url
, video_id
) 
 102         reference_id 
= self
._search
_regex
( 
 103             r
'data-reference-id=(["\'])(?P
<id>.+?
)\
1', 
 104             webpage, 'kaltura reference 
id', group='id') 
 105         partner_id = self._search_regex( 
 106             r'data
-partner
-id=(["\'])(?P<id>.+?)\1', 
 107             webpage, 'kaltura widget id', group='id') 
 108         ui_id = self._search_regex( 
 109             r'data-ui-id=(["\'])(?P
<id>.+?
)\
1', 
 110             webpage, 'kaltura uiconf 
id', group='id') 
 113             'wid
': '_
%s' % partner_id, 
 115             'flashvars
[referenceId
]': reference_id, 
 119             kaltura_session = self._download_json( 
 120                 '%s/player
/kaltura_session
/?reference_id
=%s' % (self._API_BASE, reference_id), 
 121                 video_id, 'Downloading kaltura session JSON
', 
 122                 'Unable to download kaltura session JSON
', fatal=False) 
 124                 session = kaltura_session.get('session
') 
 126                     query['flashvars
[ks
]'] = session 
 128         return self.url_result(update_url_query( 
 129             'https
://cdnapisec
.kaltura
.com
/html5
/html5lib
/v2
.37
.1/mwEmbedFrame
.php
', query), 
 133 class SafariApiIE(SafariBaseIE): 
 134     IE_NAME = 'safari
:api
' 
 135     _VALID_URL = r'https?
://(?
:www\
.)?safaribooksonline\
.com
/api
/v1
/book
/(?P
<course_id
>[^
/]+)/chapter(?
:-content
)?
/(?P
<part
>part\d
+)\
.html
' 
 138         'url
': 'https
://www
.safaribooksonline
.com
/api
/v1
/book
/9780133392838/chapter
/part00
.html
', 
 139         'only_matching
': True, 
 142     def _real_extract(self, url): 
 143         mobj = re.match(self._VALID_URL, url) 
 144         part = self._download_json( 
 145             url, '%s/%s' % (mobj.group('course_id
'), mobj.group('part
')), 
 146             'Downloading part JSON
') 
 147         return self.url_result(part['web_url
'], SafariIE.ie_key()) 
 150 class SafariCourseIE(SafariBaseIE): 
 151     IE_NAME = 'safari
:course
' 
 152     IE_DESC = 'safaribooksonline
.com online courses
' 
 154     _VALID_URL = r'https?
://(?
:www\
.)?safaribooksonline\
.com
/(?
:library
/view
/[^
/]+|api
/v1
/book
)/(?P
<id>[^
/]+)/?
(?
:[#?]|$)' 
 157         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 159             'id': '9780133392838', 
 160             'title': 'Hadoop Fundamentals LiveLessons', 
 162         'playlist_count': 22, 
 163         'skip': 'Requires safaribooksonline account credentials', 
 165         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 
 166         'only_matching': True, 
 169     def _real_extract(self
, url
): 
 170         course_id 
= self
._match
_id
(url
) 
 172         course_json 
= self
._download
_json
( 
 173             '%s/book/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
), 
 174             course_id
, 'Downloading course JSON') 
 176         if 'chapters' not in course_json
: 
 177             raise ExtractorError( 
 178                 'No chapters found for course %s' % course_id
, expected
=True) 
 181             self
.url_result(chapter
, SafariApiIE
.ie_key()) 
 182             for chapter 
in course_json
['chapters']] 
 184         course_title 
= course_json
['title'] 
 186         return self
.playlist_result(entries
, course_id
, course_title
)