2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  19 class SafariBaseIE(InfoExtractor
): 
  20     _LOGIN_URL 
= 'https://learning.oreilly.com/accounts/login/' 
  21     _NETRC_MACHINE 
= 'safari' 
  23     _API_BASE 
= 'https://learning.oreilly.com/api/v1' 
  28     def _real_initialize(self
): 
  32         username
, password 
= self
._get
_login
_info
() 
  36         _
, urlh 
= self
._download
_webpage
_handle
( 
  37             'https://learning.oreilly.com/accounts/login-check/', None, 
  38             'Downloading login page') 
  41             return 'learning.oreilly.com/home/' in urlh
.geturl() 
  47         redirect_url 
= urlh
.geturl() 
  48         parsed_url 
= compat_urlparse
.urlparse(redirect_url
) 
  49         qs 
= compat_parse_qs(parsed_url
.query
) 
  50         next_uri 
= compat_urlparse
.urljoin( 
  51             'https://api.oreilly.com', qs
['next'][0]) 
  53         auth
, urlh 
= self
._download
_json
_handle
( 
  54             'https://www.oreilly.com/member/auth/login/', None, 'Logging in', 
  58                 'redirect_uri': next_uri
, 
  59             }).encode(), headers
={ 
  60                 'Content-Type': 'application/json', 
  61                 'Referer': redirect_url
, 
  62             }, expected_status
=400) 
  64         credentials 
= auth
.get('credentials') 
  65         if (not auth
.get('logged_in') and not auth
.get('redirect_uri') 
  68                 'Unable to login: %s' % credentials
, expected
=True) 
  70         # oreilly serves two same instances of the following cookies 
  71         # in Set-Cookie header and expects first one to be actually set 
  72         for cookie 
in ('groot_sessionid', 'orm-jwt', 'orm-rt'): 
  73             self
._apply
_first
_set
_cookie
_header
(urlh
, cookie
) 
  75         _
, urlh 
= self
._download
_webpage
_handle
( 
  76             auth
.get('redirect_uri') or next_uri
, None, 'Completing login',) 
  82         raise ExtractorError('Unable to log in') 
  85 class SafariIE(SafariBaseIE
): 
  87     IE_DESC 
= 'safaribooksonline.com online video' 
  90                             (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ 
  92                                 library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| 
  93                                 videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) 
  98         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 
  99         'md5': 'dcc5a425e79f2564148652616af1f2a3', 
 103             'title': 'Introduction to Hadoop Fundamentals LiveLessons', 
 104             'timestamp': 1437758058, 
 105             'upload_date': '20150724', 
 106             'uploader_id': 'stork', 
 109         # non-digits in course id 
 110         'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 
 111         'only_matching': True, 
 113         'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', 
 114         'only_matching': True, 
 116         'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', 
 117         'only_matching': True, 
 119         'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', 
 120         'only_matching': True, 
 122         'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', 
 123         'only_matching': True, 
 126     _PARTNER_ID 
= '1926081' 
 127     _UICONF_ID 
= '29375172' 
 129     def _real_extract(self
, url
): 
 130         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 132         reference_id 
= mobj
.group('reference_id') 
 134             video_id 
= reference_id
 
 135             partner_id 
= self
._PARTNER
_ID
 
 136             ui_id 
= self
._UICONF
_ID
 
 138             video_id 
= '%s-%s' % (mobj
.group('course_id'), mobj
.group('part')) 
 140             webpage
, urlh 
= self
._download
_webpage
_handle
(url
, video_id
) 
 142             mobj 
= re
.match(self
._VALID
_URL
, urlh
.geturl()) 
 143             reference_id 
= mobj
.group('reference_id') 
 145                 reference_id 
= self
._search
_regex
( 
 146                     r
'data-reference-id=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', 
 147                     webpage, 'kaltura reference 
id', group='id') 
 148             partner_id = self._search_regex( 
 149                 r'data
-partner
-id=(["\'])(?P<id>(?:(?!\1).)+)\1', 
 150                 webpage, 'kaltura widget id', default=self._PARTNER_ID, 
 152             ui_id = self._search_regex( 
 153                 r'data-ui-id=(["\'])(?P
<id>(?
:(?
!\
1).)+)\
1', 
 154                 webpage, 'kaltura uiconf 
id', default=self._UICONF_ID, 
 158             'wid
': '_
%s' % partner_id, 
 160             'flashvars
[referenceId
]': reference_id, 
 164             kaltura_session = self._download_json( 
 165                 '%s/player
/kaltura_session
/?reference_id
=%s' % (self._API_BASE, reference_id), 
 166                 video_id, 'Downloading kaltura session JSON
', 
 167                 'Unable to download kaltura session JSON
', fatal=False, 
 168                 headers={'Accept
': 'application
/json
'}) 
 170                 session = kaltura_session.get('session
') 
 172                     query['flashvars
[ks
]'] = session 
 174         return self.url_result(update_url_query( 
 175             'https
://cdnapisec
.kaltura
.com
/html5
/html5lib
/v2
.37
.1/mwEmbedFrame
.php
', query), 
 179 class SafariApiIE(SafariBaseIE): 
 180     IE_NAME = 'safari
:api
' 
 181     _VALID_URL = r'https?
://(?
:www\
.)?
(?
:safaribooksonline|
(?
:learning\
.)?oreilly
)\
.com
/api
/v1
/book
/(?P
<course_id
>[^
/]+)/chapter(?
:-content
)?
/(?P
<part
>[^
/?
#&]+)\.html' 
 184         'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 
 185         'only_matching': True, 
 187         'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', 
 188         'only_matching': True, 
 191     def _real_extract(self
, url
): 
 192         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 193         part 
= self
._download
_json
( 
 194             url
, '%s/%s' % (mobj
.group('course_id'), mobj
.group('part')), 
 195             'Downloading part JSON') 
 196         return self
.url_result(part
['web_url'], SafariIE
.ie_key()) 
 199 class SafariCourseIE(SafariBaseIE
): 
 200     IE_NAME 
= 'safari:course' 
 201     IE_DESC 
= 'safaribooksonline.com online courses' 
 203     _VALID_URL 
= r
'''(?x) 
 206                             (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ 
 212                             techbus\.safaribooksonline\.com 
 218         'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 220             'id': '9780133392838', 
 221             'title': 'Hadoop Fundamentals LiveLessons', 
 223         'playlist_count': 22, 
 224         'skip': 'Requires safaribooksonline account credentials', 
 226         'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 
 227         'only_matching': True, 
 229         'url': 'http://techbus.safaribooksonline.com/9780134426365', 
 230         'only_matching': True, 
 232         'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', 
 233         'only_matching': True, 
 235         'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', 
 236         'only_matching': True, 
 238         'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 
 239         'only_matching': True, 
 243     def suitable(cls
, url
): 
 244         return (False if SafariIE
.suitable(url
) or SafariApiIE
.suitable(url
) 
 245                 else super(SafariCourseIE
, cls
).suitable(url
)) 
 247     def _real_extract(self
, url
): 
 248         course_id 
= self
._match
_id
(url
) 
 250         course_json 
= self
._download
_json
( 
 251             '%s/book/%s/?override_format=%s' % (self
._API
_BASE
, course_id
, self
._API
_FORMAT
), 
 252             course_id
, 'Downloading course JSON') 
 254         if 'chapters' not in course_json
: 
 255             raise ExtractorError( 
 256                 'No chapters found for course %s' % course_id
, expected
=True) 
 259             self
.url_result(chapter
, SafariApiIE
.ie_key()) 
 260             for chapter 
in course_json
['chapters']] 
 262         course_title 
= course_json
['title'] 
 264         return self
.playlist_result(entries
, course_id
, course_title
)