1 from __future__ 
import unicode_literals
 
   9 from .common 
import InfoExtractor
 
  10 from ..compat 
import ( 
  21     srt_subtitles_timecode
, 
  28 class PluralsightBaseIE(InfoExtractor
): 
  29     _API_BASE 
= 'https://app.pluralsight.com' 
  31     _GRAPHQL_EP 
= '%s/player/api/graphql' % _API_BASE
 
  33         'Content-Type': 'application/json;charset=UTF-8', 
  35     _GRAPHQL_COURSE_TMPL 
= ''' 
  36 query BootstrapPlayer { 
  49       course(courseId: "%s") { 
  53         translationLanguages { 
  57         supportsWideScreenVideoFormats 
  85     def _download_course(self
, course_id
, url
, display_id
): 
  87             return self
._download
_course
_rpc
(course_id
, url
, display_id
) 
  88         except ExtractorError
: 
  90             return self
._download
_json
( 
  91                 'https://app.pluralsight.com/player/user/api/v1/player/payload', 
  92                 display_id
, data
=urlencode_postdata({'courseId': course_id
}), 
  93                 headers
={'Referer': url
}) 
  95     def _download_course_rpc(self
, course_id
, url
, display_id
): 
  96         response 
= self
._download
_json
( 
  97             self
._GRAPHQL
_EP
, display_id
, data
=json
.dumps({ 
  98                 'query': self
._GRAPHQL
_COURSE
_TMPL 
% course_id
, 
 100             }).encode('utf-8'), headers
=self
._GRAPHQL
_HEADERS
) 
 103             response
, lambda x
: x
['data']['rpc']['bootstrapPlayer']['course'], 
 108         raise ExtractorError( 
 109             '%s said: %s' % (self
.IE_NAME
, response
['error']['message']), 
 113 class PluralsightIE(PluralsightBaseIE
): 
 114     IE_NAME 
= 'pluralsight' 
 115     _VALID_URL 
= r
'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' 
 116     _LOGIN_URL 
= 'https://app.pluralsight.com/id/' 
 118     _NETRC_MACHINE 
= 'pluralsight' 
 121         'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', 
 122         'md5': '4d458cf5cf4c593788672419a8dd4cf8', 
 124             'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', 
 126             'title': 'Demo Monitoring', 
 129         'skip': 'Requires pluralsight account credentials', 
 131         'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', 
 132         'only_matching': True, 
 134         # available without pluralsight account 
 135         'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', 
 136         'only_matching': True, 
 138         'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', 
 139         'only_matching': True, 
 142     GRAPHQL_VIEWCLIP_TMPL 
= ''' 
 145     author: "%(author)s", 
 146     clipIndex: %(clipIndex)d, 
 147     courseName: "%(courseName)s", 
 148     includeCaptions: %(includeCaptions)s, 
 149     locale: "%(locale)s", 
 150     mediaType: "%(mediaType)s", 
 151     moduleName: "%(moduleName)s", 
 152     quality: "%(quality)s" 
 164     def _real_initialize(self
): 
 168         username
, password 
= self
._get
_login
_info
() 
 172         login_page 
= self
._download
_webpage
( 
 173             self
._LOGIN
_URL
, None, 'Downloading login page') 
 175         login_form 
= self
._hidden
_inputs
(login_page
) 
 178             'Username': username
, 
 179             'Password': password
, 
 182         post_url 
= self
._search
_regex
( 
 183             r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page, 
 184             'post url
', default=self._LOGIN_URL, group='url
') 
 186         if not post_url.startswith('http
'): 
 187             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) 
 189         response = self._download_webpage( 
 190             post_url, None, 'Logging 
in', 
 191             data=urlencode_postdata(login_form), 
 192             headers={'Content
-Type
': 'application
/x
-www
-form
-urlencoded
'}) 
 194         error = self._search_regex( 
 195             r'<span
[^
>]+class="field-validation-error"[^
>]*>([^
<]+)</span
>', 
 196             response, 'error message
', default=None) 
 198             raise ExtractorError('Unable to login
: %s' % error, expected=True) 
 200         if all(not re.search(p, response) for p in ( 
 201                 r'__INITIAL_STATE__
', r'["\']currentUser["\']', 
 203                 r'>\s
*Sign out\s
*<')): 
 204             BLOCKED = 'Your account has been blocked due to suspicious activity
' 
 205             if BLOCKED in response: 
 206                 raise ExtractorError( 
 207                     'Unable to login
: %s' % BLOCKED, expected=True) 
 208             MUST_AGREE = 'To 
continue using Pluralsight
, you must agree to
' 
 209             if any(p in response for p in (MUST_AGREE, '>Disagree
<', '>Agree
<')): 
 210                 raise ExtractorError( 
 211                     'Unable to login
: %s some documents
. Go to pluralsight
.com
, ' 
 212                     'log 
in and agree 
with what Pluralsight requires
.' 
 213                     % MUST_AGREE, expected=True) 
 215             raise ExtractorError('Unable to log 
in') 
 217     def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): 
 220             captions = self._download_json( 
 221                 '%s/transcript
/api
/v1
/caption
/json
/%s/%s' 
 222                 % (self._API_BASE, clip_id, lang), video_id, 
 223                 'Downloading captions JSON
', 'Unable to download captions JSON
', 
 232             captions = self._download_json( 
 233                 '%s/player
/retrieve
-captions
' % self._API_BASE, video_id, 
 234                 'Downloading captions JSON
', 'Unable to download captions JSON
', 
 235                 fatal=False, data=json.dumps(captions_post).encode('utf
-8'), 
 236                 headers={'Content
-Type
': 'application
/json
;charset
=utf
-8'}) 
 241                     'data
': json.dumps(captions), 
 244                     'data
': self._convert_subtitles(duration, captions), 
 249     def _convert_subtitles(duration, subs): 
 251         TIME_OFFSET_KEYS = ('displayTimeOffset
', 'DisplayTimeOffset
') 
 252         TEXT_KEYS = ('text
', 'Text
') 
 253         for num, current in enumerate(subs): 
 256                 float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), 
 257                 dict_get(current, TEXT_KEYS)) 
 258             if start is None or text is None: 
 260             end = duration if num == len(subs) - 1 else float_or_none( 
 261                 dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) 
 264             srt += os.linesep.join( 
 268                         srt_subtitles_timecode(start), 
 269                         srt_subtitles_timecode(end)), 
 275     def _real_extract(self, url): 
 276         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 
 278         author = qs.get('author
', [None])[0] 
 279         name = qs.get('name
', [None])[0] 
 280         clip_idx = qs.get('clip
', [None])[0] 
 281         course_name = qs.get('course
', [None])[0] 
 283         if any(not f for f in (author, name, clip_idx, course_name,)): 
 284             raise ExtractorError('Invalid URL
', expected=True) 
 286         display_id = '%s-%s' % (name, clip_idx) 
 288         course = self._download_course(course_name, url, display_id) 
 290         collection = course['modules
'] 
 294         for module_ in collection: 
 295             if name in (module_.get('moduleName
'), module_.get('name
')): 
 296                 for clip_ in module_.get('clips
', []): 
 297                     clip_index = clip_.get('clipIndex
') 
 298                     if clip_index is None: 
 299                         clip_index = clip_.get('index
') 
 300                     if clip_index is None: 
 302                     if compat_str(clip_index) == clip_idx: 
 307             raise ExtractorError('Unable to resolve clip
') 
 309         title = clip['title
'] 
 310         clip_id = clip.get('clipName
') or clip.get('name
') or clip['clipId
'] 
 313             'low
': {'width
': 640, 'height
': 480}, 
 314             'medium
': {'width
': 848, 'height
': 640}, 
 315             'high
': {'width
': 1024, 'height
': 768}, 
 316             'high
-widescreen
': {'width
': 1280, 'height
': 720}, 
 319         QUALITIES_PREFERENCE = ('low
', 'medium
', 'high
', 'high
-widescreen
',) 
 320         quality_key = qualities(QUALITIES_PREFERENCE) 
 322         AllowedQuality = collections.namedtuple('AllowedQuality
', ['ext
', 'qualities
']) 
 324         ALLOWED_QUALITIES = ( 
 325             AllowedQuality('webm
', ['high
', ]), 
 326             AllowedQuality('mp4
', ['low
', 'medium
', 'high
', ]), 
 329         # Some courses also offer widescreen resolution for high quality (see 
 330         # https://github.com/ytdl-org/youtube-dl/issues/7766) 
 331         widescreen = course.get('supportsWideScreenVideoFormats
') is True 
 332         best_quality = 'high
-widescreen
' if widescreen else 'high
' 
 334             for allowed_quality in ALLOWED_QUALITIES: 
 335                 allowed_quality.qualities.append(best_quality) 
 337         # In order to minimize the number of calls to ViewClip API and reduce 
 338         # the probability of being throttled or banned by Pluralsight we will request 
 339         # only single format until formats listing was explicitly requested. 
 340         if self._downloader.params.get('listformats
', False): 
 341             allowed_qualities = ALLOWED_QUALITIES 
 343             def guess_allowed_qualities(): 
 344                 req_format = self._downloader.params.get('format
') or 'best
' 
 345                 req_format_split = req_format.split('-', 1) 
 346                 if len(req_format_split) > 1: 
 347                     req_ext, req_quality = req_format_split 
 348                     req_quality = '-'.join(req_quality.split('-')[:2]) 
 349                     for allowed_quality in ALLOWED_QUALITIES: 
 350                         if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: 
 351                             return (AllowedQuality(req_ext, (req_quality, )), ) 
 352                 req_ext = 'webm
' if self._downloader.params.get('prefer_free_formats
') else 'mp4
' 
 353                 return (AllowedQuality(req_ext, (best_quality, )), ) 
 354             allowed_qualities = guess_allowed_qualities() 
 357         for ext, qualities_ in allowed_qualities: 
 358             for quality in qualities_: 
 359                 f = QUALITIES[quality].copy() 
 362                     'includeCaptions
': 'false
', 
 363                     'clipIndex
': int(clip_idx), 
 364                     'courseName
': course_name, 
 368                     'quality
': '%dx%d' % (f['width
'], f['height
']), 
 370                 format_id = '%s-%s' % (ext, quality) 
 373                     viewclip = self._download_json( 
 374                         self._GRAPHQL_EP, display_id, 
 375                         'Downloading 
%s viewclip graphql
' % format_id, 
 377                             'query
': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, 
 380                         headers=self._GRAPHQL_HEADERS)['data
']['viewClip
'] 
 381                 except ExtractorError: 
 382                     # Still works but most likely will go soon 
 383                     viewclip = self._download_json( 
 384                         '%s/video
/clips
/viewclip
' % self._API_BASE, display_id, 
 385                         'Downloading 
%s viewclip JSON
' % format_id, fatal=False, 
 386                         data=json.dumps(clip_post).encode('utf
-8'), 
 387                         headers={'Content
-Type
': 'application
/json
;charset
=utf
-8'}) 
 389                 # Pluralsight tracks multiple sequential calls to ViewClip API and start 
 390                 # to return 429 HTTP errors after some time (see 
 391                 # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead 
 392                 # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842). 
 393                 # To somewhat reduce the probability of these consequences 
 394                 # we will sleep random amount of time before each call to ViewClip. 
 396                     random.randint(2, 5), display_id, 
 397                     '%(video_id)s: Waiting 
for %(timeout)s seconds to avoid throttling
') 
 402                 clip_urls = viewclip.get('urls
') 
 403                 if not isinstance(clip_urls, list): 
 406                 for clip_url_data in clip_urls: 
 407                     clip_url = clip_url_data.get('url
') 
 410                     cdn = clip_url_data.get('cdn
') 
 415                         'format_id
': '%s-%s' % (format_id, cdn) if cdn else format_id, 
 416                         'quality
': quality_key(quality), 
 417                         'source_preference
': int_or_none(clip_url_data.get('rank
')), 
 419                     formats.append(clip_f) 
 421         self._sort_formats(formats) 
 423         duration = int_or_none( 
 424             clip.get('duration
')) or parse_duration(clip.get('formattedDuration
')) 
 426         # TODO: other languages? 
 427         subtitles = self.extract_subtitles( 
 428             author, clip_idx, clip.get('clipId
'), 'en
', name, duration, display_id) 
 433             'duration
': duration, 
 436             'subtitles
': subtitles, 
 440 class PluralsightCourseIE(PluralsightBaseIE): 
 441     IE_NAME = 'pluralsight
:course
' 
 442     _VALID_URL = r'https?
://(?
:(?
:www|app
)\
.)?pluralsight\
.com
/(?
:library
/)?courses
/(?P
<id>[^
/]+)' 
 444         # Free course from Pluralsight Starter Subscription for Microsoft TechNet 
 445         # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz 
 446         'url
': 'http
://www
.pluralsight
.com
/courses
/hosting
-sql
-server
-windows
-azure
-iaas
', 
 448             'id': 'hosting
-sql
-server
-windows
-azure
-iaas
', 
 449             'title
': 'Hosting SQL Server 
in Microsoft Azure IaaS Fundamentals
', 
 450             'description
': 'md5
:61b37e60f21c4b2f91dc621a977d0986
', 
 452         'playlist_count
': 31, 
 454         # available without pluralsight account 
 455         'url
': 'https
://www
.pluralsight
.com
/courses
/angularjs
-get
-started
', 
 456         'only_matching
': True, 
 458         'url
': 'https
://app
.pluralsight
.com
/library
/courses
/understanding
-microsoft
-azure
-amazon
-aws
/table
-of
-contents
', 
 459         'only_matching
': True, 
 462     def _real_extract(self, url): 
 463         course_id = self._match_id(url) 
 467         course = self._download_course(course_id, url, course_id) 
 469         title = course['title
'] 
 470         course_name = course['name
'] 
 471         course_data = course['modules
'] 
 472         description = course.get('description
') or course.get('shortDescription
') 
 475         for num, module in enumerate(course_data, 1): 
 476             author = module.get('author
') 
 477             module_name = module.get('name
') 
 478             if not author or not module_name: 
 480             for clip in module.get('clips
', []): 
 481                 clip_index = int_or_none(clip.get('index
')) 
 482                 if clip_index is None: 
 484                 clip_url = update_url_query( 
 485                     '%s/player
' % self._API_BASE, query={ 
 487                         'course
': course_name, 
 493                     '_type
': 'url_transparent
', 
 495                     'ie_key
': PluralsightIE.ie_key(), 
 496                     'chapter
': module.get('title
'), 
 497                     'chapter_number
': num, 
 498                     'chapter_id
': module.get('moduleRef
'), 
 501         return self.playlist_result(entries, course_id, title, description)