1 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  23 class PluralsightBaseIE(InfoExtractor
): 
  24     _API_BASE 
= 'http://app.pluralsight.com' 
  27 class PluralsightIE(PluralsightBaseIE
): 
  28     IE_NAME 
= 'pluralsight' 
  29     _VALID_URL 
= r
'https?://(?:(?:www|app)\.)?pluralsight\.com/training/player\?' 
  30     _LOGIN_URL 
= 'https://app.pluralsight.com/id/' 
  32     _NETRC_MACHINE 
= 'pluralsight' 
  35         'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', 
  36         'md5': '4d458cf5cf4c593788672419a8dd4cf8', 
  38             'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', 
  40             'title': 'Management of SQL Server - Demo Monitoring', 
  43         'skip': 'Requires pluralsight account credentials', 
  45         'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', 
  46         'only_matching': True, 
  48         # available without pluralsight account 
  49         'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', 
  50         'only_matching': True, 
  53     def _real_initialize(self
): 
  57         (username
, password
) = self
._get
_login
_info
() 
  61         login_page 
= self
._download
_webpage
( 
  62             self
._LOGIN
_URL
, None, 'Downloading login page') 
  64         login_form 
= self
._hidden
_inputs
(login_page
) 
  67             'Username': username
.encode('utf-8'), 
  68             'Password': password
.encode('utf-8'), 
  71         post_url 
= self
._search
_regex
( 
  72             r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page, 
  73             'post url
', default=self._LOGIN_URL, group='url
') 
  75         if not post_url.startswith('http
'): 
  76             post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url) 
  78         request = sanitized_Request( 
  79             post_url, compat_urllib_parse.urlencode(login_form).encode('utf
-8')) 
  80         request.add_header('Content
-Type
', 'application
/x
-www
-form
-urlencoded
') 
  82         response = self._download_webpage( 
  83             request, None, 'Logging 
in as %s' % username) 
  85         error = self._search_regex( 
  86             r'<span
[^
>]+class="field-validation-error"[^
>]*>([^
<]+)</span
>', 
  87             response, 'error message
', default=None) 
  89             raise ExtractorError('Unable to login
: %s' % error, expected=True) 
  91         if all(p not in response for p in ('__INITIAL_STATE__
', '"currentUser"')): 
  92             raise ExtractorError('Unable to log 
in') 
  94     def _real_extract(self, url): 
  95         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) 
  97         author = qs.get('author
', [None])[0] 
  98         name = qs.get('name
', [None])[0] 
  99         clip_id = qs.get('clip
', [None])[0] 
 100         course = qs.get('course
', [None])[0] 
 102         if any(not f for f in (author, name, clip_id, course,)): 
 103             raise ExtractorError('Invalid URL
', expected=True) 
 105         display_id = '%s-%s' % (name, clip_id) 
 107         webpage = self._download_webpage(url, display_id) 
 109         modules = self._search_regex( 
 110             r'moduleCollection\s
*:\s
*new\s
+ModuleCollection\
((\
[.+?\
])\s
*,\s
*\$rootScope\
)', 
 111             webpage, 'modules
', default=None) 
 114             collection = self._parse_json(modules, display_id) 
 116             # Webpage may be served in different layout (see 
 117             # https://github.com/rg3/youtube-dl/issues/7607) 
 118             collection = self._parse_json( 
 120                     r'var\s
+initialState\s
*=\s
*({.+?
});\n', webpage, 'initial state
'), 
 121                 display_id)['course
']['modules
'] 
 123         module, clip = None, None 
 125         for module_ in collection: 
 126             if name in (module_.get('moduleName
'), module_.get('name
')): 
 128                 for clip_ in module_.get('clips
', []): 
 129                     clip_index = clip_.get('clipIndex
') 
 130                     if clip_index is None: 
 131                         clip_index = clip_.get('index
') 
 132                     if clip_index is None: 
 134                     if compat_str(clip_index) == clip_id: 
 139             raise ExtractorError('Unable to resolve clip
') 
 142             'low
': {'width
': 640, 'height
': 480}, 
 143             'medium
': {'width
': 848, 'height
': 640}, 
 144             'high
': {'width
': 1024, 'height
': 768}, 
 145             'high
-widescreen
': {'width
': 1280, 'height
': 720}, 
 148         QUALITIES_PREFERENCE = ('low
', 'medium
', 'high
', 'high
-widescreen
',) 
 149         quality_key = qualities(QUALITIES_PREFERENCE) 
 151         AllowedQuality = collections.namedtuple('AllowedQuality
', ['ext
', 'qualities
']) 
 153         ALLOWED_QUALITIES = ( 
 154             AllowedQuality('webm
', ['high
', ]), 
 155             AllowedQuality('mp4
', ['low
', 'medium
', 'high
', ]), 
 158         # Some courses also offer widescreen resolution for high quality (see 
 159         # https://github.com/rg3/youtube-dl/issues/7766) 
 160         widescreen = True if re.search( 
 161             r'courseSupportsWidescreenVideoFormats\s
*:\s
*true
', webpage) else False 
 162         best_quality = 'high
-widescreen
' if widescreen else 'high
' 
 164             for allowed_quality in ALLOWED_QUALITIES: 
 165                 allowed_quality.qualities.append(best_quality) 
 167         # In order to minimize the number of calls to ViewClip API and reduce 
 168         # the probability of being throttled or banned by Pluralsight we will request 
 169         # only single format until formats listing was explicitly requested. 
 170         if self._downloader.params.get('listformats
', False): 
 171             allowed_qualities = ALLOWED_QUALITIES 
 173             def guess_allowed_qualities(): 
 174                 req_format = self._downloader.params.get('format
') or 'best
' 
 175                 req_format_split = req_format.split('-', 1) 
 176                 if len(req_format_split) > 1: 
 177                     req_ext, req_quality = req_format_split 
 178                     for allowed_quality in ALLOWED_QUALITIES: 
 179                         if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: 
 180                             return (AllowedQuality(req_ext, (req_quality, )), ) 
 181                 req_ext = 'webm
' if self._downloader.params.get('prefer_free_formats
') else 'mp4
' 
 182                 return (AllowedQuality(req_ext, (best_quality, )), ) 
 183             allowed_qualities = guess_allowed_qualities() 
 186         for ext, qualities_ in allowed_qualities: 
 187             for quality in qualities_: 
 188                 f = QUALITIES[quality].copy() 
 197                     'q
': '%dx%d' % (f['width
'], f['height
']), 
 199                 request = sanitized_Request( 
 200                     '%s/training
/Player
/ViewClip
' % self._API_BASE, 
 201                     json.dumps(clip_post).encode('utf
-8')) 
 202                 request.add_header('Content
-Type
', 'application
/json
;charset
=utf
-8') 
 203                 format_id = '%s-%s' % (ext, quality) 
 204                 clip_url = self._download_webpage( 
 205                     request, display_id, 'Downloading 
%s URL
' % format_id, fatal=False) 
 207                 # Pluralsight tracks multiple sequential calls to ViewClip API and start 
 208                 # to return 429 HTTP errors after some time (see 
 209                 # https://github.com/rg3/youtube-dl/pull/6989). Moreover it may even lead 
 210                 # to account ban (see https://github.com/rg3/youtube-dl/issues/6842). 
 211                 # To somewhat reduce the probability of these consequences 
 212                 # we will sleep random amount of time before each call to ViewClip. 
 214                     random.randint(2, 5), display_id, 
 215                     '%(video_id)s: Waiting 
for %(timeout)s seconds to avoid throttling
') 
 222                     'format_id
': format_id, 
 223                     'quality
': quality_key(quality), 
 226         self._sort_formats(formats) 
 229         # http://www.pluralsight.com/training/Player/ViewClip + cap = true 
 231         # http://www.pluralsight.com/training/Player/Captions 
 232         # { a = author, cn = clip_id, lc = end, m = name } 
 235             'id': clip.get('clipName
') or clip['name
'], 
 236             'title
': '%s - %s' % (module['title
'], clip['title
']), 
 237             'duration
': int_or_none(clip.get('duration
')) or parse_duration(clip.get('formattedDuration
')), 
 243 class PluralsightCourseIE(PluralsightBaseIE): 
 244     IE_NAME = 'pluralsight
:course
' 
 245     _VALID_URL = r'https?
://(?
:(?
:www|app
)\
.)?pluralsight\
.com
/(?
:library
/)?courses
/(?P
<id>[^
/]+)' 
 247         # Free course from Pluralsight Starter Subscription for Microsoft TechNet 
 248         # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz 
 249         'url
': 'http
://www
.pluralsight
.com
/courses
/hosting
-sql
-server
-windows
-azure
-iaas
', 
 251             'id': 'hosting
-sql
-server
-windows
-azure
-iaas
', 
 252             'title
': 'Hosting SQL Server 
in Microsoft Azure IaaS Fundamentals
', 
 253             'description
': 'md5
:61b37e60f21c4b2f91dc621a977d0986
', 
 255         'playlist_count
': 31, 
 257         # available without pluralsight account 
 258         'url
': 'https
://www
.pluralsight
.com
/courses
/angularjs
-get
-started
', 
 259         'only_matching
': True, 
 261         'url
': 'https
://app
.pluralsight
.com
/library
/courses
/understanding
-microsoft
-azure
-amazon
-aws
/table
-of
-contents
', 
 262         'only_matching
': True, 
 265     def _real_extract(self, url): 
 266         course_id = self._match_id(url) 
 270         course = self._download_json( 
 271             '%s/data
/course
/%s' % (self._API_BASE, course_id), 
 272             course_id, 'Downloading course JSON
') 
 274         title = course['title
'] 
 275         description = course.get('description
') or course.get('shortDescription
') 
 277         course_data = self._download_json( 
 278             '%s/data
/course
/content
/%s' % (self._API_BASE, course_id), 
 279             course_id, 'Downloading course data JSON
') 
 282         for module in course_data: 
 283             for clip in module.get('clips
', []): 
 284                 player_parameters = clip.get('playerParameters
') 
 285                 if not player_parameters: 
 287                 entries.append(self.url_result( 
 288                     '%s/training
/player?
%s' % (self._API_BASE, player_parameters), 
 291         return self.playlist_result(entries, course_id, title, description)