]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/linkedin.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class LinkedInLearningBaseIE(InfoExtractor
): 
  17     _NETRC_MACHINE 
= 'linkedin' 
  18     _LOGIN_URL 
= 'https://www.linkedin.com/uas/login?trk=learning' 
  20     def _call_api(self
, course_slug
, fields
, video_slug
=None, resolution
=None): 
  22             'courseSlug': course_slug
, 
  29                 'videoSlug': video_slug
, 
  30                 'resolution': '_%s' % resolution
, 
  32             sub 
= ' %dp' % resolution
 
  33         api_url 
= 'https://www.linkedin.com/learning-api/detailedCourses' 
  34         return self
._download
_json
( 
  35             api_url
, video_slug
, 'Downloading%s JSON metadata' % sub
, headers
={ 
  36                 'Csrf-Token': self
._get
_cookies
(api_url
)['JSESSIONID'].value
, 
  37             }, query
=query
)['elements'][0] 
  39     def _get_urn_id(self
, video_data
): 
  40         urn 
= video_data
.get('urn') 
  42             mobj 
= re
.search(r
'urn:li:lyndaCourse:\d+,(\d+)', urn
) 
  46     def _get_video_id(self
, video_data
, course_slug
, video_slug
): 
  47         return self
._get
_urn
_id
(video_data
) or '%s/%s' % (course_slug
, video_slug
) 
  49     def _real_initialize(self
): 
  50         email
, password 
= self
._get
_login
_info
() 
  54         login_page 
= self
._download
_webpage
( 
  55             self
._LOGIN
_URL
, None, 'Downloading login page') 
  56         action_url 
= urljoin(self
._LOGIN
_URL
, self
._search
_regex
( 
  57             r
'<form[^>]+action=(["\'])(?P
<url
>.+?
)\
1', login_page, 'post url
', 
  58             default='https
://www
.linkedin
.com
/uas
/login
-submit
', group='url
')) 
  59         data = self._hidden_inputs(login_page) 
  62             'session_password
': password, 
  64         login_submit_page = self._download_webpage( 
  65             action_url, None, 'Logging 
in', 
  66             data=urlencode_postdata(data)) 
  67         error = self._search_regex( 
  68             r'<span
[^
>]+class="error"[^
>]*>\s
*(.+?
)\s
*</span
>', 
  69             login_submit_page, 'error
', default=None) 
  71             raise ExtractorError(error, expected=True) 
  74 class LinkedInLearningIE(LinkedInLearningBaseIE): 
  75     IE_NAME = 'linkedin
:learning
' 
  76     _VALID_URL = r'https?
://(?
:www\
.)?linkedin\
.com
/learning
/(?P
<course_slug
>[^
/]+)/(?P
<id>[^
/?
#]+)' 
  78         'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals/welcome?autoplay=true', 
  79         'md5': 'a1d74422ff0d5e66a792deb996693167', 
  84             'timestamp': 1430396150.82, 
  85             'upload_date': '20150430', 
  89     def _real_extract(self
, url
): 
  90         course_slug
, video_slug 
= re
.match(self
._VALID
_URL
, url
).groups() 
  94         for width
, height 
in ((640, 360), (960, 540), (1280, 720)): 
  95             video_data 
= self
._call
_api
( 
  96                 course_slug
, 'selectedVideo', video_slug
, height
)['selectedVideo'] 
  98             video_url_data 
= video_data
.get('url') or {} 
  99             progressive_url 
= video_url_data
.get('progressiveUrl') 
 102                     'format_id': 'progressive-%dp' % height
, 
 103                     'url': progressive_url
, 
 106                     'source_preference': 1, 
 109         title 
= video_data
['title'] 
 111         audio_url 
= video_data
.get('audio', {}).get('progressiveUrl') 
 116                 'format_id': 'audio', 
 121         streaming_url 
= video_url_data
.get('streamingUrl') 
 123             formats
.extend(self
._extract
_m
3u8_formats
( 
 124                 streaming_url
, video_slug
, 'mp4', 
 125                 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
 127         self
._sort
_formats
(formats
, ('width', 'height', 'source_preference', 'tbr', 'abr')) 
 130             'id': self
._get
_video
_id
(video_data
, course_slug
, video_slug
), 
 133             'thumbnail': video_data
.get('defaultThumbnail'), 
 134             'timestamp': float_or_none(video_data
.get('publishedOn'), 1000), 
 135             'duration': int_or_none(video_data
.get('durationInSeconds')), 
 139 class LinkedInLearningCourseIE(LinkedInLearningBaseIE
): 
 140     IE_NAME 
= 'linkedin:learning:course' 
 141     _VALID_URL 
= r
'https?://(?:www\.)?linkedin\.com/learning/(?P<id>[^/?#]+)' 
 143         'url': 'https://www.linkedin.com/learning/programming-foundations-fundamentals', 
 145             'id': 'programming-foundations-fundamentals', 
 146             'title': 'Programming Foundations: Fundamentals', 
 147             'description': 'md5:76e580b017694eb89dc8e8923fff5c86', 
 149         'playlist_mincount': 61, 
 153     def suitable(cls
, url
): 
 154         return False if LinkedInLearningIE
.suitable(url
) else super(LinkedInLearningCourseIE
, cls
).suitable(url
) 
 156     def _real_extract(self
, url
): 
 157         course_slug 
= self
._match
_id
(url
) 
 158         course_data 
= self
._call
_api
(course_slug
, 'chapters,description,title') 
 161         for chapter_number
, chapter 
in enumerate(course_data
.get('chapters', []), 1): 
 162             chapter_title 
= chapter
.get('title') 
 163             chapter_id 
= self
._get
_urn
_id
(chapter
) 
 164             for video 
in chapter
.get('videos', []): 
 165                 video_slug 
= video
.get('slug') 
 169                     '_type': 'url_transparent', 
 170                     'id': self
._get
_video
_id
(video
, course_slug
, video_slug
), 
 171                     'title': video
.get('title'), 
 172                     'url': 'https://www.linkedin.com/learning/%s/%s' % (course_slug
, video_slug
), 
 173                     'chapter': chapter_title
, 
 174                     'chapter_number': chapter_number
, 
 175                     'chapter_id': chapter_id
, 
 176                     'ie_key': LinkedInLearningIE
.ie_key(), 
 179         return self
.playlist_result( 
 180             entries
, course_slug
, 
 181             course_data
.get('title'), 
 182             course_data
.get('description'))