]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/teamtreehouse.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
21 class TeamTreeHouseIE(InfoExtractor
):
22 _VALID_URL
= r
'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
25 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
27 'id': 'introduction-to-user-authentication-in-php',
28 'title': 'Introduction to User Authentication in PHP',
29 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
31 'playlist_mincount': 24,
34 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
36 'id': 'deploying-a-react-app',
37 'title': 'Deploying a React App',
38 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
40 'playlist_mincount': 4,
43 'url': 'https://teamtreehouse.com/library/application-overview-2',
45 'id': 'application-overview-2',
47 'title': 'Application Overview',
48 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
50 'expected_warnings': ['This is just a preview'],
52 _NETRC_MACHINE
= 'teamtreehouse'
54 def _real_initialize(self
):
55 email
, password
= self
._get
_login
_info
()
59 signin_page
= self
._download
_webpage
(
60 'https://teamtreehouse.com/signin',
61 None, 'Downloading signin page')
62 data
= self
._form
_hidden
_inputs
('new_user_session', signin_page
)
64 'user_session[email]': email
,
65 'user_session[password]': password
,
67 error_message
= get_element_by_class('error-message', self
._download
_webpage
(
68 'https://teamtreehouse.com/person_session',
69 None, 'Logging in', data
=urlencode_postdata(data
)))
71 raise ExtractorError(clean_html(error_message
), expected
=True)
73 def _real_extract(self
, url
):
74 display_id
= self
._match
_id
(url
)
75 webpage
= self
._download
_webpage
(url
, display_id
)
76 title
= self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
)
77 description
= self
._html
_search
_meta
(
78 ['description', 'og:description', 'twitter:description'], webpage
)
79 entries
= self
._parse
_html
5_media
_entries
(url
, webpage
, display_id
)
83 for subtitles
in info
.get('subtitles', {}).values():
84 for subtitle
in subtitles
:
85 subtitle
['ext'] = determine_ext(subtitle
['url'], 'srt')
87 is_preview
= 'data-preview="true"' in webpage
90 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id
)
93 duration
= float_or_none(self
._search
_regex
(
94 r
'data-duration="(\d+)"', webpage
, 'duration'), 1000)
96 duration
= parse_duration(get_element_by_id(
97 'video-duration', webpage
))
102 'description': description
,
103 'duration': duration
,
107 def extract_urls(html
, extract_info
=None):
108 for path
in re
.findall(r
'<a[^>]+href="([^"]+)"', html
):
109 page_url
= urljoin(url
, path
)
111 '_type': 'url_transparent',
112 'id': self
._match
_id
(page_url
),
114 'id_key': self
.ie_key(),
117 entry
.update(extract_info
)
118 entries
.append(entry
)
120 workshop_videos
= self
._search
_regex
(
121 r
'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
122 webpage
, 'workshop videos', default
=None)
124 extract_urls(workshop_videos
)
126 stages_path
= self
._search
_regex
(
127 r
'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
128 webpage
, 'stages path')
130 stages_page
= self
._download
_webpage
(
131 urljoin(url
, stages_path
), display_id
, 'Downloading stages page')
132 for chapter_number
, (chapter
, steps_list
) in enumerate(re
.findall(r
'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page
), 1):
133 extract_urls(steps_list
, {
135 'chapter_number': chapter_number
,
137 title
= remove_end(title
, ' Course')
139 return self
.playlist_result(
140 entries
, display_id
, title
, description
)