2 from __future__
import unicode_literals
7 from .common
import InfoExtractor
9 compat_urllib_parse_urlencode
,
18 class CamdemyIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)'
22 'url': 'http://www.camdemy.com/media/5181/',
23 'md5': '5a5562b6a98b37873119102e052e311b',
27 'title': 'Ch1-1 Introduction, Signals (02-23-2012)',
28 'thumbnail': 're:^https?://.*\.jpg$',
30 'creator': 'ss11spring',
31 'upload_date': '20130114',
32 'timestamp': 1358154556,
36 # With non-empty description
37 'url': 'http://www.camdemy.com/media/13885',
38 'md5': '4576a3bb2581f86c61044822adbd1249',
42 'title': 'EverCam + Camdemy QuickStart',
43 'thumbnail': 're:^https?://.*\.jpg$',
44 'description': 'md5:050b62f71ed62928f8a35f1a41e186c9',
46 'upload_date': '20140620',
47 'timestamp': 1403271569,
51 'url': 'http://www.camdemy.com/media/14842',
52 'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7',
56 'upload_date': '20130211',
57 'uploader': 'Hun Kim',
58 'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection',
59 'uploader_id': 'hunkimtutorials',
60 'title': 'Excel 2013 Tutorial - How to add Password Protection',
64 def _real_extract(self
, url
):
65 video_id
= self
._match
_id
(url
)
66 page
= self
._download
_webpage
(url
, video_id
)
68 src_from
= self
._html
_search
_regex
(
69 r
"<div class='srcFrom'>Source: <a title='([^']+)'", page
,
70 'external source', default
=None)
72 return self
.url_result(src_from
)
74 oembed_obj
= self
._download
_json
(
75 'http://www.camdemy.com/oembed/?format=json&url=' + url
, video_id
)
77 thumb_url
= oembed_obj
['thumbnail_url']
78 video_folder
= compat_urlparse
.urljoin(thumb_url
, 'video/')
79 file_list_doc
= self
._download
_xml
(
80 compat_urlparse
.urljoin(video_folder
, 'fileList.xml'),
81 video_id
, 'Filelist XML')
82 file_name
= file_list_doc
.find('./video/item/fileName').text
83 video_url
= compat_urlparse
.urljoin(video_folder
, file_name
)
85 timestamp
= parse_iso8601(self
._html
_search
_regex
(
86 r
"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<",
87 page
, 'creation time', fatal
=False),
88 delimiter
=' ', timezone
=datetime
.timedelta(hours
=8))
89 view_count
= str_to_int(self
._html
_search
_regex
(
90 r
"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<",
91 page
, 'view count', fatal
=False))
96 'title': oembed_obj
['title'],
97 'thumbnail': thumb_url
,
98 'description': self
._html
_search
_meta
('description', page
),
99 'creator': oembed_obj
['author_name'],
100 'duration': oembed_obj
['duration'],
101 'timestamp': timestamp
,
102 'view_count': view_count
,
106 class CamdemyFolderIE(InfoExtractor
):
107 _VALID_URL
= r
'https?://www.camdemy.com/folder/(?P<id>\d+)'
109 # links with trailing slash
110 'url': 'http://www.camdemy.com/folder/450',
113 'title': '信號與系統 2012 & 2011 (Signals and Systems)',
115 'playlist_mincount': 145
117 # links without trailing slash
119 'url': 'http://www.camdemy.com/folder/853',
122 'title': '科學計算 - 使用 Matlab'
124 'playlist_mincount': 20
126 # with displayMode parameter. For testing the codes to add parameters
127 'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg',
130 'title': '科學計算 - 使用 Matlab'
132 'playlist_mincount': 20
135 def _real_extract(self
, url
):
136 folder_id
= self
._match
_id
(url
)
138 # Add displayMode=list so that all links are displayed in a single page
139 parsed_url
= list(compat_urlparse
.urlparse(url
))
140 query
= dict(compat_urlparse
.parse_qsl(parsed_url
[4]))
141 query
.update({'displayMode': 'list'})
142 parsed_url
[4] = compat_urllib_parse_urlencode(query
)
143 final_url
= compat_urlparse
.urlunparse(parsed_url
)
145 page
= self
._download
_webpage
(final_url
, folder_id
)
146 matches
= re
.findall(r
"href='(/media/\d+/?)'", page
)
148 entries
= [self
.url_result('http://www.camdemy.com' + media_path
)
149 for media_path
in matches
]
151 folder_title
= self
._html
_search
_meta
('keywords', page
)
153 return self
.playlist_result(entries
, folder_id
, folder_title
)