2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   8     compat_urllib_parse_urlencode
, 
  19 class CamdemyIE(InfoExtractor
): 
  20     _VALID_URL 
= r
'https?://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' 
  23         'url': 'http://www.camdemy.com/media/5181/', 
  24         'md5': '5a5562b6a98b37873119102e052e311b', 
  28             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', 
  29             'thumbnail': r
're:^https?://.*\.jpg$', 
  30             'creator': 'ss11spring', 
  32             'upload_date': '20130114', 
  36         # With non-empty description 
  37         # webpage returns "No permission or not login" 
  38         'url': 'http://www.camdemy.com/media/13885', 
  39         'md5': '4576a3bb2581f86c61044822adbd1249', 
  43             'title': 'EverCam + Camdemy QuickStart', 
  44             'thumbnail': r
're:^https?://.*\.jpg$', 
  45             'description': 'md5:2a9f989c2b153a2342acee579c6e7db6', 
  50         # External source (YouTube) 
  51         'url': 'http://www.camdemy.com/media/14842', 
  55             'title': 'Excel 2013 Tutorial - How to add Password Protection', 
  56             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', 
  57             'upload_date': '20130211', 
  58             'uploader': 'Hun Kim', 
  59             'uploader_id': 'hunkimtutorials', 
  62             'skip_download': True, 
  66     def _real_extract(self
, url
): 
  67         video_id 
= self
._match
_id
(url
) 
  69         webpage 
= self
._download
_webpage
(url
, video_id
) 
  71         src_from 
= self
._html
_search
_regex
( 
  72             r
"class=['\"]srcFrom
['\"][^>]*>Sources?(?:\s+from)?\s*:\s*<a[^>]+(?:href|title)=(['\"])(?P
<url
>(?
:(?
!\
1).)+)\
1", 
  73             webpage, 'external source', default=None, group='url') 
  75             return self.url_result(src_from) 
  77         oembed_obj = self._download_json( 
  78             'http://www.camdemy.com/oembed/?format=json&url=' + url, video_id) 
  80         title = oembed_obj['title'] 
  81         thumb_url = oembed_obj['thumbnail_url'] 
  82         video_folder = compat_urlparse.urljoin(thumb_url, 'video/') 
  83         file_list_doc = self._download_xml( 
  84             compat_urlparse.urljoin(video_folder, 'fileList.xml'), 
  85             video_id, 'Downloading filelist XML') 
  86         file_name = file_list_doc.find('./video/item/fileName').text 
  87         video_url = compat_urlparse.urljoin(video_folder, file_name) 
  89         # Some URLs return "No permission 
or not login
" in a webpage despite being 
  90         # freely available via oembed JSON URL (e.g. http://www.camdemy.com/media/13885) 
  91         upload_date = unified_strdate(self._search_regex( 
  92             r'>published on ([^<]+)<', webpage, 
  93             'upload date', default=None)) 
  94         view_count = str_to_int(self._search_regex( 
  95             r'role=["\']viewCnt
["\'][^>]*>([\d,.]+) views', 
  96             webpage, 'view count', default=None)) 
  97         description = self._html_search_meta( 
  98             'description', webpage, default=None) or clean_html( 
  99             oembed_obj.get('description')) 
 105             'thumbnail': thumb_url, 
 106             'description': description, 
 107             'creator': oembed_obj.get('author_name'), 
 108             'duration': parse_duration(oembed_obj.get('duration')), 
 109             'upload_date': upload_date, 
 110             'view_count': view_count, 
 114 class CamdemyFolderIE(InfoExtractor): 
 115     _VALID_URL = r'https?://(?:www\.)?camdemy\.com/folder/(?P<id>\d+)' 
 117         # links with trailing slash 
 118         'url': 'http://www.camdemy.com/folder/450', 
 121             'title': '信號與系統 2012 & 2011 (Signals and Systems)', 
 123         'playlist_mincount': 145 
 125         # links without trailing slash 
 127         'url': 'http://www.camdemy.com/folder/853', 
 130             'title': '科學計算 - 使用 Matlab' 
 132         'playlist_mincount': 20 
 134         # with displayMode parameter. For testing the codes to add parameters 
 135         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', 
 138             'title': '科學計算 - 使用 Matlab' 
 140         'playlist_mincount': 20 
 143     def _real_extract(self, url): 
 144         folder_id = self._match_id(url) 
 146         # Add displayMode=list so that all links are displayed in a single page 
 147         parsed_url = list(compat_urlparse.urlparse(url)) 
 148         query = dict(compat_urlparse.parse_qsl(parsed_url[4])) 
 149         query.update({'displayMode': 'list'}) 
 150         parsed_url[4] = compat_urllib_parse_urlencode(query) 
 151         final_url = compat_urlparse.urlunparse(parsed_url) 
 153         page = self._download_webpage(final_url, folder_id) 
 154         matches = re.findall(r"href
='(/media/\d+/?)'", page) 
 156         entries = [self.url_result('http://www.camdemy.com' + media_path) 
 157                    for media_path in matches] 
 159         folder_title = self._html_search_meta('keywords', page) 
 161         return self.playlist_result(entries, folder_id, folder_title)