]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/camdemy.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  18 class CamdemyIE(InfoExtractor
): 
  19     _VALID_URL 
= r
'http://(?:www\.)?camdemy\.com/media/(?P<id>\d+)' 
  22         'url': 'http://www.camdemy.com/media/5181/', 
  23         'md5': '5a5562b6a98b37873119102e052e311b', 
  27             'title': 'Ch1-1 Introduction, Signals (02-23-2012)', 
  28             'thumbnail': 're:^https?://.*\.jpg$', 
  30             'creator': 'ss11spring', 
  31             'upload_date': '20130114', 
  32             'timestamp': 1358154556, 
  36         # With non-empty description 
  37         'url': 'http://www.camdemy.com/media/13885', 
  38         'md5': '4576a3bb2581f86c61044822adbd1249', 
  42             'title': 'EverCam + Camdemy QuickStart', 
  43             'thumbnail': 're:^https?://.*\.jpg$', 
  44             'description': 'md5:050b62f71ed62928f8a35f1a41e186c9', 
  46             'upload_date': '20140620', 
  47             'timestamp': 1403271569, 
  51         'url': 'http://www.camdemy.com/media/14842', 
  52         'md5': '50e1c3c3aa233d3d7b7daa2fa10b1cf7', 
  56             'upload_date': '20130211', 
  57             'uploader': 'Hun Kim', 
  58             'description': 'Excel 2013 Tutorial for Beginners - How to add Password Protection', 
  59             'uploader_id': 'hunkimtutorials', 
  60             'title': 'Excel 2013 Tutorial - How to add Password Protection', 
  64     def _real_extract(self
, url
): 
  65         video_id 
= self
._match
_id
(url
) 
  66         page 
= self
._download
_webpage
(url
, video_id
) 
  68         src_from 
= self
._html
_search
_regex
( 
  69             r
"<div class='srcFrom'>Source: <a title='([^']+)'", page
, 
  70             'external source', default
=None) 
  72             return self
.url_result(src_from
) 
  74         oembed_obj 
= self
._download
_json
( 
  75             'http://www.camdemy.com/oembed/?format=json&url=' + url
, video_id
) 
  77         thumb_url 
= oembed_obj
['thumbnail_url'] 
  78         video_folder 
= compat_urlparse
.urljoin(thumb_url
, 'video/') 
  79         file_list_doc 
= self
._download
_xml
( 
  80             compat_urlparse
.urljoin(video_folder
, 'fileList.xml'), 
  81             video_id
, 'Filelist XML') 
  82         file_name 
= file_list_doc
.find('./video/item/fileName').text
 
  83         video_url 
= compat_urlparse
.urljoin(video_folder
, file_name
) 
  85         timestamp 
= parse_iso8601(self
._html
_search
_regex
( 
  86             r
"<div class='title'>Posted\s*:</div>\s*<div class='value'>([^<>]+)<", 
  87             page
, 'creation time', fatal
=False), 
  88             delimiter
=' ', timezone
=datetime
.timedelta(hours
=8)) 
  89         view_count 
= str_to_int(self
._html
_search
_regex
( 
  90             r
"<div class='title'>Views\s*:</div>\s*<div class='value'>([^<>]+)<", 
  91             page
, 'view count', fatal
=False)) 
  96             'title': oembed_obj
['title'], 
  97             'thumbnail': thumb_url
, 
  98             'description': self
._html
_search
_meta
('description', page
), 
  99             'creator': oembed_obj
['author_name'], 
 100             'duration': oembed_obj
['duration'], 
 101             'timestamp': timestamp
, 
 102             'view_count': view_count
, 
 106 class CamdemyFolderIE(InfoExtractor
): 
 107     _VALID_URL 
= r
'http://www.camdemy.com/folder/(?P<id>\d+)' 
 109         # links with trailing slash 
 110         'url': 'http://www.camdemy.com/folder/450', 
 113             'title': '信號與系統 2012 & 2011 (Signals and Systems)', 
 115         'playlist_mincount': 145 
 117         # links without trailing slash 
 119         'url': 'http://www.camdemy.com/folder/853', 
 122             'title': '科學計算 - 使用 Matlab' 
 124         'playlist_mincount': 20 
 126         # with displayMode parameter. For testing the codes to add parameters 
 127         'url': 'http://www.camdemy.com/folder/853/?displayMode=defaultOrderByOrg', 
 130             'title': '科學計算 - 使用 Matlab' 
 132         'playlist_mincount': 20 
 135     def _real_extract(self
, url
): 
 136         folder_id 
= self
._match
_id
(url
) 
 138         # Add displayMode=list so that all links are displayed in a single page 
 139         parsed_url 
= list(compat_urlparse
.urlparse(url
)) 
 140         query 
= dict(compat_urlparse
.parse_qsl(parsed_url
[4])) 
 141         query
.update({'displayMode': 'list'}) 
 142         parsed_url
[4] = compat_urllib_parse
.urlencode(query
) 
 143         final_url 
= compat_urlparse
.urlunparse(parsed_url
) 
 145         page 
= self
._download
_webpage
(final_url
, folder_id
) 
 146         matches 
= re
.findall(r
"href='(/media/\d+/?)'", page
) 
 148         entries 
= [self
.url_result('http://www.camdemy.com' + media_path
) 
 149                    for media_path 
in matches
] 
 151         folder_title 
= self
._html
_search
_meta
('keywords', page
) 
 153         return self
.playlist_result(entries
, folder_id
, folder_title
)