-import re
+from __future__ import unicode_literals
from .common import InfoExtractor
+from ..utils import parse_iso8601
class HowcastIE(InfoExtractor):
- _VALID_URL = r'(?:https?://)?(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
+ _VALID_URL = r'https?://(?:www\.)?howcast\.com/videos/(?P<id>\d+)'
+ _TEST = {
+ 'url': 'http://www.howcast.com/videos/390161-How-to-Tie-a-Square-Knot-Properly',
+ 'md5': '8b743df908c42f60cf6496586c7f12c3',
+ 'info_dict': {
+ 'id': '390161',
+ 'ext': 'mp4',
+ 'title': 'How to Tie a Square Knot Properly',
+ 'description': 'md5:dbe792e5f6f1489027027bf2eba188a3',
+ 'timestamp': 1276081287,
+ 'upload_date': '20100609',
+ 'duration': 56.823,
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
-
- video_id = mobj.group('id')
- webpage_url = 'http://www.howcast.com/videos/' + video_id
- webpage = self._download_webpage(webpage_url, video_id)
-
- self.report_extraction(video_id)
-
- video_url = self._search_regex(r'\'?file\'?: "(http://mobile-media\.howcast\.com/[0-9]+\.mp4)',
- webpage, u'video URL')
-
- video_title = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') property=\'og:title\'',
- webpage, u'title')
-
- video_description = self._html_search_regex(r'<meta content=(?:"([^"]+)"|\'([^\']+)\') name=\'description\'',
- webpage, u'description', fatal=False)
-
- thumbnail = self._html_search_regex(r'<meta content=\'(.+?)\' property=\'og:image\'',
- webpage, u'thumbnail', fatal=False)
-
- return [{
- 'id': video_id,
- 'url': video_url,
- 'ext': 'mp4',
- 'title': video_title,
- 'description': video_description,
- 'thumbnail': thumbnail,
- }]
+ video_id = self._match_id(url)
+
+ webpage = self._download_webpage(url, video_id)
+
+ embed_code = self._search_regex(
+ r'<iframe[^>]+src="[^"]+\bembed_code=([^\b]+)\b',
+ webpage, 'ooyala embed code')
+
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': 'Ooyala',
+ 'url': 'ooyala:%s' % embed_code,
+ 'id': video_id,
+ 'timestamp': parse_iso8601(self._html_search_meta(
+ 'article:published_time', webpage, 'timestamp')),
+ }