X-Git-Url: https://git.rapsys.eu/.gitweb.cgi/youtubedl/blobdiff_plain/63a6927374492ef47c8fd6de67d0760ace4dd0ed..af014acd27e0b471d5903630847eabb26437b46c:/youtube_dl/extractor/aparat.py diff --git a/youtube_dl/extractor/aparat.py b/youtube_dl/extractor/aparat.py index 7e93bc4..e394cb6 100644 --- a/youtube_dl/extractor/aparat.py +++ b/youtube_dl/extractor/aparat.py @@ -1,56 +1,69 @@ -#coding: utf-8 - -import re +# coding: utf-8 +from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( - ExtractorError, - HEADRequest, + int_or_none, + mimetype2ext, ) class AparatIE(InfoExtractor): - _VALID_URL = r'^https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' + _VALID_URL = r'https?://(?:www\.)?aparat\.com/(?:v/|video/video/embed/videohash/)(?P[a-zA-Z0-9]+)' _TEST = { - u'url': u'http://www.aparat.com/v/wP8On', - u'file': u'wP8On.mp4', - u'md5': u'6714e0af7e0d875c5a39c4dc4ab46ad1', - u'info_dict': { - u"title": u"تیم گلکسی 11 - زومیت", + 'url': 'http://www.aparat.com/v/wP8On', + 'md5': '131aca2e14fe7c4dcb3c4877ba300c89', + 'info_dict': { + 'id': 'wP8On', + 'ext': 'mp4', + 'title': 'تیم گلکسی 11 - زومیت', + 'age_limit': 0, }, - #u'skip': u'Extremely unreliable', + # 'skip': 'Extremely unreliable', } def _real_extract(self, url): - m = re.match(self._VALID_URL, url) - video_id = m.group('id') + video_id = self._match_id(url) # Note: There is an easier-to-parse configuration at # http://www.aparat.com/video/video/config/videohash/%video_id # but the URL in there does not work - embed_url = (u'http://www.aparat.com/video/video/embed/videohash/' + - video_id + u'/vt/frame') - webpage = self._download_webpage(embed_url, video_id) - - video_urls = re.findall(r'fileList\[[0-9]+\]\s*=\s*"([^"]+)"', webpage) - for i, video_url in enumerate(video_urls): - req = HEADRequest(video_url) - res = self._request_webpage( - req, video_id, note=u'Testing video URL %d' % i, errnote=False) - if res: - break - else: - raise ExtractorError(u'No working video URLs found') - - title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, u'title') + webpage = self._download_webpage( + 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, + video_id) + + title = self._search_regex(r'\s+title:\s*"([^"]+)"', webpage, 'title') + + file_list = self._parse_json( + self._search_regex( + r'fileList\s*=\s*JSON\.parse\(\'([^\']+)\'\)', webpage, + 'file list'), + video_id) + + formats = [] + for item in file_list[0]: + file_url = item.get('file') + if not file_url: + continue + ext = mimetype2ext(item.get('type')) + label = item.get('label') + formats.append({ + 'url': file_url, + 'ext': ext, + 'format_id': label or ext, + 'height': int_or_none(self._search_regex( + r'(\d+)[pP]', label or '', 'height', default=None)), + }) + self._sort_formats(formats) + thumbnail = self._search_regex( - r'\s+image:\s*"([^"]+)"', webpage, u'thumbnail', fatal=False) + r'image:\s*"([^"]+)"', webpage, 'thumbnail', fatal=False) return { 'id': video_id, 'title': title, - 'url': video_url, - 'ext': 'mp4', 'thumbnail': thumbnail, + 'age_limit': self._family_friendly_search(webpage), + 'formats': formats, }