X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/3ae74f711947d73bf6627bf312edeec41cec85c3..d018d3313032e12968a6add6800e51d412e2f602:/youtube_dl/extractor/c56.py?ds=sidebyside diff --git a/youtube_dl/extractor/c56.py b/youtube_dl/extractor/c56.py index dc3a8d4..cb96c38 100644 --- a/youtube_dl/extractor/c56.py +++ b/youtube_dl/extractor/c56.py @@ -1,36 +1,47 @@ # coding: utf-8 +from __future__ import unicode_literals import re -import json from .common import InfoExtractor -from ..utils import determine_ext + class C56IE(InfoExtractor): - _VALID_URL = r'https?://((www|player)\.)?56\.com/(.+?/)?(v_|(play_album.+-))(?P.+?)\.(html|swf)' - IE_NAME = u'56.com' - - _TEST ={ - u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html', - u'file': u'93440716.flv', - u'md5': u'e59995ac63d0457783ea05f93f12a866', - u'info_dict': { - u'title': u'网事知多少 第32期:车怒', + _VALID_URL = r'https?://(?:(?:www|player)\.)?56\.com/(?:.+?/)?(?:v_|(?:play_album.+-))(?P.+?)\.(?:html|swf)' + IE_NAME = '56.com' + _TEST = { + 'url': 'http://www.56.com/u39/v_OTM0NDA3MTY.html', + 'md5': 'e59995ac63d0457783ea05f93f12a866', + 'info_dict': { + 'id': '93440716', + 'ext': 'flv', + 'title': '网事知多少 第32期:车怒', + 'duration': 283.813, }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) text_id = mobj.group('textid') - info_page = self._download_webpage('http://vxml.56.com/json/%s/' % text_id, - text_id, u'Downloading video info') - info = json.loads(info_page)['info'] - best_format = sorted(info['rfiles'], key=lambda f: int(f['filesize']))[-1] - video_url = best_format['url'] - - return {'id': info['vid'], - 'title': info['Subject'], - 'url': video_url, - 'ext': determine_ext(video_url), - 'thumbnail': info.get('bimg') or info.get('img'), - } + + page = self._download_json( + 'http://vxml.56.com/json/%s/' % text_id, text_id, 'Downloading video info') + + info = page['info'] + + formats = [ + { + 'format_id': f['type'], + 'filesize': int(f['filesize']), + 'url': f['url'] + } for f in info['rfiles'] + ] + self._sort_formats(formats) + + return { + 'id': info['vid'], + 'title': info['Subject'], + 'duration': int(info['duration']) / 1000.0, + 'formats': formats, + 'thumbnail': info.get('bimg') or info.get('img'), + }