X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/3ae74f711947d73bf6627bf312edeec41cec85c3..2e86127b07f6a63b7ce457331d600d9486b22b44:/youtube_dl/extractor/sohu.py diff --git a/youtube_dl/extractor/sohu.py b/youtube_dl/extractor/sohu.py index 77bb0a8..07f514a 100644 --- a/youtube_dl/extractor/sohu.py +++ b/youtube_dl/extractor/sohu.py @@ -1,4 +1,5 @@ # encoding: utf-8 +from __future__ import unicode_literals import json import re @@ -8,50 +9,56 @@ from ..utils import ExtractorError class SohuIE(InfoExtractor): - _VALID_URL = r'https?://tv\.sohu\.com/\d+?/n(?P\d+)\.shtml.*?' + _VALID_URL = r'https?://(?Pmy\.)?tv\.sohu\.com/.+?/(?(mytv)|n)(?P\d+)\.shtml.*?' _TEST = { - u'url': u'http://tv.sohu.com/20130724/n382479172.shtml#super', - u'file': u'382479172.mp4', - u'md5': u'bde8d9a6ffd82c63a1eefaef4eeefec7', - u'info_dict': { - u'title': u'MV:Far East Movement《The Illest》', + 'url': 'http://tv.sohu.com/20130724/n382479172.shtml#super', + 'md5': 'bde8d9a6ffd82c63a1eefaef4eeefec7', + 'info_dict': { + 'id': '382479172', + 'ext': 'mp4', + 'title': 'MV:Far East Movement《The Illest》', }, + 'skip': 'Only available from China', } def _real_extract(self, url): - def _fetch_data(vid_id): - base_data_url = u'http://hot.vrs.sohu.com/vrs_flash.action?vid=' + def _fetch_data(vid_id, mytv=False): + if mytv: + base_data_url = 'http://my.tv.sohu.com/play/videonew.do?vid=' + else: + base_data_url = 'http://hot.vrs.sohu.com/vrs_flash.action?vid=' data_url = base_data_url + str(vid_id) data_json = self._download_webpage( data_url, video_id, - note=u'Downloading JSON data for ' + str(vid_id)) + note='Downloading JSON data for ' + str(vid_id)) return json.loads(data_json) mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + mytv = mobj.group('mytv') is not None webpage = self._download_webpage(url, video_id) raw_title = self._html_search_regex(r'(?s)(.+?)', - webpage, u'video title') + webpage, 'video title') title = raw_title.partition('-')[0].strip() - vid = self._html_search_regex(r'var vid="(\d+)"', webpage, - u'video path') - data = _fetch_data(vid) + vid = self._html_search_regex(r'var vid ?= ?["\'](\d+)["\']', webpage, + 'video path') + data = _fetch_data(vid, mytv) QUALITIES = ('ori', 'super', 'high', 'nor') vid_ids = [data['data'][q + 'Vid'] for q in QUALITIES if data['data'][q + 'Vid'] != 0] if not vid_ids: - raise ExtractorError(u'No formats available for this video') + raise ExtractorError('No formats available for this video') # For now, we just pick the highest available quality vid_id = vid_ids[-1] - format_data = data if vid == vid_id else _fetch_data(vid_id) + format_data = data if vid == vid_id else _fetch_data(vid_id, mytv) part_count = format_data['data']['totalBlocks'] allot = format_data['allot'] prot = format_data['prot'] @@ -64,7 +71,7 @@ class SohuIE(InfoExtractor): (allot, prot, clipsURL[i], su[i])) part_str = self._download_webpage( part_url, video_id, - note=u'Downloading part %d of %d' % (i+1, part_count)) + note='Downloading part %d of %d' % (i + 1, part_count)) part_info = part_str.split('|') video_url = '%s%s?key=%s' % (part_info[0], su[i], part_info[3])