X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/e76f531201cd41dfc0ce00be28bcc5c575c7acc5..d1d8237037fb87a57bd1d073c3b9e642d4a75016:/youtube_dl/extractor/crunchyroll.py diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 6e5999c..90a6430 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -11,8 +11,7 @@ from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( compat_etree_fromstring, - compat_urllib_parse, - compat_urllib_parse_unquote, + compat_urllib_parse_urlencode, compat_urllib_request, compat_urlparse, ) @@ -23,9 +22,11 @@ from ..utils import ( int_or_none, lowercase_escape, remove_end, + sanitized_Request, unified_strdate, urlencode_postdata, xpath_text, + extract_attributes, ) from ..aes import ( aes_cbc_decrypt, @@ -46,16 +47,16 @@ class CrunchyrollBaseIE(InfoExtractor): 'name': username, 'password': password, }) - login_request = compat_urllib_request.Request(login_url, data) + login_request = sanitized_Request(login_url, data) login_request.add_header('Content-Type', 'application/x-www-form-urlencoded') self._download_webpage(login_request, None, False, 'Wrong login info') def _real_initialize(self): self._login() - def _download_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True, tries=1, timeout=5, encoding=None): + def _download_webpage(self, url_or_request, *args, **kwargs): request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request) - else compat_urllib_request.Request(url_or_request)) + else sanitized_Request(url_or_request)) # Accept-Language must be set explicitly to accept any language to avoid issues # similar to https://github.com/rg3/youtube-dl/issues/6797. # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction @@ -64,8 +65,7 @@ class CrunchyrollBaseIE(InfoExtractor): # Crunchyroll to not work in georestriction cases in some browsers that don't place # the locale lang first in header. However allowing any language seems to workaround the issue. request.add_header('Accept-Language', '*') - return super(CrunchyrollBaseIE, self)._download_webpage( - request, video_id, note, errnote, fatal, tries, timeout, encoding) + return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs) @staticmethod def _add_skip_wall(url): @@ -78,7 +78,7 @@ class CrunchyrollBaseIE(InfoExtractor): # See https://github.com/rg3/youtube-dl/issues/7202. qs['skip_wall'] = ['1'] return compat_urlparse.urlunparse( - parsed_url._replace(query=compat_urllib_parse.urlencode(qs, True))) + parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True))) class CrunchyrollIE(CrunchyrollBaseIE): @@ -179,40 +179,40 @@ class CrunchyrollIE(CrunchyrollBaseIE): return assvalue output = '[Script Info]\n' - output += 'Title: %s\n' % sub_root.attrib["title"] + output += 'Title: %s\n' % sub_root.attrib['title'] output += 'ScriptType: v4.00+\n' - output += 'WrapStyle: %s\n' % sub_root.attrib["wrap_style"] - output += 'PlayResX: %s\n' % sub_root.attrib["play_res_x"] - output += 'PlayResY: %s\n' % sub_root.attrib["play_res_y"] + output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style'] + output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x'] + output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y'] output += """ScaledBorderAndShadow: yes [V4+ Styles] Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding """ for style in sub_root.findall('./styles/style'): - output += 'Style: ' + style.attrib["name"] - output += ',' + style.attrib["font_name"] - output += ',' + style.attrib["font_size"] - output += ',' + style.attrib["primary_colour"] - output += ',' + style.attrib["secondary_colour"] - output += ',' + style.attrib["outline_colour"] - output += ',' + style.attrib["back_colour"] - output += ',' + ass_bool(style.attrib["bold"]) - output += ',' + ass_bool(style.attrib["italic"]) - output += ',' + ass_bool(style.attrib["underline"]) - output += ',' + ass_bool(style.attrib["strikeout"]) - output += ',' + style.attrib["scale_x"] - output += ',' + style.attrib["scale_y"] - output += ',' + style.attrib["spacing"] - output += ',' + style.attrib["angle"] - output += ',' + style.attrib["border_style"] - output += ',' + style.attrib["outline"] - output += ',' + style.attrib["shadow"] - output += ',' + style.attrib["alignment"] - output += ',' + style.attrib["margin_l"] - output += ',' + style.attrib["margin_r"] - output += ',' + style.attrib["margin_v"] - output += ',' + style.attrib["encoding"] + output += 'Style: ' + style.attrib['name'] + output += ',' + style.attrib['font_name'] + output += ',' + style.attrib['font_size'] + output += ',' + style.attrib['primary_colour'] + output += ',' + style.attrib['secondary_colour'] + output += ',' + style.attrib['outline_colour'] + output += ',' + style.attrib['back_colour'] + output += ',' + ass_bool(style.attrib['bold']) + output += ',' + ass_bool(style.attrib['italic']) + output += ',' + ass_bool(style.attrib['underline']) + output += ',' + ass_bool(style.attrib['strikeout']) + output += ',' + style.attrib['scale_x'] + output += ',' + style.attrib['scale_y'] + output += ',' + style.attrib['spacing'] + output += ',' + style.attrib['angle'] + output += ',' + style.attrib['border_style'] + output += ',' + style.attrib['outline'] + output += ',' + style.attrib['shadow'] + output += ',' + style.attrib['alignment'] + output += ',' + style.attrib['margin_l'] + output += ',' + style.attrib['margin_r'] + output += ',' + style.attrib['margin_v'] + output += ',' + style.attrib['encoding'] output += '\n' output += """ @@ -221,15 +221,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text """ for event in sub_root.findall('./events/event'): output += 'Dialogue: 0' - output += ',' + event.attrib["start"] - output += ',' + event.attrib["end"] - output += ',' + event.attrib["style"] - output += ',' + event.attrib["name"] - output += ',' + event.attrib["margin_l"] - output += ',' + event.attrib["margin_r"] - output += ',' + event.attrib["margin_v"] - output += ',' + event.attrib["effect"] - output += ',' + event.attrib["text"] + output += ',' + event.attrib['start'] + output += ',' + event.attrib['end'] + output += ',' + event.attrib['style'] + output += ',' + event.attrib['name'] + output += ',' + event.attrib['margin_l'] + output += ',' + event.attrib['margin_r'] + output += ',' + event.attrib['margin_v'] + output += ',' + event.attrib['effect'] + output += ',' + event.attrib['text'] output += '\n' return output @@ -306,30 +306,40 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text r']+href="/publisher/[^"]+"[^>]*>([^<]+)', webpage, 'video_uploader', fatal=False) - playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) - playerdata_req = compat_urllib_request.Request(playerdata_url) - playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) - playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') - playerdata = self._download_webpage(playerdata_req, video_id, note='Downloading media info') - - stream_id = self._search_regex(r'([^<]+)', playerdata, 'stream_id') - video_thumbnail = self._search_regex(r'([^<]+)', playerdata, 'thumbnail', fatal=False) - + available_fmts = [] + for a, fmt in re.findall(r'(]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage): + attrs = extract_attributes(a) + href = attrs.get('href') + if href and '/freetrial' in href: + continue + available_fmts.append(fmt) + if not available_fmts: + for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'): + available_fmts = re.findall(p, webpage) + if available_fmts: + break + video_encode_ids = [] formats = [] - for fmt in re.findall(r'showmedia\.([0-9]{3,4})p', webpage): + for fmt in available_fmts: stream_quality, stream_format = self._FORMAT_IDS[fmt] video_format = fmt + 'p' - streamdata_req = compat_urllib_request.Request( + streamdata_req = sanitized_Request( 'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s' - % (stream_id, stream_format, stream_quality), - compat_urllib_parse.urlencode({'current_page': url}).encode('utf-8')) + % (video_id, stream_format, stream_quality), + compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8')) streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') streamdata = self._download_xml( streamdata_req, video_id, note='Downloading media info for %s' % video_format) stream_info = streamdata.find('./{default}preload/stream_info') - video_url = stream_info.find('./host').text - video_play_path = stream_info.find('./file').text + video_encode_id = xpath_text(stream_info, './video_encode_id') + if video_encode_id in video_encode_ids: + continue + video_encode_ids.append(video_encode_id) + video_url = xpath_text(stream_info, './host') + video_play_path = xpath_text(stream_info, './file') + if not video_url or not video_play_path: + continue metadata = stream_info.find('./metadata') format_info = { 'format': video_format, @@ -357,6 +367,14 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'ext': 'flv', }) formats.append(format_info) + self._sort_formats(formats) + + metadata = self._download_xml( + 'http://www.crunchyroll.com/xml', video_id, + note='Downloading media info', query={ + 'req': 'RpcApiVideoPlayer_GetMediaMetadata', + 'media_id': video_id, + }) subtitles = self.extract_subtitles(video_id, webpage) @@ -364,16 +382,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text 'id': video_id, 'title': video_title, 'description': video_description, - 'thumbnail': video_thumbnail, + 'thumbnail': xpath_text(metadata, 'episode_image_url'), 'uploader': video_uploader, 'upload_date': video_upload_date, + 'series': xpath_text(metadata, 'series_title'), + 'episode': xpath_text(metadata, 'episode_title'), + 'episode_number': int_or_none(xpath_text(metadata, 'episode_number')), 'subtitles': subtitles, 'formats': formats, } class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE): - IE_NAME = "crunchyroll:playlist" + IE_NAME = 'crunchyroll:playlist' _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P[\w\-]+))/?(?:\?|$)' _TESTS = [{