X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/87a0165ca7e39af4dacb7ec637063b2cd35ae40b..20205e79eb4216762c923790b27dbe38f945293a:/youtube_dl/extractor/crunchyroll.py
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 8d5b69f..bc2d1fa 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -3,14 +3,17 @@ from __future__ import unicode_literals
import re
import json
-import base64
import zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
+from .vrv import VRVIE
from ..compat import (
+ compat_b64decode,
+ compat_etree_Element,
compat_etree_fromstring,
+ compat_str,
compat_urllib_parse_urlencode,
compat_urllib_request,
compat_urlparse,
@@ -18,15 +21,16 @@ from ..compat import (
from ..utils import (
ExtractorError,
bytes_to_intlist,
+ extract_attributes,
+ float_or_none,
intlist_to_bytes,
int_or_none,
lowercase_escape,
+ merge_dicts,
remove_end,
sanitized_Request,
- unified_strdate,
urlencode_postdata,
xpath_text,
- extract_attributes,
)
from ..aes import (
aes_cbc_decrypt,
@@ -38,8 +42,18 @@ class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_FORM = 'login_form'
_NETRC_MACHINE = 'crunchyroll'
+ def _call_rpc_api(self, method, video_id, note=None, data=None):
+ data = data or {}
+ data['req'] = 'RpcApi' + method
+ data = compat_urllib_parse_urlencode(data).encode('utf-8')
+ return self._download_xml(
+ 'https://www.crunchyroll.com/xml/',
+ video_id, note, fatal=False, data=data, headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
@@ -47,7 +61,7 @@ class CrunchyrollBaseIE(InfoExtractor):
self._LOGIN_URL, None, 'Downloading login page')
def is_logged(webpage):
- return '
Redirecting' in webpage
+ return 'href="/logout"' in webpage
# Already logged in
if is_logged(login_page):
@@ -90,19 +104,6 @@ class CrunchyrollBaseIE(InfoExtractor):
def _real_initialize(self):
self._login()
- def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
- else sanitized_Request(url_or_request))
- # Accept-Language must be set explicitly to accept any language to avoid issues
- # similar to https://github.com/rg3/youtube-dl/issues/6797.
- # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
- # should be imposed or not (from what I can see it just takes the first language
- # ignoring the priority and requires it to correspond the IP). By the way this causes
- # Crunchyroll to not work in georestriction cases in some browsers that don't place
- # the locale lang first in header. However allowing any language seems to workaround the issue.
- request.add_header('Accept-Language', '*')
- return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-
@staticmethod
def _add_skip_wall(url):
parsed_url = compat_urlparse.urlparse(url)
@@ -111,22 +112,23 @@ class CrunchyrollBaseIE(InfoExtractor):
# > This content may be inappropriate for some people.
# > Are you sure you want to continue?
# since it's not disabled by default in crunchyroll account's settings.
- # See https://github.com/rg3/youtube-dl/issues/7202.
+ # See https://github.com/ytdl-org/youtube-dl/issues/7202.
qs['skip_wall'] = ['1']
return compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-class CrunchyrollIE(CrunchyrollBaseIE):
- _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)'
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+ IE_NAME = 'crunchyroll'
+ _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
'id': '645513',
- 'ext': 'flv',
+ 'ext': 'mp4',
'title': 'Wanna be the Strongest in the World Episode 1 â An Idol-Wrestler is Born!',
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
- 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
'url': 're:(?!.*&)',
@@ -135,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# rtmp
'skip_download': True,
},
+ 'skip': 'Video gone',
}, {
'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
'info_dict': {
@@ -142,7 +145,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'ext': 'flv',
'title': 'Culture Japan Episode 1 â Rebuilding Japan after the 3.11',
'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
- 'thumbnail': 're:^https?://.*\.jpg$',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Danny Choo Network',
'upload_date': '20120213',
},
@@ -156,11 +159,33 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'info_dict': {
'id': '702409',
'ext': 'mp4',
- 'title': 'Re:ZERO -Starting Life in Another World- Episode 5 â The Morning of Our Promise Is Still Distant',
- 'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
- 'thumbnail': 're:^https?://.*\.jpg$',
- 'uploader': 'TV TOKYO',
- 'upload_date': '20160508',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Re:Zero Partners',
+ 'timestamp': 1462098900,
+ 'upload_date': '20160501',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
+ 'info_dict': {
+ 'id': '727589',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'thumbnail': r're:^https?://.*\.jpg$',
+ 'uploader': 'Kadokawa Pictures Inc.',
+ 'timestamp': 1484130900,
+ 'upload_date': '20170111',
+ 'series': compat_str,
+ 'season': "KONOSUBA -God's blessing on this wonderful world! 2",
+ 'season_number': 2,
+ 'episode': 'Give Me Deliverance From This Judicial Injustice!',
+ 'episode_number': 1,
},
'params': {
# m3u8 download
@@ -173,6 +198,62 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# geo-restricted (US), 18+ maturity wall, non-premium available
'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
'only_matching': True,
+ }, {
+ # A description with double quotes
+ 'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
+ 'info_dict': {
+ 'id': '535080',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'uploader': 'Marvelous AQL Inc.',
+ 'timestamp': 1255512600,
+ 'upload_date': '20091014',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
+ }, {
+ # make sure we can extract an uploader name that's not a link
+ 'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
+ 'info_dict': {
+ 'id': '606899',
+ 'ext': 'mp4',
+ 'title': 'Hakuoki Reimeiroku Episode 1 â Dawn of the Divine Warriors',
+ 'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
+ 'uploader': 'Geneon Entertainment',
+ 'upload_date': '20120717',
+ },
+ 'params': {
+ # just test metadata extraction
+ 'skip_download': True,
+ },
+ 'skip': 'Video gone',
+ }, {
+ # A video with a vastly different season name compared to the series name
+ 'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
+ 'info_dict': {
+ 'id': '590532',
+ 'ext': 'mp4',
+ 'title': compat_str,
+ 'description': compat_str,
+ 'uploader': 'TV TOKYO',
+ 'timestamp': 1330956000,
+ 'upload_date': '20120305',
+ 'series': 'Nyarko-san: Another Crawling Chaos',
+ 'season': 'Haiyoru! Nyaruani (ONA)',
+ },
+ 'params': {
+ # Just test metadata extraction
+ 'skip_download': True,
+ },
+ }, {
+ 'url': 'http://www.crunchyroll.com/media-723735',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+ 'only_matching': True,
}]
_FORMAT_IDS = {
@@ -182,9 +263,22 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'1080': ('80', '108'),
}
+ def _download_webpage(self, url_or_request, *args, **kwargs):
+ request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ else sanitized_Request(url_or_request))
+ # Accept-Language must be set explicitly to accept any language to avoid issues
+ # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
+ # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+ # should be imposed or not (from what I can see it just takes the first language
+ # ignoring the priority and requires it to correspond the IP). By the way this causes
+ # Crunchyroll to not work in georestriction cases in some browsers that don't place
+ # the locale lang first in header. However allowing any language seems to workaround the issue.
+ request.add_header('Accept-Language', '*')
+ return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
+
def _decrypt_subtitles(self, data, iv, id):
- data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
- iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
+ data = bytes_to_intlist(compat_b64decode(data))
+ iv = bytes_to_intlist(compat_b64decode(iv))
id = int(id)
def obfuscate_key_aux(count, modulo, start):
@@ -236,8 +330,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
- output += """ScaledBorderAndShadow: no
-
+ output += """
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
"""
@@ -299,15 +392,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
- sub_page = self._download_webpage(
- 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
- video_id, note='Downloading subtitles for ' + sub_name)
- id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
- iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False)
- data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False)
- if not id or not iv or not data:
+ sub_doc = self._call_rpc_api(
+ 'Subtitle_GetXml', video_id,
+ 'Downloading subtitles for ' + sub_name, data={
+ 'subtitle_script_id': sub_id,
+ })
+ if not isinstance(sub_doc, compat_etree_Element):
continue
- subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+ sid = sub_doc.get('id')
+ iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
+ data = xpath_text(sub_doc, 'data', 'subtitle data')
+ if not sid or not iv or not data:
+ continue
+ subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
@@ -324,7 +421,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
else:
webpage_url = 'http://www.' + mobj.group('url')
- webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
+ webpage = self._download_webpage(
+ self._add_skip_wall(webpage_url), video_id,
+ headers=self.geo_verification_headers())
note_m = self._html_search_regex(
r'(.+?)
',
webpage, 'trailer-notice', default='')
@@ -340,123 +439,205 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
if 'To view this, please log in to verify you are 18 or older.' in webpage:
self.raise_login_required()
+ media = self._parse_json(self._search_regex(
+ r'vilos\.config\.media\s*=\s*({.+?});',
+ webpage, 'vilos media', default='{}'), video_id)
+ media_metadata = media.get('metadata') or {}
+
+ language = self._search_regex(
+ r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P(?:(?!\1).)+)\1',
+ webpage, 'language', default=None, group='lang')
+
video_title = self._html_search_regex(
- r'(?s)]*>((?:(?!]+itemprop=["\']title["\'][^>]*>(?:(?!',
- webpage, 'video_title')
+ (r'(?s)]*>((?:(?!]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!',
+ r'(.+?),\s+-\s+.+? Crunchyroll'),
+ webpage, 'video_title', default=None)
+ if not video_title:
+ video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
video_title = re.sub(r' {2,}', ' ', video_title)
- video_description = self._html_search_regex(
- r'