X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/af014acd27e0b471d5903630847eabb26437b46c..4fbf6829491780534e93bd27e5a749e608c97b46:/youtube_dl/extractor/crunchyroll.py?ds=sidebyside
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py
index 8bdaf0c..85a9a57 100644
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -3,13 +3,15 @@ from __future__ import unicode_literals
import re
import json
-import base64
import zlib
from hashlib import sha1
from math import pow, sqrt, floor
from .common import InfoExtractor
+from .vrv import VRVIE
from ..compat import (
+ compat_b64decode,
+ compat_etree_Element,
compat_etree_fromstring,
compat_urllib_parse_urlencode,
compat_urllib_request,
@@ -18,6 +20,8 @@ from ..compat import (
from ..utils import (
ExtractorError,
bytes_to_intlist,
+ extract_attributes,
+ float_or_none,
intlist_to_bytes,
int_or_none,
lowercase_escape,
@@ -26,7 +30,6 @@ from ..utils import (
unified_strdate,
urlencode_postdata,
xpath_text,
- extract_attributes,
)
from ..aes import (
aes_cbc_decrypt,
@@ -38,8 +41,18 @@ class CrunchyrollBaseIE(InfoExtractor):
_LOGIN_FORM = 'login_form'
_NETRC_MACHINE = 'crunchyroll'
+ def _call_rpc_api(self, method, video_id, note=None, data=None):
+ data = data or {}
+ data['req'] = 'RpcApi' + method
+ data = compat_urllib_parse_urlencode(data).encode('utf-8')
+ return self._download_xml(
+ 'https://www.crunchyroll.com/xml/',
+ video_id, note, fatal=False, data=data, headers={
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ })
+
def _login(self):
- (username, password) = self._get_login_info()
+ username, password = self._get_login_info()
if username is None:
return
@@ -47,7 +60,7 @@ class CrunchyrollBaseIE(InfoExtractor):
self._LOGIN_URL, None, 'Downloading login page')
def is_logged(webpage):
- return '
Redirecting' in webpage
+ return 'href="/logout"' in webpage
# Already logged in
if is_logged(login_page):
@@ -90,19 +103,6 @@ class CrunchyrollBaseIE(InfoExtractor):
def _real_initialize(self):
self._login()
- def _download_webpage(self, url_or_request, *args, **kwargs):
- request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
- else sanitized_Request(url_or_request))
- # Accept-Language must be set explicitly to accept any language to avoid issues
- # similar to https://github.com/rg3/youtube-dl/issues/6797.
- # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
- # should be imposed or not (from what I can see it just takes the first language
- # ignoring the priority and requires it to correspond the IP). By the way this causes
- # Crunchyroll to not work in georestriction cases in some browsers that don't place
- # the locale lang first in header. However allowing any language seems to workaround the issue.
- request.add_header('Accept-Language', '*')
- return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-
@staticmethod
def _add_skip_wall(url):
parsed_url = compat_urlparse.urlparse(url)
@@ -111,14 +111,15 @@ class CrunchyrollBaseIE(InfoExtractor):
# > This content may be inappropriate for some people.
# > Are you sure you want to continue?
# since it's not disabled by default in crunchyroll account's settings.
- # See https://github.com/rg3/youtube-dl/issues/7202.
+ # See https://github.com/ytdl-org/youtube-dl/issues/7202.
qs['skip_wall'] = ['1']
return compat_urlparse.urlunparse(
parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
-class CrunchyrollIE(CrunchyrollBaseIE):
- _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)'
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+ IE_NAME = 'crunchyroll'
+ _VALID_URL = r'https?://(?:(?Pwww|m)\.)?(?Pcrunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P[0-9]+))(?:[/?&]|$)'
_TESTS = [{
'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
'info_dict': {
@@ -126,7 +127,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'ext': 'mp4',
'title': 'Wanna be the Strongest in the World Episode 1 â An Idol-Wrestler is Born!',
'description': 'md5:2d17137920c64f2f49981a7797d275ef',
- 'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+ 'thumbnail': r're:^https?://.*\.jpg$',
'uploader': 'Yomiuri Telecasting Corporation (YTV)',
'upload_date': '20131013',
'url': 're:(?!.*&)',
@@ -199,7 +200,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'info_dict': {
'id': '535080',
'ext': 'mp4',
- 'title': '11eyes Episode 1 â Piros éjszaka - Red Night',
+ 'title': '11eyes Episode 1 â Red Night ~ Piros éjszaka',
'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
'uploader': 'Marvelous AQL Inc.',
'upload_date': '20091021',
@@ -240,6 +241,12 @@ class CrunchyrollIE(CrunchyrollBaseIE):
# Just test metadata extraction
'skip_download': True,
},
+ }, {
+ 'url': 'http://www.crunchyroll.com/media-723735',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+ 'only_matching': True,
}]
_FORMAT_IDS = {
@@ -249,9 +256,22 @@ class CrunchyrollIE(CrunchyrollBaseIE):
'1080': ('80', '108'),
}
+ def _download_webpage(self, url_or_request, *args, **kwargs):
+ request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+ else sanitized_Request(url_or_request))
+ # Accept-Language must be set explicitly to accept any language to avoid issues
+ # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
+ # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+ # should be imposed or not (from what I can see it just takes the first language
+ # ignoring the priority and requires it to correspond the IP). By the way this causes
+ # Crunchyroll to not work in georestriction cases in some browsers that don't place
+ # the locale lang first in header. However allowing any language seems to workaround the issue.
+ request.add_header('Accept-Language', '*')
+ return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
+
def _decrypt_subtitles(self, data, iv, id):
- data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
- iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
+ data = bytes_to_intlist(compat_b64decode(data))
+ iv = bytes_to_intlist(compat_b64decode(iv))
id = int(id)
def obfuscate_key_aux(count, modulo, start):
@@ -365,15 +385,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
def _get_subtitles(self, video_id, webpage):
subtitles = {}
for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
- sub_page = self._download_webpage(
- 'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
- video_id, note='Downloading subtitles for ' + sub_name)
- id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
- iv = self._search_regex(r'([^<]+)', sub_page, 'subtitle_iv', fatal=False)
- data = self._search_regex(r'([^<]+)', sub_page, 'subtitle_data', fatal=False)
- if not id or not iv or not data:
+ sub_doc = self._call_rpc_api(
+ 'Subtitle_GetXml', video_id,
+ 'Downloading subtitles for ' + sub_name, data={
+ 'subtitle_script_id': sub_id,
+ })
+ if not isinstance(sub_doc, compat_etree_Element):
+ continue
+ sid = sub_doc.get('id')
+ iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
+ data = xpath_text(sub_doc, 'data', 'subtitle data')
+ if not sid or not iv or not data:
continue
- subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+ subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
if not lang_code:
continue
@@ -408,13 +432,22 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
if 'To view this, please log in to verify you are 18 or older.' in webpage:
self.raise_login_required()
+ media = self._parse_json(self._search_regex(
+ r'vilos\.config\.media\s*=\s*({.+?});',
+ webpage, 'vilos media', default='{}'), video_id)
+ media_metadata = media.get('metadata') or {}
+
+ language = self._search_regex(
+ r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P(?:(?!\1).)+)\1',
+ webpage, 'language', default=None, group='lang')
+
video_title = self._html_search_regex(
r'(?s)]*>((?:(?!]+itemprop=["\']title["\'][^>]*>(?:(?!',
webpage, 'video_title')
video_title = re.sub(r' {2,}', ' ', video_title)
- video_description = self._parse_json(self._html_search_regex(
+ video_description = (self._parse_json(self._html_search_regex(
r'