]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/crunchyroll.py
Update upstream source from tag 'upstream/2020.06.16'
[youtubedl] / youtube_dl / extractor / crunchyroll.py
index b92f2544799769abc9690a04d4d1090af435ca79..bc2d1fa8b041e3ec1bbc4d6d1b5f055ac31ee140 100644 (file)
@@ -3,14 +3,17 @@ from __future__ import unicode_literals
 
 import re
 import json
 
 import re
 import json
-import base64
 import zlib
 
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
 import zlib
 
 from hashlib import sha1
 from math import pow, sqrt, floor
 from .common import InfoExtractor
+from .vrv import VRVIE
 from ..compat import (
 from ..compat import (
+    compat_b64decode,
+    compat_etree_Element,
     compat_etree_fromstring,
     compat_etree_fromstring,
+    compat_str,
     compat_urllib_parse_urlencode,
     compat_urllib_request,
     compat_urlparse,
     compat_urllib_parse_urlencode,
     compat_urllib_request,
     compat_urlparse,
@@ -18,15 +21,16 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
 from ..utils import (
     ExtractorError,
     bytes_to_intlist,
+    extract_attributes,
+    float_or_none,
     intlist_to_bytes,
     int_or_none,
     lowercase_escape,
     intlist_to_bytes,
     int_or_none,
     lowercase_escape,
+    merge_dicts,
     remove_end,
     sanitized_Request,
     remove_end,
     sanitized_Request,
-    unified_strdate,
     urlencode_postdata,
     xpath_text,
     urlencode_postdata,
     xpath_text,
-    extract_attributes,
 )
 from ..aes import (
     aes_cbc_decrypt,
 )
 from ..aes import (
     aes_cbc_decrypt,
@@ -43,32 +47,21 @@ class CrunchyrollBaseIE(InfoExtractor):
         data['req'] = 'RpcApi' + method
         data = compat_urllib_parse_urlencode(data).encode('utf-8')
         return self._download_xml(
         data['req'] = 'RpcApi' + method
         data = compat_urllib_parse_urlencode(data).encode('utf-8')
         return self._download_xml(
-            'http://www.crunchyroll.com/xml/',
+            'https://www.crunchyroll.com/xml/',
             video_id, note, fatal=False, data=data, headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
             })
 
     def _login(self):
             video_id, note, fatal=False, data=data, headers={
                 'Content-Type': 'application/x-www-form-urlencoded',
             })
 
     def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
         if username is None:
             return
 
         if username is None:
             return
 
-        self._download_webpage(
-            'https://www.crunchyroll.com/?a=formhandler',
-            None, 'Logging in', 'Wrong login info',
-            data=urlencode_postdata({
-                'formname': 'RpcApiUser_Login',
-                'next_url': 'https://www.crunchyroll.com/acct/membership',
-                'name': username,
-                'password': password,
-            }))
-
-        '''
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
         def is_logged(webpage):
         login_page = self._download_webpage(
             self._LOGIN_URL, None, 'Downloading login page')
 
         def is_logged(webpage):
-            return '<title>Redirecting' in webpage
+            return 'href="/logout"' in webpage
 
         # Already logged in
         if is_logged(login_page):
 
         # Already logged in
         if is_logged(login_page):
@@ -107,24 +100,10 @@ class CrunchyrollBaseIE(InfoExtractor):
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
         raise ExtractorError('Unable to log in')
             raise ExtractorError('Unable to login: %s' % error, expected=True)
 
         raise ExtractorError('Unable to log in')
-        '''
 
     def _real_initialize(self):
         self._login()
 
 
     def _real_initialize(self):
         self._login()
 
-    def _download_webpage(self, url_or_request, *args, **kwargs):
-        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
-                   else sanitized_Request(url_or_request))
-        # Accept-Language must be set explicitly to accept any language to avoid issues
-        # similar to https://github.com/rg3/youtube-dl/issues/6797.
-        # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
-        # should be imposed or not (from what I can see it just takes the first language
-        # ignoring the priority and requires it to correspond the IP). By the way this causes
-        # Crunchyroll to not work in georestriction cases in some browsers that don't place
-        # the locale lang first in header. However allowing any language seems to workaround the issue.
-        request.add_header('Accept-Language', '*')
-        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
-
     @staticmethod
     def _add_skip_wall(url):
         parsed_url = compat_urlparse.urlparse(url)
     @staticmethod
     def _add_skip_wall(url):
         parsed_url = compat_urlparse.urlparse(url)
@@ -133,14 +112,15 @@ class CrunchyrollBaseIE(InfoExtractor):
         # > This content may be inappropriate for some people.
         # > Are you sure you want to continue?
         # since it's not disabled by default in crunchyroll account's settings.
         # > This content may be inappropriate for some people.
         # > Are you sure you want to continue?
         # since it's not disabled by default in crunchyroll account's settings.
-        # See https://github.com/rg3/youtube-dl/issues/7202.
+        # See https://github.com/ytdl-org/youtube-dl/issues/7202.
         qs['skip_wall'] = ['1']
         return compat_urlparse.urlunparse(
             parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
 
         qs['skip_wall'] = ['1']
         return compat_urlparse.urlunparse(
             parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
 
 
-class CrunchyrollIE(CrunchyrollBaseIE):
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+    IE_NAME = 'crunchyroll'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|(?:[^/]*/){1,2}[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
         'info_dict': {
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
         'info_dict': {
@@ -148,7 +128,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             'ext': 'mp4',
             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
             'description': 'md5:2d17137920c64f2f49981a7797d275ef',
             'ext': 'mp4',
             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
             'description': 'md5:2d17137920c64f2f49981a7797d275ef',
-            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+            'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
             'url': 're:(?!.*&amp)',
             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
             'upload_date': '20131013',
             'url': 're:(?!.*&amp)',
@@ -157,6 +137,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # rtmp
             'skip_download': True,
         },
             # rtmp
             'skip_download': True,
         },
+        'skip': 'Video gone',
     }, {
         'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
         'info_dict': {
     }, {
         'url': 'http://www.crunchyroll.com/media-589804/culture-japan-1',
         'info_dict': {
@@ -178,11 +159,12 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         'info_dict': {
             'id': '702409',
             'ext': 'mp4',
         'info_dict': {
             'id': '702409',
             'ext': 'mp4',
-            'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
-            'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
+            'title': compat_str,
+            'description': compat_str,
             'thumbnail': r're:^https?://.*\.jpg$',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'uploader': 'TV TOKYO',
-            'upload_date': '20160508',
+            'uploader': 'Re:Zero Partners',
+            'timestamp': 1462098900,
+            'upload_date': '20160501',
         },
         'params': {
             # m3u8 download
         },
         'params': {
             # m3u8 download
@@ -193,12 +175,13 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         'info_dict': {
             'id': '727589',
             'ext': 'mp4',
         'info_dict': {
             'id': '727589',
             'ext': 'mp4',
-            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
-            'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
+            'title': compat_str,
+            'description': compat_str,
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'Kadokawa Pictures Inc.',
             'thumbnail': r're:^https?://.*\.jpg$',
             'uploader': 'Kadokawa Pictures Inc.',
-            'upload_date': '20170118',
-            'series': "KONOSUBA -God's blessing on this wonderful world!",
+            'timestamp': 1484130900,
+            'upload_date': '20170111',
+            'series': compat_str,
             'season': "KONOSUBA -God's blessing on this wonderful world! 2",
             'season_number': 2,
             'episode': 'Give Me Deliverance From This Judicial Injustice!',
             'season': "KONOSUBA -God's blessing on this wonderful world! 2",
             'season_number': 2,
             'episode': 'Give Me Deliverance From This Judicial Injustice!',
@@ -221,10 +204,11 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         'info_dict': {
             'id': '535080',
             'ext': 'mp4',
         'info_dict': {
             'id': '535080',
             'ext': 'mp4',
-            'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
-            'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
+            'title': compat_str,
+            'description': compat_str,
             'uploader': 'Marvelous AQL Inc.',
             'uploader': 'Marvelous AQL Inc.',
-            'upload_date': '20091021',
+            'timestamp': 1255512600,
+            'upload_date': '20091014',
         },
         'params': {
             # Just test metadata extraction
         },
         'params': {
             # Just test metadata extraction
@@ -245,15 +229,17 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # just test metadata extraction
             'skip_download': True,
         },
             # just test metadata extraction
             'skip_download': True,
         },
+        'skip': 'Video gone',
     }, {
         # A video with a vastly different season name compared to the series name
         'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
         'info_dict': {
             'id': '590532',
             'ext': 'mp4',
     }, {
         # A video with a vastly different season name compared to the series name
         'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
         'info_dict': {
             'id': '590532',
             'ext': 'mp4',
-            'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
-            'description': 'Mahiro and Nyaruko talk about official certification.',
+            'title': compat_str,
+            'description': compat_str,
             'uploader': 'TV TOKYO',
             'uploader': 'TV TOKYO',
+            'timestamp': 1330956000,
             'upload_date': '20120305',
             'series': 'Nyarko-san: Another Crawling Chaos',
             'season': 'Haiyoru! Nyaruani (ONA)',
             'upload_date': '20120305',
             'series': 'Nyarko-san: Another Crawling Chaos',
             'season': 'Haiyoru! Nyaruani (ONA)',
@@ -262,6 +248,12 @@ class CrunchyrollIE(CrunchyrollBaseIE):
             # Just test metadata extraction
             'skip_download': True,
         },
             # Just test metadata extraction
             'skip_download': True,
         },
+    }, {
+        'url': 'http://www.crunchyroll.com/media-723735',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.crunchyroll.com/en-gb/mob-psycho-100/episode-2-urban-legends-encountering-rumors-780921',
+        'only_matching': True,
     }]
 
     _FORMAT_IDS = {
     }]
 
     _FORMAT_IDS = {
@@ -271,9 +263,22 @@ class CrunchyrollIE(CrunchyrollBaseIE):
         '1080': ('80', '108'),
     }
 
         '1080': ('80', '108'),
     }
 
+    def _download_webpage(self, url_or_request, *args, **kwargs):
+        request = (url_or_request if isinstance(url_or_request, compat_urllib_request.Request)
+                   else sanitized_Request(url_or_request))
+        # Accept-Language must be set explicitly to accept any language to avoid issues
+        # similar to https://github.com/ytdl-org/youtube-dl/issues/6797.
+        # Along with IP address Crunchyroll uses Accept-Language to guess whether georestriction
+        # should be imposed or not (from what I can see it just takes the first language
+        # ignoring the priority and requires it to correspond the IP). By the way this causes
+        # Crunchyroll to not work in georestriction cases in some browsers that don't place
+        # the locale lang first in header. However allowing any language seems to workaround the issue.
+        request.add_header('Accept-Language', '*')
+        return super(CrunchyrollBaseIE, self)._download_webpage(request, *args, **kwargs)
+
     def _decrypt_subtitles(self, data, iv, id):
     def _decrypt_subtitles(self, data, iv, id):
-        data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
-        iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
+        data = bytes_to_intlist(compat_b64decode(data))
+        iv = bytes_to_intlist(compat_b64decode(iv))
         id = int(id)
 
         def obfuscate_key_aux(count, modulo, start):
         id = int(id)
 
         def obfuscate_key_aux(count, modulo, start):
@@ -392,7 +397,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'Downloading subtitles for ' + sub_name, data={
                     'subtitle_script_id': sub_id,
                 })
                 'Downloading subtitles for ' + sub_name, data={
                     'subtitle_script_id': sub_id,
                 })
-            if sub_doc is None:
+            if not isinstance(sub_doc, compat_etree_Element):
                 continue
             sid = sub_doc.get('id')
             iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
                 continue
             sid = sub_doc.get('id')
             iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
@@ -434,111 +439,139 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
         if 'To view this, please log in to verify you are 18 or older.' in webpage:
             self.raise_login_required()
 
         if 'To view this, please log in to verify you are 18 or older.' in webpage:
             self.raise_login_required()
 
+        media = self._parse_json(self._search_regex(
+            r'vilos\.config\.media\s*=\s*({.+?});',
+            webpage, 'vilos media', default='{}'), video_id)
+        media_metadata = media.get('metadata') or {}
+
+        language = self._search_regex(
+            r'(?:vilos\.config\.player\.language|LOCALE)\s*=\s*(["\'])(?P<lang>(?:(?!\1).)+)\1',
+            webpage, 'language', default=None, group='lang')
+
         video_title = self._html_search_regex(
         video_title = self._html_search_regex(
-            r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
-            webpage, 'video_title')
+            (r'(?s)<h1[^>]*>((?:(?!<h1).)*?<(?:span[^>]+itemprop=["\']title["\']|meta[^>]+itemprop=["\']position["\'])[^>]*>(?:(?!<h1).)+?)</h1>',
+             r'<title>(.+?),\s+-\s+.+? Crunchyroll'),
+            webpage, 'video_title', default=None)
+        if not video_title:
+            video_title = re.sub(r'^Watch\s+', '', self._og_search_description(webpage))
         video_title = re.sub(r' {2,}', ' ', video_title)
         video_title = re.sub(r' {2,}', ' ', video_title)
-        video_description = self._parse_json(self._html_search_regex(
+        video_description = (self._parse_json(self._html_search_regex(
             r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
             r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
-            webpage, 'description', default='{}'), video_id).get('description')
+            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
         if video_description:
             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
         if video_description:
             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
-        video_upload_date = self._html_search_regex(
-            [r'<div>Availability for free users:(.+?)</div>', r'<div>[^<>]+<span>\s*(.+?\d{4})\s*</span></div>'],
-            webpage, 'video_upload_date', fatal=False, flags=re.DOTALL)
-        if video_upload_date:
-            video_upload_date = unified_strdate(video_upload_date)
         video_uploader = self._html_search_regex(
             # try looking for both an uploader that's a link and one that's not
             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
         video_uploader = self._html_search_regex(
             # try looking for both an uploader that's a link and one that's not
             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
-            webpage, 'video_uploader', fatal=False)
+            webpage, 'video_uploader', default=False)
 
 
-        available_fmts = []
-        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
-            attrs = extract_attributes(a)
-            href = attrs.get('href')
-            if href and '/freetrial' in href:
-                continue
-            available_fmts.append(fmt)
-        if not available_fmts:
-            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
-                available_fmts = re.findall(p, webpage)
-                if available_fmts:
-                    break
-        video_encode_ids = []
         formats = []
         formats = []
-        for fmt in available_fmts:
-            stream_quality, stream_format = self._FORMAT_IDS[fmt]
-            video_format = fmt + 'p'
-            stream_infos = []
-            streamdata = self._call_rpc_api(
-                'VideoPlayer_GetStandardConfig', video_id,
-                'Downloading media info for %s' % video_format, data={
-                    'media_id': video_id,
-                    'video_format': stream_format,
-                    'video_quality': stream_quality,
-                    'current_page': url,
-                })
-            if streamdata is not None:
-                stream_info = streamdata.find('./{default}preload/stream_info')
-                if stream_info is not None:
-                    stream_infos.append(stream_info)
-            stream_info = self._call_rpc_api(
-                'VideoEncode_GetStreamInfo', video_id,
-                'Downloading stream info for %s' % video_format, data={
-                    'media_id': video_id,
-                    'video_format': stream_format,
-                    'video_encode_quality': stream_quality,
-                })
-            if stream_info is not None:
-                stream_infos.append(stream_info)
-            for stream_info in stream_infos:
-                video_encode_id = xpath_text(stream_info, './video_encode_id')
-                if video_encode_id in video_encode_ids:
-                    continue
-                video_encode_ids.append(video_encode_id)
-
-                video_file = xpath_text(stream_info, './file')
-                if not video_file:
-                    continue
-                if video_file.startswith('http'):
-                    formats.extend(self._extract_m3u8_formats(
-                        video_file, video_id, 'mp4', entry_protocol='m3u8_native',
-                        m3u8_id='hls', fatal=False))
+        for stream in media.get('streams', []):
+            audio_lang = stream.get('audio_lang')
+            hardsub_lang = stream.get('hardsub_lang')
+            vrv_formats = self._extract_vrv_formats(
+                stream.get('url'), video_id, stream.get('format'),
+                audio_lang, hardsub_lang)
+            for f in vrv_formats:
+                if not hardsub_lang:
+                    f['preference'] = 1
+                language_preference = 0
+                if audio_lang == language:
+                    language_preference += 1
+                if hardsub_lang == language:
+                    language_preference += 1
+                if language_preference:
+                    f['language_preference'] = language_preference
+            formats.extend(vrv_formats)
+        if not formats:
+            available_fmts = []
+            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
+                attrs = extract_attributes(a)
+                href = attrs.get('href')
+                if href and '/freetrial' in href:
                     continue
                     continue
+                available_fmts.append(fmt)
+            if not available_fmts:
+                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+                    available_fmts = re.findall(p, webpage)
+                    if available_fmts:
+                        break
+            if not available_fmts:
+                available_fmts = self._FORMAT_IDS.keys()
+            video_encode_ids = []
+
+            for fmt in available_fmts:
+                stream_quality, stream_format = self._FORMAT_IDS[fmt]
+                video_format = fmt + 'p'
+                stream_infos = []
+                streamdata = self._call_rpc_api(
+                    'VideoPlayer_GetStandardConfig', video_id,
+                    'Downloading media info for %s' % video_format, data={
+                        'media_id': video_id,
+                        'video_format': stream_format,
+                        'video_quality': stream_quality,
+                        'current_page': url,
+                    })
+                if isinstance(streamdata, compat_etree_Element):
+                    stream_info = streamdata.find('./{default}preload/stream_info')
+                    if stream_info is not None:
+                        stream_infos.append(stream_info)
+                stream_info = self._call_rpc_api(
+                    'VideoEncode_GetStreamInfo', video_id,
+                    'Downloading stream info for %s' % video_format, data={
+                        'media_id': video_id,
+                        'video_format': stream_format,
+                        'video_encode_quality': stream_quality,
+                    })
+                if isinstance(stream_info, compat_etree_Element):
+                    stream_infos.append(stream_info)
+                for stream_info in stream_infos:
+                    video_encode_id = xpath_text(stream_info, './video_encode_id')
+                    if video_encode_id in video_encode_ids:
+                        continue
+                    video_encode_ids.append(video_encode_id)
 
 
-                video_url = xpath_text(stream_info, './host')
-                if not video_url:
-                    continue
-                metadata = stream_info.find('./metadata')
-                format_info = {
-                    'format': video_format,
-                    'height': int_or_none(xpath_text(metadata, './height')),
-                    'width': int_or_none(xpath_text(metadata, './width')),
-                }
-
-                if '.fplive.net/' in video_url:
-                    video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
-                    parsed_video_url = compat_urlparse.urlparse(video_url)
-                    direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
-                        netloc='v.lvlt.crcdn.net',
-                        path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
-                    if self._is_valid_url(direct_video_url, video_id, video_format):
-                        format_info.update({
-                            'format_id': 'http-' + video_format,
-                            'url': direct_video_url,
-                        })
-                        formats.append(format_info)
+                    video_file = xpath_text(stream_info, './file')
+                    if not video_file:
+                        continue
+                    if video_file.startswith('http'):
+                        formats.extend(self._extract_m3u8_formats(
+                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
+                            m3u8_id='hls', fatal=False))
                         continue
 
                         continue
 
-                format_info.update({
-                    'format_id': 'rtmp-' + video_format,
-                    'url': video_url,
-                    'play_path': video_file,
-                    'ext': 'flv',
-                })
-                formats.append(format_info)
-        self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
+                    video_url = xpath_text(stream_info, './host')
+                    if not video_url:
+                        continue
+                    metadata = stream_info.find('./metadata')
+                    format_info = {
+                        'format': video_format,
+                        'height': int_or_none(xpath_text(metadata, './height')),
+                        'width': int_or_none(xpath_text(metadata, './width')),
+                    }
+
+                    if '.fplive.net/' in video_url:
+                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+                        parsed_video_url = compat_urlparse.urlparse(video_url)
+                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+                            netloc='v.lvlt.crcdn.net',
+                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
+                        if self._is_valid_url(direct_video_url, video_id, video_format):
+                            format_info.update({
+                                'format_id': 'http-' + video_format,
+                                'url': direct_video_url,
+                            })
+                            formats.append(format_info)
+                            continue
+
+                    format_info.update({
+                        'format_id': 'rtmp-' + video_format,
+                        'url': video_url,
+                        'play_path': video_file,
+                        'ext': 'flv',
+                    })
+                    formats.append(format_info)
+        self._sort_formats(formats, ('preference', 'language_preference', 'height', 'width', 'tbr', 'fps'))
 
         metadata = self._call_rpc_api(
             'VideoPlayer_GetMediaMetadata', video_id,
 
         metadata = self._call_rpc_api(
             'VideoPlayer_GetMediaMetadata', video_id,
@@ -546,28 +579,52 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
                 'media_id': video_id,
             })
 
                 'media_id': video_id,
             })
 
-        subtitles = self.extract_subtitles(video_id, webpage)
+        subtitles = {}
+        for subtitle in media.get('subtitles', []):
+            subtitle_url = subtitle.get('url')
+            if not subtitle_url:
+                continue
+            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
+                'url': subtitle_url,
+                'ext': subtitle.get('format', 'ass'),
+            })
+        if not subtitles:
+            subtitles = self.extract_subtitles(video_id, webpage)
 
         # webpage provide more accurate data than series_title from XML
         series = self._html_search_regex(
             r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
             webpage, 'series', fatal=False)
 
         # webpage provide more accurate data than series_title from XML
         series = self._html_search_regex(
             r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
             webpage, 'series', fatal=False)
-        season = xpath_text(metadata, 'series_title')
 
 
-        episode = xpath_text(metadata, 'episode_title')
-        episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+        season = episode = episode_number = duration = thumbnail = None
+
+        if isinstance(metadata, compat_etree_Element):
+            season = xpath_text(metadata, 'series_title')
+            episode = xpath_text(metadata, 'episode_title')
+            episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
+            duration = float_or_none(media_metadata.get('duration'), 1000)
+            thumbnail = xpath_text(metadata, 'episode_image_url')
+
+        if not episode:
+            episode = media_metadata.get('title')
+        if not episode_number:
+            episode_number = int_or_none(media_metadata.get('episode_number'))
+        if not thumbnail:
+            thumbnail = media_metadata.get('thumbnail', {}).get('url')
 
         season_number = int_or_none(self._search_regex(
             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
             webpage, 'season number', default=None))
 
 
         season_number = int_or_none(self._search_regex(
             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
             webpage, 'season number', default=None))
 
-        return {
+        info = self._search_json_ld(webpage, video_id, default={})
+
+        return merge_dicts({
             'id': video_id,
             'title': video_title,
             'description': video_description,
             'id': video_id,
             'title': video_title,
             'description': video_description,
-            'thumbnail': xpath_text(metadata, 'episode_image_url'),
+            'duration': duration,
+            'thumbnail': thumbnail,
             'uploader': video_uploader,
             'uploader': video_uploader,
-            'upload_date': video_upload_date,
             'series': series,
             'season': season,
             'season_number': season_number,
             'series': series,
             'season': season,
             'season_number': season_number,
@@ -575,12 +632,12 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
             'episode_number': episode_number,
             'subtitles': subtitles,
             'formats': formats,
             'episode_number': episode_number,
             'subtitles': subtitles,
             'formats': formats,
-        }
+        }, info)
 
 
 class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
     IE_NAME = 'crunchyroll:playlist'
 
 
 class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
     IE_NAME = 'crunchyroll:playlist'
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
 
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
 
     _TESTS = [{
         'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
@@ -610,9 +667,8 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
         webpage = self._download_webpage(
             self._add_skip_wall(url), show_id,
             headers=self.geo_verification_headers())
         webpage = self._download_webpage(
             self._add_skip_wall(url), show_id,
             headers=self.geo_verification_headers())
-        title = self._html_search_regex(
-            r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
-            webpage, 'title')
+        title = self._html_search_meta('name', webpage, default=None)
+
         episode_paths = re.findall(
             r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
             webpage)
         episode_paths = re.findall(
             r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
             webpage)