debian/control: Update list of extractors in long description.

[youtubedl] / youtube_dl / extractor / crunchyroll.py
diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py

index 6d3abb52f735de655e11ce88c70a4b6b9cc287d6..ba8b9fa7eff3105fb657cf322fdacab17da632eb 100644 (file)
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -1,15 +1,16 @@
-# encoding: utf-8
+# coding: utf-8
  from __future__ import unicode_literals
  
  import re
  import json
  from __future__ import unicode_literals
  
  import re
  import json
-import base64
  import zlib
  
  from hashlib import sha1
  from math import pow, sqrt, floor
  from .common import InfoExtractor
  import zlib
  
  from hashlib import sha1
  from math import pow, sqrt, floor
  from .common import InfoExtractor
+from .vrv import VRVIE
  from ..compat import (
  from ..compat import (
+    compat_b64decode,
      compat_etree_fromstring,
      compat_urllib_parse_urlencode,
      compat_urllib_request,
      compat_etree_fromstring,
      compat_urllib_parse_urlencode,
      compat_urllib_request,
@@ -18,6 +19,8 @@ from ..compat import (
  from ..utils import (
      ExtractorError,
      bytes_to_intlist,
  from ..utils import (
      ExtractorError,
      bytes_to_intlist,
+    extract_attributes,
+    float_or_none,
      intlist_to_bytes,
      int_or_none,
      lowercase_escape,
      intlist_to_bytes,
      int_or_none,
      lowercase_escape,
@@ -26,7 +29,6 @@ from ..utils import (
      unified_strdate,
      urlencode_postdata,
      xpath_text,
      unified_strdate,
      urlencode_postdata,
      xpath_text,
-    extract_attributes,
  )
  from ..aes import (
      aes_cbc_decrypt,
  )
  from ..aes import (
      aes_cbc_decrypt,
@@ -34,22 +36,80 @@ from ..aes import (
  
  
  class CrunchyrollBaseIE(InfoExtractor):
  
  
  class CrunchyrollBaseIE(InfoExtractor):
+    _LOGIN_URL = 'https://www.crunchyroll.com/login'
+    _LOGIN_FORM = 'login_form'
      _NETRC_MACHINE = 'crunchyroll'
  
      _NETRC_MACHINE = 'crunchyroll'
  
+    def _call_rpc_api(self, method, video_id, note=None, data=None):
+        data = data or {}
+        data['req'] = 'RpcApi' + method
+        data = compat_urllib_parse_urlencode(data).encode('utf-8')
+        return self._download_xml(
+            'http://www.crunchyroll.com/xml/',
+            video_id, note, fatal=False, data=data, headers={
+                'Content-Type': 'application/x-www-form-urlencoded',
+            })
+
      def _login(self):
      def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
          if username is None:
              return
          if username is None:
              return
-        self.report_login()
-        login_url = 'https://www.crunchyroll.com/?a=formhandler'
-        data = urlencode_postdata({
-            'formname': 'RpcApiUser_Login',
-            'name': username,
-            'password': password,
+
+        self._download_webpage(
+            'https://www.crunchyroll.com/?a=formhandler',
+            None, 'Logging in', 'Wrong login info',
+            data=urlencode_postdata({
+                'formname': 'RpcApiUser_Login',
+                'next_url': 'https://www.crunchyroll.com/acct/membership',
+                'name': username,
+                'password': password,
+            }))
+
+        '''
+        login_page = self._download_webpage(
+            self._LOGIN_URL, None, 'Downloading login page')
+
+        def is_logged(webpage):
+            return '<title>Redirecting' in webpage
+
+        # Already logged in
+        if is_logged(login_page):
+            return
+
+        login_form_str = self._search_regex(
+            r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LOGIN_FORM,
+            login_page, 'login form', group='form')
+
+        post_url = extract_attributes(login_form_str).get('action')
+        if not post_url:
+            post_url = self._LOGIN_URL
+        elif not post_url.startswith('http'):
+            post_url = compat_urlparse.urljoin(self._LOGIN_URL, post_url)
+
+        login_form = self._form_hidden_inputs(self._LOGIN_FORM, login_page)
+
+        login_form.update({
+            'login_form[name]': username,
+            'login_form[password]': password,
          })
          })
-        login_request = sanitized_Request(login_url, data)
-        login_request.add_header('Content-Type', 'application/x-www-form-urlencoded')
-        self._download_webpage(login_request, None, False, 'Wrong login info')
+
+        response = self._download_webpage(
+            post_url, None, 'Logging in', 'Wrong login info',
+            data=urlencode_postdata(login_form),
+            headers={'Content-Type': 'application/x-www-form-urlencoded'})
+
+        # Successful login
+        if is_logged(response):
+            return
+
+        error = self._html_search_regex(
+            '(?s)<ul[^>]+class=["\']messages["\'][^>]*>(.+?)</ul>',
+            response, 'error message', default=None)
+        if error:
+            raise ExtractorError('Unable to login: %s' % error, expected=True)
+
+        raise ExtractorError('Unable to log in')
+        '''
  
      def _real_initialize(self):
          self._login()
  
      def _real_initialize(self):
          self._login()
@@ -81,16 +141,17 @@ class CrunchyrollBaseIE(InfoExtractor):
              parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
  
  
              parsed_url._replace(query=compat_urllib_parse_urlencode(qs, True)))
  
  
-class CrunchyrollIE(CrunchyrollBaseIE):
+class CrunchyrollIE(CrunchyrollBaseIE, VRVIE):
+    IE_NAME = 'crunchyroll'
      _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
          'info_dict': {
              'id': '645513',
      _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.(?:com|fr)/(?:media(?:-|/\?id=)|[^/]*/[^/?&]*?)(?P<video_id>[0-9]+))(?:[/?&]|$)'
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/wanna-be-the-strongest-in-the-world/episode-1-an-idol-wrestler-is-born-645513',
          'info_dict': {
              'id': '645513',
-            'ext': 'flv',
+            'ext': 'mp4',
              'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
              'description': 'md5:2d17137920c64f2f49981a7797d275ef',
              'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
              'description': 'md5:2d17137920c64f2f49981a7797d275ef',
-            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
+            'thumbnail': r're:^https?://.*\.jpg$',
              'uploader': 'Yomiuri Telecasting Corporation (YTV)',
              'upload_date': '20131013',
              'url': 're:(?!.*&amp)',
              'uploader': 'Yomiuri Telecasting Corporation (YTV)',
              'upload_date': '20131013',
              'url': 're:(?!.*&amp)',
@@ -106,7 +167,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
              'ext': 'flv',
              'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
              'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
              'ext': 'flv',
              'title': 'Culture Japan Episode 1 – Rebuilding Japan after the 3.11',
              'description': 'md5:2fbc01f90b87e8e9137296f37b461c12',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
              'uploader': 'Danny Choo Network',
              'upload_date': '20120213',
          },
              'uploader': 'Danny Choo Network',
              'upload_date': '20120213',
          },
@@ -114,6 +175,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
              # rtmp
              'skip_download': True,
          },
              # rtmp
              'skip_download': True,
          },
+        'skip': 'Video gone',
      }, {
          'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
          'info_dict': {
      }, {
          'url': 'http://www.crunchyroll.com/rezero-starting-life-in-another-world-/episode-5-the-morning-of-our-promise-is-still-distant-702409',
          'info_dict': {
@@ -121,7 +183,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
              'ext': 'mp4',
              'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
              'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
              'ext': 'mp4',
              'title': 'Re:ZERO -Starting Life in Another World- Episode 5 – The Morning of Our Promise Is Still Distant',
              'description': 'md5:97664de1ab24bbf77a9c01918cb7dca9',
-            'thumbnail': 're:^https?://.*\.jpg$',
+            'thumbnail': r're:^https?://.*\.jpg$',
              'uploader': 'TV TOKYO',
              'upload_date': '20160508',
          },
              'uploader': 'TV TOKYO',
              'upload_date': '20160508',
          },
@@ -129,6 +191,26 @@ class CrunchyrollIE(CrunchyrollBaseIE):
              # m3u8 download
              'skip_download': True,
          },
              # m3u8 download
              'skip_download': True,
          },
+    }, {
+        'url': 'http://www.crunchyroll.com/konosuba-gods-blessing-on-this-wonderful-world/episode-1-give-me-deliverance-from-this-judicial-injustice-727589',
+        'info_dict': {
+            'id': '727589',
+            'ext': 'mp4',
+            'title': "KONOSUBA -God's blessing on this wonderful world! 2 Episode 1 – Give Me Deliverance From This Judicial Injustice!",
+            'description': 'md5:cbcf05e528124b0f3a0a419fc805ea7d',
+            'thumbnail': r're:^https?://.*\.jpg$',
+            'uploader': 'Kadokawa Pictures Inc.',
+            'upload_date': '20170118',
+            'series': "KONOSUBA -God's blessing on this wonderful world!",
+            'season': "KONOSUBA -God's blessing on this wonderful world! 2",
+            'season_number': 2,
+            'episode': 'Give Me Deliverance From This Judicial Injustice!',
+            'episode_number': 1,
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
      }, {
          'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
          'only_matching': True,
      }, {
          'url': 'http://www.crunchyroll.fr/girl-friend-beta/episode-11-goodbye-la-mode-661697',
          'only_matching': True,
@@ -136,6 +218,56 @@ class CrunchyrollIE(CrunchyrollBaseIE):
          # geo-restricted (US), 18+ maturity wall, non-premium available
          'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
          'only_matching': True,
          # geo-restricted (US), 18+ maturity wall, non-premium available
          'url': 'http://www.crunchyroll.com/cosplay-complex-ova/episode-1-the-birth-of-the-cosplay-club-565617',
          'only_matching': True,
+    }, {
+        # A description with double quotes
+        'url': 'http://www.crunchyroll.com/11eyes/episode-1-piros-jszaka-red-night-535080',
+        'info_dict': {
+            'id': '535080',
+            'ext': 'mp4',
+            'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
+            'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
+            'uploader': 'Marvelous AQL Inc.',
+            'upload_date': '20091021',
+        },
+        'params': {
+            # Just test metadata extraction
+            'skip_download': True,
+        },
+    }, {
+        # make sure we can extract an uploader name that's not a link
+        'url': 'http://www.crunchyroll.com/hakuoki-reimeiroku/episode-1-dawn-of-the-divine-warriors-606899',
+        'info_dict': {
+            'id': '606899',
+            'ext': 'mp4',
+            'title': 'Hakuoki Reimeiroku Episode 1 – Dawn of the Divine Warriors',
+            'description': 'Ryunosuke was left to die, but Serizawa-san asked him a simple question "Do you want to live?"',
+            'uploader': 'Geneon Entertainment',
+            'upload_date': '20120717',
+        },
+        'params': {
+            # just test metadata extraction
+            'skip_download': True,
+        },
+    }, {
+        # A video with a vastly different season name compared to the series name
+        'url': 'http://www.crunchyroll.com/nyarko-san-another-crawling-chaos/episode-1-test-590532',
+        'info_dict': {
+            'id': '590532',
+            'ext': 'mp4',
+            'title': 'Haiyoru! Nyaruani (ONA) Episode 1 – Test',
+            'description': 'Mahiro and Nyaruko talk about official certification.',
+            'uploader': 'TV TOKYO',
+            'upload_date': '20120305',
+            'series': 'Nyarko-san: Another Crawling Chaos',
+            'season': 'Haiyoru! Nyaruani (ONA)',
+        },
+        'params': {
+            # Just test metadata extraction
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.crunchyroll.com/media-723735',
+        'only_matching': True,
      }]
  
      _FORMAT_IDS = {
      }]
  
      _FORMAT_IDS = {
@@ -146,8 +278,8 @@ class CrunchyrollIE(CrunchyrollBaseIE):
      }
  
      def _decrypt_subtitles(self, data, iv, id):
      }
  
      def _decrypt_subtitles(self, data, iv, id):
-        data = bytes_to_intlist(base64.b64decode(data.encode('utf-8')))
-        iv = bytes_to_intlist(base64.b64decode(iv.encode('utf-8')))
+        data = bytes_to_intlist(compat_b64decode(data))
+        iv = bytes_to_intlist(compat_b64decode(iv))
          id = int(id)
  
          def obfuscate_key_aux(count, modulo, start):
          id = int(id)
  
          def obfuscate_key_aux(count, modulo, start):
@@ -199,8 +331,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
          output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
          output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
          output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
          output += 'WrapStyle: %s\n' % sub_root.attrib['wrap_style']
          output += 'PlayResX: %s\n' % sub_root.attrib['play_res_x']
          output += 'PlayResY: %s\n' % sub_root.attrib['play_res_y']
-        output += """ScaledBorderAndShadow: yes
-
+        output += """
  [V4+ Styles]
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
  """
  [V4+ Styles]
  Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
  """
@@ -262,15 +393,19 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
      def _get_subtitles(self, video_id, webpage):
          subtitles = {}
          for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
      def _get_subtitles(self, video_id, webpage):
          subtitles = {}
          for sub_id, sub_name in re.findall(r'\bssid=([0-9]+)"[^>]+?\btitle="([^"]+)', webpage):
-            sub_page = self._download_webpage(
-                'http://www.crunchyroll.com/xml/?req=RpcApiSubtitle_GetXml&subtitle_script_id=' + sub_id,
-                video_id, note='Downloading subtitles for ' + sub_name)
-            id = self._search_regex(r'id=\'([0-9]+)', sub_page, 'subtitle_id', fatal=False)
-            iv = self._search_regex(r'<iv>([^<]+)', sub_page, 'subtitle_iv', fatal=False)
-            data = self._search_regex(r'<data>([^<]+)', sub_page, 'subtitle_data', fatal=False)
-            if not id or not iv or not data:
+            sub_doc = self._call_rpc_api(
+                'Subtitle_GetXml', video_id,
+                'Downloading subtitles for ' + sub_name, data={
+                    'subtitle_script_id': sub_id,
+                })
+            if sub_doc is None:
                  continue
                  continue
-            subtitle = self._decrypt_subtitles(data, iv, id).decode('utf-8')
+            sid = sub_doc.get('id')
+            iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
+            data = xpath_text(sub_doc, 'data', 'subtitle data')
+            if not sid or not iv or not data:
+                continue
+            subtitle = self._decrypt_subtitles(data, iv, sid).decode('utf-8')
              lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
              if not lang_code:
                  continue
              lang_code = self._search_regex(r'lang_code=["\']([^"\']+)', subtitle, 'subtitle_lang_code', fatal=False)
              if not lang_code:
                  continue
@@ -287,7 +422,9 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
          else:
              webpage_url = 'http://www.' + mobj.group('url')
  
          else:
              webpage_url = 'http://www.' + mobj.group('url')
  
-        webpage = self._download_webpage(self._add_skip_wall(webpage_url), video_id, 'Downloading webpage')
+        webpage = self._download_webpage(
+            self._add_skip_wall(webpage_url), video_id,
+            headers=self.geo_verification_headers())
          note_m = self._html_search_regex(
              r'<div class="showmedia-trailer-notice">(.+?)</div>',
              webpage, 'trailer-notice', default='')
          note_m = self._html_search_regex(
              r'<div class="showmedia-trailer-notice">(.+?)</div>',
              webpage, 'trailer-notice', default='')
@@ -303,13 +440,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
          if 'To view this, please log in to verify you are 18 or older.' in webpage:
              self.raise_login_required()
  
          if 'To view this, please log in to verify you are 18 or older.' in webpage:
              self.raise_login_required()
  
+        media = self._parse_json(self._search_regex(
+            r'vilos\.config\.media\s*=\s*({.+?});',
+            webpage, 'vilos media', default='{}'), video_id)
+        media_metadata = media.get('metadata') or {}
+
          video_title = self._html_search_regex(
              r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
              webpage, 'video_title')
          video_title = re.sub(r' {2,}', ' ', video_title)
          video_title = self._html_search_regex(
              r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
              webpage, 'video_title')
          video_title = re.sub(r' {2,}', ' ', video_title)
-        video_description = self._html_search_regex(
-            r'<script[^>]*>\s*.+?\[media_id=%s\].+?"description"\s*:\s*"([^"]+)' % video_id,
-            webpage, 'description', default=None)
+        video_description = (self._parse_json(self._html_search_regex(
+            r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
+            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
          if video_description:
              video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
          video_upload_date = self._html_search_regex(
          if video_description:
              video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
          video_upload_date = self._html_search_regex(
@@ -318,100 +460,149 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
          if video_upload_date:
              video_upload_date = unified_strdate(video_upload_date)
          video_uploader = self._html_search_regex(
          if video_upload_date:
              video_upload_date = unified_strdate(video_upload_date)
          video_uploader = self._html_search_regex(
-            r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', webpage,
-            'video_uploader', fatal=False)
-
-        available_fmts = []
-        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
-            attrs = extract_attributes(a)
-            href = attrs.get('href')
-            if href and '/freetrial' in href:
-                continue
-            available_fmts.append(fmt)
-        if not available_fmts:
-            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
-                available_fmts = re.findall(p, webpage)
-                if available_fmts:
-                    break
-        video_encode_ids = []
-        formats = []
-        for fmt in available_fmts:
-            stream_quality, stream_format = self._FORMAT_IDS[fmt]
-            video_format = fmt + 'p'
-            streamdata_req = sanitized_Request(
-                'http://www.crunchyroll.com/xml/?req=RpcApiVideoPlayer_GetStandardConfig&media_id=%s&video_format=%s&video_quality=%s'
-                % (video_id, stream_format, stream_quality),
-                compat_urllib_parse_urlencode({'current_page': url}).encode('utf-8'))
-            streamdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded')
-            streamdata = self._download_xml(
-                streamdata_req, video_id,
-                note='Downloading media info for %s' % video_format)
-            stream_info = streamdata.find('./{default}preload/stream_info')
-            video_encode_id = xpath_text(stream_info, './video_encode_id')
-            if video_encode_id in video_encode_ids:
-                continue
-            video_encode_ids.append(video_encode_id)
+            # try looking for both an uploader that's a link and one that's not
+            [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
+            webpage, 'video_uploader', fatal=False)
  
  
-            video_file = xpath_text(stream_info, './file')
-            if not video_file:
-                continue
-            if video_file.startswith('http'):
-                formats.extend(self._extract_m3u8_formats(
-                    video_file, video_id, 'mp4', entry_protocol='m3u8_native',
-                    m3u8_id='hls', fatal=False))
-                continue
+        formats = []
+        for stream in media.get('streams', []):
+            formats.extend(self._extract_vrv_formats(
+                stream.get('url'), video_id, stream.get('format'),
+                stream.get('audio_lang'), stream.get('hardsub_lang')))
+        if not formats:
+            available_fmts = []
+            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
+                attrs = extract_attributes(a)
+                href = attrs.get('href')
+                if href and '/freetrial' in href:
+                    continue
+                available_fmts.append(fmt)
+            if not available_fmts:
+                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
+                    available_fmts = re.findall(p, webpage)
+                    if available_fmts:
+                        break
+            if not available_fmts:
+                available_fmts = self._FORMAT_IDS.keys()
+            video_encode_ids = []
+
+            for fmt in available_fmts:
+                stream_quality, stream_format = self._FORMAT_IDS[fmt]
+                video_format = fmt + 'p'
+                stream_infos = []
+                streamdata = self._call_rpc_api(
+                    'VideoPlayer_GetStandardConfig', video_id,
+                    'Downloading media info for %s' % video_format, data={
+                        'media_id': video_id,
+                        'video_format': stream_format,
+                        'video_quality': stream_quality,
+                        'current_page': url,
+                    })
+                if streamdata is not None:
+                    stream_info = streamdata.find('./{default}preload/stream_info')
+                    if stream_info is not None:
+                        stream_infos.append(stream_info)
+                stream_info = self._call_rpc_api(
+                    'VideoEncode_GetStreamInfo', video_id,
+                    'Downloading stream info for %s' % video_format, data={
+                        'media_id': video_id,
+                        'video_format': stream_format,
+                        'video_encode_quality': stream_quality,
+                    })
+                if stream_info is not None:
+                    stream_infos.append(stream_info)
+                for stream_info in stream_infos:
+                    video_encode_id = xpath_text(stream_info, './video_encode_id')
+                    if video_encode_id in video_encode_ids:
+                        continue
+                    video_encode_ids.append(video_encode_id)
+
+                    video_file = xpath_text(stream_info, './file')
+                    if not video_file:
+                        continue
+                    if video_file.startswith('http'):
+                        formats.extend(self._extract_m3u8_formats(
+                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
+                            m3u8_id='hls', fatal=False))
+                        continue
+
+                    video_url = xpath_text(stream_info, './host')
+                    if not video_url:
+                        continue
+                    metadata = stream_info.find('./metadata')
+                    format_info = {
+                        'format': video_format,
+                        'height': int_or_none(xpath_text(metadata, './height')),
+                        'width': int_or_none(xpath_text(metadata, './width')),
+                    }
+
+                    if '.fplive.net/' in video_url:
+                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
+                        parsed_video_url = compat_urlparse.urlparse(video_url)
+                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
+                            netloc='v.lvlt.crcdn.net',
+                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
+                        if self._is_valid_url(direct_video_url, video_id, video_format):
+                            format_info.update({
+                                'format_id': 'http-' + video_format,
+                                'url': direct_video_url,
+                            })
+                            formats.append(format_info)
+                            continue
  
  
-            video_url = xpath_text(stream_info, './host')
-            if not video_url:
-                continue
-            metadata = stream_info.find('./metadata')
-            format_info = {
-                'format': video_format,
-                'format_id': video_format,
-                'height': int_or_none(xpath_text(metadata, './height')),
-                'width': int_or_none(xpath_text(metadata, './width')),
-            }
-
-            if '.fplive.net/' in video_url:
-                video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
-                parsed_video_url = compat_urlparse.urlparse(video_url)
-                direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
-                    netloc='v.lvlt.crcdn.net',
-                    path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
-                if self._is_valid_url(direct_video_url, video_id, video_format):
                      format_info.update({
                      format_info.update({
-                        'url': direct_video_url,
+                        'format_id': 'rtmp-' + video_format,
+                        'url': video_url,
+                        'play_path': video_file,
+                        'ext': 'flv',
                      })
                      formats.append(format_info)
                      })
                      formats.append(format_info)
-                    continue
+        self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
  
  
-            format_info.update({
-                'url': video_url,
-                'play_path': video_file,
-                'ext': 'flv',
+        metadata = self._call_rpc_api(
+            'VideoPlayer_GetMediaMetadata', video_id,
+            note='Downloading media info', data={
+                'media_id': video_id,
              })
              })
-            formats.append(format_info)
-        self._sort_formats(formats)
  
  
-        metadata = self._download_xml(
-            'http://www.crunchyroll.com/xml', video_id,
-            note='Downloading media info', query={
-                'req': 'RpcApiVideoPlayer_GetMediaMetadata',
-                'media_id': video_id,
+        subtitles = {}
+        for subtitle in media.get('subtitles', []):
+            subtitle_url = subtitle.get('url')
+            if not subtitle_url:
+                continue
+            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
+                'url': subtitle_url,
+                'ext': subtitle.get('format', 'ass'),
              })
              })
+        if not subtitles:
+            subtitles = self.extract_subtitles(video_id, webpage)
+
+        # webpage provide more accurate data than series_title from XML
+        series = self._html_search_regex(
+            r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
+            webpage, 'series', fatal=False)
+        season = xpath_text(metadata, 'series_title')
+
+        episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
+        episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
  
  
-        subtitles = self.extract_subtitles(video_id, webpage)
+        season_number = int_or_none(self._search_regex(
+            r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
+            webpage, 'season number', default=None))
  
          return {
              'id': video_id,
              'title': video_title,
              'description': video_description,
  
          return {
              'id': video_id,
              'title': video_title,
              'description': video_description,
-            'thumbnail': xpath_text(metadata, 'episode_image_url'),
+            'duration': float_or_none(media_metadata.get('duration'), 1000),
+            'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
              'uploader': video_uploader,
              'upload_date': video_upload_date,
              'uploader': video_uploader,
              'upload_date': video_upload_date,
-            'series': xpath_text(metadata, 'series_title'),
-            'episode': xpath_text(metadata, 'episode_title'),
-            'episode_number': int_or_none(xpath_text(metadata, 'episode_number')),
+            'series': series,
+            'season': season,
+            'season_number': season_number,
+            'episode': episode,
+            'episode_number': episode_number,
              'subtitles': subtitles,
              'formats': formats,
          }
              'subtitles': subtitles,
              'formats': formats,
          }
@@ -419,7 +610,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
  
  class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
      IE_NAME = 'crunchyroll:playlist'
  
  class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
      IE_NAME = 'crunchyroll:playlist'
-    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login))(?P<id>[\w\-]+))/?(?:\?|$)'
+    _VALID_URL = r'https?://(?:(?P<prefix>www|m)\.)?(?P<url>crunchyroll\.com/(?!(?:news|anime-news|library|forum|launchcalendar|lineup|store|comics|freetrial|login|media-\d+))(?P<id>[\w\-]+))/?(?:\?|$)'
  
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
  
      _TESTS = [{
          'url': 'http://www.crunchyroll.com/a-bridge-to-the-starry-skies-hoshizora-e-kakaru-hashi',
@@ -446,16 +637,18 @@ class CrunchyrollShowPlaylistIE(CrunchyrollBaseIE):
      def _real_extract(self, url):
          show_id = self._match_id(url)
  
      def _real_extract(self, url):
          show_id = self._match_id(url)
  
-        webpage = self._download_webpage(self._add_skip_wall(url), show_id)
+        webpage = self._download_webpage(
+            self._add_skip_wall(url), show_id,
+            headers=self.geo_verification_headers())
          title = self._html_search_regex(
              r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
              webpage, 'title')
          episode_paths = re.findall(
          title = self._html_search_regex(
              r'(?s)<h1[^>]*>\s*<span itemprop="name">(.*?)</span>',
              webpage, 'title')
          episode_paths = re.findall(
-            r'(?s)<li id="showview_videos_media_[0-9]+"[^>]+>.*?<a href="([^"]+)"',
+            r'(?s)<li id="showview_videos_media_(\d+)"[^>]+>.*?<a href="([^"]+)"',
              webpage)
          entries = [
              webpage)
          entries = [
-            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll')
-            for ep in episode_paths
+            self.url_result('http://www.crunchyroll.com' + ep, 'Crunchyroll', ep_id)
+            for ep_id, ep in episode_paths
          ]
          entries.reverse()
  
          ]
          entries.reverse()