Initiate new release.

[youtubedl] / youtube_dl / extractor / common.py
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py

index 05c51fac9b0b4162fb126cb79a79d871b591ead8..4252d682563f9831a33225ff3dc906aa27f0eb01 100644 (file)
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -6,6 +6,7 @@ import hashlib
  import json
  import netrc
  import os
+import random
  import re
  import socket
  import sys
@@ -39,7 +40,10 @@ from ..utils import (
      ExtractorError,
      fix_xml_ampersands,
      float_or_none,
+    GeoRestrictedError,
+    GeoUtils,
      int_or_none,
+    js_to_json,
      parse_iso8601,
      RegexNotFoundError,
      sanitize_filename,
@@ -59,6 +63,7 @@ from ..utils import (
      parse_m3u8_attributes,
      extract_attributes,
      parse_codecs,
+    urljoin,
  )
  
  
@@ -120,9 +125,19 @@ class InfoExtractor(object):
                                   download, lower-case.
                                   "http", "https", "rtsp", "rtmp", "rtmpe",
                                   "m3u8", "m3u8_native" or "http_dash_segments".
-                    * fragments  A list of fragments of the fragmented media,
-                                 with the following entries:
-                                 * "url" (mandatory) - fragment's URL
+                    * fragment_base_url
+                                 Base URL for fragments. Each fragment's path
+                                 value (if present) will be relative to
+                                 this URL.
+                    * fragments  A list of fragments of a fragmented media.
+                                 Each fragment entry must contain either an url
+                                 or a path. If an url is present it should be
+                                 considered by a client. Otherwise both path and
+                                 fragment_base_url must be present. Here is
+                                 the list of all potential fields:
+                                 * "url" - fragment's URL
+                                 * "path" - fragment's path relative to
+                                            fragment_base_url
                                   * "duration" (optional, int or float)
                                   * "filesize" (optional, int)
                      * preference Order number of this format. If this field is
@@ -188,9 +203,10 @@ class InfoExtractor(object):
      uploader_url:   Full URL to a personal webpage of the video uploader.
      location:       Physical location where the video was filmed.
      subtitles:      The available subtitles as a dictionary in the format
-                    {language: subformats}. "subformats" is a list sorted from
-                    lower to higher preference, each element is a dictionary
-                    with the "ext" entry and one of:
+                    {tag: subformats}. "tag" is usually a language code, and
+                    "subformats" is a list sorted from lower to higher
+                    preference, each element is a dictionary with the "ext"
+                    entry and one of:
                          * "data": The subtitles file contents
                          * "url": A URL pointing to the subtitles file
                      "ext" will be calculated from URL if missing
@@ -307,17 +323,34 @@ class InfoExtractor(object):
      _real_extract() methods and define a _VALID_URL regexp.
      Probably, they should also be added to the list of extractors.
  
+    _GEO_BYPASS attribute may be set to False in order to disable
+    geo restriction bypass mechanisms for a particular extractor.
+    Though it won't disable explicit geo restriction bypass based on
+    country code provided with geo_bypass_country. (experimental)
+
+    _GEO_COUNTRIES attribute may contain a list of presumably geo unrestricted
+    countries for this extractor. One of these countries will be used by
+    geo restriction bypass mechanism right away in order to bypass
+    geo restriction, of course, if the mechanism is not disabled. (experimental)
+
+    NB: both these geo attributes are experimental and may change in future
+    or be completely removed.
+
      Finally, the _WORKING attribute should be set to False for broken IEs
      in order to warn the users and skip the tests.
      """
  
      _ready = False
      _downloader = None
+    _x_forwarded_for_ip = None
+    _GEO_BYPASS = True
+    _GEO_COUNTRIES = None
      _WORKING = True
  
      def __init__(self, downloader=None):
          """Constructor. Receives an optional downloader."""
          self._ready = False
+        self._x_forwarded_for_ip = None
          self.set_downloader(downloader)
  
      @classmethod
@@ -346,15 +379,59 @@ class InfoExtractor(object):
  
      def initialize(self):
          """Initializes an instance (authentication, etc)."""
+        self._initialize_geo_bypass(self._GEO_COUNTRIES)
          if not self._ready:
              self._real_initialize()
              self._ready = True
  
+    def _initialize_geo_bypass(self, countries):
+        """
+        Initialize geo restriction bypass mechanism.
+
+        This method is used to initialize geo bypass mechanism based on faking
+        X-Forwarded-For HTTP header. A random country from provided country list
+        is selected and a random IP belonging to this country is generated. This
+        IP will be passed as X-Forwarded-For HTTP header in all subsequent
+        HTTP requests.
+
+        This method will be used for initial geo bypass mechanism initialization
+        during the instance initialization with _GEO_COUNTRIES.
+
+        You may also manually call it from extractor's code if geo countries
+        information is not available beforehand (e.g. obtained during
+        extraction) or due to some another reason.
+        """
+        if not self._x_forwarded_for_ip:
+            country_code = self._downloader.params.get('geo_bypass_country', None)
+            # If there is no explicit country for geo bypass specified and
+            # the extractor is known to be geo restricted let's fake IP
+            # as X-Forwarded-For right away.
+            if (not country_code and
+                    self._GEO_BYPASS and
+                    self._downloader.params.get('geo_bypass', True) and
+                    countries):
+                country_code = random.choice(countries)
+            if country_code:
+                self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+                if self._downloader.params.get('verbose', False):
+                    self._downloader.to_stdout(
+                        '[debug] Using fake IP %s (%s) as X-Forwarded-For.'
+                        % (self._x_forwarded_for_ip, country_code.upper()))
+
      def extract(self, url):
          """Extracts URL information and returns it in list of dicts."""
          try:
-            self.initialize()
-            return self._real_extract(url)
+            for _ in range(2):
+                try:
+                    self.initialize()
+                    ie_result = self._real_extract(url)
+                    if self._x_forwarded_for_ip:
+                        ie_result['__x_forwarded_for_ip'] = self._x_forwarded_for_ip
+                    return ie_result
+                except GeoRestrictedError as e:
+                    if self.__maybe_fake_ip_and_retry(e.countries):
+                        continue
+                    raise
          except ExtractorError:
              raise
          except compat_http_client.IncompleteRead as e:
@@ -362,6 +439,21 @@ class InfoExtractor(object):
          except (KeyError, StopIteration) as e:
              raise ExtractorError('An extractor error has occurred.', cause=e)
  
+    def __maybe_fake_ip_and_retry(self, countries):
+        if (not self._downloader.params.get('geo_bypass_country', None) and
+                self._GEO_BYPASS and
+                self._downloader.params.get('geo_bypass', True) and
+                not self._x_forwarded_for_ip and
+                countries):
+            country_code = random.choice(countries)
+            self._x_forwarded_for_ip = GeoUtils.random_ipv4(country_code)
+            if self._x_forwarded_for_ip:
+                self.report_warning(
+                    'Video is geo restricted. Retrying extraction with fake IP %s (%s) as X-Forwarded-For.'
+                    % (self._x_forwarded_for_ip, country_code.upper()))
+                return True
+        return False
+
      def set_downloader(self, downloader):
          """Sets the downloader for this IE."""
          self._downloader = downloader
@@ -421,6 +513,15 @@ class InfoExtractor(object):
          if isinstance(url_or_request, (compat_str, str)):
              url_or_request = url_or_request.partition('#')[0]
  
+        # Some sites check X-Forwarded-For HTTP header in order to figure out
+        # the origin of the client behind proxy. This allows bypassing geo
+        # restriction by faking this header's value to IP that belongs to some
+        # geo unrestricted country. We will do so once we encounter any
+        # geo restriction error.
+        if self._x_forwarded_for_ip:
+            if 'X-Forwarded-For' not in headers:
+                headers['X-Forwarded-For'] = self._x_forwarded_for_ip
+
          urlh = self._request_webpage(url_or_request, video_id, note, errnote, fatal, data=data, headers=headers, query=query)
          if urlh is False:
              assert not fatal
@@ -596,10 +697,8 @@ class InfoExtractor(object):
              expected=True)
  
      @staticmethod
-    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction'):
-        raise ExtractorError(
-            '%s. You might want to use --proxy to workaround.' % msg,
-            expected=True)
+    def raise_geo_restricted(msg='This video is not available from your location due to geo restriction', countries=None):
+        raise GeoRestrictedError(msg, countries=countries)
  
      # Methods for following #608
      @staticmethod
@@ -1013,13 +1112,13 @@ class InfoExtractor(object):
                  unique_formats.append(f)
          formats[:] = unique_formats
  
-    def _is_valid_url(self, url, video_id, item='video'):
+    def _is_valid_url(self, url, video_id, item='video', headers={}):
          url = self._proto_relative_url(url, scheme='http:')
          # For now assume non HTTP(S) URLs always valid
          if not (url.startswith('http://') or url.startswith('https://')):
              return True
          try:
-            self._request_webpage(url, video_id, 'Checking %s URL' % item)
+            self._request_webpage(url, video_id, 'Checking %s URL' % item, headers=headers)
              return True
          except ExtractorError as e:
              if isinstance(e.cause, compat_urllib_error.URLError):
@@ -1196,6 +1295,9 @@ class InfoExtractor(object):
          m3u8_doc, urlh = res
          m3u8_url = urlh.geturl()
  
+        if '#EXT-X-FAXS-CM:' in m3u8_doc:  # Adobe Flash Access
+            return []
+
          formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
  
          format_url = lambda u: (
@@ -1224,6 +1326,7 @@ class InfoExtractor(object):
                  'protocol': entry_protocol,
                  'preference': preference,
              }]
+        audio_in_video_stream = {}
          last_info = {}
          last_media = {}
          for line in m3u8_doc.splitlines():
@@ -1233,25 +1336,32 @@ class InfoExtractor(object):
                  media = parse_m3u8_attributes(line)
                  media_type = media.get('TYPE')
                  if media_type in ('VIDEO', 'AUDIO'):
+                    group_id = media.get('GROUP-ID')
                      media_url = media.get('URI')
                      if media_url:
                          format_id = []
-                        for v in (media.get('GROUP-ID'), media.get('NAME')):
+                        for v in (group_id, media.get('NAME')):
                              if v:
                                  format_id.append(v)
-                        formats.append({
+                        f = {
                              'format_id': '-'.join(format_id),
                              'url': format_url(media_url),
                              'language': media.get('LANGUAGE'),
-                            'vcodec': 'none' if media_type == 'AUDIO' else None,
                              'ext': ext,
                              'protocol': entry_protocol,
                              'preference': preference,
-                        })
+                        }
+                        if media_type == 'AUDIO':
+                            f['vcodec'] = 'none'
+                            if group_id and not audio_in_video_stream.get(group_id):
+                                audio_in_video_stream[group_id] = False
+                        formats.append(f)
                      else:
                          # When there is no URI in EXT-X-MEDIA let this tag's
                          # data be used by regular URI lines below
                          last_media = media
+                        if media_type == 'AUDIO' and group_id:
+                            audio_in_video_stream[group_id] = True
              elif line.startswith('#') or not line.strip():
                  continue
              else:
@@ -1295,6 +1405,9 @@ class InfoExtractor(object):
                          'abr': abr,
                      })
                  f.update(parse_codecs(last_info.get('CODECS')))
+                if audio_in_video_stream.get(last_info.get('AUDIO')) is False and f['vcodec'] != 'none':
+                    # TODO: update acodec for audio only formats with the same GROUP-ID
+                    f['acodec'] = 'none'
                  formats.append(f)
                  last_info = {}
                  last_media = {}
@@ -1614,21 +1727,16 @@ class InfoExtractor(object):
                  segment_template = element.find(_add_ns('SegmentTemplate'))
                  if segment_template is not None:
                      extract_common(segment_template)
-                    media_template = segment_template.get('media')
-                    if media_template:
-                        ms_info['media_template'] = media_template
+                    media = segment_template.get('media')
+                    if media:
+                        ms_info['media'] = media
                      initialization = segment_template.get('initialization')
                      if initialization:
-                        ms_info['initialization_url'] = initialization
+                        ms_info['initialization'] = initialization
                      else:
                          extract_Initialization(segment_template)
              return ms_info
  
-        def combine_url(base_url, target_url):
-            if re.match(r'^https?://', target_url):
-                return target_url
-            return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
-
          mpd_duration = parse_duration(mpd_doc.get('mediaPresentationDuration'))
          formats = []
          for period in mpd_doc.findall(_add_ns('Period')):
@@ -1668,6 +1776,7 @@ class InfoExtractor(object):
                          lang = representation_attrib.get('lang')
                          url_el = representation.find(_add_ns('BaseURL'))
                          filesize = int_or_none(url_el.attrib.get('{http://youtube.com/yt/2012/10/10}contentLength') if url_el is not None else None)
+                        bandwidth = int_or_none(representation_attrib.get('bandwidth'))
                          f = {
                              'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,
                              'url': base_url,
@@ -1675,23 +1784,41 @@ class InfoExtractor(object):
                              'ext': mimetype2ext(mime_type),
                              'width': int_or_none(representation_attrib.get('width')),
                              'height': int_or_none(representation_attrib.get('height')),
-                            'tbr': int_or_none(representation_attrib.get('bandwidth'), 1000),
+                            'tbr': int_or_none(bandwidth, 1000),
                              'asr': int_or_none(representation_attrib.get('audioSamplingRate')),
                              'fps': int_or_none(representation_attrib.get('frameRate')),
-                            'vcodec': 'none' if content_type == 'audio' else representation_attrib.get('codecs'),
-                            'acodec': 'none' if content_type == 'video' else representation_attrib.get('codecs'),
                              'language': lang if lang not in ('mul', 'und', 'zxx', 'mis') else None,
                              'format_note': 'DASH %s' % content_type,
                              'filesize': filesize,
                          }
+                        f.update(parse_codecs(representation_attrib.get('codecs')))
                          representation_ms_info = extract_multisegment_info(representation, adaption_set_ms_info)
-                        if 'segment_urls' not in representation_ms_info and 'media_template' in representation_ms_info:
  
-                            media_template = representation_ms_info['media_template']
-                            media_template = media_template.replace('$RepresentationID$', representation_id)
-                            media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template)
-                            media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)
-                            media_template.replace('$$', '$')
+                        def prepare_template(template_name, identifiers):
+                            t = representation_ms_info[template_name]
+                            t = t.replace('$RepresentationID$', representation_id)
+                            t = re.sub(r'\$(%s)\$' % '|'.join(identifiers), r'%(\1)d', t)
+                            t = re.sub(r'\$(%s)%%([^$]+)\$' % '|'.join(identifiers), r'%(\1)\2', t)
+                            t.replace('$$', '$')
+                            return t
+
+                        # @initialization is a regular template like @media one
+                        # so it should be handled just the same way (see
+                        # https://github.com/rg3/youtube-dl/issues/11605)
+                        if 'initialization' in representation_ms_info:
+                            initialization_template = prepare_template(
+                                'initialization',
+                                # As per [1, 5.3.9.4.2, Table 15, page 54] $Number$ and
+                                # $Time$ shall not be included for @initialization thus
+                                # only $Bandwidth$ remains
+                                ('Bandwidth', ))
+                            representation_ms_info['initialization_url'] = initialization_template % {
+                                'Bandwidth': bandwidth,
+                            }
+
+                        if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
+
+                            media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
  
                              # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
                              # can't be used at the same time
@@ -1703,7 +1830,7 @@ class InfoExtractor(object):
                                  representation_ms_info['fragments'] = [{
                                      'url': media_template % {
                                          'Number': segment_number,
-                                        'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+                                        'Bandwidth': bandwidth,
                                      },
                                      'duration': segment_duration,
                                  } for segment_number in range(
@@ -1721,7 +1848,7 @@ class InfoExtractor(object):
                                  def add_segment_url():
                                      segment_url = media_template % {
                                          'Time': segment_time,
-                                        'Bandwidth': int_or_none(representation_attrib.get('bandwidth')),
+                                        'Bandwidth': bandwidth,
                                          'Number': segment_number,
                                      }
                                      representation_ms_info['fragments'].append({
@@ -1744,14 +1871,16 @@ class InfoExtractor(object):
                              # Example: https://www.youtube.com/watch?v=iXZV5uAYMJI
                              # or any YouTube dashsegments video
                              fragments = []
-                            s_num = 0
-                            for segment_url in representation_ms_info['segment_urls']:
-                                s = representation_ms_info['s'][s_num]
+                            segment_index = 0
+                            timescale = representation_ms_info['timescale']
+                            for s in representation_ms_info['s']:
+                                duration = float_or_none(s['d'], timescale)
                                  for r in range(s.get('r', 0) + 1):
                                      fragments.append({
-                                        'url': segment_url,
-                                        'duration': float_or_none(s['d'], representation_ms_info['timescale']),
+                                        'url': representation_ms_info['segment_urls'][segment_index],
+                                        'duration': duration,
                                      })
+                                    segment_index += 1
                              representation_ms_info['fragments'] = fragments
                          # NB: MPD manifest may contain direct URLs to unfragmented media.
                          # No fragments key is present in this case.
@@ -1761,13 +1890,13 @@ class InfoExtractor(object):
                                  'protocol': 'http_dash_segments',
                              })
                              if 'initialization_url' in representation_ms_info:
-                                initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id)
+                                initialization_url = representation_ms_info['initialization_url']
                                  if not f.get('url'):
                                      f['url'] = initialization_url
                                  f['fragments'].append({'url': initialization_url})
                              f['fragments'].extend(representation_ms_info['fragments'])
                              for fragment in f['fragments']:
-                                fragment['url'] = combine_url(base_url, fragment['url'])
+                                fragment['url'] = urljoin(base_url, fragment['url'])
                          try:
                              existing_format = next(
                                  fo for fo in formats
@@ -1881,7 +2010,7 @@ class InfoExtractor(object):
                  })
          return formats
  
-    def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8'):
+    def _parse_html5_media_entries(self, base_url, webpage, video_id, m3u8_id=None, m3u8_entry_protocol='m3u8', mpd_id=None):
          def absolute_url(video_url):
              return compat_urlparse.urljoin(base_url, video_url)
  
@@ -1898,11 +2027,16 @@ class InfoExtractor(object):
  
          def _media_formats(src, cur_media_type):
              full_url = absolute_url(src)
-            if determine_ext(full_url) == 'm3u8':
+            ext = determine_ext(full_url)
+            if ext == 'm3u8':
                  is_plain_url = False
                  formats = self._extract_m3u8_formats(
                      full_url, video_id, ext='mp4',
                      entry_protocol=m3u8_entry_protocol, m3u8_id=m3u8_id)
+            elif ext == 'mpd':
+                is_plain_url = False
+                formats = self._extract_mpd_formats(
+                    full_url, video_id, mpd_id=mpd_id)
              else:
                  is_plain_url = True
                  formats = [{
@@ -1915,7 +2049,12 @@ class InfoExtractor(object):
          media_tags = [(media_tag, media_type, '')
                        for media_tag, media_type
                        in re.findall(r'(?s)(<(video|audio)[^>]*/>)', webpage)]
-        media_tags.extend(re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage))
+        media_tags.extend(re.findall(
+            # We only allow video|audio followed by a whitespace or '>'.
+            # Allowing more characters may end up in significant slow down (see
+            # https://github.com/rg3/youtube-dl/issues/11979, example URL:
+            # http://www.porntrex.com/maps/videositemap.xml).
+            r'(?s)(<(?P<tag>video|audio)(?:\s+[^>]*)?>)(.*?)</(?P=tag)>', webpage))
          for media_tag, media_type, media_content in media_tags:
              media_info = {
                  'formats': [],
@@ -1955,10 +2094,13 @@ class InfoExtractor(object):
                  entries.append(media_info)
          return entries
  
-    def _extract_akamai_formats(self, manifest_url, video_id):
+    def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
          formats = []
          hdcore_sign = 'hdcore=3.7.0'
-        f4m_url = re.sub(r'(https?://.+?)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+        hds_host = hosts.get('hds')
+        if hds_host:
+            f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
          if 'hdcore=' not in f4m_url:
              f4m_url += ('&' if '?' in f4m_url else '?') + hdcore_sign
          f4m_formats = self._extract_f4m_formats(
@@ -1966,7 +2108,10 @@ class InfoExtractor(object):
          for entry in f4m_formats:
              entry.update({'extra_param_to_segment_url': hdcore_sign})
          formats.extend(f4m_formats)
-        m3u8_url = re.sub(r'(https?://.+?)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        m3u8_url = re.sub(r'(https?://[^/]+)/z/', r'\1/i/', manifest_url).replace('/manifest.f4m', '/master.m3u8')
+        hls_host = hosts.get('hls')
+        if hls_host:
+            m3u8_url = re.sub(r'(https?://)[^/]+', r'\1' + hls_host, m3u8_url)
          formats.extend(self._extract_m3u8_formats(
              m3u8_url, video_id, 'mp4', 'm3u8_native',
              m3u8_id='hls', fatal=False))
@@ -2015,6 +2160,123 @@ class InfoExtractor(object):
                      })
          return formats
  
+    @staticmethod
+    def _find_jwplayer_data(webpage):
+        mobj = re.search(
+            r'jwplayer\((?P<quote>[\'"])[^\'" ]+(?P=quote)\)\.setup\s*\((?P<options>[^)]+)\)',
+            webpage)
+        if mobj:
+            return mobj.group('options')
+
+    def _extract_jwplayer_data(self, webpage, video_id, *args, **kwargs):
+        jwplayer_data = self._parse_json(
+            self._find_jwplayer_data(webpage), video_id,
+            transform_source=js_to_json)
+        return self._parse_jwplayer_data(
+            jwplayer_data, video_id, *args, **kwargs)
+
+    def _parse_jwplayer_data(self, jwplayer_data, video_id=None, require_title=True,
+                             m3u8_id=None, mpd_id=None, rtmp_params=None, base_url=None):
+        # JWPlayer backward compatibility: flattened playlists
+        # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/api/config.js#L81-L96
+        if 'playlist' not in jwplayer_data:
+            jwplayer_data = {'playlist': [jwplayer_data]}
+
+        entries = []
+
+        # JWPlayer backward compatibility: single playlist item
+        # https://github.com/jwplayer/jwplayer/blob/v7.7.0/src/js/playlist/playlist.js#L10
+        if not isinstance(jwplayer_data['playlist'], list):
+            jwplayer_data['playlist'] = [jwplayer_data['playlist']]
+
+        for video_data in jwplayer_data['playlist']:
+            # JWPlayer backward compatibility: flattened sources
+            # https://github.com/jwplayer/jwplayer/blob/v7.4.3/src/js/playlist/item.js#L29-L35
+            if 'sources' not in video_data:
+                video_data['sources'] = [video_data]
+
+            this_video_id = video_id or video_data['mediaid']
+
+            formats = []
+            for source in video_data['sources']:
+                source_url = self._proto_relative_url(source['file'])
+                if base_url:
+                    source_url = compat_urlparse.urljoin(base_url, source_url)
+                source_type = source.get('type') or ''
+                ext = mimetype2ext(source_type) or determine_ext(source_url)
+                if source_type == 'hls' or ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        source_url, this_video_id, 'mp4', 'm3u8_native', m3u8_id=m3u8_id, fatal=False))
+                elif ext == 'mpd':
+                    formats.extend(self._extract_mpd_formats(
+                        source_url, this_video_id, mpd_id=mpd_id, fatal=False))
+                # https://github.com/jwplayer/jwplayer/blob/master/src/js/providers/default.js#L67
+                elif source_type.startswith('audio') or ext in ('oga', 'aac', 'mp3', 'mpeg', 'vorbis'):
+                    formats.append({
+                        'url': source_url,
+                        'vcodec': 'none',
+                        'ext': ext,
+                    })
+                else:
+                    height = int_or_none(source.get('height'))
+                    if height is None:
+                        # Often no height is provided but there is a label in
+                        # format like 1080p.
+                        height = int_or_none(self._search_regex(
+                            r'^(\d{3,})[pP]$', source.get('label') or '',
+                            'height', default=None))
+                    a_format = {
+                        'url': source_url,
+                        'width': int_or_none(source.get('width')),
+                        'height': height,
+                        'ext': ext,
+                    }
+                    if source_url.startswith('rtmp'):
+                        a_format['ext'] = 'flv'
+
+                        # See com/longtailvideo/jwplayer/media/RTMPMediaProvider.as
+                        # of jwplayer.flash.swf
+                        rtmp_url_parts = re.split(
+                            r'((?:mp4|mp3|flv):)', source_url, 1)
+                        if len(rtmp_url_parts) == 3:
+                            rtmp_url, prefix, play_path = rtmp_url_parts
+                            a_format.update({
+                                'url': rtmp_url,
+                                'play_path': prefix + play_path,
+                            })
+                        if rtmp_params:
+                            a_format.update(rtmp_params)
+                    formats.append(a_format)
+            self._sort_formats(formats)
+
+            subtitles = {}
+            tracks = video_data.get('tracks')
+            if tracks and isinstance(tracks, list):
+                for track in tracks:
+                    if track.get('kind') != 'captions':
+                        continue
+                    track_url = urljoin(base_url, track.get('file'))
+                    if not track_url:
+                        continue
+                    subtitles.setdefault(track.get('label') or 'en', []).append({
+                        'url': self._proto_relative_url(track_url)
+                    })
+
+            entries.append({
+                'id': this_video_id,
+                'title': video_data['title'] if require_title else video_data.get('title'),
+                'description': video_data.get('description'),
+                'thumbnail': self._proto_relative_url(video_data.get('image')),
+                'timestamp': int_or_none(video_data.get('pubdate')),
+                'duration': float_or_none(jwplayer_data.get('duration') or video_data.get('duration')),
+                'subtitles': subtitles,
+                'formats': formats,
+            })
+        if len(entries) == 1:
+            return entries[0]
+        else:
+            return self.playlist_result(entries)
+
      def _live_title(self, name):
          """ Generate the title for a live video """
          now = datetime.datetime.now()