Update upstream source from tag 'upstream/2020.05.08'

[youtubedl] / youtube_dl / extractor / theplatform.py
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index de236bbba899837f87a748cb7aab6cb8182b77c4..07055513a1c7f91df0af1c12c1df5bfeaaeb115f 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -32,14 +32,24 @@ _x = lambda p: xpath_with_ns(p, {'smil': default_ns})
  
  
  class ThePlatformBaseIE(OnceIE):
  
  
  class ThePlatformBaseIE(OnceIE):
+    _TP_TLD = 'com'
+
      def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
          meta = self._download_xml(
              smil_url, video_id, note=note, query={'format': 'SMIL'},
              headers=self.geo_verification_headers())
          error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
      def _extract_theplatform_smil(self, smil_url, video_id, note='Downloading SMIL data'):
          meta = self._download_xml(
              smil_url, video_id, note=note, query={'format': 'SMIL'},
              headers=self.geo_verification_headers())
          error_element = find_xpath_attr(meta, _x('.//smil:ref'), 'src')
-        if error_element is not None and error_element.attrib['src'].startswith(
-                'http://link.theplatform.com/s/errorFiles/Unavailable.'):
-            raise ExtractorError(error_element.attrib['abstract'], expected=True)
+        if error_element is not None:
+            exception = find_xpath_attr(
+                error_element, _x('.//smil:param'), 'name', 'exception')
+            if exception is not None:
+                if exception.get('value') == 'GeoLocationBlocked':
+                    self.raise_geo_restricted(error_element.attrib['abstract'])
+                elif error_element.attrib['src'].startswith(
+                        'http://link.theplatform.%s/s/errorFiles/Unavailable.'
+                        % self._TP_TLD):
+                    raise ExtractorError(
+                        error_element.attrib['abstract'], expected=True)
  
          smil_formats = self._parse_smil_formats(
              meta, smil_url, video_id, namespace=default_ns,
  
          smil_formats = self._parse_smil_formats(
              meta, smil_url, video_id, namespace=default_ns,
@@ -66,7 +76,7 @@ class ThePlatformBaseIE(OnceIE):
          return formats, subtitles
  
      def _download_theplatform_metadata(self, path, video_id):
          return formats, subtitles
  
      def _download_theplatform_metadata(self, path, video_id):
-        info_url = 'http://link.theplatform.com/s/%s?format=preview' % path
+        info_url = 'http://link.theplatform.%s/s/%s?format=preview' % (self._TP_TLD, path)
          return self._download_json(info_url, video_id)
  
      def _parse_theplatform_metadata(self, info):
          return self._download_json(info_url, video_id)
  
      def _parse_theplatform_metadata(self, info):
@@ -199,7 +209,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
              return [m.group('url')]
  
          # Are whitesapces ignored in URLs?
              return [m.group('url')]
  
          # Are whitesapces ignored in URLs?
-        # https://github.com/rg3/youtube-dl/issues/12044
+        # https://github.com/ytdl-org/youtube-dl/issues/12044
          matches = re.findall(
              r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
          if matches:
          matches = re.findall(
              r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
          if matches:
@@ -216,7 +226,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
          def hex_to_bytes(hex):
              return binascii.a2b_hex(hex.encode('ascii'))
  
          def hex_to_bytes(hex):
              return binascii.a2b_hex(hex.encode('ascii'))
  
-        relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+        relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
          clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
          checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
          sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
          clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
          checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
          sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
@@ -261,7 +271,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
  
          if smuggled_data.get('force_smil_url', False):
              smil_url = url
  
          if smuggled_data.get('force_smil_url', False):
              smil_url = url
-        # Explicitly specified SMIL (see https://github.com/rg3/youtube-dl/issues/7385)
+        # Explicitly specified SMIL (see https://github.com/ytdl-org/youtube-dl/issues/7385)
          elif '/guid/' in url:
              headers = {}
              source_url = smuggled_data.get('source_url')
          elif '/guid/' in url:
              headers = {}
              source_url = smuggled_data.get('source_url')
@@ -308,7 +318,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
  
  class ThePlatformFeedIE(ThePlatformBaseIE):
      _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
  
  class ThePlatformFeedIE(ThePlatformBaseIE):
      _URL_TEMPLATE = '%s//feed.theplatform.com/f/%s/%s?form=json&%s'
-    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[\w-]+))'
+    _VALID_URL = r'https?://feed\.theplatform\.com/f/(?P<provider_id>[^/]+)/(?P<feed_id>[^?/]+)\?(?:[^&]+&)*(?P<filter>by(?:Gui|I)d=(?P<id>[^&]+))'
      _TESTS = [{
          # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
          'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
      _TESTS = [{
          # From http://player.theplatform.com/p/7wvmTC/MSNBCEmbeddedOffSite?guid=n_hardball_5biden_140207
          'url': 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207',
@@ -325,12 +335,15 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
              'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
              'uploader': 'NBCU-NEWS',
          },
              'categories': ['MSNBC/Issues/Democrats', 'MSNBC/Issues/Elections/Election 2016'],
              'uploader': 'NBCU-NEWS',
          },
+    }, {
+        'url': 'http://feed.theplatform.com/f/2E2eJC/nnd_NBCNews?byGuid=nn_netcast_180306.Copy.01',
+        'only_matching': True,
      }]
  
      def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
          real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
          entry = self._download_json(real_url, video_id)['entries'][0]
      }]
  
      def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
          real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
          entry = self._download_json(real_url, video_id)['entries'][0]
-        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
+        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else entry.get('plmedia$publicUrl')
  
          formats = []
          subtitles = {}
  
          formats = []
          subtitles = {}
@@ -343,7 +356,8 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
              if first_video_id is None:
                  first_video_id = cur_video_id
                  duration = float_or_none(item.get('plfile$duration'))
              if first_video_id is None:
                  first_video_id = cur_video_id
                  duration = float_or_none(item.get('plfile$duration'))
-            for asset_type in item['plfile$assetTypes']:
+            file_asset_types = item.get('plfile$assetTypes') or compat_parse_qs(compat_urllib_parse_urlparse(smil_url).query)['assetTypes']
+            for asset_type in file_asset_types:
                  if asset_type in asset_types:
                      continue
                  asset_types.append(asset_type)
                  if asset_type in asset_types:
                      continue
                  asset_types.append(asset_type)