Adapt changes from Andreas Tille for porting to Salsa.

[youtubedl] / youtube_dl / extractor / theplatform.py
diff --git a/youtube_dl/extractor/theplatform.py b/youtube_dl/extractor/theplatform.py

index 192d8fa292e0a6f360929590274d06b4745fb8f6..b1a985ff6c12368347d98d95beed6a042e70093c 100644 (file)
--- a/youtube_dl/extractor/theplatform.py
+++ b/youtube_dl/extractor/theplatform.py
@@ -80,14 +80,33 @@ class ThePlatformBaseIE(OnceIE):
                      'url': src,
                  })
  
                      'url': src,
                  })
  
+        duration = info.get('duration')
+        tp_chapters = info.get('chapters', [])
+        chapters = []
+        if tp_chapters:
+            def _add_chapter(start_time, end_time):
+                start_time = float_or_none(start_time, 1000)
+                end_time = float_or_none(end_time, 1000)
+                if start_time is None or end_time is None:
+                    return
+                chapters.append({
+                    'start_time': start_time,
+                    'end_time': end_time,
+                })
+
+            for chapter in tp_chapters[:-1]:
+                _add_chapter(chapter.get('startTime'), chapter.get('endTime'))
+            _add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
+
          return {
              'title': info['title'],
              'subtitles': subtitles,
              'description': info['description'],
              'thumbnail': info['defaultThumbnailUrl'],
          return {
              'title': info['title'],
              'subtitles': subtitles,
              'description': info['description'],
              'thumbnail': info['defaultThumbnailUrl'],
-            'duration': int_or_none(info.get('duration'), 1000),
+            'duration': float_or_none(duration, 1000),
              'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
              'uploader': info.get('billingCode'),
              'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
              'uploader': info.get('billingCode'),
+            'chapters': chapters,
          }
  
      def _extract_theplatform_metadata(self, path, video_id):
          }
  
      def _extract_theplatform_metadata(self, path, video_id):
@@ -179,10 +198,12 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
          if m:
              return [m.group('url')]
  
          if m:
              return [m.group('url')]
  
+        # Are whitesapces ignored in URLs?
+        # https://github.com/rg3/youtube-dl/issues/12044
          matches = re.findall(
          matches = re.findall(
-            r'<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
+            r'(?s)<(?:iframe|script)[^>]+src=(["\'])((?:https?:)?//player\.theplatform\.com/p/.+?)\1', webpage)
          if matches:
          if matches:
-            return list(zip(*matches))[1]
+            return [re.sub(r'\s', '', list(zip(*matches))[1][0])]
  
      @staticmethod
      def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
  
      @staticmethod
      def _sign_url(url, sig_key, sig_secret, life=600, include_qs=False):
@@ -195,7 +216,7 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):
          def hex_to_bytes(hex):
              return binascii.a2b_hex(hex.encode('ascii'))
  
          def hex_to_bytes(hex):
              return binascii.a2b_hex(hex.encode('ascii'))
  
-        relative_path = re.match(r'https?://link.theplatform.com/s/([^?]+)', url).group(1)
+        relative_path = re.match(r'https?://link\.theplatform\.com/s/([^?]+)', url).group(1)
          clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
          checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
          sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
          clear_text = hex_to_bytes(flags + expiration_date + str_to_hex(relative_path))
          checksum = hmac.new(sig_key.encode('ascii'), clear_text, hashlib.sha1).hexdigest()
          sig = flags + expiration_date + checksum + str_to_hex(sig_secret)
@@ -306,9 +327,10 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
          },
      }]
  
          },
      }]
  
-    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}):
+    def _extract_feed_info(self, provider_id, feed_id, filter_query, video_id, custom_fields=None, asset_types_query={}, account_id=None):
          real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
          entry = self._download_json(real_url, video_id)['entries'][0]
          real_url = self._URL_TEMPLATE % (self.http_scheme(), provider_id, feed_id, filter_query)
          entry = self._download_json(real_url, video_id)['entries'][0]
+        main_smil_url = 'http://link.theplatform.com/s/%s/media/guid/%d/%s' % (provider_id, account_id, entry['guid']) if account_id else None
  
          formats = []
          subtitles = {}
  
          formats = []
          subtitles = {}
@@ -333,7 +355,7 @@ class ThePlatformFeedIE(ThePlatformBaseIE):
                  if asset_type in asset_types_query:
                      query.update(asset_types_query[asset_type])
                  cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
                  if asset_type in asset_types_query:
                      query.update(asset_types_query[asset_type])
                  cur_formats, cur_subtitles = self._extract_theplatform_smil(update_url_query(
-                    smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
+                    main_smil_url or smil_url, query), video_id, 'Downloading SMIL data for %s' % asset_type)
                  formats.extend(cur_formats)
                  subtitles = self._merge_subtitles(subtitles, cur_subtitles)
  
                  formats.extend(cur_formats)
                  subtitles = self._merge_subtitles(subtitles, cur_subtitles)