Update upstream source from tag 'upstream/2019.09.28'

[youtubedl] / youtube_dl / extractor / wistia.py
diff --git a/youtube_dl/extractor/wistia.py b/youtube_dl/extractor/wistia.py

index c634b8decddf8fdb15649b05e8f49ad9efc36254..fa142b974ae873a7883ede4894421f4f8a010549 100644 (file)
--- a/youtube_dl/extractor/wistia.py
+++ b/youtube_dl/extractor/wistia.py
@@ -1,15 +1,18 @@
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from ..utils import (
      ExtractorError,
      int_or_none,
      float_or_none,
+    unescapeHTML,
  )
  
  
  class WistiaIE(InfoExtractor):
-    _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.net/embed/iframe/)(?P<id>[a-z0-9]+)'
+    _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]+)'
      _API_URL = 'http://fast.wistia.com/embed/medias/%s.json'
      _IFRAME_URL = 'http://fast.wistia.net/embed/iframe/%s'
  
@@ -32,8 +35,33 @@ class WistiaIE(InfoExtractor):
          # with hls video
          'url': 'wistia:807fafadvk',
          'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+        'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+        'only_matching': True,
      }]
  
+    @staticmethod
+    def _extract_url(webpage):
+        match = re.search(
+            r'<(?:meta[^>]+?content|iframe[^>]+?src)=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/iframe/.+?)\1', webpage)
+        if match:
+            return unescapeHTML(match.group('url'))
+
+        match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
+        if match:
+            return 'wistia:%s' % match.group('id')
+
+        match = re.search(
+            r'''(?sx)
+                <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
+                <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
+            ''', webpage)
+        if match:
+            return 'wistia:%s' % match.group('id')
+
      def _real_extract(self, url):
          video_id = self._match_id(url)