Update upstream source from tag 'upstream/2020.05.08'

[youtubedl] / youtube_dl / extractor / vzaar.py
diff --git a/youtube_dl/extractor/vzaar.py b/youtube_dl/extractor/vzaar.py

index b270f08d1e8796889ac54e4885c2234e4e7eb46b..b7d02fca3ac426679b21f2ba7213e2ac6fdb701b 100644 (file)
--- a/youtube_dl/extractor/vzaar.py
+++ b/youtube_dl/extractor/vzaar.py
@@ -1,16 +1,22 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
  # coding: utf-8
  from __future__ import unicode_literals
  
+import re
+
  from .common import InfoExtractor
  from .common import InfoExtractor
+from ..compat import compat_str
  from ..utils import (
      int_or_none,
      float_or_none,
  from ..utils import (
      int_or_none,
      float_or_none,
+    unified_timestamp,
+    url_or_none,
  )
  
  
  class VzaarIE(InfoExtractor):
      _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
      _TESTS = [{
  )
  
  
  class VzaarIE(InfoExtractor):
      _VALID_URL = r'https?://(?:(?:www|view)\.)?vzaar\.com/(?:videos/)?(?P<id>\d+)'
      _TESTS = [{
+        # HTTP and HLS
          'url': 'https://vzaar.com/videos/1152805',
          'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
          'info_dict': {
          'url': 'https://vzaar.com/videos/1152805',
          'md5': 'bde5ddfeb104a6c56a93a06b04901dbf',
          'info_dict': {
@@ -26,30 +32,81 @@ class VzaarIE(InfoExtractor):
              'ext': 'mp3',
              'title': 'MP3',
          },
              'ext': 'mp3',
              'title': 'MP3',
          },
+    }, {
+        # hlsAes = true
+        'url': 'https://view.vzaar.com/11379930/player',
+        'info_dict': {
+            'id': '11379930',
+            'ext': 'mp4',
+            'title': 'Videoaula',
+        },
+        'params': {
+            # m3u8 download
+            'skip_download': True,
+        },
+    }, {
+        # with null videoTitle
+        'url': 'https://view.vzaar.com/20313539/download',
+        'only_matching': True,
      }]
  
      }]
  
+    @staticmethod
+    def _extract_urls(webpage):
+        return re.findall(
+            r'<iframe[^>]+src=["\']((?:https?:)?//(?:view\.vzaar\.com)/[0-9]+)',
+            webpage)
+
      def _real_extract(self, url):
          video_id = self._match_id(url)
          video_data = self._download_json(
              'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
      def _real_extract(self, url):
          video_id = self._match_id(url)
          video_data = self._download_json(
              'http://view.vzaar.com/v2/%s/video' % video_id, video_id)
-        source_url = video_data['sourceUrl']
  
  
-        info = {
+        title = video_data.get('videoTitle') or video_id
+
+        formats = []
+
+        source_url = url_or_none(video_data.get('sourceUrl'))
+        if source_url:
+            f = {
+                'url': source_url,
+                'format_id': 'http',
+                'preference': 1,
+            }
+            if 'audio' in source_url:
+                f.update({
+                    'vcodec': 'none',
+                    'ext': 'mp3',
+                })
+            else:
+                f.update({
+                    'width': int_or_none(video_data.get('width')),
+                    'height': int_or_none(video_data.get('height')),
+                    'ext': 'mp4',
+                    'fps': float_or_none(video_data.get('fps')),
+                })
+            formats.append(f)
+
+        video_guid = video_data.get('guid')
+        usp = video_data.get('usp')
+        if video_data.get('uspEnabled') and isinstance(video_guid, compat_str) and isinstance(usp, dict):
+            hls_aes = video_data.get('hlsAes')
+            qs = '&'.join('%s=%s' % (k, v) for k, v in usp.items())
+            url_templ = 'http://%%s.vzaar.com/v5/usp%s/%s/%s.ism%%s?' % ('aes' if hls_aes else '', video_guid, video_id)
+            m3u8_formats = self._extract_m3u8_formats(
+                url_templ % ('fable', '/.m3u8') + qs, video_id, 'mp4', 'm3u8_native',
+                m3u8_id='hls', fatal=False)
+            if hls_aes:
+                for f in m3u8_formats:
+                    f['_decryption_key_url'] = url_templ % ('goose', '') + qs
+            formats.extend(m3u8_formats)
+
+        self._sort_formats(formats)
+
+        return {
              'id': video_id,
              'id': video_id,
-            'title': video_data['videoTitle'],
-            'url': source_url,
+            'title': title,
              'thumbnail': self._proto_relative_url(video_data.get('poster')),
              'duration': float_or_none(video_data.get('videoDuration')),
              'thumbnail': self._proto_relative_url(video_data.get('poster')),
              'duration': float_or_none(video_data.get('videoDuration')),
+            'timestamp': unified_timestamp(video_data.get('ts')),
+            'formats': formats,
          }
          }
-        if 'audio' in source_url:
-            info.update({
-                'vcodec': 'none',
-                'ext': 'mp3',
-            })
-        else:
-            info.update({
-                'width': int_or_none(video_data.get('width')),
-                'height': int_or_none(video_data.get('height')),
-                'ext': 'mp4',
-            })
-        return info