New upstream version 2020.03.24

[youtubedl] / youtube_dl / extractor / peertube.py
diff --git a/youtube_dl/extractor/peertube.py b/youtube_dl/extractor/peertube.py

index d3a83ea2bb5215e34e72ad85bf99867697e2e1b2..48fb9541693c35878317f22ed9dd6e2da4412ced 100644 (file)
--- a/youtube_dl/extractor/peertube.py
+++ b/youtube_dl/extractor/peertube.py
@@ -8,6 +8,7 @@ from ..compat import compat_str
  from ..utils import (
      int_or_none,
      parse_resolution,
+    str_or_none,
      try_get,
      unified_timestamp,
      url_or_none,
@@ -415,6 +416,7 @@ class PeerTubeIE(InfoExtractor):
                              peertube\.cpy\.re
                          )'''
      _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+    _API_BASE = 'https://%s/api/v1/videos/%s/%s'
      _VALID_URL = r'''(?x)
                      (?:
                          peertube:(?P<host>[^:]+):|
@@ -423,26 +425,30 @@ class PeerTubeIE(InfoExtractor):
                      (?P<id>%s)
                      ''' % (_INSTANCES_RE, _UUID_RE)
      _TESTS = [{
-        'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
-        'md5': '80f24ff364cc9d333529506a263e7feb',
+        'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+        'md5': '9bed8c0137913e17b86334e5885aacff',
          'info_dict': {
-            'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+            'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
              'ext': 'mp4',
-            'title': 'wow',
-            'description': 'wow such video, so gif',
+            'title': 'What is PeerTube?',
+            'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
              'thumbnail': r're:https?://.*\.(?:jpg|png)',
-            'timestamp': 1519297480,
-            'upload_date': '20180222',
-            'uploader': 'Luclu7',
-            'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
-            'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
-            'license': 'Unknown',
-            'duration': 3,
+            'timestamp': 1538391166,
+            'upload_date': '20181001',
+            'uploader': 'Framasoft',
+            'uploader_id': '3',
+            'uploader_url': 'https://framatube.org/accounts/framasoft',
+            'channel': 'Les vidéos de Framasoft',
+            'channel_id': '2',
+            'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+            'language': 'en',
+            'license': 'Attribution - Share Alike',
+            'duration': 113,
              'view_count': int,
              'like_count': int,
              'dislike_count': int,
-            'tags': list,
-            'categories': list,
+            'tags': ['framasoft', 'peertube'],
+            'categories': ['Science & Technology'],
          }
      }, {
          'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
@@ -484,13 +490,38 @@ class PeerTubeIE(InfoExtractor):
                  entries = [peertube_url]
          return entries
  
+    def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+        return self._download_json(
+            self._API_BASE % (host, video_id, path), video_id,
+            note=note, errnote=errnote, fatal=fatal)
+
+    def _get_subtitles(self, host, video_id):
+        captions = self._call_api(
+            host, video_id, 'captions', note='Downloading captions JSON',
+            fatal=False)
+        if not isinstance(captions, dict):
+            return
+        data = captions.get('data')
+        if not isinstance(data, list):
+            return
+        subtitles = {}
+        for e in data:
+            language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+            caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+            if not caption_url:
+                continue
+            subtitles.setdefault(language_id or 'en', []).append({
+                'url': caption_url,
+            })
+        return subtitles
+
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
          host = mobj.group('host') or mobj.group('host_2')
          video_id = mobj.group('id')
  
-        video = self._download_json(
-            'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
+        video = self._call_api(
+            host, video_id, '', note='Downloading video JSON')
  
          title = video['name']
  
@@ -513,10 +544,28 @@ class PeerTubeIE(InfoExtractor):
              formats.append(f)
          self._sort_formats(formats)
  
-        def account_data(field):
-            return try_get(video, lambda x: x['account'][field], compat_str)
+        full_description = self._call_api(
+            host, video_id, 'description', note='Downloading description JSON',
+            fatal=False)
+
+        description = None
+        if isinstance(full_description, dict):
+            description = str_or_none(full_description.get('description'))
+        if not description:
+            description = video.get('description')
+
+        subtitles = self.extract_subtitles(host, video_id)
+
+        def data(section, field, type_):
+            return try_get(video, lambda x: x[section][field], type_)
+
+        def account_data(field, type_):
+            return data('account', field, type_)
+
+        def channel_data(field, type_):
+            return data('channel', field, type_)
  
-        category = try_get(video, lambda x: x['category']['label'], compat_str)
+        category = data('category', 'label', compat_str)
          categories = [category] if category else None
  
          nsfw = video.get('nsfw')
@@ -528,14 +577,17 @@ class PeerTubeIE(InfoExtractor):
          return {
              'id': video_id,
              'title': title,
-            'description': video.get('description'),
+            'description': description,
              'thumbnail': urljoin(url, video.get('thumbnailPath')),
              'timestamp': unified_timestamp(video.get('publishedAt')),
-            'uploader': account_data('displayName'),
-            'uploader_id': account_data('uuid'),
-            'uploder_url': account_data('url'),
-            'license': try_get(
-                video, lambda x: x['licence']['label'], compat_str),
+            'uploader': account_data('displayName', compat_str),
+            'uploader_id': str_or_none(account_data('id', int)),
+            'uploader_url': url_or_none(account_data('url', compat_str)),
+            'channel': channel_data('displayName', compat_str),
+            'channel_id': str_or_none(channel_data('id', int)),
+            'channel_url': url_or_none(channel_data('url', compat_str)),
+            'language': data('language', 'id', compat_str),
+            'license': data('licence', 'label', compat_str),
              'duration': int_or_none(video.get('duration')),
              'view_count': int_or_none(video.get('views')),
              'like_count': int_or_none(video.get('likes')),
@@ -544,4 +596,5 @@ class PeerTubeIE(InfoExtractor):
              'tags': try_get(video, lambda x: x['tags'], list),
              'categories': categories,
              'formats': formats,
+            'subtitles': subtitles
          }