from ..utils import (
int_or_none,
parse_resolution,
+ str_or_none,
try_get,
unified_timestamp,
url_or_none,
peertube\.cpy\.re
)'''
_UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
+ _API_BASE = 'https://%s/api/v1/videos/%s/%s'
_VALID_URL = r'''(?x)
(?:
peertube:(?P<host>[^:]+):|
(?P<id>%s)
''' % (_INSTANCES_RE, _UUID_RE)
_TESTS = [{
- 'url': 'https://peertube.cpy.re/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
- 'md5': '80f24ff364cc9d333529506a263e7feb',
+ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
+ 'md5': '9bed8c0137913e17b86334e5885aacff',
'info_dict': {
- 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
+ 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
'ext': 'mp4',
- 'title': 'wow',
- 'description': 'wow such video, so gif',
+ 'title': 'What is PeerTube?',
+ 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
'thumbnail': r're:https?://.*\.(?:jpg|png)',
- 'timestamp': 1519297480,
- 'upload_date': '20180222',
- 'uploader': 'Luclu7',
- 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
- 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
- 'license': 'Unknown',
- 'duration': 3,
+ 'timestamp': 1538391166,
+ 'upload_date': '20181001',
+ 'uploader': 'Framasoft',
+ 'uploader_id': '3',
+ 'uploader_url': 'https://framatube.org/accounts/framasoft',
+ 'channel': 'Les vidéos de Framasoft',
+ 'channel_id': '2',
+ 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
+ 'language': 'en',
+ 'license': 'Attribution - Share Alike',
+ 'duration': 113,
'view_count': int,
'like_count': int,
'dislike_count': int,
- 'tags': list,
- 'categories': list,
+ 'tags': ['framasoft', 'peertube'],
+ 'categories': ['Science & Technology'],
}
}, {
'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
entries = [peertube_url]
return entries
+ def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
+ return self._download_json(
+ self._API_BASE % (host, video_id, path), video_id,
+ note=note, errnote=errnote, fatal=fatal)
+
+ def _get_subtitles(self, host, video_id):
+ captions = self._call_api(
+ host, video_id, 'captions', note='Downloading captions JSON',
+ fatal=False)
+ if not isinstance(captions, dict):
+ return
+ data = captions.get('data')
+ if not isinstance(data, list):
+ return
+ subtitles = {}
+ for e in data:
+ language_id = try_get(e, lambda x: x['language']['id'], compat_str)
+ caption_url = urljoin('https://%s' % host, e.get('captionPath'))
+ if not caption_url:
+ continue
+ subtitles.setdefault(language_id or 'en', []).append({
+ 'url': caption_url,
+ })
+ return subtitles
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
host = mobj.group('host') or mobj.group('host_2')
video_id = mobj.group('id')
- video = self._download_json(
- 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
+ video = self._call_api(
+ host, video_id, '', note='Downloading video JSON')
title = video['name']
formats.append(f)
self._sort_formats(formats)
- def account_data(field):
- return try_get(video, lambda x: x['account'][field], compat_str)
+ full_description = self._call_api(
+ host, video_id, 'description', note='Downloading description JSON',
+ fatal=False)
+
+ description = None
+ if isinstance(full_description, dict):
+ description = str_or_none(full_description.get('description'))
+ if not description:
+ description = video.get('description')
+
+ subtitles = self.extract_subtitles(host, video_id)
+
+ def data(section, field, type_):
+ return try_get(video, lambda x: x[section][field], type_)
+
+ def account_data(field, type_):
+ return data('account', field, type_)
+
+ def channel_data(field, type_):
+ return data('channel', field, type_)
- category = try_get(video, lambda x: x['category']['label'], compat_str)
+ category = data('category', 'label', compat_str)
categories = [category] if category else None
nsfw = video.get('nsfw')
return {
'id': video_id,
'title': title,
- 'description': video.get('description'),
+ 'description': description,
'thumbnail': urljoin(url, video.get('thumbnailPath')),
'timestamp': unified_timestamp(video.get('publishedAt')),
- 'uploader': account_data('displayName'),
- 'uploader_id': account_data('uuid'),
- 'uploder_url': account_data('url'),
- 'license': try_get(
- video, lambda x: x['licence']['label'], compat_str),
+ 'uploader': account_data('displayName', compat_str),
+ 'uploader_id': str_or_none(account_data('id', int)),
+ 'uploader_url': url_or_none(account_data('url', compat_str)),
+ 'channel': channel_data('displayName', compat_str),
+ 'channel_id': str_or_none(channel_data('id', int)),
+ 'channel_url': url_or_none(channel_data('url', compat_str)),
+ 'language': data('language', 'id', compat_str),
+ 'license': data('licence', 'label', compat_str),
'duration': int_or_none(video.get('duration')),
'view_count': int_or_none(video.get('views')),
'like_count': int_or_none(video.get('likes')),
'tags': try_get(video, lambda x: x['tags'], list),
'categories': categories,
'formats': formats,
+ 'subtitles': subtitles
}