]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/peertube.py
Update upstream source from tag 'upstream/2019.09.28'
[youtubedl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 int_or_none,
10 parse_resolution,
11 try_get,
12 unified_timestamp,
13 url_or_none,
14 urljoin,
15 )
16
17
18 class PeerTubeIE(InfoExtractor):
19 _INSTANCES_RE = r'''(?:
20 # Taken from https://instances.joinpeertube.org/instances
21 tube\.openalgeria\.org|
22 peertube\.pointsecu\.fr|
23 peertube\.nogafa\.org|
24 peertube\.pl|
25 megatube\.lilomoino\.fr|
26 peertube\.tamanoir\.foucry\.net|
27 peertube\.inapurna\.org|
28 peertube\.netzspielplatz\.de|
29 video\.deadsuperhero\.com|
30 peertube\.devosi\.org|
31 peertube\.1312\.media|
32 tube\.worldofhauru\.xyz|
33 tube\.bootlicker\.party|
34 skeptikon\.fr|
35 peertube\.geekshell\.fr|
36 tube\.opportunis\.me|
37 peertube\.peshane\.net|
38 video\.blueline\.mg|
39 tube\.homecomputing\.fr|
40 videos\.cloudfrancois\.fr|
41 peertube\.viviers-fibre\.net|
42 tube\.ouahpiti\.info|
43 video\.tedomum\.net|
44 video\.g3l\.org|
45 fontube\.fr|
46 peertube\.gaialabs\.ch|
47 peertube\.extremely\.online|
48 peertube\.public-infrastructure\.eu|
49 tube\.kher\.nl|
50 peertube\.qtg\.fr|
51 tube\.22decembre\.eu|
52 facegirl\.me|
53 video\.migennes\.net|
54 janny\.moe|
55 tube\.p2p\.legal|
56 video\.atlanti\.se|
57 troll\.tv|
58 peertube\.geekael\.fr|
59 vid\.leotindall\.com|
60 video\.anormallostpod\.ovh|
61 p-tube\.h3z\.jp|
62 tube\.darfweb\.eu|
63 videos\.iut-orsay\.fr|
64 peertube\.solidev\.net|
65 videos\.symphonie-of-code\.fr|
66 testtube\.ortg\.de|
67 videos\.cemea\.org|
68 peertube\.gwendalavir\.eu|
69 video\.passageenseine\.fr|
70 videos\.festivalparminous\.org|
71 peertube\.touhoppai\.moe|
72 peertube\.duckdns\.org|
73 sikke\.fi|
74 peertube\.mastodon\.host|
75 firedragonvideos\.com|
76 vidz\.dou\.bet|
77 peertube\.koehn\.com|
78 peer\.hostux\.social|
79 share\.tube|
80 peertube\.walkingmountains\.fr|
81 medias\.libox\.fr|
82 peertube\.moe|
83 peertube\.xyz|
84 jp\.peertube\.network|
85 videos\.benpro\.fr|
86 tube\.otter\.sh|
87 peertube\.angristan\.xyz|
88 peertube\.parleur\.net|
89 peer\.ecutsa\.fr|
90 peertube\.heraut\.eu|
91 peertube\.tifox\.fr|
92 peertube\.maly\.io|
93 vod\.mochi\.academy|
94 exode\.me|
95 coste\.video|
96 tube\.aquilenet\.fr|
97 peertube\.gegeweb\.eu|
98 framatube\.org|
99 thinkerview\.video|
100 tube\.conferences-gesticulees\.net|
101 peertube\.datagueule\.tv|
102 video\.lqdn\.fr|
103 meilleurtube\.delire\.party|
104 tube\.mochi\.academy|
105 peertube\.dav\.li|
106 media\.zat\.im|
107 pytu\.be|
108 peertube\.valvin\.fr|
109 peertube\.nsa\.ovh|
110 video\.colibris-outilslibres\.org|
111 video\.hispagatos\.org|
112 tube\.svnet\.fr|
113 peertube\.video|
114 videos\.lecygnenoir\.info|
115 peertube3\.cpy\.re|
116 peertube2\.cpy\.re|
117 videos\.tcit\.fr|
118 peertube\.cpy\.re
119 )'''
120 _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
121 _VALID_URL = r'''(?x)
122 (?:
123 peertube:(?P<host>[^:]+):|
124 https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
125 )
126 (?P<id>%s)
127 ''' % (_INSTANCES_RE, _UUID_RE)
128 _TESTS = [{
129 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
130 'md5': '80f24ff364cc9d333529506a263e7feb',
131 'info_dict': {
132 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
133 'ext': 'mp4',
134 'title': 'wow',
135 'description': 'wow such video, so gif',
136 'thumbnail': r're:https?://.*\.(?:jpg|png)',
137 'timestamp': 1519297480,
138 'upload_date': '20180222',
139 'uploader': 'Luclu7',
140 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
141 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
142 'license': 'Unknown',
143 'duration': 3,
144 'view_count': int,
145 'like_count': int,
146 'dislike_count': int,
147 'tags': list,
148 'categories': list,
149 }
150 }, {
151 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
152 'only_matching': True,
153 }, {
154 # nsfw
155 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
156 'only_matching': True,
157 }, {
158 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
159 'only_matching': True,
160 }, {
161 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
162 'only_matching': True,
163 }, {
164 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
165 'only_matching': True,
166 }]
167
168 @staticmethod
169 def _extract_peertube_url(webpage, source_url):
170 mobj = re.match(
171 r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
172 % PeerTubeIE._UUID_RE, source_url)
173 if mobj and any(p in webpage for p in (
174 '<title>PeerTube<',
175 'There will be other non JS-based clients to access PeerTube',
176 '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
177 return 'peertube:%s:%s' % mobj.group('host', 'id')
178
179 @staticmethod
180 def _extract_urls(webpage, source_url):
181 entries = re.findall(
182 r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
183 % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
184 if not entries:
185 peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
186 if peertube_url:
187 entries = [peertube_url]
188 return entries
189
190 def _real_extract(self, url):
191 mobj = re.match(self._VALID_URL, url)
192 host = mobj.group('host') or mobj.group('host_2')
193 video_id = mobj.group('id')
194
195 video = self._download_json(
196 'https://%s/api/v1/videos/%s' % (host, video_id), video_id)
197
198 title = video['name']
199
200 formats = []
201 for file_ in video['files']:
202 if not isinstance(file_, dict):
203 continue
204 file_url = url_or_none(file_.get('fileUrl'))
205 if not file_url:
206 continue
207 file_size = int_or_none(file_.get('size'))
208 format_id = try_get(
209 file_, lambda x: x['resolution']['label'], compat_str)
210 f = parse_resolution(format_id)
211 f.update({
212 'url': file_url,
213 'format_id': format_id,
214 'filesize': file_size,
215 })
216 formats.append(f)
217 self._sort_formats(formats)
218
219 def account_data(field):
220 return try_get(video, lambda x: x['account'][field], compat_str)
221
222 category = try_get(video, lambda x: x['category']['label'], compat_str)
223 categories = [category] if category else None
224
225 nsfw = video.get('nsfw')
226 if nsfw is bool:
227 age_limit = 18 if nsfw else 0
228 else:
229 age_limit = None
230
231 return {
232 'id': video_id,
233 'title': title,
234 'description': video.get('description'),
235 'thumbnail': urljoin(url, video.get('thumbnailPath')),
236 'timestamp': unified_timestamp(video.get('publishedAt')),
237 'uploader': account_data('displayName'),
238 'uploader_id': account_data('uuid'),
239 'uploder_url': account_data('url'),
240 'license': try_get(
241 video, lambda x: x['licence']['label'], compat_str),
242 'duration': int_or_none(video.get('duration')),
243 'view_count': int_or_none(video.get('views')),
244 'like_count': int_or_none(video.get('likes')),
245 'dislike_count': int_or_none(video.get('dislikes')),
246 'age_limit': age_limit,
247 'tags': try_get(video, lambda x: x['tags'], list),
248 'categories': categories,
249 'formats': formats,
250 }