]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/peertube.py
debian/copyright: use spaces rather than tabs to start continuation lines.
[youtubedl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 int_or_none,
10 parse_resolution,
11 str_or_none,
12 try_get,
13 unified_timestamp,
14 url_or_none,
15 urljoin,
16 )
17
18
19 class PeerTubeIE(InfoExtractor):
20 _INSTANCES_RE = r'''(?:
21 # Taken from https://instances.joinpeertube.org/instances
22 peertube\.rainbowswingers\.net|
23 tube\.stanisic\.nl|
24 peer\.suiri\.us|
25 medias\.libox\.fr|
26 videomensoif\.ynh\.fr|
27 peertube\.travelpandas\.eu|
28 peertube\.rachetjay\.fr|
29 peertube\.montecsys\.fr|
30 tube\.eskuero\.me|
31 peer\.tube|
32 peertube\.umeahackerspace\.se|
33 tube\.nx-pod\.de|
34 video\.monsieurbidouille\.fr|
35 tube\.openalgeria\.org|
36 vid\.lelux\.fi|
37 video\.anormallostpod\.ovh|
38 tube\.crapaud-fou\.org|
39 peertube\.stemy\.me|
40 lostpod\.space|
41 exode\.me|
42 peertube\.snargol\.com|
43 vis\.ion\.ovh|
44 videosdulib\.re|
45 v\.mbius\.io|
46 videos\.judrey\.eu|
47 peertube\.osureplayviewer\.xyz|
48 peertube\.mathieufamily\.ovh|
49 www\.videos-libr\.es|
50 fightforinfo\.com|
51 peertube\.fediverse\.ru|
52 peertube\.oiseauroch\.fr|
53 video\.nesven\.eu|
54 v\.bearvideo\.win|
55 video\.qoto\.org|
56 justporn\.cc|
57 video\.vny\.fr|
58 peervideo\.club|
59 tube\.taker\.fr|
60 peertube\.chantierlibre\.org|
61 tube\.ipfixe\.info|
62 tube\.kicou\.info|
63 tube\.dodsorf\.as|
64 videobit\.cc|
65 video\.yukari\.moe|
66 videos\.elbinario\.net|
67 hkvideo\.live|
68 pt\.tux\.tf|
69 www\.hkvideo\.live|
70 FIGHTFORINFO\.com|
71 pt\.765racing\.com|
72 peertube\.gnumeria\.eu\.org|
73 nordenmedia\.com|
74 peertube\.co\.uk|
75 tube\.darfweb\.eu|
76 tube\.kalah-france\.org|
77 0ch\.in|
78 vod\.mochi\.academy|
79 film\.node9\.org|
80 peertube\.hatthieves\.es|
81 video\.fitchfamily\.org|
82 peertube\.ddns\.net|
83 video\.ifuncle\.kr|
84 video\.fdlibre\.eu|
85 tube\.22decembre\.eu|
86 peertube\.harmoniescreatives\.com|
87 tube\.fabrigli\.fr|
88 video\.thedwyers\.co|
89 video\.bruitbruit\.com|
90 peertube\.foxfam\.club|
91 peer\.philoxweb\.be|
92 videos\.bugs\.social|
93 peertube\.malbert\.xyz|
94 peertube\.bilange\.ca|
95 libretube\.net|
96 diytelevision\.com|
97 peertube\.fedilab\.app|
98 libre\.video|
99 video\.mstddntfdn\.online|
100 us\.tv|
101 peertube\.sl-network\.fr|
102 peertube\.dynlinux\.io|
103 peertube\.david\.durieux\.family|
104 peertube\.linuxrocks\.online|
105 peerwatch\.xyz|
106 v\.kretschmann\.social|
107 tube\.otter\.sh|
108 yt\.is\.nota\.live|
109 tube\.dragonpsi\.xyz|
110 peertube\.boneheadmedia\.com|
111 videos\.funkwhale\.audio|
112 watch\.44con\.com|
113 peertube\.gcaillaut\.fr|
114 peertube\.icu|
115 pony\.tube|
116 spacepub\.space|
117 tube\.stbr\.io|
118 v\.mom-gay\.faith|
119 tube\.port0\.xyz|
120 peertube\.simounet\.net|
121 play\.jergefelt\.se|
122 peertube\.zeteo\.me|
123 tube\.danq\.me|
124 peertube\.kerenon\.com|
125 tube\.fab-l3\.org|
126 tube\.calculate\.social|
127 peertube\.mckillop\.org|
128 tube\.netzspielplatz\.de|
129 vod\.ksite\.de|
130 peertube\.laas\.fr|
131 tube\.govital\.net|
132 peertube\.stephenson\.cc|
133 bistule\.nohost\.me|
134 peertube\.kajalinifi\.de|
135 video\.ploud\.jp|
136 video\.omniatv\.com|
137 peertube\.ffs2play\.fr|
138 peertube\.leboulaire\.ovh|
139 peertube\.tronic-studio\.com|
140 peertube\.public\.cat|
141 peertube\.metalbanana\.net|
142 video\.1000i100\.fr|
143 peertube\.alter-nativ-voll\.de|
144 tube\.pasa\.tf|
145 tube\.worldofhauru\.xyz|
146 pt\.kamp\.site|
147 peertube\.teleassist\.fr|
148 videos\.mleduc\.xyz|
149 conf\.tube|
150 media\.privacyinternational\.org|
151 pt\.forty-two\.nl|
152 video\.halle-leaks\.de|
153 video\.grosskopfgames\.de|
154 peertube\.schaeferit\.de|
155 peertube\.jackbot\.fr|
156 tube\.extinctionrebellion\.fr|
157 peertube\.f-si\.org|
158 video\.subak\.ovh|
159 videos\.koweb\.fr|
160 peertube\.zergy\.net|
161 peertube\.roflcopter\.fr|
162 peertube\.floss-marketing-school\.com|
163 vloggers\.social|
164 peertube\.iriseden\.eu|
165 videos\.ubuntu-paris\.org|
166 peertube\.mastodon\.host|
167 armstube\.com|
168 peertube\.s2s\.video|
169 peertube\.lol|
170 tube\.open-plug\.eu|
171 open\.tube|
172 peertube\.ch|
173 peertube\.normandie-libre\.fr|
174 peertube\.slat\.org|
175 video\.lacaveatonton\.ovh|
176 peertube\.uno|
177 peertube\.servebeer\.com|
178 peertube\.fedi\.quebec|
179 tube\.h3z\.jp|
180 tube\.plus200\.com|
181 peertube\.eric\.ovh|
182 tube\.metadocs\.cc|
183 tube\.unmondemeilleur\.eu|
184 gouttedeau\.space|
185 video\.antirep\.net|
186 nrop\.cant\.at|
187 tube\.ksl-bmx\.de|
188 tube\.plaf\.fr|
189 tube\.tchncs\.de|
190 video\.devinberg\.com|
191 hitchtube\.fr|
192 peertube\.kosebamse\.com|
193 yunopeertube\.myddns\.me|
194 peertube\.varney\.fr|
195 peertube\.anon-kenkai\.com|
196 tube\.maiti\.info|
197 tubee\.fr|
198 videos\.dinofly\.com|
199 toobnix\.org|
200 videotape\.me|
201 voca\.tube|
202 video\.heromuster\.com|
203 video\.lemediatv\.fr|
204 video\.up\.edu\.ph|
205 balafon\.video|
206 video\.ivel\.fr|
207 thickrips\.cloud|
208 pt\.laurentkruger\.fr|
209 video\.monarch-pass\.net|
210 peertube\.artica\.center|
211 video\.alternanet\.fr|
212 indymotion\.fr|
213 fanvid\.stopthatimp\.net|
214 video\.farci\.org|
215 v\.lesterpig\.com|
216 video\.okaris\.de|
217 tube\.pawelko\.net|
218 peertube\.mablr\.org|
219 tube\.fede\.re|
220 pytu\.be|
221 evertron\.tv|
222 devtube\.dev-wiki\.de|
223 raptube\.antipub\.org|
224 video\.selea\.se|
225 peertube\.mygaia\.org|
226 video\.oh14\.de|
227 peertube\.livingutopia\.org|
228 peertube\.the-penguin\.de|
229 tube\.thechangebook\.org|
230 tube\.anjara\.eu|
231 pt\.pube\.tk|
232 video\.samedi\.pm|
233 mplayer\.demouliere\.eu|
234 widemus\.de|
235 peertube\.me|
236 peertube\.zapashcanon\.fr|
237 video\.latavernedejohnjohn\.fr|
238 peertube\.pcservice46\.fr|
239 peertube\.mazzonetto\.eu|
240 video\.irem\.univ-paris-diderot\.fr|
241 video\.livecchi\.cloud|
242 alttube\.fr|
243 video\.coop\.tools|
244 video\.cabane-libre\.org|
245 peertube\.openstreetmap\.fr|
246 videos\.alolise\.org|
247 irrsinn\.video|
248 video\.antopie\.org|
249 scitech\.video|
250 tube2\.nemsia\.org|
251 video\.amic37\.fr|
252 peertube\.freeforge\.eu|
253 video\.arbitrarion\.com|
254 video\.datsemultimedia\.com|
255 stoptrackingus\.tv|
256 peertube\.ricostrongxxx\.com|
257 docker\.videos\.lecygnenoir\.info|
258 peertube\.togart\.de|
259 tube\.postblue\.info|
260 videos\.domainepublic\.net|
261 peertube\.cyber-tribal\.com|
262 video\.gresille\.org|
263 peertube\.dsmouse\.net|
264 cinema\.yunohost\.support|
265 tube\.theocevaer\.fr|
266 repro\.video|
267 tube\.4aem\.com|
268 quaziinc\.com|
269 peertube\.metawurst\.space|
270 videos\.wakapo\.com|
271 video\.ploud\.fr|
272 video\.freeradical\.zone|
273 tube\.valinor\.fr|
274 refuznik\.video|
275 pt\.kircheneuenburg\.de|
276 peertube\.asrun\.eu|
277 peertube\.lagob\.fr|
278 videos\.side-ways\.net|
279 91video\.online|
280 video\.valme\.io|
281 video\.taboulisme\.com|
282 videos-libr\.es|
283 tv\.mooh\.fr|
284 nuage\.acostey\.fr|
285 video\.monsieur-a\.fr|
286 peertube\.librelois\.fr|
287 videos\.pair2jeux\.tube|
288 videos\.pueseso\.club|
289 peer\.mathdacloud\.ovh|
290 media\.assassinate-you\.net|
291 vidcommons\.org|
292 ptube\.rousset\.nom\.fr|
293 tube\.cyano\.at|
294 videos\.squat\.net|
295 video\.iphodase\.fr|
296 peertube\.makotoworkshop\.org|
297 peertube\.serveur\.slv-valbonne\.fr|
298 vault\.mle\.party|
299 hostyour\.tv|
300 videos\.hack2g2\.fr|
301 libre\.tube|
302 pire\.artisanlogiciel\.net|
303 videos\.numerique-en-commun\.fr|
304 video\.netsyms\.com|
305 video\.die-partei\.social|
306 video\.writeas\.org|
307 peertube\.swarm\.solvingmaz\.es|
308 tube\.pericoloso\.ovh|
309 watching\.cypherpunk\.observer|
310 videos\.adhocmusic\.com|
311 tube\.rfc1149\.net|
312 peertube\.librelabucm\.org|
313 videos\.numericoop\.fr|
314 peertube\.koehn\.com|
315 peertube\.anarchmusicall\.net|
316 tube\.kampftoast\.de|
317 vid\.y-y\.li|
318 peertube\.xtenz\.xyz|
319 diode\.zone|
320 tube\.egf\.mn|
321 peertube\.nomagic\.uk|
322 visionon\.tv|
323 videos\.koumoul\.com|
324 video\.rastapuls\.com|
325 video\.mantlepro\.com|
326 video\.deadsuperhero\.com|
327 peertube\.musicstudio\.pro|
328 peertube\.we-keys\.fr|
329 artitube\.artifaille\.fr|
330 peertube\.ethernia\.net|
331 tube\.midov\.pl|
332 peertube\.fr|
333 watch\.snoot\.tube|
334 peertube\.donnadieu\.fr|
335 argos\.aquilenet\.fr|
336 tube\.nemsia\.org|
337 tube\.bruniau\.net|
338 videos\.darckoune\.moe|
339 tube\.traydent\.info|
340 dev\.videos\.lecygnenoir\.info|
341 peertube\.nayya\.org|
342 peertube\.live|
343 peertube\.mofgao\.space|
344 video\.lequerrec\.eu|
345 peertube\.amicale\.net|
346 aperi\.tube|
347 tube\.ac-lyon\.fr|
348 video\.lw1\.at|
349 www\.yiny\.org|
350 videos\.pofilo\.fr|
351 tube\.lou\.lt|
352 choob\.h\.etbus\.ch|
353 tube\.hoga\.fr|
354 peertube\.heberge\.fr|
355 video\.obermui\.de|
356 videos\.cloudfrancois\.fr|
357 betamax\.video|
358 video\.typica\.us|
359 tube\.piweb\.be|
360 video\.blender\.org|
361 peertube\.cat|
362 tube\.kdy\.ch|
363 pe\.ertu\.be|
364 peertube\.social|
365 videos\.lescommuns\.org|
366 tv\.datamol\.org|
367 videonaute\.fr|
368 dialup\.express|
369 peertube\.nogafa\.org|
370 megatube\.lilomoino\.fr|
371 peertube\.tamanoir\.foucry\.net|
372 peertube\.devosi\.org|
373 peertube\.1312\.media|
374 tube\.bootlicker\.party|
375 skeptikon\.fr|
376 video\.blueline\.mg|
377 tube\.homecomputing\.fr|
378 tube\.ouahpiti\.info|
379 video\.tedomum\.net|
380 video\.g3l\.org|
381 fontube\.fr|
382 peertube\.gaialabs\.ch|
383 tube\.kher\.nl|
384 peertube\.qtg\.fr|
385 video\.migennes\.net|
386 tube\.p2p\.legal|
387 troll\.tv|
388 videos\.iut-orsay\.fr|
389 peertube\.solidev\.net|
390 videos\.cemea\.org|
391 video\.passageenseine\.fr|
392 videos\.festivalparminous\.org|
393 peertube\.touhoppai\.moe|
394 sikke\.fi|
395 peer\.hostux\.social|
396 share\.tube|
397 peertube\.walkingmountains\.fr|
398 videos\.benpro\.fr|
399 peertube\.parleur\.net|
400 peertube\.heraut\.eu|
401 tube\.aquilenet\.fr|
402 peertube\.gegeweb\.eu|
403 framatube\.org|
404 thinkerview\.video|
405 tube\.conferences-gesticulees\.net|
406 peertube\.datagueule\.tv|
407 video\.lqdn\.fr|
408 tube\.mochi\.academy|
409 media\.zat\.im|
410 video\.colibris-outilslibres\.org|
411 tube\.svnet\.fr|
412 peertube\.video|
413 peertube3\.cpy\.re|
414 peertube2\.cpy\.re|
415 videos\.tcit\.fr|
416 peertube\.cpy\.re
417 )'''
418 _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
419 _API_BASE = 'https://%s/api/v1/videos/%s/%s'
420 _VALID_URL = r'''(?x)
421 (?:
422 peertube:(?P<host>[^:]+):|
423 https?://(?P<host_2>%s)/(?:videos/(?:watch|embed)|api/v\d/videos)/
424 )
425 (?P<id>%s)
426 ''' % (_INSTANCES_RE, _UUID_RE)
427 _TESTS = [{
428 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d',
429 'md5': '9bed8c0137913e17b86334e5885aacff',
430 'info_dict': {
431 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d',
432 'ext': 'mp4',
433 'title': 'What is PeerTube?',
434 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10',
435 'thumbnail': r're:https?://.*\.(?:jpg|png)',
436 'timestamp': 1538391166,
437 'upload_date': '20181001',
438 'uploader': 'Framasoft',
439 'uploader_id': '3',
440 'uploader_url': 'https://framatube.org/accounts/framasoft',
441 'channel': 'Les vidéos de Framasoft',
442 'channel_id': '2',
443 'channel_url': 'https://framatube.org/video-channels/bf54d359-cfad-4935-9d45-9d6be93f63e8',
444 'language': 'en',
445 'license': 'Attribution - Share Alike',
446 'duration': 113,
447 'view_count': int,
448 'like_count': int,
449 'dislike_count': int,
450 'tags': ['framasoft', 'peertube'],
451 'categories': ['Science & Technology'],
452 }
453 }, {
454 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
455 'only_matching': True,
456 }, {
457 # nsfw
458 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
459 'only_matching': True,
460 }, {
461 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
462 'only_matching': True,
463 }, {
464 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
465 'only_matching': True,
466 }, {
467 'url': 'peertube:video.blender.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205',
468 'only_matching': True,
469 }]
470
471 @staticmethod
472 def _extract_peertube_url(webpage, source_url):
473 mobj = re.match(
474 r'https?://(?P<host>[^/]+)/videos/(?:watch|embed)/(?P<id>%s)'
475 % PeerTubeIE._UUID_RE, source_url)
476 if mobj and any(p in webpage for p in (
477 '<title>PeerTube<',
478 'There will be other non JS-based clients to access PeerTube',
479 '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')):
480 return 'peertube:%s:%s' % mobj.group('host', 'id')
481
482 @staticmethod
483 def _extract_urls(webpage, source_url):
484 entries = re.findall(
485 r'''(?x)<iframe[^>]+\bsrc=["\'](?P<url>(?:https?:)?//%s/videos/embed/%s)'''
486 % (PeerTubeIE._INSTANCES_RE, PeerTubeIE._UUID_RE), webpage)
487 if not entries:
488 peertube_url = PeerTubeIE._extract_peertube_url(webpage, source_url)
489 if peertube_url:
490 entries = [peertube_url]
491 return entries
492
493 def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True):
494 return self._download_json(
495 self._API_BASE % (host, video_id, path), video_id,
496 note=note, errnote=errnote, fatal=fatal)
497
498 def _get_subtitles(self, host, video_id):
499 captions = self._call_api(
500 host, video_id, 'captions', note='Downloading captions JSON',
501 fatal=False)
502 if not isinstance(captions, dict):
503 return
504 data = captions.get('data')
505 if not isinstance(data, list):
506 return
507 subtitles = {}
508 for e in data:
509 language_id = try_get(e, lambda x: x['language']['id'], compat_str)
510 caption_url = urljoin('https://%s' % host, e.get('captionPath'))
511 if not caption_url:
512 continue
513 subtitles.setdefault(language_id or 'en', []).append({
514 'url': caption_url,
515 })
516 return subtitles
517
518 def _real_extract(self, url):
519 mobj = re.match(self._VALID_URL, url)
520 host = mobj.group('host') or mobj.group('host_2')
521 video_id = mobj.group('id')
522
523 video = self._call_api(
524 host, video_id, '', note='Downloading video JSON')
525
526 title = video['name']
527
528 formats = []
529 for file_ in video['files']:
530 if not isinstance(file_, dict):
531 continue
532 file_url = url_or_none(file_.get('fileUrl'))
533 if not file_url:
534 continue
535 file_size = int_or_none(file_.get('size'))
536 format_id = try_get(
537 file_, lambda x: x['resolution']['label'], compat_str)
538 f = parse_resolution(format_id)
539 f.update({
540 'url': file_url,
541 'format_id': format_id,
542 'filesize': file_size,
543 })
544 formats.append(f)
545 self._sort_formats(formats)
546
547 full_description = self._call_api(
548 host, video_id, 'description', note='Downloading description JSON',
549 fatal=False)
550
551 description = None
552 if isinstance(full_description, dict):
553 description = str_or_none(full_description.get('description'))
554 if not description:
555 description = video.get('description')
556
557 subtitles = self.extract_subtitles(host, video_id)
558
559 def data(section, field, type_):
560 return try_get(video, lambda x: x[section][field], type_)
561
562 def account_data(field, type_):
563 return data('account', field, type_)
564
565 def channel_data(field, type_):
566 return data('channel', field, type_)
567
568 category = data('category', 'label', compat_str)
569 categories = [category] if category else None
570
571 nsfw = video.get('nsfw')
572 if nsfw is bool:
573 age_limit = 18 if nsfw else 0
574 else:
575 age_limit = None
576
577 return {
578 'id': video_id,
579 'title': title,
580 'description': description,
581 'thumbnail': urljoin(url, video.get('thumbnailPath')),
582 'timestamp': unified_timestamp(video.get('publishedAt')),
583 'uploader': account_data('displayName', compat_str),
584 'uploader_id': str_or_none(account_data('id', int)),
585 'uploader_url': url_or_none(account_data('url', compat_str)),
586 'channel': channel_data('displayName', compat_str),
587 'channel_id': str_or_none(channel_data('id', int)),
588 'channel_url': url_or_none(channel_data('url', compat_str)),
589 'language': data('language', 'id', compat_str),
590 'license': data('licence', 'label', compat_str),
591 'duration': int_or_none(video.get('duration')),
592 'view_count': int_or_none(video.get('views')),
593 'like_count': int_or_none(video.get('likes')),
594 'dislike_count': int_or_none(video.get('dislikes')),
595 'age_limit': age_limit,
596 'tags': try_get(video, lambda x: x['tags'], list),
597 'categories': categories,
598 'formats': formats,
599 'subtitles': subtitles
600 }