]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/peertube.py
Update upstream source from tag 'upstream/2018.06.18'
[youtubedl] / youtube_dl / extractor / peertube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_str
8 from ..utils import (
9 int_or_none,
10 parse_resolution,
11 try_get,
12 unified_timestamp,
13 urljoin,
14 )
15
16
17 class PeerTubeIE(InfoExtractor):
18 _INSTANCES_RE = r'''(?:
19 # Taken from https://instances.joinpeertube.org/instances
20 tube\.openalgeria\.org|
21 peertube\.pointsecu\.fr|
22 peertube\.nogafa\.org|
23 peertube\.pl|
24 megatube\.lilomoino\.fr|
25 peertube\.tamanoir\.foucry\.net|
26 peertube\.inapurna\.org|
27 peertube\.netzspielplatz\.de|
28 video\.deadsuperhero\.com|
29 peertube\.devosi\.org|
30 peertube\.1312\.media|
31 tube\.worldofhauru\.xyz|
32 tube\.bootlicker\.party|
33 skeptikon\.fr|
34 peertube\.geekshell\.fr|
35 tube\.opportunis\.me|
36 peertube\.peshane\.net|
37 video\.blueline\.mg|
38 tube\.homecomputing\.fr|
39 videos\.cloudfrancois\.fr|
40 peertube\.viviers-fibre\.net|
41 tube\.ouahpiti\.info|
42 video\.tedomum\.net|
43 video\.g3l\.org|
44 fontube\.fr|
45 peertube\.gaialabs\.ch|
46 peertube\.extremely\.online|
47 peertube\.public-infrastructure\.eu|
48 tube\.kher\.nl|
49 peertube\.qtg\.fr|
50 tube\.22decembre\.eu|
51 facegirl\.me|
52 video\.migennes\.net|
53 janny\.moe|
54 tube\.p2p\.legal|
55 video\.atlanti\.se|
56 troll\.tv|
57 peertube\.geekael\.fr|
58 vid\.leotindall\.com|
59 video\.anormallostpod\.ovh|
60 p-tube\.h3z\.jp|
61 tube\.darfweb\.eu|
62 videos\.iut-orsay\.fr|
63 peertube\.solidev\.net|
64 videos\.symphonie-of-code\.fr|
65 testtube\.ortg\.de|
66 videos\.cemea\.org|
67 peertube\.gwendalavir\.eu|
68 video\.passageenseine\.fr|
69 videos\.festivalparminous\.org|
70 peertube\.touhoppai\.moe|
71 peertube\.duckdns\.org|
72 sikke\.fi|
73 peertube\.mastodon\.host|
74 firedragonvideos\.com|
75 vidz\.dou\.bet|
76 peertube\.koehn\.com|
77 peer\.hostux\.social|
78 share\.tube|
79 peertube\.walkingmountains\.fr|
80 medias\.libox\.fr|
81 peertube\.moe|
82 peertube\.xyz|
83 jp\.peertube\.network|
84 videos\.benpro\.fr|
85 tube\.otter\.sh|
86 peertube\.angristan\.xyz|
87 peertube\.parleur\.net|
88 peer\.ecutsa\.fr|
89 peertube\.heraut\.eu|
90 peertube\.tifox\.fr|
91 peertube\.maly\.io|
92 vod\.mochi\.academy|
93 exode\.me|
94 coste\.video|
95 tube\.aquilenet\.fr|
96 peertube\.gegeweb\.eu|
97 framatube\.org|
98 thinkerview\.video|
99 tube\.conferences-gesticulees\.net|
100 peertube\.datagueule\.tv|
101 video\.lqdn\.fr|
102 meilleurtube\.delire\.party|
103 tube\.mochi\.academy|
104 peertube\.dav\.li|
105 media\.zat\.im|
106 pytu\.be|
107 peertube\.valvin\.fr|
108 peertube\.nsa\.ovh|
109 video\.colibris-outilslibres\.org|
110 video\.hispagatos\.org|
111 tube\.svnet\.fr|
112 peertube\.video|
113 videos\.lecygnenoir\.info|
114 peertube3\.cpy\.re|
115 peertube2\.cpy\.re|
116 videos\.tcit\.fr|
117 peertube\.cpy\.re
118 )'''
119 _VALID_URL = r'''(?x)
120 https?://
121 %s
122 /(?:videos/(?:watch|embed)|api/v\d/videos)/
123 (?P<id>[^/?\#&]+)
124 ''' % _INSTANCES_RE
125 _TESTS = [{
126 'url': 'https://peertube.moe/videos/watch/2790feb0-8120-4e63-9af3-c943c69f5e6c',
127 'md5': '80f24ff364cc9d333529506a263e7feb',
128 'info_dict': {
129 'id': '2790feb0-8120-4e63-9af3-c943c69f5e6c',
130 'ext': 'mp4',
131 'title': 'wow',
132 'description': 'wow such video, so gif',
133 'thumbnail': r're:https?://.*\.(?:jpg|png)',
134 'timestamp': 1519297480,
135 'upload_date': '20180222',
136 'uploader': 'Luclu7',
137 'uploader_id': '7fc42640-efdb-4505-a45d-a15b1a5496f1',
138 'uploder_url': 'https://peertube.nsa.ovh/accounts/luclu7',
139 'license': 'Unknown',
140 'duration': 3,
141 'view_count': int,
142 'like_count': int,
143 'dislike_count': int,
144 'tags': list,
145 'categories': list,
146 }
147 }, {
148 'url': 'https://peertube.tamanoir.foucry.net/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44',
149 'only_matching': True,
150 }, {
151 # nsfw
152 'url': 'https://tube.22decembre.eu/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39',
153 'only_matching': True,
154 }, {
155 'url': 'https://tube.22decembre.eu/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7',
156 'only_matching': True,
157 }, {
158 'url': 'https://tube.openalgeria.org/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8',
159 'only_matching': True,
160 }]
161
162 @staticmethod
163 def _extract_urls(webpage):
164 return [
165 mobj.group('url')
166 for mobj in re.finditer(
167 r'''(?x)<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//%s/videos/embed/[^/?\#&]+)\1'''
168 % PeerTubeIE._INSTANCES_RE, webpage)]
169
170 def _real_extract(self, url):
171 video_id = self._match_id(url)
172
173 video = self._download_json(
174 urljoin(url, '/api/v1/videos/%s' % video_id), video_id)
175
176 title = video['name']
177
178 formats = []
179 for file_ in video['files']:
180 if not isinstance(file_, dict):
181 continue
182 file_url = file_.get('fileUrl')
183 if not file_url or not isinstance(file_url, compat_str):
184 continue
185 file_size = int_or_none(file_.get('size'))
186 format_id = try_get(
187 file_, lambda x: x['resolution']['label'], compat_str)
188 f = parse_resolution(format_id)
189 f.update({
190 'url': file_url,
191 'format_id': format_id,
192 'filesize': file_size,
193 })
194 formats.append(f)
195 self._sort_formats(formats)
196
197 def account_data(field):
198 return try_get(video, lambda x: x['account'][field], compat_str)
199
200 category = try_get(video, lambda x: x['category']['label'], compat_str)
201 categories = [category] if category else None
202
203 nsfw = video.get('nsfw')
204 if nsfw is bool:
205 age_limit = 18 if nsfw else 0
206 else:
207 age_limit = None
208
209 return {
210 'id': video_id,
211 'title': title,
212 'description': video.get('description'),
213 'thumbnail': urljoin(url, video.get('thumbnailPath')),
214 'timestamp': unified_timestamp(video.get('publishedAt')),
215 'uploader': account_data('displayName'),
216 'uploader_id': account_data('uuid'),
217 'uploder_url': account_data('url'),
218 'license': try_get(
219 video, lambda x: x['licence']['label'], compat_str),
220 'duration': int_or_none(video.get('duration')),
221 'view_count': int_or_none(video.get('views')),
222 'like_count': int_or_none(video.get('likes')),
223 'dislike_count': int_or_none(video.get('dislikes')),
224 'age_limit': age_limit,
225 'tags': try_get(video, lambda x: x['tags'], list),
226 'categories': categories,
227 'formats': formats,
228 }