]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dtube.py
5887887e15ef9065515abb1472e06511e55e32c9
[youtubedl] / youtube_dl / extractor / dtube.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import json
5 import re
6 from socket import timeout
7
8 from .common import InfoExtractor
9 from ..utils import (
10 int_or_none,
11 parse_iso8601,
12 )
13
14
15 class DTubeIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?d\.tube/(?:#!/)?v/(?P<uploader_id>[0-9a-z.-]+)/(?P<id>[0-9a-z]{8})'
17 _TEST = {
18 'url': 'https://d.tube/#!/v/benswann/zqd630em',
19 'md5': 'a03eaa186618ffa7a3145945543a251e',
20 'info_dict': {
21 'id': 'zqd630em',
22 'ext': 'mp4',
23 'title': 'Reality Check: FDA\'s Disinformation Campaign on Kratom',
24 'description': 'md5:700d164e066b87f9eac057949e4227c2',
25 'uploader_id': 'benswann',
26 'upload_date': '20180222',
27 'timestamp': 1519328958,
28 },
29 'params': {
30 'format': '480p',
31 },
32 }
33
34 def _real_extract(self, url):
35 uploader_id, video_id = re.match(self._VALID_URL, url).groups()
36 result = self._download_json('https://api.steemit.com/', video_id, data=json.dumps({
37 'jsonrpc': '2.0',
38 'method': 'get_content',
39 'params': [uploader_id, video_id],
40 }).encode())['result']
41
42 metadata = json.loads(result['json_metadata'])
43 video = metadata['video']
44 content = video['content']
45 info = video.get('info', {})
46 title = info.get('title') or result['title']
47
48 def canonical_url(h):
49 if not h:
50 return None
51 return 'https://ipfs.io/ipfs/' + h
52
53 formats = []
54 for q in ('240', '480', '720', '1080', ''):
55 video_url = canonical_url(content.get('video%shash' % q))
56 if not video_url:
57 continue
58 format_id = (q + 'p') if q else 'Source'
59 try:
60 self.to_screen('%s: Checking %s video format URL' % (video_id, format_id))
61 self._downloader._opener.open(video_url, timeout=5).close()
62 except timeout:
63 self.to_screen(
64 '%s: %s URL is invalid, skipping' % (video_id, format_id))
65 continue
66 formats.append({
67 'format_id': format_id,
68 'url': video_url,
69 'height': int_or_none(q),
70 'ext': 'mp4',
71 })
72
73 return {
74 'id': video_id,
75 'title': title,
76 'description': content.get('description'),
77 'thumbnail': canonical_url(info.get('snaphash')),
78 'tags': content.get('tags') or metadata.get('tags'),
79 'duration': info.get('duration'),
80 'formats': formats,
81 'timestamp': parse_iso8601(result.get('created')),
82 'uploader_id': uploader_id,
83 }