]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/testtube.py
26655d690250f495caf98de2cfaad6aff3eda331
1 from __future__
import unicode_literals
3 from .common
import InfoExtractor
10 class TestTubeIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://testtube\.com/[^/?#]+/(?P<id>[^/?#]+)'
13 'url': 'https://testtube.com/dnews/5-weird-ways-plants-can-eat-animals?utm_source=FB&utm_medium=DNews&utm_campaign=DNewsSocial',
16 'display_id': '5-weird-ways-plants-can-eat-animals',
19 'title': '5 Weird Ways Plants Can Eat Animals',
20 'description': 'Why have some plants evolved to eat meat?',
21 'thumbnail': 're:^https?://.*\.jpg$',
23 'uploader_id': 'dnews',
26 'url': 'https://testtube.com/iflscience/insane-jet-ski-flipping',
30 'title': 'Flipping Jet-Ski Skills | Outrageous Acts of Science',
31 'uploader': 'Science Channel',
32 'uploader_id': 'ScienceChannel',
33 'upload_date': '20150203',
34 'description': 'md5:e61374030015bae1d2e22f096d4769d6',
38 def _real_extract(self
, url
):
39 display_id
= self
._match
_id
(url
)
41 webpage
= self
._download
_webpage
(url
, display_id
)
43 youtube_url
= self
._html
_search
_regex
(
44 r
'<iframe[^>]+src="((?:https?:)?//www.youtube.com/embed/[^"]+)"',
45 webpage
, 'youtube iframe', default
=None)
47 return self
.url_result(youtube_url
, 'Youtube', video_id
=display_id
)
49 video_id
= self
._search
_regex
(
50 r
"player\.loadRevision3Item\('video_id',\s*([0-9]+)\);",
53 all_info
= self
._download
_json
(
54 'https://testtube.com/api/getPlaylist.json?api_key=ba9c741bce1b9d8e3defcc22193f3651b8867e62&codecs=h264,vp8,theora&video_id=%s' % video_id
,
56 info
= all_info
['items'][0]
59 for vcodec
, fdatas
in info
['media'].items():
60 for name
, fdata
in fdatas
.items():
62 'format_id': '%s-%s' % (vcodec
, name
),
65 'tbr': fdata
.get('bitrate'),
67 self
._sort
_formats
(formats
)
69 duration
= int_or_none(info
.get('duration'))
70 images
= info
.get('images')
72 preference
= qualities(['mini', 'small', 'medium', 'large'])
77 'preference': preference(thumbnail_id
)
78 } for thumbnail_id
, img_url
in images
.items()]
82 'display_id': display_id
,
83 'title': info
['title'],
84 'description': info
.get('summary'),
85 'thumbnails': thumbnails
,
86 'uploader': info
.get('show', {}).get('name'),
87 'uploader_id': info
.get('show', {}).get('slug'),