]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/streamable.py
debian/control: Verify compliance with Policy 4.1.4 (no changes needed).
[youtubedl] / youtube_dl / extractor / streamable.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 float_or_none,
10 int_or_none,
11 )
12
13
14 class StreamableIE(InfoExtractor):
15 _VALID_URL = r'https?://streamable\.com/(?:[es]/)?(?P<id>\w+)'
16 _TESTS = [
17 {
18 'url': 'https://streamable.com/dnd1',
19 'md5': '3e3bc5ca088b48c2d436529b64397fef',
20 'info_dict': {
21 'id': 'dnd1',
22 'ext': 'mp4',
23 'title': 'Mikel Oiarzabal scores to make it 0-3 for La Real against Espanyol',
24 'thumbnail': r're:https?://.*\.jpg$',
25 'uploader': 'teabaker',
26 'timestamp': 1454964157.35115,
27 'upload_date': '20160208',
28 'duration': 61.516,
29 'view_count': int,
30 }
31 },
32 # older video without bitrate, width/height, etc. info
33 {
34 'url': 'https://streamable.com/moo',
35 'md5': '2cf6923639b87fba3279ad0df3a64e73',
36 'info_dict': {
37 'id': 'moo',
38 'ext': 'mp4',
39 'title': '"Please don\'t eat me!"',
40 'thumbnail': r're:https?://.*\.jpg$',
41 'timestamp': 1426115495,
42 'upload_date': '20150311',
43 'duration': 12,
44 'view_count': int,
45 }
46 },
47 {
48 'url': 'https://streamable.com/e/dnd1',
49 'only_matching': True,
50 },
51 {
52 'url': 'https://streamable.com/s/okkqk/drxjds',
53 'only_matching': True,
54 }
55 ]
56
57 @staticmethod
58 def _extract_url(webpage):
59 mobj = re.search(
60 r'<iframe[^>]+src=(?P<q1>[\'"])(?P<src>(?:https?:)?//streamable\.com/(?:(?!\1).+))(?P=q1)',
61 webpage)
62 if mobj:
63 return mobj.group('src')
64
65 def _real_extract(self, url):
66 video_id = self._match_id(url)
67
68 # Note: Using the ajax API, as the public Streamable API doesn't seem
69 # to return video info like the title properly sometimes, and doesn't
70 # include info like the video duration
71 video = self._download_json(
72 'https://ajax.streamable.com/videos/%s' % video_id, video_id)
73
74 # Format IDs:
75 # 0 The video is being uploaded
76 # 1 The video is being processed
77 # 2 The video has at least one file ready
78 # 3 The video is unavailable due to an error
79 status = video.get('status')
80 if status != 2:
81 raise ExtractorError(
82 'This video is currently unavailable. It may still be uploading or processing.',
83 expected=True)
84
85 title = video.get('reddit_title') or video['title']
86
87 formats = []
88 for key, info in video['files'].items():
89 if not info.get('url'):
90 continue
91 formats.append({
92 'format_id': key,
93 'url': self._proto_relative_url(info['url']),
94 'width': int_or_none(info.get('width')),
95 'height': int_or_none(info.get('height')),
96 'filesize': int_or_none(info.get('size')),
97 'fps': int_or_none(info.get('framerate')),
98 'vbr': float_or_none(info.get('bitrate'), 1000)
99 })
100 self._sort_formats(formats)
101
102 return {
103 'id': video_id,
104 'title': title,
105 'description': video.get('description'),
106 'thumbnail': self._proto_relative_url(video.get('thumbnail_url')),
107 'uploader': video.get('owner', {}).get('user_name'),
108 'timestamp': float_or_none(video.get('date_added')),
109 'duration': float_or_none(video.get('duration')),
110 'view_count': int_or_none(video.get('plays')),
111 'formats': formats
112 }