]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/kaltura.py
d2873049202813be7587067b629503dc2da0f877
[youtubedl] / youtube_dl / extractor / kaltura.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_urllib_parse
8 from ..utils import (
9 ExtractorError,
10 int_or_none,
11 )
12
13
14 class KalturaIE(InfoExtractor):
15 _VALID_URL = r'''(?x)
16 (?:kaltura:|
17 https?://(:?(?:www|cdnapisec)\.)?kaltura\.com/index\.php/kwidget/(?:[^/]+/)*?wid/_
18 )(?P<partner_id>\d+)
19 (?::|
20 /(?:[^/]+/)*?entry_id/
21 )(?P<id>[0-9a-z_]+)'''
22 _API_BASE = 'http://cdnapi.kaltura.com/api_v3/index.php?'
23 _TESTS = [
24 {
25 'url': 'kaltura:269692:1_1jc2y3e4',
26 'md5': '3adcbdb3dcc02d647539e53f284ba171',
27 'info_dict': {
28 'id': '1_1jc2y3e4',
29 'ext': 'mp4',
30 'title': 'Track 4',
31 'upload_date': '20131219',
32 'uploader_id': 'mlundberg@wolfgangsvault.com',
33 'description': 'The Allman Brothers Band, 12/16/1981',
34 'thumbnail': 're:^https?://.*/thumbnail/.*',
35 'timestamp': int,
36 },
37 },
38 {
39 'url': 'http://www.kaltura.com/index.php/kwidget/cache_st/1300318621/wid/_269692/uiconf_id/3873291/entry_id/1_1jc2y3e4',
40 'only_matching': True,
41 },
42 {
43 'url': 'https://cdnapisec.kaltura.com/index.php/kwidget/wid/_557781/uiconf_id/22845202/entry_id/1_plr1syf3',
44 'only_matching': True,
45 },
46 ]
47
48 def _kaltura_api_call(self, video_id, actions, *args, **kwargs):
49 params = actions[0]
50 if len(actions) > 1:
51 for i, a in enumerate(actions[1:], start=1):
52 for k, v in a.items():
53 params['%d:%s' % (i, k)] = v
54
55 query = compat_urllib_parse.urlencode(params)
56 url = self._API_BASE + query
57 data = self._download_json(url, video_id, *args, **kwargs)
58
59 status = data if len(actions) == 1 else data[0]
60 if status.get('objectType') == 'KalturaAPIException':
61 raise ExtractorError(
62 '%s said: %s' % (self.IE_NAME, status['message']))
63
64 return data
65
66 def _get_kaltura_signature(self, video_id, partner_id):
67 actions = [{
68 'apiVersion': '3.1',
69 'expiry': 86400,
70 'format': 1,
71 'service': 'session',
72 'action': 'startWidgetSession',
73 'widgetId': '_%s' % partner_id,
74 }]
75 return self._kaltura_api_call(
76 video_id, actions, note='Downloading Kaltura signature')['ks']
77
78 def _get_video_info(self, video_id, partner_id):
79 signature = self._get_kaltura_signature(video_id, partner_id)
80 actions = [
81 {
82 'action': 'null',
83 'apiVersion': '3.1.5',
84 'clientTag': 'kdp:v3.8.5',
85 'format': 1, # JSON, 2 = XML, 3 = PHP
86 'service': 'multirequest',
87 'ks': signature,
88 },
89 {
90 'action': 'get',
91 'entryId': video_id,
92 'service': 'baseentry',
93 'version': '-1',
94 },
95 {
96 'action': 'getContextData',
97 'contextDataParams:objectType': 'KalturaEntryContextDataParams',
98 'contextDataParams:referrer': 'http://www.kaltura.com/',
99 'contextDataParams:streamerType': 'http',
100 'entryId': video_id,
101 'service': 'baseentry',
102 },
103 ]
104 return self._kaltura_api_call(
105 video_id, actions, note='Downloading video info JSON')
106
107 def _real_extract(self, url):
108 video_id = self._match_id(url)
109 mobj = re.match(self._VALID_URL, url)
110 partner_id, entry_id = mobj.group('partner_id'), mobj.group('id')
111
112 info, source_data = self._get_video_info(entry_id, partner_id)
113
114 formats = [{
115 'format_id': '%(fileExt)s-%(bitrate)s' % f,
116 'ext': f['fileExt'],
117 'tbr': f['bitrate'],
118 'fps': f.get('frameRate'),
119 'filesize_approx': int_or_none(f.get('size'), invscale=1024),
120 'container': f.get('containerFormat'),
121 'vcodec': f.get('videoCodecId'),
122 'height': f.get('height'),
123 'width': f.get('width'),
124 'url': '%s/flavorId/%s' % (info['dataUrl'], f['id']),
125 } for f in source_data['flavorAssets']]
126 self._sort_formats(formats)
127
128 return {
129 'id': video_id,
130 'title': info['name'],
131 'formats': formats,
132 'description': info.get('description'),
133 'thumbnail': info.get('thumbnailUrl'),
134 'duration': info.get('duration'),
135 'timestamp': info.get('createdAt'),
136 'uploader_id': info.get('userId'),
137 'view_count': info.get('plays'),
138 }