]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ooyala.py
Imported Upstream version 2015.05.15
[youtubedl] / youtube_dl / extractor / ooyala.py
1 from __future__ import unicode_literals
2 import re
3 import json
4 import base64
5
6 from .common import InfoExtractor
7 from ..utils import (
8 unescapeHTML,
9 ExtractorError,
10 determine_ext,
11 int_or_none,
12 )
13
14
15 class OoyalaIE(InfoExtractor):
16 _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
17
18 _TESTS = [
19 {
20 # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
21 'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
22 'info_dict': {
23 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
24 'ext': 'mp4',
25 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
26 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
27 },
28 }, {
29 # Only available for ipad
30 'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
31 'info_dict': {
32 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
33 'ext': 'mp4',
34 'title': 'Simulation Overview - Levels of Simulation',
35 'description': '',
36 },
37 },
38 {
39 # Information available only through SAS api
40 # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
41 'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
42 'md5': 'a84001441b35ea492bc03736e59e7935',
43 'info_dict': {
44 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
45 'ext': 'mp4',
46 'title': 'Ooyala video',
47 }
48 }
49 ]
50
51 @staticmethod
52 def _url_for_embed_code(embed_code):
53 return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
54
55 @classmethod
56 def _build_url_result(cls, embed_code):
57 return cls.url_result(cls._url_for_embed_code(embed_code),
58 ie=cls.ie_key())
59
60 def _extract_result(self, info, more_info):
61 embedCode = info['embedCode']
62 video_url = info.get('ipad_url') or info['url']
63
64 if determine_ext(video_url) == 'm3u8':
65 formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
66 else:
67 formats = [{
68 'url': video_url,
69 'ext': 'mp4',
70 }]
71
72 return {
73 'id': embedCode,
74 'title': unescapeHTML(info['title']),
75 'formats': formats,
76 'description': unescapeHTML(more_info['description']),
77 'thumbnail': more_info['promo'],
78 }
79
80 def _real_extract(self, url):
81 mobj = re.match(self._VALID_URL, url)
82 embedCode = mobj.group('id')
83 player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
84 player = self._download_webpage(player_url, embedCode)
85 mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
86 player, 'mobile player url')
87 # Looks like some videos are only available for particular devices
88 # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
89 # is only available for ipad)
90 # Working around with fetching URLs for all the devices found starting with 'unknown'
91 # until we succeed or eventually fail for each device.
92 devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
93 devices.remove('unknown')
94 devices.insert(0, 'unknown')
95 for device in devices:
96 mobile_player = self._download_webpage(
97 '%s&device=%s' % (mobile_url, device), embedCode,
98 'Downloading mobile player JS for %s device' % device)
99 videos_info = self._search_regex(
100 r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
101 mobile_player, 'info', fatal=False, default=None)
102 if videos_info:
103 break
104
105 if not videos_info:
106 formats = []
107 auth_data = self._download_json(
108 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
109 embedCode)
110
111 cur_auth_data = auth_data['authorization_data'][embedCode]
112
113 for stream in cur_auth_data['streams']:
114 formats.append({
115 'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
116 'ext': stream.get('delivery_type'),
117 'format': stream.get('video_codec'),
118 'format_id': stream.get('profile'),
119 'width': int_or_none(stream.get('width')),
120 'height': int_or_none(stream.get('height')),
121 'abr': int_or_none(stream.get('audio_bitrate')),
122 'vbr': int_or_none(stream.get('video_bitrate')),
123 })
124 if formats:
125 return {
126 'id': embedCode,
127 'formats': formats,
128 'title': 'Ooyala video',
129 }
130
131 if not cur_auth_data['authorized']:
132 raise ExtractorError(cur_auth_data['message'], expected=True)
133
134 if not videos_info:
135 raise ExtractorError('Unable to extract info')
136 videos_info = videos_info.replace('\\"', '"')
137 videos_more_info = self._search_regex(
138 r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
139 videos_info = json.loads(videos_info)
140 videos_more_info = json.loads(videos_more_info)
141
142 if videos_more_info.get('lineup'):
143 videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
144 return {
145 '_type': 'playlist',
146 'id': embedCode,
147 'title': unescapeHTML(videos_more_info['title']),
148 'entries': videos,
149 }
150 else:
151 return self._extract_result(videos_info[0], videos_more_info)