Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ooyala.py

   1 from __future__ import unicode_literals
   2 import re
   3 import json
   4 import base64
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     unescapeHTML,
   9     ExtractorError,
  10     determine_ext,
  11     int_or_none,
  12 )
  13
  14
  15 class OoyalaIE(InfoExtractor):
  16     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
  17
  18     _TESTS = [
  19         {
  20             # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
  21             'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
  22             'info_dict': {
  23                 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
  24                 'ext': 'mp4',
  25                 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
  26                 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
  27             },
  28         }, {
  29             # Only available for ipad
  30             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
  31             'info_dict': {
  32                 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
  33                 'ext': 'mp4',
  34                 'title': 'Simulation Overview - Levels of Simulation',
  35                 'description': '',
  36             },
  37         },
  38         {
  39             # Information available only through SAS api
  40             # From http://community.plm.automation.siemens.com/t5/News-NX-Manufacturing/Tool-Path-Divide/ba-p/4187
  41             'url': 'http://player.ooyala.com/player.js?embedCode=FiOG81ZTrvckcchQxmalf4aQj590qTEx',
  42             'md5': 'a84001441b35ea492bc03736e59e7935',
  43             'info_dict': {
  44                 'id': 'FiOG81ZTrvckcchQxmalf4aQj590qTEx',
  45                 'ext': 'mp4',
  46                 'title': 'Ooyala video',
  47             }
  48         }
  49     ]
  50
  51     @staticmethod
  52     def _url_for_embed_code(embed_code):
  53         return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
  54
  55     @classmethod
  56     def _build_url_result(cls, embed_code):
  57         return cls.url_result(cls._url_for_embed_code(embed_code),
  58                               ie=cls.ie_key())
  59
  60     def _extract_result(self, info, more_info):
  61         embedCode = info['embedCode']
  62         video_url = info.get('ipad_url') or info['url']
  63
  64         if determine_ext(video_url) == 'm3u8':
  65             formats = self._extract_m3u8_formats(video_url, embedCode, ext='mp4')
  66         else:
  67             formats = [{
  68                 'url': video_url,
  69                 'ext': 'mp4',
  70             }]
  71
  72         return {
  73             'id': embedCode,
  74             'title': unescapeHTML(info['title']),
  75             'formats': formats,
  76             'description': unescapeHTML(more_info['description']),
  77             'thumbnail': more_info['promo'],
  78         }
  79
  80     def _real_extract(self, url):
  81         mobj = re.match(self._VALID_URL, url)
  82         embedCode = mobj.group('id')
  83         player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
  84         player = self._download_webpage(player_url, embedCode)
  85         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
  86                                         player, 'mobile player url')
  87         # Looks like some videos are only available for particular devices
  88         # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
  89         # is only available for ipad)
  90         # Working around with fetching URLs for all the devices found starting with 'unknown'
  91         # until we succeed or eventually fail for each device.
  92         devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
  93         devices.remove('unknown')
  94         devices.insert(0, 'unknown')
  95         for device in devices:
  96             mobile_player = self._download_webpage(
  97                 '%s&device=%s' % (mobile_url, device), embedCode,
  98                 'Downloading mobile player JS for %s device' % device)
  99             videos_info = self._search_regex(
 100                 r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
 101                 mobile_player, 'info', fatal=False, default=None)
 102             if videos_info:
 103                 break
 104
 105         if not videos_info:
 106             formats = []
 107             auth_data = self._download_json(
 108                 'http://player.ooyala.com/sas/player_api/v1/authorization/embed_code/%s/%s?domain=www.example.org&supportedFormats=mp4,webm' % (embedCode, embedCode),
 109                 embedCode)
 110
 111             cur_auth_data = auth_data['authorization_data'][embedCode]
 112
 113             for stream in cur_auth_data['streams']:
 114                 formats.append({
 115                     'url': base64.b64decode(stream['url']['data'].encode('ascii')).decode('utf-8'),
 116                     'ext': stream.get('delivery_type'),
 117                     'format': stream.get('video_codec'),
 118                     'format_id': stream.get('profile'),
 119                     'width': int_or_none(stream.get('width')),
 120                     'height': int_or_none(stream.get('height')),
 121                     'abr': int_or_none(stream.get('audio_bitrate')),
 122                     'vbr': int_or_none(stream.get('video_bitrate')),
 123                 })
 124             if formats:
 125                 return {
 126                     'id': embedCode,
 127                     'formats': formats,
 128                     'title': 'Ooyala video',
 129                 }
 130
 131             if not cur_auth_data['authorized']:
 132                 raise ExtractorError(cur_auth_data['message'], expected=True)
 133
 134         if not videos_info:
 135             raise ExtractorError('Unable to extract info')
 136         videos_info = videos_info.replace('\\"', '"')
 137         videos_more_info = self._search_regex(
 138             r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
 139         videos_info = json.loads(videos_info)
 140         videos_more_info = json.loads(videos_more_info)
 141
 142         if videos_more_info.get('lineup'):
 143             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
 144             return {
 145                 '_type': 'playlist',
 146                 'id': embedCode,
 147                 'title': unescapeHTML(videos_more_info['title']),
 148                 'entries': videos,
 149             }
 150         else:
 151             return self._extract_result(videos_info[0], videos_more_info)