Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ooyala.py

   1 from __future__ import unicode_literals
   2 import re
   3 import json
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     unescapeHTML,
   8     ExtractorError,
   9 )
  10
  11
  12 class OoyalaIE(InfoExtractor):
  13     _VALID_URL = r'(?:ooyala:|https?://.+?\.ooyala\.com/.*?(?:embedCode|ec)=)(?P<id>.+?)(&|$)'
  14
  15     _TESTS = [
  16         {
  17             # From http://it.slashdot.org/story/13/04/25/178216/recovering-data-from-broken-hard-drives-and-ssds-video
  18             'url': 'http://player.ooyala.com/player.js?embedCode=pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
  19             'info_dict': {
  20                 'id': 'pxczE2YjpfHfn1f3M-ykG_AmJRRn0PD8',
  21                 'ext': 'mp4',
  22                 'title': 'Explaining Data Recovery from Hard Drives and SSDs',
  23                 'description': 'How badly damaged does a drive have to be to defeat Russell and his crew? Apparently, smashed to bits.',
  24             },
  25         }, {
  26             # Only available for ipad
  27             'url': 'http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
  28             'info_dict': {
  29                 'id': 'x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0',
  30                 'ext': 'mp4',
  31                 'title': 'Simulation Overview - Levels of Simulation',
  32                 'description': '',
  33             },
  34         },
  35     ]
  36
  37     @staticmethod
  38     def _url_for_embed_code(embed_code):
  39         return 'http://player.ooyala.com/player.js?embedCode=%s' % embed_code
  40
  41     @classmethod
  42     def _build_url_result(cls, embed_code):
  43         return cls.url_result(cls._url_for_embed_code(embed_code),
  44                               ie=cls.ie_key())
  45
  46     def _extract_result(self, info, more_info):
  47         return {
  48             'id': info['embedCode'],
  49             'ext': 'mp4',
  50             'title': unescapeHTML(info['title']),
  51             'url': info.get('ipad_url') or info['url'],
  52             'description': unescapeHTML(more_info['description']),
  53             'thumbnail': more_info['promo'],
  54         }
  55
  56     def _real_extract(self, url):
  57         mobj = re.match(self._VALID_URL, url)
  58         embedCode = mobj.group('id')
  59         player_url = 'http://player.ooyala.com/player.js?embedCode=%s' % embedCode
  60         player = self._download_webpage(player_url, embedCode)
  61         mobile_url = self._search_regex(r'mobile_player_url="(.+?)&device="',
  62                                         player, 'mobile player url')
  63         # Looks like some videos are only available for particular devices
  64         # (e.g. http://player.ooyala.com/player.js?embedCode=x1b3lqZDq9y_7kMyC2Op5qo-p077tXD0
  65         # is only available for ipad)
  66         # Working around with fetching URLs for all the devices found starting with 'unknown'
  67         # until we succeed or eventually fail for each device.
  68         devices = re.findall(r'device\s*=\s*"([^"]+)";', player)
  69         devices.remove('unknown')
  70         devices.insert(0, 'unknown')
  71         for device in devices:
  72             mobile_player = self._download_webpage(
  73                 '%s&device=%s' % (mobile_url, device), embedCode,
  74                 'Downloading mobile player JS for %s device' % device)
  75             videos_info = self._search_regex(
  76                 r'var streams=window.oo_testEnv\?\[\]:eval\("\((\[{.*?}\])\)"\);',
  77                 mobile_player, 'info', fatal=False, default=None)
  78             if videos_info:
  79                 break
  80         if not videos_info:
  81             raise ExtractorError('Unable to extract info')
  82         videos_info = videos_info.replace('\\"', '"')
  83         videos_more_info = self._search_regex(
  84             r'eval\("\(({.*?\\"promo\\".*?})\)"', mobile_player, 'more info').replace('\\"', '"')
  85         videos_info = json.loads(videos_info)
  86         videos_more_info = json.loads(videos_more_info)
  87
  88         if videos_more_info.get('lineup'):
  89             videos = [self._extract_result(info, more_info) for (info, more_info) in zip(videos_info, videos_more_info['lineup'])]
  90             return {
  91                 '_type': 'playlist',
  92                 'id': embedCode,
  93                 'title': unescapeHTML(videos_more_info['title']),
  94                 'entries': videos,
  95             }
  96         else:
  97             return self._extract_result(videos_info[0], videos_more_info)