Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/discoveryvr.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import parse_duration
   6
   7
   8 class DiscoveryVRIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
  10     _TEST = {
  11         'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
  12         'md5': '32b1929798c464a54356378b7912eca4',
  13         'info_dict': {
  14             'id': 'discovery-vr-an-introduction',
  15             'ext': 'mp4',
  16             'title': 'Discovery VR - An Introduction',
  17             'description': 'md5:80d418a10efb8899d9403e61d8790f06',
  18         }
  19     }
  20
  21     def _real_extract(self, url):
  22         display_id = self._match_id(url)
  23         webpage = self._download_webpage(url, display_id)
  24
  25         bootstrap_data = self._search_regex(
  26             r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
  27             webpage, 'bootstrap data')
  28         bootstrap_data = self._parse_json(
  29             bootstrap_data.encode('utf-8').decode('unicode_escape'),
  30             display_id)
  31         videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
  32         video_data = next(video for video in videos if video.get('slug') == display_id)
  33
  34         series = video_data.get('showTitle')
  35         title = episode = video_data.get('title') or series
  36         if series and series != title:
  37             title = '%s - %s' % (series, title)
  38
  39         formats = []
  40         for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
  41             f_url = video_data.get(f)
  42             if not f_url:
  43                 continue
  44             formats.append({
  45                 'format_id': format_id,
  46                 'url': f_url,
  47             })
  48
  49         return {
  50             'id': display_id,
  51             'display_id': display_id,
  52             'title': title,
  53             'description': video_data.get('description'),
  54             'thumbnail': video_data.get('thumbnail'),
  55             'duration': parse_duration(video_data.get('runTime')),
  56             'formats': formats,
  57             'episode': episode,
  58             'series': series,
  59         }