]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/discoveryvr.py
debian/control: Remove trailing whitespace at EOF.
[youtubedl] / youtube_dl / extractor / discoveryvr.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..utils import parse_duration
6
7
8 class DiscoveryVRIE(InfoExtractor):
9 _VALID_URL = r'https?://(?:www\.)?discoveryvr\.com/watch/(?P<id>[^/?#]+)'
10 _TEST = {
11 'url': 'http://www.discoveryvr.com/watch/discovery-vr-an-introduction',
12 'md5': '32b1929798c464a54356378b7912eca4',
13 'info_dict': {
14 'id': 'discovery-vr-an-introduction',
15 'ext': 'mp4',
16 'title': 'Discovery VR - An Introduction',
17 'description': 'md5:80d418a10efb8899d9403e61d8790f06',
18 }
19 }
20
21 def _real_extract(self, url):
22 display_id = self._match_id(url)
23 webpage = self._download_webpage(url, display_id)
24
25 bootstrap_data = self._search_regex(
26 r'root\.DVR\.bootstrapData\s+=\s+"({.+?})";',
27 webpage, 'bootstrap data')
28 bootstrap_data = self._parse_json(
29 bootstrap_data.encode('utf-8').decode('unicode_escape'),
30 display_id)
31 videos = self._parse_json(bootstrap_data['videos'], display_id)['allVideos']
32 video_data = next(video for video in videos if video.get('slug') == display_id)
33
34 series = video_data.get('showTitle')
35 title = episode = video_data.get('title') or series
36 if series and series != title:
37 title = '%s - %s' % (series, title)
38
39 formats = []
40 for f, format_id in (('cdnUriM3U8', 'mobi'), ('webVideoUrlSd', 'sd'), ('webVideoUrlHd', 'hd')):
41 f_url = video_data.get(f)
42 if not f_url:
43 continue
44 formats.append({
45 'format_id': format_id,
46 'url': f_url,
47 })
48
49 return {
50 'id': display_id,
51 'display_id': display_id,
52 'title': title,
53 'description': video_data.get('description'),
54 'thumbnail': video_data.get('thumbnail'),
55 'duration': parse_duration(video_data.get('runTime')),
56 'formats': formats,
57 'episode': episode,
58 'series': series,
59 }