]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tbs.py
debian/control: Update list of supported sites/extractors.
[youtubedl] / youtube_dl / extractor / tbs.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .turner import TurnerBaseIE
7 from ..utils import (
8 float_or_none,
9 int_or_none,
10 strip_or_none,
11 )
12
13
14 class TBSIE(TurnerBaseIE):
15 _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
16 _TESTS = [{
17 'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
18 'info_dict': {
19 'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
20 'ext': 'mp4',
21 'title': 'Monster',
22 'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
23 'timestamp': 1508175329,
24 'upload_date': '20171016',
25 },
26 'params': {
27 # m3u8 download
28 'skip_download': True,
29 }
30 }, {
31 'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
32 'only_matching': True,
33 }, {
34 'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
35 'only_matching': True,
36 }]
37
38 def _real_extract(self, url):
39 site, display_id = re.match(self._VALID_URL, url).groups()
40 webpage = self._download_webpage(url, display_id)
41 video_data = self._parse_json(self._search_regex(
42 r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
43 webpage, 'drupal setting'), display_id)['turner_playlist'][0]
44
45 media_id = video_data['mediaID']
46 title = video_data['title']
47
48 streams_data = self._download_json(
49 'http://medium.ngtv.io/media/%s/tv' % media_id,
50 media_id)['media']['tv']
51 duration = None
52 chapters = []
53 formats = []
54 for supported_type in ('unprotected', 'bulkaes'):
55 stream_data = streams_data.get(supported_type, {})
56 m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
57 if not m3u8_url:
58 continue
59 if stream_data.get('playlistProtection') == 'spe':
60 m3u8_url = self._add_akamai_spe_token(
61 'http://token.vgtf.net/token/token_spe',
62 m3u8_url, media_id, {
63 'url': url,
64 'site_name': site[:3].upper(),
65 'auth_required': video_data.get('authRequired') == '1',
66 })
67 formats.extend(self._extract_m3u8_formats(
68 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
69
70 duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
71
72 if not chapters:
73 for chapter in stream_data.get('contentSegments', []):
74 start_time = float_or_none(chapter.get('start'))
75 duration = float_or_none(chapter.get('duration'))
76 if start_time is None or duration is None:
77 continue
78 chapters.append({
79 'start_time': start_time,
80 'end_time': start_time + duration,
81 })
82 self._sort_formats(formats)
83
84 thumbnails = []
85 for image_id, image in video_data.get('images', {}).items():
86 image_url = image.get('url')
87 if not image_url or image.get('type') != 'video':
88 continue
89 i = {
90 'id': image_id,
91 'url': image_url,
92 }
93 mobj = re.search(r'(\d+)x(\d+)', image_url)
94 if mobj:
95 i.update({
96 'width': int(mobj.group(1)),
97 'height': int(mobj.group(2)),
98 })
99 thumbnails.append(i)
100
101 return {
102 'id': media_id,
103 'title': title,
104 'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
105 'duration': duration,
106 'timestamp': int_or_none(video_data.get('created')),
107 'season_number': int_or_none(video_data.get('season')),
108 'episode_number': int_or_none(video_data.get('episode')),
109 'cahpters': chapters,
110 'thumbnails': thumbnails,
111 'formats': formats,
112 }