Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tbs.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .turner import TurnerBaseIE
   7 from ..utils import (
   8     float_or_none,
   9     int_or_none,
  10     strip_or_none,
  11 )
  12
  13
  14 class TBSIE(TurnerBaseIE):
  15     _VALID_URL = r'https?://(?:www\.)?(?P<site>tbs|tntdrama)\.com/(?:movies|shows/[^/]+/(?:clips|season-\d+/episode-\d+))/(?P<id>[^/?#]+)'
  16     _TESTS = [{
  17         'url': 'http://www.tntdrama.com/shows/the-alienist/clips/monster',
  18         'info_dict': {
  19             'id': '8d384cde33b89f3a43ce5329de42903ed5099887',
  20             'ext': 'mp4',
  21             'title': 'Monster',
  22             'description': 'Get a first look at the theatrical trailer for TNT’s highly anticipated new psychological thriller The Alienist, which premieres January 22 on TNT.',
  23             'timestamp': 1508175329,
  24             'upload_date': '20171016',
  25         },
  26         'params': {
  27             # m3u8 download
  28             'skip_download': True,
  29         }
  30     }, {
  31         'url': 'http://www.tbs.com/shows/search-party/season-1/episode-1/explicit-the-mysterious-disappearance-of-the-girl-no-one-knew',
  32         'only_matching': True,
  33     }, {
  34         'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
  35         'only_matching': True,
  36     }]
  37
  38     def _real_extract(self, url):
  39         site, display_id = re.match(self._VALID_URL, url).groups()
  40         webpage = self._download_webpage(url, display_id)
  41         video_data = self._parse_json(self._search_regex(
  42             r'<script[^>]+?data-drupal-selector="drupal-settings-json"[^>]*?>({.+?})</script>',
  43             webpage, 'drupal setting'), display_id)['turner_playlist'][0]
  44
  45         media_id = video_data['mediaID']
  46         title = video_data['title']
  47
  48         streams_data = self._download_json(
  49             'http://medium.ngtv.io/media/%s/tv' % media_id,
  50             media_id)['media']['tv']
  51         duration = None
  52         chapters = []
  53         formats = []
  54         for supported_type in ('unprotected', 'bulkaes'):
  55             stream_data = streams_data.get(supported_type, {})
  56             m3u8_url = stream_data.get('secureUrl') or stream_data.get('url')
  57             if not m3u8_url:
  58                 continue
  59             if stream_data.get('playlistProtection') == 'spe':
  60                 m3u8_url = self._add_akamai_spe_token(
  61                     'http://www.%s.com/service/token_spe' % site,
  62                     m3u8_url, media_id, {
  63                         'url': url,
  64                         'site_name': site[:3].upper(),
  65                         'auth_required': video_data.get('authRequired') == '1',
  66                     })
  67             formats.extend(self._extract_m3u8_formats(
  68                 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
  69
  70             duration = float_or_none(stream_data.get('totalRuntime') or video_data.get('duration'))
  71
  72             if not chapters:
  73                 for chapter in stream_data.get('contentSegments', []):
  74                     start_time = float_or_none(chapter.get('start'))
  75                     duration = float_or_none(chapter.get('duration'))
  76                     if start_time is None or duration is None:
  77                         continue
  78                     chapters.append({
  79                         'start_time': start_time,
  80                         'end_time': start_time + duration,
  81                     })
  82         self._sort_formats(formats)
  83
  84         thumbnails = []
  85         for image_id, image in video_data.get('images', {}).items():
  86             image_url = image.get('url')
  87             if not image_url or image.get('type') != 'video':
  88                 continue
  89             i = {
  90                 'id': image_id,
  91                 'url': image_url,
  92             }
  93             mobj = re.search(r'(\d+)x(\d+)', image_url)
  94             if mobj:
  95                 i.update({
  96                     'width': int(mobj.group(1)),
  97                     'height': int(mobj.group(2)),
  98                 })
  99             thumbnails.append(i)
 100
 101         return {
 102             'id': media_id,
 103             'title': title,
 104             'description': strip_or_none(video_data.get('descriptionNoTags') or video_data.get('shortDescriptionNoTags')),
 105             'duration': duration,
 106             'timestamp': int_or_none(video_data.get('created')),
 107             'season_number': int_or_none(video_data.get('season')),
 108             'episode_number': int_or_none(video_data.get('episode')),
 109             'cahpters': chapters,
 110             'thumbnails': thumbnails,
 111             'formats': formats,
 112         }