Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sevenplus.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .brightcove import BrightcoveNewIE
   7 from ..compat import compat_str
   8 from ..utils import (
   9     try_get,
  10     update_url_query,
  11 )
  12
  13
  14 class SevenPlusIE(BrightcoveNewIE):
  15     IE_NAME = '7plus'
  16     _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
  17     _TESTS = [{
  18         'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
  19         'info_dict': {
  20             'id': 'MTYS7-003',
  21             'ext': 'mp4',
  22             'title': 'S7 E3 - Wind Surf',
  23             'description': 'md5:29c6a69f21accda7601278f81b46483d',
  24             'uploader_id': '5303576322001',
  25             'upload_date': '20171201',
  26             'timestamp': 1512106377,
  27             'series': 'Mighty Ships',
  28             'season_number': 7,
  29             'episode_number': 3,
  30             'episode': 'Wind Surf',
  31         },
  32         'params': {
  33             'format': 'bestvideo',
  34             'skip_download': True,
  35         }
  36     }, {
  37         'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
  38         'only_matching': True,
  39     }]
  40
  41     def _real_extract(self, url):
  42         path, episode_id = re.match(self._VALID_URL, url).groups()
  43
  44         media = self._download_json(
  45             'https://videoservice.swm.digital/playback', episode_id, query={
  46                 'appId': '7plus',
  47                 'deviceType': 'web',
  48                 'platformType': 'web',
  49                 'accountId': 5303576322001,
  50                 'referenceId': 'ref:' + episode_id,
  51                 'deliveryId': 'csai',
  52                 'videoType': 'vod',
  53             })['media']
  54
  55         for source in media.get('sources', {}):
  56             src = source.get('src')
  57             if not src:
  58                 continue
  59             source['src'] = update_url_query(src, {'rule': ''})
  60
  61         info = self._parse_brightcove_metadata(media, episode_id)
  62
  63         content = self._download_json(
  64             'https://component-cdn.swm.digital/content/' + path,
  65             episode_id, headers={
  66                 'market-id': 4,
  67             }, fatal=False) or {}
  68         for item in content.get('items', {}):
  69             if item.get('componentData', {}).get('componentType') == 'infoPanel':
  70                 for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
  71                     value = item.get(src_key)
  72                     if value:
  73                         info[dst_key] = value
  74                 info['series'] = try_get(
  75                     item, lambda x: x['seriesLogo']['name'], compat_str)
  76                 mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
  77                 if mobj:
  78                     info.update({
  79                         'season_number': int(mobj.group(1)),
  80                         'episode_number': int(mobj.group(2)),
  81                         'episode': mobj.group(3),
  82                     })
  83
  84         return info