]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/sevenplus.py
debian/control: Remove trailing whitespace at EOF.
[youtubedl] / youtube_dl / extractor / sevenplus.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .brightcove import BrightcoveNewIE
7 from ..compat import compat_str
8 from ..utils import (
9 try_get,
10 update_url_query,
11 )
12
13
14 class SevenPlusIE(BrightcoveNewIE):
15 IE_NAME = '7plus'
16 _VALID_URL = r'https?://(?:www\.)?7plus\.com\.au/(?P<path>[^?]+\?.*?\bepisode-id=(?P<id>[^&#]+))'
17 _TESTS = [{
18 'url': 'https://7plus.com.au/MTYS?episode-id=MTYS7-003',
19 'info_dict': {
20 'id': 'MTYS7-003',
21 'ext': 'mp4',
22 'title': 'S7 E3 - Wind Surf',
23 'description': 'md5:29c6a69f21accda7601278f81b46483d',
24 'uploader_id': '5303576322001',
25 'upload_date': '20171201',
26 'timestamp': 1512106377,
27 'series': 'Mighty Ships',
28 'season_number': 7,
29 'episode_number': 3,
30 'episode': 'Wind Surf',
31 },
32 'params': {
33 'format': 'bestvideo',
34 'skip_download': True,
35 }
36 }, {
37 'url': 'https://7plus.com.au/UUUU?episode-id=AUMS43-001',
38 'only_matching': True,
39 }]
40
41 def _real_extract(self, url):
42 path, episode_id = re.match(self._VALID_URL, url).groups()
43
44 media = self._download_json(
45 'https://videoservice.swm.digital/playback', episode_id, query={
46 'appId': '7plus',
47 'deviceType': 'web',
48 'platformType': 'web',
49 'accountId': 5303576322001,
50 'referenceId': 'ref:' + episode_id,
51 'deliveryId': 'csai',
52 'videoType': 'vod',
53 })['media']
54
55 for source in media.get('sources', {}):
56 src = source.get('src')
57 if not src:
58 continue
59 source['src'] = update_url_query(src, {'rule': ''})
60
61 info = self._parse_brightcove_metadata(media, episode_id)
62
63 content = self._download_json(
64 'https://component-cdn.swm.digital/content/' + path,
65 episode_id, headers={
66 'market-id': 4,
67 }, fatal=False) or {}
68 for item in content.get('items', {}):
69 if item.get('componentData', {}).get('componentType') == 'infoPanel':
70 for src_key, dst_key in [('title', 'title'), ('shortSynopsis', 'description')]:
71 value = item.get(src_key)
72 if value:
73 info[dst_key] = value
74 info['series'] = try_get(
75 item, lambda x: x['seriesLogo']['name'], compat_str)
76 mobj = re.search(r'^S(\d+)\s+E(\d+)\s+-\s+(.+)$', info['title'])
77 if mobj:
78 info.update({
79 'season_number': int(mobj.group(1)),
80 'episode_number': int(mobj.group(2)),
81 'episode': mobj.group(3),
82 })
83
84 return info