from .common import InfoExtractor
from ..compat import (
- compat_urlparse,
compat_HTTPError,
+ compat_str,
+ compat_urlparse,
)
from ..utils import (
- USER_AGENTS,
+ determine_ext,
ExtractorError,
+ float_or_none,
int_or_none,
- unified_strdate,
remove_end,
+ try_get,
+ unified_strdate,
+ unified_timestamp,
update_url_query,
+ urljoin,
+ USER_AGENTS,
)
class DPlayIE(InfoExtractor):
- _VALID_URL = r'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
+ _VALID_URL = r'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)'
_TESTS = [{
# non geo restricted, via secure api, unsigned download hls URL
'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
'info_dict': {
'id': '3172',
- 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
+ 'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet',
'ext': 'mp4',
'title': 'Svensken lär sig njuta av livet',
'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
'info_dict': {
'id': '70816',
- 'display_id': 'season-6-episode-12',
+ 'display_id': 'mig-og-min-mor/season-6-episode-12',
'ext': 'mp4',
'title': 'Episode 12',
'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
# geo restricted, via direct unsigned hls URL
'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
'only_matching': True,
+ }, {
+ # disco-api
+ 'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7',
+ 'info_dict': {
+ 'id': '40206',
+ 'display_id': 'i-kongens-klr/sesong-1-episode-7',
+ 'ext': 'mp4',
+ 'title': 'Episode 7',
+ 'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf',
+ 'duration': 2611.16,
+ 'timestamp': 1516726800,
+ 'upload_date': '20180123',
+ 'series': 'I kongens klær',
+ 'season_number': 1,
+ 'episode_number': 7,
+ },
+ 'params': {
+ 'format': 'bestvideo',
+ 'skip_download': True,
+ },
+ }, {
+
+ 'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3',
+ 'only_matching': True,
+ }, {
+ 'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001',
+ 'only_matching': True,
}]
+ def _get_disco_api_info(self, url, display_id, disco_host, realm):
+ disco_base = 'https://' + disco_host
+ token = self._download_json(
+ '%s/token' % disco_base, display_id, 'Downloading token',
+ query={
+ 'realm': realm,
+ })['data']['attributes']['token']
+ headers = {
+ 'Referer': url,
+ 'Authorization': 'Bearer ' + token,
+ }
+ video = self._download_json(
+ '%s/content/videos/%s' % (disco_base, display_id), display_id,
+ headers=headers, query={
+ 'include': 'show'
+ })
+ video_id = video['data']['id']
+ info = video['data']['attributes']
+ title = info['name']
+ formats = []
+ for format_id, format_dict in self._download_json(
+ '%s/playback/videoPlaybackInfo/%s' % (disco_base, video_id),
+ display_id, headers=headers)['data']['attributes']['streaming'].items():
+ if not isinstance(format_dict, dict):
+ continue
+ format_url = format_dict.get('url')
+ if not format_url:
+ continue
+ ext = determine_ext(format_url)
+ if format_id == 'dash' or ext == 'mpd':
+ formats.extend(self._extract_mpd_formats(
+ format_url, display_id, mpd_id='dash', fatal=False))
+ elif format_id == 'hls' or ext == 'm3u8':
+ formats.extend(self._extract_m3u8_formats(
+ format_url, display_id, 'mp4',
+ entry_protocol='m3u8_native', m3u8_id='hls',
+ fatal=False))
+ else:
+ formats.append({
+ 'url': format_url,
+ 'format_id': format_id,
+ })
+ self._sort_formats(formats)
+
+ series = None
+ try:
+ included = video.get('included')
+ if isinstance(included, list):
+ show = next(e for e in included if e.get('type') == 'show')
+ series = try_get(
+ show, lambda x: x['attributes']['name'], compat_str)
+ except StopIteration:
+ pass
+
+ return {
+ 'id': video_id,
+ 'display_id': display_id,
+ 'title': title,
+ 'description': info.get('description'),
+ 'duration': float_or_none(
+ info.get('videoDuration'), scale=1000),
+ 'timestamp': unified_timestamp(info.get('publishStart')),
+ 'series': series,
+ 'season_number': int_or_none(info.get('seasonNumber')),
+ 'episode_number': int_or_none(info.get('episodeNumber')),
+ 'age_limit': int_or_none(info.get('minimum_age')),
+ 'formats': formats,
+ }
+
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
display_id = mobj.group('id')
domain = mobj.group('domain')
+ self._initialize_geo_bypass({
+ 'countries': [mobj.group('country').upper()],
+ })
+
webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
- r'data-video-id=["\'](\d+)', webpage, 'video id')
+ r'data-video-id=["\'](\d+)', webpage, 'video id', default=None)
+
+ if not video_id:
+ host = mobj.group('host')
+ return self._get_disco_api_info(
+ url, display_id, 'disco-api.' + host, host.replace('.', ''))
info = self._download_json(
'http://%s/api/v2/ajax/videos?video_id=%s' % (domain, video_id),
webpage = self._download_webpage(url, display_id)
- info_url = self._search_regex(
- r'url\s*:\s*["\']((?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)',
- webpage, 'video id')
-
title = remove_end(self._og_search_title(webpage), ' | Dplay')
- try:
- info = self._download_json(
- info_url, display_id, headers={
- 'Authorization': 'Bearer %s' % self._get_cookies(url).get(
- 'dplayit_token').value,
- 'Referer': url,
- })
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
- info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
- error = info['errors'][0]
- if error.get('code') == 'access.denied.geoblocked':
- self.raise_geo_restricted(
- msg=error.get('detail'), countries=self._GEO_COUNTRIES)
- raise ExtractorError(info['errors'][0]['detail'], expected=True)
- raise
+ video_id = None
+
+ info = self._search_regex(
+ r'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
+ webpage, 'playback JSON', default=None)
+ if info:
+ for _ in range(2):
+ info = self._parse_json(info, display_id, fatal=False)
+ if not info:
+ break
+ else:
+ video_id = try_get(info, lambda x: x['data']['id'])
+
+ if not info:
+ info_url = self._search_regex(
+ (r'playback_json_url\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1',
+ r'url\s*[:=]\s*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'),
+ webpage, 'info url', group='url')
+
+ info_url = urljoin(url, info_url)
+ video_id = info_url.rpartition('/')[-1]
+
+ try:
+ info = self._download_json(
+ info_url, display_id, headers={
+ 'Authorization': 'Bearer %s' % self._get_cookies(url).get(
+ 'dplayit_token').value,
+ 'Referer': url,
+ })
+ if isinstance(info, compat_str):
+ info = self._parse_json(info, display_id)
+ except ExtractorError as e:
+ if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
+ info = self._parse_json(e.cause.read().decode('utf-8'), display_id)
+ error = info['errors'][0]
+ if error.get('code') == 'access.denied.geoblocked':
+ self.raise_geo_restricted(
+ msg=error.get('detail'), countries=self._GEO_COUNTRIES)
+ raise ExtractorError(info['errors'][0]['detail'], expected=True)
+ raise
hls_url = info['data']['attributes']['streaming']['hls']['url']
formats = self._extract_m3u8_formats(
hls_url, display_id, ext='mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
+ self._sort_formats(formats)
series = self._html_search_regex(
r'(?s)<h1[^>]+class=["\'].*?\bshow_title\b.*?["\'][^>]*>(.+?)</h1>',
season_number = episode_number = upload_date = None
return {
- 'id': info_url.rpartition('/')[-1],
+ 'id': compat_str(video_id or display_id),
'display_id': display_id,
'title': title,
'description': self._og_search_description(webpage),