]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dplay.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  29 class DPlayIE(InfoExtractor
): 
  30     _VALID_URL 
= r
'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' 
  33         # non geo restricted, via secure api, unsigned download hls URL 
  34         'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 
  37             'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', 
  39             'title': 'Svensken lär sig njuta av livet', 
  40             'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', 
  42             'timestamp': 1365454320, 
  43             'upload_date': '20130408', 
  44             'creator': 'Kanal 5 (Home)', 
  45             'series': 'Nugammalt - 77 händelser som format Sverige', 
  51         # geo restricted, via secure api, unsigned download hls URL 
  52         'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 
  55             'display_id': 'mig-og-min-mor/season-6-episode-12', 
  57             'title': 'Episode 12', 
  58             'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', 
  60             'timestamp': 1429696800, 
  61             'upload_date': '20150422', 
  62             'creator': 'Kanal 4 (Home)', 
  63             'series': 'Mig og min mor', 
  69         # geo restricted, via direct unsigned hls URL 
  70         'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 
  71         'only_matching': True, 
  74         'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', 
  77             'display_id': 'i-kongens-klr/sesong-1-episode-7', 
  80             'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', 
  82             'timestamp': 1516726800, 
  83             'upload_date': '20180123', 
  84             'series': 'I kongens klær', 
  89             'format': 'bestvideo', 
  90             'skip_download': True, 
  94         'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', 
  95         'only_matching': True, 
  97         'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', 
  98         'only_matching': True, 
 101     def _get_disco_api_info(self
, url
, display_id
, disco_host
, realm
): 
 102         disco_base 
= 'https://' + disco_host
 
 103         token 
= self
._download
_json
( 
 104             '%s/token' % disco_base
, display_id
, 'Downloading token', 
 107             })['data']['attributes']['token'] 
 110             'Authorization': 'Bearer ' + token
, 
 112         video 
= self
._download
_json
( 
 113             '%s/content/videos/%s' % (disco_base
, display_id
), display_id
, 
 114             headers
=headers
, query
={ 
 117         video_id 
= video
['data']['id'] 
 118         info 
= video
['data']['attributes'] 
 121         for format_id
, format_dict 
in self
._download
_json
( 
 122                 '%s/playback/videoPlaybackInfo/%s' % (disco_base
, video_id
), 
 123                 display_id
, headers
=headers
)['data']['attributes']['streaming'].items(): 
 124             if not isinstance(format_dict
, dict): 
 126             format_url 
= format_dict
.get('url') 
 129             ext 
= determine_ext(format_url
) 
 130             if format_id 
== 'dash' or ext 
== 'mpd': 
 131                 formats
.extend(self
._extract
_mpd
_formats
( 
 132                     format_url
, display_id
, mpd_id
='dash', fatal
=False)) 
 133             elif format_id 
== 'hls' or ext 
== 'm3u8': 
 134                 formats
.extend(self
._extract
_m
3u8_formats
( 
 135                     format_url
, display_id
, 'mp4', 
 136                     entry_protocol
='m3u8_native', m3u8_id
='hls', 
 141                     'format_id': format_id
, 
 143         self
._sort
_formats
(formats
) 
 147             included 
= video
.get('included') 
 148             if isinstance(included
, list): 
 149                 show 
= next(e 
for e 
in included 
if e
.get('type') == 'show') 
 151                     show
, lambda x
: x
['attributes']['name'], compat_str
) 
 152         except StopIteration: 
 157             'display_id': display_id
, 
 159             'description': info
.get('description'), 
 160             'duration': float_or_none( 
 161                 info
.get('videoDuration'), scale
=1000), 
 162             'timestamp': unified_timestamp(info
.get('publishStart')), 
 164             'season_number': int_or_none(info
.get('seasonNumber')), 
 165             'episode_number': int_or_none(info
.get('episodeNumber')), 
 166             'age_limit': int_or_none(info
.get('minimum_age')), 
 170     def _real_extract(self
, url
): 
 171         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 172         display_id 
= mobj
.group('id') 
 173         domain 
= mobj
.group('domain') 
 175         self
._initialize
_geo
_bypass
({ 
 176             'countries': [mobj
.group('country').upper()], 
 179         webpage 
= self
._download
_webpage
(url
, display_id
) 
 181         video_id 
= self
._search
_regex
( 
 182             r
'data-video-id=["\'](\d
+)', webpage, 'video 
id', default=None) 
 185             host = mobj.group('host
') 
 186             return self._get_disco_api_info( 
 187                 url, display_id, 'disco
-api
.' + host, host.replace('.', '')) 
 189         info = self._download_json( 
 190             'http
://%s/api
/v2
/ajax
/videos?video_id
=%s' % (domain, video_id), 
 193         title = info['title
'] 
 195         PROTOCOLS = ('hls
', 'hds
') 
 198         def extract_formats(protocol, manifest_url): 
 199             if protocol == 'hls
': 
 200                 m3u8_formats = self._extract_m3u8_formats( 
 201                     manifest_url, video_id, ext='mp4
', 
 202                     entry_protocol='m3u8_native
', m3u8_id=protocol, fatal=False) 
 203                 # Sometimes final URLs inside m3u8 are unsigned, let's fix this
 
 204                 # ourselves. Also fragments' URLs are only served signed for 
 206                 query 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(manifest_url
).query
) 
 207                 for m3u8_format 
in m3u8_formats
: 
 209                         'url': update_url_query(m3u8_format
['url'], query
), 
 211                             'User-Agent': USER_AGENTS
['Safari'], 
 214                 formats
.extend(m3u8_formats
) 
 215             elif protocol 
== 'hds': 
 216                 formats
.extend(self
._extract
_f
4m
_formats
( 
 217                     manifest_url 
+ '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', 
 218                     video_id
, f4m_id
=protocol
, fatal
=False)) 
 220         domain_tld 
= domain
.split('.')[-1] 
 221         if domain_tld 
in ('se', 'dk', 'no'): 
 222             for protocol 
in PROTOCOLS
: 
 223                 # Providing dsc-geo allows to bypass geo restriction in some cases 
 225                     'secure.dplay.%s' % domain_tld
, 'dsc-geo', 
 227                         'countryCode': domain_tld
.upper(), 
 228                         'expiry': (time
.time() + 20 * 60) * 1000, 
 230                 stream 
= self
._download
_json
( 
 231                     'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s' 
 232                     % (domain_tld
, video_id
, protocol
), video_id
, 
 233                     'Downloading %s stream JSON' % protocol
, fatal
=False) 
 234                 if stream 
and stream
.get(protocol
): 
 235                     extract_formats(protocol
, stream
[protocol
]) 
 237         # The last resort is to try direct unsigned hls/hds URLs from info dictionary. 
 238         # Sometimes this does work even when secure API with dsc-geo has failed (e.g. 
 239         # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). 
 241             for protocol 
in PROTOCOLS
: 
 242                 if info
.get(protocol
): 
 243                     extract_formats(protocol
, info
[protocol
]) 
 245         self
._sort
_formats
(formats
) 
 248         for lang 
in ('se', 'sv', 'da', 'nl', 'no'): 
 249             for format_id 
in ('web_vtt', 'vtt', 'srt'): 
 250                 subtitle_url 
= info
.get('subtitles_%s_%s' % (lang
, format_id
)) 
 252                     subtitles
.setdefault(lang
, []).append({'url': subtitle_url
}) 
 256             'display_id': display_id
, 
 258             'description': info
.get('video_metadata_longDescription'), 
 259             'duration': int_or_none(info
.get('video_metadata_length'), scale
=1000), 
 260             'timestamp': int_or_none(info
.get('video_publish_date')), 
 261             'creator': info
.get('video_metadata_homeChannel'), 
 262             'series': info
.get('video_metadata_show'), 
 263             'season_number': int_or_none(info
.get('season')), 
 264             'episode_number': int_or_none(info
.get('episode')), 
 265             'age_limit': int_or_none(info
.get('minimum_age')), 
 267             'subtitles': subtitles
, 
 271 class DPlayItIE(InfoExtractor
): 
 272     _VALID_URL 
= r
'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' 
 273     _GEO_COUNTRIES 
= ['IT'] 
 275         'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', 
 276         'md5': '2b808ffb00fc47b884a172ca5d13053c', 
 279             'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', 
 281             'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', 
 282             'description': 'md5:3c7a4303aef85868f867a26f5cc14813', 
 283             'thumbnail': r
're:^https?://.*\.jpe?g', 
 284             'upload_date': '20160524', 
 285             'series': 'Biografie imbarazzanti', 
 287             'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', 
 292     def _real_extract(self
, url
): 
 293         display_id 
= self
._match
_id
(url
) 
 295         webpage 
= self
._download
_webpage
(url
, display_id
) 
 297         title 
= remove_end(self
._og
_search
_title
(webpage
), ' | Dplay') 
 301         info 
= self
._search
_regex
( 
 302             r
'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', 
 303             webpage
, 'playback JSON', default
=None) 
 306                 info 
= self
._parse
_json
(info
, display_id
, fatal
=False) 
 310                 video_id 
= try_get(info
, lambda x
: x
['data']['id']) 
 313             info_url 
= self
._search
_regex
( 
 314                 (r
'playback_json_url\s*:\s*(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1', 
 315                  r'url\s
*[:=]\s
*["\'](?P<url>(?:https?:)?//[^/]+/playback/videoPlaybackInfo/\d+)'), 
 316                 webpage, 'info url', group='url') 
 318             info_url = urljoin(url, info_url) 
 319             video_id = info_url.rpartition('/')[-1] 
 322                 info = self._download_json( 
 323                     info_url, display_id, headers={ 
 324                         'Authorization': 'Bearer %s' % self._get_cookies(url).get( 
 325                             'dplayit_token').value, 
 328                 if isinstance(info, compat_str): 
 329                     info = self._parse_json(info, display_id) 
 330             except ExtractorError as e: 
 331                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): 
 332                     info = self._parse_json(e.cause.read().decode('utf-8'), display_id) 
 333                     error = info['errors'][0] 
 334                     if error.get('code') == 'access.denied.geoblocked': 
 335                         self.raise_geo_restricted( 
 336                             msg=error.get('detail'), countries=self._GEO_COUNTRIES) 
 337                     raise ExtractorError(info['errors'][0]['detail'], expected=True) 
 340         hls_url = info['data']['attributes']['streaming']['hls']['url'] 
 342         formats = self._extract_m3u8_formats( 
 343             hls_url, display_id, ext='mp4', entry_protocol='m3u8_native', 
 345         self._sort_formats(formats) 
 347         series = self._html_search_regex( 
 348             r'(?s)<h1[^>]+class=["\'].*?
\bshow
_title
\b.*?
["\'][^>]*>(.+?)</h1>', 
 349             webpage, 'series', fatal=False) 
 350         episode = self._search_regex( 
 351             r'<p[^>]+class=["\'].*?
\bdesc
_ep
\b.*?
["\'][^>]*>\s*<br/>\s*<b>([^<]+)', 
 352             webpage, 'episode', fatal=False) 
 355             r'(?s)<span[^>]+class=["\']dates
["\'][^>]*>.+?\bS\.(?P<season_number>\d+)\s+E\.(?P<episode_number>\d+)\s*-\s*(?P<upload_date>\d{2}/\d{2}/\d{4})', 
 358             season_number = int(mobj.group('season_number')) 
 359             episode_number = int(mobj.group('episode_number')) 
 360             upload_date = unified_strdate(mobj.group('upload_date')) 
 362             season_number = episode_number = upload_date = None 
 365             'id': compat_str(video_id or display_id), 
 366             'display_id': display_id, 
 368             'description': self._og_search_description(webpage), 
 369             'thumbnail': self._og_search_thumbnail(webpage), 
 371             'season_number': season_number, 
 373             'episode_number': episode_number, 
 374             'upload_date': upload_date,