]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dplay.py
   2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
  28 class DPlayIE(InfoExtractor
): 
  29     _VALID_URL 
= r
'https?://(?P<domain>www\.(?P<host>dplay\.(?P<country>dk|se|no)))/(?:video(?:er|s)/)?(?P<id>[^/]+/[^/?#]+)' 
  32         # non geo restricted, via secure api, unsigned download hls URL 
  33         'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/', 
  36             'display_id': 'nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet', 
  38             'title': 'Svensken lär sig njuta av livet', 
  39             'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8', 
  41             'timestamp': 1365454320, 
  42             'upload_date': '20130408', 
  43             'creator': 'Kanal 5 (Home)', 
  44             'series': 'Nugammalt - 77 händelser som format Sverige', 
  50         # geo restricted, via secure api, unsigned download hls URL 
  51         'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/', 
  54             'display_id': 'mig-og-min-mor/season-6-episode-12', 
  56             'title': 'Episode 12', 
  57             'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90', 
  59             'timestamp': 1429696800, 
  60             'upload_date': '20150422', 
  61             'creator': 'Kanal 4 (Home)', 
  62             'series': 'Mig og min mor', 
  68         # geo restricted, via direct unsigned hls URL 
  69         'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/', 
  70         'only_matching': True, 
  73         'url': 'https://www.dplay.no/videoer/i-kongens-klr/sesong-1-episode-7', 
  76             'display_id': 'i-kongens-klr/sesong-1-episode-7', 
  79             'description': 'md5:e3e1411b2b9aebeea36a6ec5d50c60cf', 
  81             'timestamp': 1516726800, 
  82             'upload_date': '20180123', 
  83             'series': 'I kongens klær', 
  88             'format': 'bestvideo', 
  89             'skip_download': True, 
  93         'url': 'https://www.dplay.dk/videoer/singleliv/season-5-episode-3', 
  94         'only_matching': True, 
  96         'url': 'https://www.dplay.se/videos/sofias-anglar/sofias-anglar-1001', 
  97         'only_matching': True, 
 100     def _real_extract(self
, url
): 
 101         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 102         display_id 
= mobj
.group('id') 
 103         domain 
= mobj
.group('domain') 
 105         self
._initialize
_geo
_bypass
([mobj
.group('country').upper()]) 
 107         webpage 
= self
._download
_webpage
(url
, display_id
) 
 109         video_id 
= self
._search
_regex
( 
 110             r
'data-video-id=["\'](\d
+)', webpage, 'video 
id', default=None) 
 113             host = mobj.group('host
') 
 114             disco_base = 'https
://disco
-api
.%s' % host 
 116                 '%s/token
' % disco_base, display_id, 'Downloading token
', 
 118                     'realm
': host.replace('.', ''), 
 120             video = self._download_json( 
 121                 '%s/content
/videos
/%s' % (disco_base, display_id), display_id, 
 124                     'x
-disco
-client
': 'WEB
:UNKNOWN
:dplay
-client
:0.0.1', 
 128             video_id = video['data
']['id'] 
 129             info = video['data
']['attributes
'] 
 132             for format_id, format_dict in self._download_json( 
 133                     '%s/playback
/videoPlaybackInfo
/%s' % (disco_base, video_id), 
 134                     display_id)['data
']['attributes
']['streaming
'].items(): 
 135                 if not isinstance(format_dict, dict): 
 137                 format_url = format_dict.get('url
') 
 140                 ext = determine_ext(format_url) 
 141                 if format_id == 'dash
' or ext == 'mpd
': 
 142                     formats.extend(self._extract_mpd_formats( 
 143                         format_url, display_id, mpd_id='dash
', fatal=False)) 
 144                 elif format_id == 'hls
' or ext == 'm3u8
': 
 145                     formats.extend(self._extract_m3u8_formats( 
 146                         format_url, display_id, 'mp4
', 
 147                         entry_protocol='m3u8_native
', m3u8_id='hls
', 
 152                         'format_id
': format_id, 
 154             self._sort_formats(formats) 
 158                 included = video.get('included
') 
 159                 if isinstance(included, list): 
 160                     show = next(e for e in included if e.get('type') == 'show
') 
 162                         show, lambda x: x['attributes
']['name
'], compat_str) 
 163             except StopIteration: 
 168                 'display_id
': display_id, 
 170                 'description
': info.get('description
'), 
 171                 'duration
': float_or_none( 
 172                     info.get('videoDuration
'), scale=1000), 
 173                 'timestamp
': unified_timestamp(info.get('publishStart
')), 
 175                 'season_number
': int_or_none(info.get('seasonNumber
')), 
 176                 'episode_number
': int_or_none(info.get('episodeNumber
')), 
 177                 'age_limit
': int_or_none(info.get('minimum_age
')), 
 181         info = self._download_json( 
 182             'http
://%s/api
/v2
/ajax
/videos?video_id
=%s' % (domain, video_id), 
 185         title = info['title
'] 
 187         PROTOCOLS = ('hls
', 'hds
') 
 190         def extract_formats(protocol, manifest_url): 
 191             if protocol == 'hls
': 
 192                 m3u8_formats = self._extract_m3u8_formats( 
 193                     manifest_url, video_id, ext='mp4
', 
 194                     entry_protocol='m3u8_native
', m3u8_id=protocol, fatal=False) 
 195                 # Sometimes final URLs inside m3u8 are unsigned, let's fix this
 
 196                 # ourselves. Also fragments' URLs are only served signed for 
 198                 query 
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(manifest_url
).query
) 
 199                 for m3u8_format 
in m3u8_formats
: 
 201                         'url': update_url_query(m3u8_format
['url'], query
), 
 203                             'User-Agent': USER_AGENTS
['Safari'], 
 206                 formats
.extend(m3u8_formats
) 
 207             elif protocol 
== 'hds': 
 208                 formats
.extend(self
._extract
_f
4m
_formats
( 
 209                     manifest_url 
+ '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0', 
 210                     video_id
, f4m_id
=protocol
, fatal
=False)) 
 212         domain_tld 
= domain
.split('.')[-1] 
 213         if domain_tld 
in ('se', 'dk', 'no'): 
 214             for protocol 
in PROTOCOLS
: 
 215                 # Providing dsc-geo allows to bypass geo restriction in some cases 
 217                     'secure.dplay.%s' % domain_tld
, 'dsc-geo', 
 219                         'countryCode': domain_tld
.upper(), 
 220                         'expiry': (time
.time() + 20 * 60) * 1000, 
 222                 stream 
= self
._download
_json
( 
 223                     'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s' 
 224                     % (domain_tld
, video_id
, protocol
), video_id
, 
 225                     'Downloading %s stream JSON' % protocol
, fatal
=False) 
 226                 if stream 
and stream
.get(protocol
): 
 227                     extract_formats(protocol
, stream
[protocol
]) 
 229         # The last resort is to try direct unsigned hls/hds URLs from info dictionary. 
 230         # Sometimes this does work even when secure API with dsc-geo has failed (e.g. 
 231         # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/). 
 233             for protocol 
in PROTOCOLS
: 
 234                 if info
.get(protocol
): 
 235                     extract_formats(protocol
, info
[protocol
]) 
 237         self
._sort
_formats
(formats
) 
 240         for lang 
in ('se', 'sv', 'da', 'nl', 'no'): 
 241             for format_id 
in ('web_vtt', 'vtt', 'srt'): 
 242                 subtitle_url 
= info
.get('subtitles_%s_%s' % (lang
, format_id
)) 
 244                     subtitles
.setdefault(lang
, []).append({'url': subtitle_url
}) 
 248             'display_id': display_id
, 
 250             'description': info
.get('video_metadata_longDescription'), 
 251             'duration': int_or_none(info
.get('video_metadata_length'), scale
=1000), 
 252             'timestamp': int_or_none(info
.get('video_publish_date')), 
 253             'creator': info
.get('video_metadata_homeChannel'), 
 254             'series': info
.get('video_metadata_show'), 
 255             'season_number': int_or_none(info
.get('season')), 
 256             'episode_number': int_or_none(info
.get('episode')), 
 257             'age_limit': int_or_none(info
.get('minimum_age')), 
 259             'subtitles': subtitles
, 
 263 class DPlayItIE(InfoExtractor
): 
 264     _VALID_URL 
= r
'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)' 
 265     _GEO_COUNTRIES 
= ['IT'] 
 267         'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/', 
 268         'md5': '2b808ffb00fc47b884a172ca5d13053c', 
 271             'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij', 
 273             'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij', 
 274             'description': 'md5:3c7a4303aef85868f867a26f5cc14813', 
 275             'thumbnail': r
're:^https?://.*\.jpe?g', 
 276             'upload_date': '20160524', 
 277             'series': 'Biografie imbarazzanti', 
 279             'episode': 'Luigi Di Maio: la psicosi di Stanislawskij', 
 284     def _real_extract(self
, url
): 
 285         display_id 
= self
._match
_id
(url
) 
 287         webpage 
= self
._download
_webpage
(url
, display_id
) 
 289         title 
= remove_end(self
._og
_search
_title
(webpage
), ' | Dplay') 
 293         info 
= self
._search
_regex
( 
 294             r
'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")', 
 295             webpage
, 'playback JSON', default
=None) 
 298                 info 
= self
._parse
_json
(info
, display_id
, fatal
=False) 
 302                 video_id 
= try_get(info
, lambda x
: x
['data']['id']) 
 305             info_url 
= self
._search
_regex
( 
 306                 r
'url\s*[:=]\s*["\']((?
:https?
:)?
//[^
/]+/playback
/videoPlaybackInfo
/\d
+)', 
 309             video_id = info_url.rpartition('/')[-1] 
 312                 info = self._download_json( 
 313                     info_url, display_id, headers={ 
 314                         'Authorization
': 'Bearer 
%s' % self._get_cookies(url).get( 
 315                             'dplayit_token
').value, 
 318             except ExtractorError as e: 
 319                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403): 
 320                     info = self._parse_json(e.cause.read().decode('utf
-8'), display_id) 
 321                     error = info['errors
'][0] 
 322                     if error.get('code
') == 'access
.denied
.geoblocked
': 
 323                         self.raise_geo_restricted( 
 324                             msg=error.get('detail
'), countries=self._GEO_COUNTRIES) 
 325                     raise ExtractorError(info['errors
'][0]['detail
'], expected=True) 
 328         hls_url = info['data
']['attributes
']['streaming
']['hls
']['url
'] 
 330         formats = self._extract_m3u8_formats( 
 331             hls_url, display_id, ext='mp4
', entry_protocol='m3u8_native
', 
 334         series = self._html_search_regex( 
 335             r'(?s
)<h1
[^
>]+class=["\'].*?\bshow_title\b.*?["\'][^
>]*>(.+?
)</h1
>', 
 336             webpage, 'series
', fatal=False) 
 337         episode = self._search_regex( 
 338             r'<p
[^
>]+class=["\'].*?\bdesc_ep\b.*?["\'][^
>]*>\s
*<br
/>\s
*<b
>([^
<]+)', 
 339             webpage, 'episode
', fatal=False) 
 342             r'(?s
)<span
[^
>]+class=["\']dates["\'][^
>]*>.+?
\bS\
.(?P
<season_number
>\d
+)\s
+E\
.(?P
<episode_number
>\d
+)\s
*-\s
*(?P
<upload_date
>\d{2}
/\d{2}
/\d{4}
)', 
 345             season_number = int(mobj.group('season_number
')) 
 346             episode_number = int(mobj.group('episode_number
')) 
 347             upload_date = unified_strdate(mobj.group('upload_date
')) 
 349             season_number = episode_number = upload_date = None 
 352             'id': compat_str(video_id or display_id), 
 353             'display_id
': display_id, 
 355             'description
': self._og_search_description(webpage), 
 356             'thumbnail
': self._og_search_thumbnail(webpage), 
 358             'season_number
': season_number, 
 360             'episode_number
': episode_number, 
 361             'upload_date
': upload_date,