]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dplay.py
2 from __future__
import unicode_literals
8 from .common
import InfoExtractor
25 class DPlayIE(InfoExtractor
):
26 _VALID_URL
= r
'https?://(?P<domain>www\.dplay\.(?:dk|se|no))/[^/]+/(?P<id>[^/?#]+)'
29 # non geo restricted, via secure api, unsigned download hls URL
30 'url': 'http://www.dplay.se/nugammalt-77-handelser-som-format-sverige/season-1-svensken-lar-sig-njuta-av-livet/',
33 'display_id': 'season-1-svensken-lar-sig-njuta-av-livet',
35 'title': 'Svensken lär sig njuta av livet',
36 'description': 'md5:d3819c9bccffd0fe458ca42451dd50d8',
38 'timestamp': 1365454320,
39 'upload_date': '20130408',
40 'creator': 'Kanal 5 (Home)',
41 'series': 'Nugammalt - 77 händelser som format Sverige',
47 # geo restricted, via secure api, unsigned download hls URL
48 'url': 'http://www.dplay.dk/mig-og-min-mor/season-6-episode-12/',
51 'display_id': 'season-6-episode-12',
53 'title': 'Episode 12',
54 'description': 'md5:9c86e51a93f8a4401fc9641ef9894c90',
56 'timestamp': 1429696800,
57 'upload_date': '20150422',
58 'creator': 'Kanal 4 (Home)',
59 'series': 'Mig og min mor',
65 # geo restricted, via direct unsigned hls URL
66 'url': 'http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/',
67 'only_matching': True,
70 def _real_extract(self
, url
):
71 mobj
= re
.match(self
._VALID
_URL
, url
)
72 display_id
= mobj
.group('id')
73 domain
= mobj
.group('domain')
75 webpage
= self
._download
_webpage
(url
, display_id
)
77 video_id
= self
._search
_regex
(
78 r
'data-video-id=["\'](\d
+)', webpage, 'video
id')
80 info = self._download_json(
81 'http
://%s/api
/v2
/ajax
/videos?video_id
=%s' % (domain, video_id),
86 PROTOCOLS = ('hls
', 'hds
')
89 def extract_formats(protocol, manifest_url):
91 m3u8_formats = self._extract_m3u8_formats(
92 manifest_url, video_id, ext='mp4
',
93 entry_protocol='m3u8_native
', m3u8_id=protocol, fatal=False)
94 # Sometimes final URLs inside m3u8 are unsigned, let's fix this
95 # ourselves. Also fragments' URLs are only served signed for
97 query
= compat_urlparse
.parse_qs(compat_urlparse
.urlparse(manifest_url
).query
)
98 for m3u8_format
in m3u8_formats
:
100 'url': update_url_query(m3u8_format
['url'], query
),
102 'User-Agent': USER_AGENTS
['Safari'],
105 formats
.extend(m3u8_formats
)
106 elif protocol
== 'hds':
107 formats
.extend(self
._extract
_f
4m
_formats
(
108 manifest_url
+ '&hdcore=3.8.0&plugin=flowplayer-3.8.0.0',
109 video_id
, f4m_id
=protocol
, fatal
=False))
111 domain_tld
= domain
.split('.')[-1]
112 if domain_tld
in ('se', 'dk', 'no'):
113 for protocol
in PROTOCOLS
:
114 # Providing dsc-geo allows to bypass geo restriction in some cases
116 'secure.dplay.%s' % domain_tld
, 'dsc-geo',
118 'countryCode': domain_tld
.upper(),
119 'expiry': (time
.time() + 20 * 60) * 1000,
121 stream
= self
._download
_json
(
122 'https://secure.dplay.%s/secure/api/v2/user/authorization/stream/%s?stream_type=%s'
123 % (domain_tld
, video_id
, protocol
), video_id
,
124 'Downloading %s stream JSON' % protocol
, fatal
=False)
125 if stream
and stream
.get(protocol
):
126 extract_formats(protocol
, stream
[protocol
])
128 # The last resort is to try direct unsigned hls/hds URLs from info dictionary.
129 # Sometimes this does work even when secure API with dsc-geo has failed (e.g.
130 # http://www.dplay.no/pga-tour/season-1-hoydepunkter-18-21-februar/).
132 for protocol
in PROTOCOLS
:
133 if info
.get(protocol
):
134 extract_formats(protocol
, info
[protocol
])
136 self
._sort
_formats
(formats
)
139 for lang
in ('se', 'sv', 'da', 'nl', 'no'):
140 for format_id
in ('web_vtt', 'vtt', 'srt'):
141 subtitle_url
= info
.get('subtitles_%s_%s' % (lang
, format_id
))
143 subtitles
.setdefault(lang
, []).append({'url': subtitle_url
})
147 'display_id': display_id
,
149 'description': info
.get('video_metadata_longDescription'),
150 'duration': int_or_none(info
.get('video_metadata_length'), scale
=1000),
151 'timestamp': int_or_none(info
.get('video_publish_date')),
152 'creator': info
.get('video_metadata_homeChannel'),
153 'series': info
.get('video_metadata_show'),
154 'season_number': int_or_none(info
.get('season')),
155 'episode_number': int_or_none(info
.get('episode')),
156 'age_limit': int_or_none(info
.get('minimum_age')),
158 'subtitles': subtitles
,
162 class DPlayItIE(InfoExtractor
):
163 _VALID_URL
= r
'https?://it\.dplay\.com/[^/]+/[^/]+/(?P<id>[^/?#]+)'
164 _GEO_COUNTRIES
= ['IT']
166 'url': 'http://it.dplay.com/nove/biografie-imbarazzanti/luigi-di-maio-la-psicosi-di-stanislawskij/',
167 'md5': '2b808ffb00fc47b884a172ca5d13053c',
170 'display_id': 'luigi-di-maio-la-psicosi-di-stanislawskij',
172 'title': 'Biografie imbarazzanti: Luigi Di Maio: la psicosi di Stanislawskij',
173 'description': 'md5:3c7a4303aef85868f867a26f5cc14813',
174 'thumbnail': r
're:^https?://.*\.jpe?g',
175 'upload_date': '20160524',
176 'series': 'Biografie imbarazzanti',
178 'episode': 'Luigi Di Maio: la psicosi di Stanislawskij',
183 def _real_extract(self
, url
):
184 display_id
= self
._match
_id
(url
)
186 webpage
= self
._download
_webpage
(url
, display_id
)
188 title
= remove_end(self
._og
_search
_title
(webpage
), ' | Dplay')
192 info
= self
._search
_regex
(
193 r
'playback_json\s*:\s*JSON\.parse\s*\(\s*("(?:\\.|[^"\\])+?")',
194 webpage
, 'playback JSON', default
=None)
197 info
= self
._parse
_json
(info
, display_id
, fatal
=False)
201 video_id
= try_get(info
, lambda x
: x
['data']['id'])
204 info_url
= self
._search
_regex
(
205 r
'url\s*[:=]\s*["\']((?
:https?
:)?
//[^
/]+/playback
/videoPlaybackInfo
/\d
+)',
208 video_id = info_url.rpartition('/')[-1]
211 info = self._download_json(
212 info_url, display_id, headers={
213 'Authorization
': 'Bearer
%s' % self._get_cookies(url).get(
214 'dplayit_token
').value,
217 except ExtractorError as e:
218 if isinstance(e.cause, compat_HTTPError) and e.cause.code in (400, 403):
219 info = self._parse_json(e.cause.read().decode('utf
-8'), display_id)
220 error = info['errors
'][0]
221 if error.get('code
') == 'access
.denied
.geoblocked
':
222 self.raise_geo_restricted(
223 msg=error.get('detail
'), countries=self._GEO_COUNTRIES)
224 raise ExtractorError(info['errors
'][0]['detail
'], expected=True)
227 hls_url = info['data
']['attributes
']['streaming
']['hls
']['url
']
229 formats = self._extract_m3u8_formats(
230 hls_url, display_id, ext='mp4
', entry_protocol='m3u8_native
',
233 series = self._html_search_regex(
234 r'(?s
)<h1
[^
>]+class=["\'].*?\bshow_title\b.*?["\'][^
>]*>(.+?
)</h1
>',
235 webpage, 'series
', fatal=False)
236 episode = self._search_regex(
237 r'<p
[^
>]+class=["\'].*?\bdesc_ep\b.*?["\'][^
>]*>\s
*<br
/>\s
*<b
>([^
<]+)',
238 webpage, 'episode
', fatal=False)
241 r'(?s
)<span
[^
>]+class=["\']dates["\'][^
>]*>.+?
\bS\
.(?P
<season_number
>\d
+)\s
+E\
.(?P
<episode_number
>\d
+)\s
*-\s
*(?P
<upload_date
>\d{2}
/\d{2}
/\d{4}
)',
244 season_number = int(mobj.group('season_number
'))
245 episode_number = int(mobj.group('episode_number
'))
246 upload_date = unified_strdate(mobj.group('upload_date
'))
248 season_number = episode_number = upload_date = None
251 'id': compat_str(video_id or display_id),
252 'display_id
': display_id,
254 'description
': self._og_search_description(webpage),
255 'thumbnail
': self._og_search_thumbnail(webpage),
257 'season_number
': season_number,
259 'episode_number
': episode_number,
260 'upload_date
': upload_date,