]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..compat
import compat_str
16 class NRKIE(InfoExtractor
):
17 _VALID_URL
= r
'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
21 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
22 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
26 'title': 'Dompap og andre fugler i Piip-Show',
27 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
31 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
32 'md5': '3471f2a51718195164e88f46bf427668',
36 'title': 'Slik høres internett ut når du er blind',
37 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
42 def _real_extract(self
, url
):
43 mobj
= re
.match(self
._VALID
_URL
, url
)
44 video_id
= mobj
.group('id')
46 page
= self
._download
_webpage
(url
, video_id
)
48 video_id
= self
._html
_search
_regex
(r
'<div class="nrk-video" data-nrk-id="(\d+)">', page
, 'video id')
50 data
= self
._download
_json
(
51 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id
, video_id
, 'Downloading media JSON')
53 if data
['usageRights']['isGeoBlocked']:
54 raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected
=True)
56 video_url
= data
['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
58 images
= data
.get('images')
60 thumbnails
= images
['webImages']
61 thumbnails
.sort(key
=lambda image
: image
['pixelWidth'])
62 thumbnail
= thumbnails
[-1]['imageUrl']
70 'title': data
['title'],
71 'description': data
['description'],
72 'thumbnail': thumbnail
,
76 class NRKTVIE(InfoExtractor
):
77 _VALID_URL
= r
'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
81 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
82 'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
86 'title': '20 spørsmål',
87 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
88 'upload_date': '20140523',
93 'url': 'http://tv.nrk.no/program/mdfp15000514',
94 'md5': '383650ece2b25ecec996ad7b5bb2a384',
98 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
99 'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
100 'upload_date': '20140524',
105 # single playlist video
106 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
107 'md5': 'adbd1dbd813edaf532b0a253780719c2',
109 'id': 'MSPO40010515-part2',
111 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
112 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
113 'upload_date': '20150106',
115 'skip': 'Only works from Norway',
118 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
121 'md5': '9480285eff92d64f06e02a5367970a7a',
123 'id': 'MSPO40010515-part1',
125 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
126 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
127 'upload_date': '20150106',
131 'md5': 'adbd1dbd813edaf532b0a253780719c2',
133 'id': 'MSPO40010515-part2',
135 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
136 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
137 'upload_date': '20150106',
142 'id': 'MSPO40010515',
143 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
144 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
145 'upload_date': '20150106',
146 'duration': 6947.5199999999995,
148 'skip': 'Only works from Norway',
152 def _seconds2str(self
, s
):
153 return '%02d:%02d:%02d.%03d' % (s
/ 3600, (s
% 3600) / 60, s
% 60, (s
% 1) * 1000)
155 def _debug_print(self
, txt
):
156 if self
._downloader
.params
.get('verbose', False):
157 self
.to_screen('[debug] %s' % txt
)
159 def _get_subtitles(self
, subtitlesurl
, video_id
, baseurl
):
160 url
= "%s%s" % (baseurl
, subtitlesurl
)
161 self
._debug
_print
('%s: Subtitle url: %s' % (video_id
, url
))
162 captions
= self
._download
_xml
(
163 url
, video_id
, 'Downloading subtitles',
164 transform_source
=lambda s
: s
.replace(r
'<br />', '\r\n'))
165 lang
= captions
.get('lang', 'no')
166 ps
= captions
.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
168 for pos
, p
in enumerate(ps
):
169 begin
= parse_duration(p
.get('begin'))
170 duration
= parse_duration(p
.get('dur'))
171 starttime
= self
._seconds
2str
(begin
)
172 endtime
= self
._seconds
2str
(begin
+ duration
)
173 srt
+= '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos
), starttime
, endtime
, p
.text
)
175 {'ext': 'ttml', 'url': url
},
176 {'ext': 'srt', 'data': srt
},
179 def _extract_f4m(self
, manifest_url
, video_id
):
180 return self
._extract
_f
4m
_formats
(manifest_url
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
)
182 def _real_extract(self
, url
):
183 mobj
= re
.match(self
._VALID
_URL
, url
)
184 video_id
= mobj
.group('id')
185 part_id
= mobj
.group('part_id')
186 baseurl
= mobj
.group('baseurl')
188 webpage
= self
._download
_webpage
(url
, video_id
)
190 title
= self
._html
_search
_meta
(
191 'title', webpage
, 'title')
192 description
= self
._html
_search
_meta
(
193 'description', webpage
, 'description')
195 thumbnail
= self
._html
_search
_regex
(
196 r
'data-posterimage="([^"]+)"',
197 webpage
, 'thumbnail', fatal
=False)
198 upload_date
= unified_strdate(self
._html
_search
_meta
(
199 'rightsfrom', webpage
, 'upload date', fatal
=False))
200 duration
= float_or_none(self
._html
_search
_regex
(
201 r
'data-duration="([^"]+)"',
202 webpage
, 'duration', fatal
=False))
206 r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
)
209 for current_part_id
, stream_url
, part_title
in parts
:
210 if part_id
and current_part_id
!= part_id
:
212 video_part_id
= '%s-part%s' % (video_id
, current_part_id
)
213 formats
= self
._extract
_f
4m
(stream_url
, video_part_id
)
217 'description': description
,
218 'thumbnail': thumbnail
,
219 'upload_date': upload_date
,
226 playlist
= self
.playlist_result(entries
, video_id
, title
, description
)
228 'thumbnail': thumbnail
,
229 'upload_date': upload_date
,
230 'duration': duration
,
236 f4m_url
= re
.search(r
'data-media="([^"]+)"', webpage
)
238 formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
))
240 m3u8_url
= re
.search(r
'data-hls-media="([^"]+)"', webpage
)
242 formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4'))
243 self
._sort
_formats
(formats
)
245 subtitles_url
= self
._html
_search
_regex
(
246 r
'data-subtitlesurl[ ]*=[ ]*"([^"]+)"',
247 webpage
, 'subtitle URL', default
=None)
250 subtitles
= self
.extract_subtitles(subtitles_url
, video_id
, baseurl
)
255 'description': description
,
256 'thumbnail': thumbnail
,
257 'upload_date': upload_date
,
258 'duration': duration
,
260 'subtitles': subtitles
,