]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
13 from .subtitles
import SubtitlesInfoExtractor
16 class NRKIE(InfoExtractor
):
17 _VALID_URL
= r
'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})'
21 'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/',
22 'md5': 'a6eac35052f3b242bb6bb7f43aed5886',
26 'title': 'Dompap og andre fugler i Piip-Show',
27 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f'
31 'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/',
32 'md5': '3471f2a51718195164e88f46bf427668',
36 'title': 'Slik høres internett ut når du er blind',
37 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568',
42 def _real_extract(self
, url
):
43 mobj
= re
.match(self
._VALID
_URL
, url
)
44 video_id
= mobj
.group('id')
46 page
= self
._download
_webpage
(url
, video_id
)
48 video_id
= self
._html
_search
_regex
(r
'<div class="nrk-video" data-nrk-id="(\d+)">', page
, 'video id')
50 data
= self
._download
_json
(
51 'http://v7.psapi.nrk.no/mediaelement/%s' % video_id
, video_id
, 'Downloading media JSON')
53 if data
['usageRights']['isGeoBlocked']:
54 raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected
=True)
56 video_url
= data
['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124'
58 images
= data
.get('images')
60 thumbnails
= images
['webImages']
61 thumbnails
.sort(key
=lambda image
: image
['pixelWidth'])
62 thumbnail
= thumbnails
[-1]['imageUrl']
70 'title': data
['title'],
71 'description': data
['description'],
72 'thumbnail': thumbnail
,
76 class NRKTVIE(SubtitlesInfoExtractor
):
77 _VALID_URL
= r
'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
81 'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014',
82 'md5': 'adf2c5454fa2bf032f47a9f8fb351342',
86 'title': '20 spørsmål',
87 'description': 'md5:bdea103bc35494c143c6a9acdd84887a',
88 'upload_date': '20140523',
93 'url': 'http://tv.nrk.no/program/mdfp15000514',
94 'md5': '383650ece2b25ecec996ad7b5bb2a384',
98 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting',
99 'description': 'md5:654c12511f035aed1e42bdf5db3b206a',
100 'upload_date': '20140524',
105 # single playlist video
106 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2',
107 'md5': 'adbd1dbd813edaf532b0a253780719c2',
109 'id': 'MSPO40010515-part2',
111 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
112 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
113 'upload_date': '20150106',
115 'skip': 'Only works from Norway',
118 'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015',
121 'md5': '9480285eff92d64f06e02a5367970a7a',
123 'id': 'MSPO40010515-part1',
125 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)',
126 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
127 'upload_date': '20150106',
131 'md5': 'adbd1dbd813edaf532b0a253780719c2',
133 'id': 'MSPO40010515-part2',
135 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)',
136 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
137 'upload_date': '20150106',
142 'id': 'MSPO40010515',
143 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn',
144 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26',
145 'upload_date': '20150106',
146 'duration': 6947.5199999999995,
148 'skip': 'Only works from Norway',
152 def _seconds2str(self
, s
):
153 return '%02d:%02d:%02d.%03d' % (s
/ 3600, (s
% 3600) / 60, s
% 60, (s
% 1) * 1000)
155 def _debug_print(self
, txt
):
156 if self
._downloader
.params
.get('verbose', False):
157 self
.to_screen('[debug] %s' % txt
)
159 def _extract_captions(self
, subtitlesurl
, video_id
, baseurl
):
160 url
= "%s%s" % (baseurl
, subtitlesurl
)
161 self
._debug
_print
('%s: Subtitle url: %s' % (video_id
, url
))
162 captions
= self
._download
_xml
(url
, video_id
, 'Downloading subtitles')
163 lang
= captions
.get('lang', 'no')
164 ps
= captions
.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
166 for pos
, p
in enumerate(ps
):
167 begin
= parse_duration(p
.get('begin'))
168 duration
= parse_duration(p
.get('dur'))
169 starttime
= self
._seconds
2str
(begin
)
170 endtime
= self
._seconds
2str
(begin
+ duration
)
171 text
= '\n'.join(p
.itertext())
172 srt
+= '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos
), starttime
, endtime
, text
)
175 def _extract_f4m(self
, manifest_url
, video_id
):
176 return self
._extract
_f
4m
_formats
(manifest_url
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
)
178 def _real_extract(self
, url
):
179 mobj
= re
.match(self
._VALID
_URL
, url
)
180 video_id
= mobj
.group('id')
181 part_id
= mobj
.group('part_id')
182 baseurl
= mobj
.group('baseurl')
184 webpage
= self
._download
_webpage
(url
, video_id
)
186 title
= self
._html
_search
_meta
(
187 'title', webpage
, 'title')
188 description
= self
._html
_search
_meta
(
189 'description', webpage
, 'description')
191 thumbnail
= self
._html
_search
_regex
(
192 r
'data-posterimage="([^"]+)"',
193 webpage
, 'thumbnail', fatal
=False)
194 upload_date
= unified_strdate(self
._html
_search
_meta
(
195 'rightsfrom', webpage
, 'upload date', fatal
=False))
196 duration
= float_or_none(self
._html
_search
_regex
(
197 r
'data-duration="([^"]+)"',
198 webpage
, 'duration', fatal
=False))
202 r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
)
205 for current_part_id
, stream_url
, part_title
in parts
:
206 if part_id
and current_part_id
!= part_id
:
208 video_part_id
= '%s-part%s' % (video_id
, current_part_id
)
209 formats
= self
._extract
_f
4m
(stream_url
, video_part_id
)
213 'description': description
,
214 'thumbnail': thumbnail
,
215 'upload_date': upload_date
,
222 playlist
= self
.playlist_result(entries
, video_id
, title
, description
)
224 'thumbnail': thumbnail
,
225 'upload_date': upload_date
,
226 'duration': duration
,
232 f4m_url
= re
.search(r
'data-media="([^"]+)"', webpage
)
234 formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
))
236 m3u8_url
= re
.search(r
'data-hls-media="([^"]+)"', webpage
)
238 formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4'))
239 self
._sort
_formats
(formats
)
241 subtitles_url
= self
._html
_search
_regex
(
242 r
'data-subtitlesurl[ ]*=[ ]*"([^"]+)"',
243 webpage
, 'subtitle URL', default
=None)
246 subtitles
= self
._extract
_captions
(subtitles_url
, video_id
, baseurl
)
247 if self
._downloader
.params
.get('listsubtitles', False):
248 self
._list
_available
_subtitles
(video_id
, subtitles
)
254 'description': description
,
255 'thumbnail': thumbnail
,
256 'upload_date': upload_date
,
257 'duration': duration
,
259 'subtitles': subtitles
,