]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class NRKIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' 
  20             'url': 'http://www.nrk.no/video/PS*150533', 
  21             'md5': 'bccd850baebefe23b56d708a113229c2', 
  25                 'title': 'Dompap og andre fugler i Piip-Show', 
  26                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 
  31             'url': 'http://www.nrk.no/video/PS*154915', 
  32             'md5': '0b1493ba1aae7d9579a5ad5531bc395a', 
  36                 'title': 'Slik høres internett ut når du er blind', 
  37                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 
  43     def _real_extract(self
, url
): 
  44         video_id 
= self
._match
_id
(url
) 
  46         data 
= self
._download
_json
( 
  47             'http://v8.psapi.nrk.no/mediaelement/%s' % video_id
, 
  48             video_id
, 'Downloading media JSON') 
  50         if data
['usageRights']['isGeoBlocked']: 
  52                 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', 
  55         video_url 
= data
['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' 
  57         duration 
= parse_duration(data
.get('duration')) 
  59         images 
= data
.get('images') 
  61             thumbnails 
= images
['webImages'] 
  62             thumbnails
.sort(key
=lambda image
: image
['pixelWidth']) 
  63             thumbnail 
= thumbnails
[-1]['imageUrl'] 
  71             'title': data
['title'], 
  72             'description': data
['description'], 
  74             'thumbnail': thumbnail
, 
  78 class NRKPlaylistIE(InfoExtractor
): 
  79     _VALID_URL 
= r
'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)' 
  82         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  84             'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  85             'title': 'Gjenopplev den historiske solformørkelsen', 
  86             'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', 
  90         'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449', 
  92             'id': 'rivertonprisen-til-karin-fossum-1.12266449', 
  93             'title': 'Rivertonprisen til Karin Fossum', 
  94             'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', 
  99     def _real_extract(self
, url
): 
 100         playlist_id 
= self
._match
_id
(url
) 
 102         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 105             self
.url_result('nrk:%s' % video_id
, 'NRK') 
 106             for video_id 
in re
.findall( 
 107                 r
'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', 
 111         playlist_title 
= self
._og
_search
_title
(webpage
) 
 112         playlist_description 
= self
._og
_search
_description
(webpage
) 
 114         return self
.playlist_result( 
 115             entries
, playlist_id
, playlist_title
, playlist_description
) 
 118 class NRKTVIE(InfoExtractor
): 
 119     IE_DESC 
= 'NRK TV and NRK Radio' 
 120     _VALID_URL 
= r
'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' 
 124             'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 
 125             'md5': 'adf2c5454fa2bf032f47a9f8fb351342', 
 127                 'id': 'MUHH48000314', 
 129                 'title': '20 spørsmål', 
 130                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 
 131                 'upload_date': '20140523', 
 136             'url': 'https://tv.nrk.no/program/mdfp15000514', 
 137             'md5': '383650ece2b25ecec996ad7b5bb2a384', 
 139                 'id': 'mdfp15000514', 
 141                 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', 
 142                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', 
 143                 'upload_date': '20140524', 
 148             # single playlist video 
 149             'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 
 150             'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 152                 'id': 'MSPO40010515-part2', 
 154                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 155                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 156                 'upload_date': '20150106', 
 158             'skip': 'Only works from Norway', 
 161             'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 
 164                     'md5': '9480285eff92d64f06e02a5367970a7a', 
 166                         'id': 'MSPO40010515-part1', 
 168                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 
 169                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 170                         'upload_date': '20150106', 
 174                     'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 176                         'id': 'MSPO40010515-part2', 
 178                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 179                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 180                         'upload_date': '20150106', 
 185                 'id': 'MSPO40010515', 
 186                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 
 187                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 188                 'upload_date': '20150106', 
 189                 'duration': 6947.5199999999995, 
 191             'skip': 'Only works from Norway', 
 194             'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 
 195             'only_matching': True, 
 199     def _debug_print(self
, txt
): 
 200         if self
._downloader
.params
.get('verbose', False): 
 201             self
.to_screen('[debug] %s' % txt
) 
 203     def _get_subtitles(self
, subtitlesurl
, video_id
, baseurl
): 
 204         url 
= "%s%s" % (baseurl
, subtitlesurl
) 
 205         self
._debug
_print
('%s: Subtitle url: %s' % (video_id
, url
)) 
 206         captions 
= self
._download
_xml
( 
 207             url
, video_id
, 'Downloading subtitles') 
 208         lang 
= captions
.get('lang', 'no') 
 210             {'ext': 'ttml', 'url': url
}, 
 213     def _extract_f4m(self
, manifest_url
, video_id
): 
 214         return self
._extract
_f
4m
_formats
( 
 215             manifest_url 
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
, f4m_id
='hds') 
 217     def _real_extract(self
, url
): 
 218         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 219         video_id 
= mobj
.group('id') 
 220         part_id 
= mobj
.group('part_id') 
 221         baseurl 
= mobj
.group('baseurl') 
 223         webpage 
= self
._download
_webpage
(url
, video_id
) 
 225         title 
= self
._html
_search
_meta
( 
 226             'title', webpage
, 'title') 
 227         description 
= self
._html
_search
_meta
( 
 228             'description', webpage
, 'description') 
 230         thumbnail 
= self
._html
_search
_regex
( 
 231             r
'data-posterimage="([^"]+)"', 
 232             webpage
, 'thumbnail', fatal
=False) 
 233         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 234             'rightsfrom', webpage
, 'upload date', fatal
=False)) 
 235         duration 
= float_or_none(self
._html
_search
_regex
( 
 236             r
'data-duration="([^"]+)"', 
 237             webpage
, 'duration', fatal
=False)) 
 241             r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
) 
 244             for current_part_id
, stream_url
, part_title 
in parts
: 
 245                 if part_id 
and current_part_id 
!= part_id
: 
 247                 video_part_id 
= '%s-part%s' % (video_id
, current_part_id
) 
 248                 formats 
= self
._extract
_f
4m
(stream_url
, video_part_id
) 
 252                     'description': description
, 
 253                     'thumbnail': thumbnail
, 
 254                     'upload_date': upload_date
, 
 261                 playlist 
= self
.playlist_result(entries
, video_id
, title
, description
) 
 263                     'thumbnail': thumbnail
, 
 264                     'upload_date': upload_date
, 
 265                     'duration': duration
, 
 271         f4m_url 
= re
.search(r
'data-media="([^"]+)"', webpage
) 
 273             formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
)) 
 275         m3u8_url 
= re
.search(r
'data-hls-media="([^"]+)"', webpage
) 
 277             formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4', m3u8_id
='hls')) 
 278         self
._sort
_formats
(formats
) 
 280         subtitles_url 
= self
._html
_search
_regex
( 
 281             r
'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', 
 282             webpage
, 'subtitle URL', default
=None) 
 285             subtitles 
= self
.extract_subtitles(subtitles_url
, video_id
, baseurl
) 
 290             'description': description
, 
 291             'thumbnail': thumbnail
, 
 292             'upload_date': upload_date
, 
 293             'duration': duration
, 
 295             'subtitles': subtitles
,