]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  16 class NRKIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' 
  21             'url': 'http://www.nrk.no/video/PS*150533', 
  22             'md5': 'bccd850baebefe23b56d708a113229c2', 
  26                 'title': 'Dompap og andre fugler i Piip-Show', 
  27                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 
  32             'url': 'http://www.nrk.no/video/PS*154915', 
  33             'md5': '0b1493ba1aae7d9579a5ad5531bc395a', 
  37                 'title': 'Slik høres internett ut når du er blind', 
  38                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 
  44     def _real_extract(self
, url
): 
  45         video_id 
= self
._match
_id
(url
) 
  47         data 
= self
._download
_json
( 
  48             'http://v8.psapi.nrk.no/mediaelement/%s' % video_id
, 
  49             video_id
, 'Downloading media JSON') 
  51         if data
['usageRights']['isGeoBlocked']: 
  53                 'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', 
  56         video_url 
= data
['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' 
  58         duration 
= parse_duration(data
.get('duration')) 
  60         images 
= data
.get('images') 
  62             thumbnails 
= images
['webImages'] 
  63             thumbnails
.sort(key
=lambda image
: image
['pixelWidth']) 
  64             thumbnail 
= thumbnails
[-1]['imageUrl'] 
  72             'title': data
['title'], 
  73             'description': data
['description'], 
  75             'thumbnail': thumbnail
, 
  79 class NRKPlaylistIE(InfoExtractor
): 
  80     _VALID_URL 
= r
'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)' 
  83         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  85             'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  86             'title': 'Gjenopplev den historiske solformørkelsen', 
  87             'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', 
  91         'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449', 
  93             'id': 'rivertonprisen-til-karin-fossum-1.12266449', 
  94             'title': 'Rivertonprisen til Karin Fossum', 
  95             'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', 
 100     def _real_extract(self
, url
): 
 101         playlist_id 
= self
._match
_id
(url
) 
 103         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 106             self
.url_result('nrk:%s' % video_id
, 'NRK') 
 107             for video_id 
in re
.findall( 
 108                 r
'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', 
 112         playlist_title 
= self
._og
_search
_title
(webpage
) 
 113         playlist_description 
= self
._og
_search
_description
(webpage
) 
 115         return self
.playlist_result( 
 116             entries
, playlist_id
, playlist_title
, playlist_description
) 
 119 class NRKTVIE(InfoExtractor
): 
 120     IE_DESC 
= 'NRK TV and NRK Radio' 
 121     _VALID_URL 
= r
'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' 
 125             'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 
 126             'md5': 'adf2c5454fa2bf032f47a9f8fb351342', 
 128                 'id': 'MUHH48000314', 
 130                 'title': '20 spørsmål', 
 131                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 
 132                 'upload_date': '20140523', 
 137             'url': 'https://tv.nrk.no/program/mdfp15000514', 
 138             'md5': '383650ece2b25ecec996ad7b5bb2a384', 
 140                 'id': 'mdfp15000514', 
 142                 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', 
 143                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', 
 144                 'upload_date': '20140524', 
 149             # single playlist video 
 150             'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 
 151             'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 153                 'id': 'MSPO40010515-part2', 
 155                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 156                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 157                 'upload_date': '20150106', 
 159             'skip': 'Only works from Norway', 
 162             'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 
 165                     'md5': '9480285eff92d64f06e02a5367970a7a', 
 167                         'id': 'MSPO40010515-part1', 
 169                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 
 170                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 171                         'upload_date': '20150106', 
 175                     'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 177                         'id': 'MSPO40010515-part2', 
 179                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 180                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 181                         'upload_date': '20150106', 
 186                 'id': 'MSPO40010515', 
 187                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 
 188                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 189                 'upload_date': '20150106', 
 190                 'duration': 6947.5199999999995, 
 192             'skip': 'Only works from Norway', 
 195             'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 
 196             'only_matching': True, 
 200     def _extract_f4m(self
, manifest_url
, video_id
): 
 201         return self
._extract
_f
4m
_formats
( 
 202             manifest_url 
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
, f4m_id
='hds') 
 204     def _real_extract(self
, url
): 
 205         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 206         video_id 
= mobj
.group('id') 
 207         part_id 
= mobj
.group('part_id') 
 208         base_url 
= mobj
.group('baseurl') 
 210         webpage 
= self
._download
_webpage
(url
, video_id
) 
 212         title 
= self
._html
_search
_meta
( 
 213             'title', webpage
, 'title') 
 214         description 
= self
._html
_search
_meta
( 
 215             'description', webpage
, 'description') 
 217         thumbnail 
= self
._html
_search
_regex
( 
 218             r
'data-posterimage="([^"]+)"', 
 219             webpage
, 'thumbnail', fatal
=False) 
 220         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 221             'rightsfrom', webpage
, 'upload date', fatal
=False)) 
 222         duration 
= float_or_none(self
._html
_search
_regex
( 
 223             r
'data-duration="([^"]+)"', 
 224             webpage
, 'duration', fatal
=False)) 
 228             r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
) 
 231             for current_part_id
, stream_url
, part_title 
in parts
: 
 232                 if part_id 
and current_part_id 
!= part_id
: 
 234                 video_part_id 
= '%s-part%s' % (video_id
, current_part_id
) 
 235                 formats 
= self
._extract
_f
4m
(stream_url
, video_part_id
) 
 239                     'description': description
, 
 240                     'thumbnail': thumbnail
, 
 241                     'upload_date': upload_date
, 
 248                 playlist 
= self
.playlist_result(entries
, video_id
, title
, description
) 
 250                     'thumbnail': thumbnail
, 
 251                     'upload_date': upload_date
, 
 252                     'duration': duration
, 
 258         f4m_url 
= re
.search(r
'data-media="([^"]+)"', webpage
) 
 260             formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
)) 
 262         m3u8_url 
= re
.search(r
'data-hls-media="([^"]+)"', webpage
) 
 264             formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4', m3u8_id
='hls')) 
 265         self
._sort
_formats
(formats
) 
 267         subtitles_url 
= self
._html
_search
_regex
( 
 268             r
'data-subtitlesurl\s*=\s*(["\'])(?P
<url
>.+?
)\
1', 
 269             webpage, 'subtitle URL
', default=None, group='url
') 
 274                 'url
': compat_urlparse.urljoin(base_url, subtitles_url), 
 280             'description
': description, 
 281             'thumbnail
': thumbnail, 
 282             'upload_date
': upload_date, 
 283             'duration
': duration, 
 285             'subtitles
': subtitles,