]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  17 class NRKIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'(?:nrk:|https?://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' 
  22             'url': 'http://www.nrk.no/video/PS*150533', 
  23             'md5': 'bccd850baebefe23b56d708a113229c2', 
  27                 'title': 'Dompap og andre fugler i Piip-Show', 
  28                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 
  33             'url': 'http://www.nrk.no/video/PS*154915', 
  34             'md5': '0b1493ba1aae7d9579a5ad5531bc395a', 
  38                 'title': 'Slik høres internett ut når du er blind', 
  39                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 
  45     def _real_extract(self
, url
): 
  46         video_id 
= self
._match
_id
(url
) 
  48         data 
= self
._download
_json
( 
  49             'http://v8.psapi.nrk.no/mediaelement/%s' % video_id
, 
  50             video_id
, 'Downloading media JSON') 
  52         media_url 
= data
.get('mediaUrl') 
  55             if data
['usageRights']['isGeoBlocked']: 
  57                     'NRK har ikke rettigheter til å vise dette programmet utenfor Norge', 
  60         if determine_ext(media_url
) == 'f4m': 
  61             formats 
= self
._extract
_f
4m
_formats
( 
  62                 media_url 
+ '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81', video_id
, f4m_id
='hds') 
  69         duration 
= parse_duration(data
.get('duration')) 
  71         images 
= data
.get('images') 
  73             thumbnails 
= images
['webImages'] 
  74             thumbnails
.sort(key
=lambda image
: image
['pixelWidth']) 
  75             thumbnail 
= thumbnails
[-1]['imageUrl'] 
  81             'title': data
['title'], 
  82             'description': data
['description'], 
  84             'thumbnail': thumbnail
, 
  89 class NRKPlaylistIE(InfoExtractor
): 
  90     _VALID_URL 
= r
'https?://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)' 
  93         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  95             'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  96             'title': 'Gjenopplev den historiske solformørkelsen', 
  97             'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', 
 101         'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449', 
 103             'id': 'rivertonprisen-til-karin-fossum-1.12266449', 
 104             'title': 'Rivertonprisen til Karin Fossum', 
 105             'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', 
 110     def _real_extract(self
, url
): 
 111         playlist_id 
= self
._match
_id
(url
) 
 113         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 116             self
.url_result('nrk:%s' % video_id
, 'NRK') 
 117             for video_id 
in re
.findall( 
 118                 r
'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', 
 122         playlist_title 
= self
._og
_search
_title
(webpage
) 
 123         playlist_description 
= self
._og
_search
_description
(webpage
) 
 125         return self
.playlist_result( 
 126             entries
, playlist_id
, playlist_title
, playlist_description
) 
 129 class NRKTVIE(InfoExtractor
): 
 130     IE_DESC 
= 'NRK TV and NRK Radio' 
 131     _VALID_URL 
= r
'(?P<baseurl>https?://(?:tv|radio)\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' 
 135             'url': 'https://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 
 137                 'id': 'MUHH48000314', 
 139                 'title': '20 spørsmål', 
 140                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 
 141                 'upload_date': '20140523', 
 146                 'skip_download': True, 
 150             'url': 'https://tv.nrk.no/program/mdfp15000514', 
 152                 'id': 'mdfp15000514', 
 154                 'title': 'Grunnlovsjubiléet - Stor ståhei for ingenting', 
 155                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', 
 156                 'upload_date': '20140524', 
 161                 'skip_download': True, 
 165             # single playlist video 
 166             'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 
 167             'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 169                 'id': 'MSPO40010515-part2', 
 171                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 172                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 173                 'upload_date': '20150106', 
 175             'skip': 'Only works from Norway', 
 178             'url': 'https://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 
 181                     'md5': '9480285eff92d64f06e02a5367970a7a', 
 183                         'id': 'MSPO40010515-part1', 
 185                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 
 186                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 187                         'upload_date': '20150106', 
 191                     'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 193                         'id': 'MSPO40010515-part2', 
 195                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 196                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 197                         'upload_date': '20150106', 
 202                 'id': 'MSPO40010515', 
 203                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 
 204                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 205                 'upload_date': '20150106', 
 206                 'duration': 6947.5199999999995, 
 208             'skip': 'Only works from Norway', 
 211             'url': 'https://radio.nrk.no/serie/dagsnytt/NPUB21019315/12-07-2015#', 
 212             'only_matching': True, 
 216     def _extract_f4m(self
, manifest_url
, video_id
): 
 217         return self
._extract
_f
4m
_formats
( 
 218             manifest_url 
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
, f4m_id
='hds') 
 220     def _real_extract(self
, url
): 
 221         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 222         video_id 
= mobj
.group('id') 
 223         part_id 
= mobj
.group('part_id') 
 224         base_url 
= mobj
.group('baseurl') 
 226         webpage 
= self
._download
_webpage
(url
, video_id
) 
 228         title 
= self
._html
_search
_meta
( 
 229             'title', webpage
, 'title') 
 230         description 
= self
._html
_search
_meta
( 
 231             'description', webpage
, 'description') 
 233         thumbnail 
= self
._html
_search
_regex
( 
 234             r
'data-posterimage="([^"]+)"', 
 235             webpage
, 'thumbnail', fatal
=False) 
 236         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 237             'rightsfrom', webpage
, 'upload date', fatal
=False)) 
 238         duration 
= float_or_none(self
._html
_search
_regex
( 
 239             r
'data-duration="([^"]+)"', 
 240             webpage
, 'duration', fatal
=False)) 
 244             r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
) 
 247             for current_part_id
, stream_url
, part_title 
in parts
: 
 248                 if part_id 
and current_part_id 
!= part_id
: 
 250                 video_part_id 
= '%s-part%s' % (video_id
, current_part_id
) 
 251                 formats 
= self
._extract
_f
4m
(stream_url
, video_part_id
) 
 255                     'description': description
, 
 256                     'thumbnail': thumbnail
, 
 257                     'upload_date': upload_date
, 
 264                 playlist 
= self
.playlist_result(entries
, video_id
, title
, description
) 
 266                     'thumbnail': thumbnail
, 
 267                     'upload_date': upload_date
, 
 268                     'duration': duration
, 
 274         f4m_url 
= re
.search(r
'data-media="([^"]+)"', webpage
) 
 276             formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
)) 
 278         m3u8_url 
= re
.search(r
'data-hls-media="([^"]+)"', webpage
) 
 280             formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4', m3u8_id
='hls')) 
 281         self
._sort
_formats
(formats
) 
 283         subtitles_url 
= self
._html
_search
_regex
( 
 284             r
'data-subtitlesurl\s*=\s*(["\'])(?P
<url
>.+?
)\
1', 
 285             webpage, 'subtitle URL
', default=None, group='url
') 
 290                 'url
': compat_urlparse.urljoin(base_url, subtitles_url), 
 296             'description
': description, 
 297             'thumbnail
': thumbnail, 
 298             'upload_date
': upload_date, 
 299             'duration
': duration, 
 301             'subtitles
': subtitles,