]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class NRKIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'(?:nrk:|http://(?:www\.)?nrk\.no/video/PS\*)(?P<id>\d+)' 
  20             'url': 'http://www.nrk.no/video/PS*150533', 
  21             'md5': 'bccd850baebefe23b56d708a113229c2', 
  25                 'title': 'Dompap og andre fugler i Piip-Show', 
  26                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f', 
  31             'url': 'http://www.nrk.no/video/PS*154915', 
  32             'md5': '0b1493ba1aae7d9579a5ad5531bc395a', 
  36                 'title': 'Slik høres internett ut når du er blind', 
  37                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 
  43     def _real_extract(self
, url
): 
  44         video_id 
= self
._match
_id
(url
) 
  46         data 
= self
._download
_json
( 
  47             'http://v8.psapi.nrk.no/mediaelement/%s' % video_id
, 
  48             video_id
, 'Downloading media JSON') 
  50         if data
['usageRights']['isGeoBlocked']: 
  52                 'NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', 
  55         video_url 
= data
['mediaUrl'] + '?hdcore=3.5.0&plugin=aasp-3.5.0.151.81' 
  57         duration 
= parse_duration(data
.get('duration')) 
  59         images 
= data
.get('images') 
  61             thumbnails 
= images
['webImages'] 
  62             thumbnails
.sort(key
=lambda image
: image
['pixelWidth']) 
  63             thumbnail 
= thumbnails
[-1]['imageUrl'] 
  71             'title': data
['title'], 
  72             'description': data
['description'], 
  74             'thumbnail': thumbnail
, 
  78 class NRKPlaylistIE(InfoExtractor
): 
  79     _VALID_URL 
= r
'http://(?:www\.)?nrk\.no/(?!video)(?:[^/]+/)+(?P<id>[^/]+)' 
  82         'url': 'http://www.nrk.no/troms/gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  84             'id': 'gjenopplev-den-historiske-solformorkelsen-1.12270763', 
  85             'title': 'Gjenopplev den historiske solformørkelsen', 
  86             'description': 'md5:c2df8ea3bac5654a26fc2834a542feed', 
  90         'url': 'http://www.nrk.no/kultur/bok/rivertonprisen-til-karin-fossum-1.12266449', 
  92             'id': 'rivertonprisen-til-karin-fossum-1.12266449', 
  93             'title': 'Rivertonprisen til Karin Fossum', 
  94             'description': 'Første kvinne på 15 år til å vinne krimlitteraturprisen.', 
  99     def _real_extract(self
, url
): 
 100         playlist_id 
= self
._match
_id
(url
) 
 102         webpage 
= self
._download
_webpage
(url
, playlist_id
) 
 105             self
.url_result('nrk:%s' % video_id
, 'NRK') 
 106             for video_id 
in re
.findall( 
 107                 r
'class="[^"]*\brich\b[^"]*"[^>]+data-video-id="([^"]+)"', 
 111         playlist_title 
= self
._og
_search
_title
(webpage
) 
 112         playlist_description 
= self
._og
_search
_description
(webpage
) 
 114         return self
.playlist_result( 
 115             entries
, playlist_id
, playlist_title
, playlist_description
) 
 118 class NRKTVIE(InfoExtractor
): 
 119     _VALID_URL 
= r
'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' 
 123             'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 
 124             'md5': 'adf2c5454fa2bf032f47a9f8fb351342', 
 126                 'id': 'MUHH48000314', 
 128                 'title': '20 spørsmål', 
 129                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 
 130                 'upload_date': '20140523', 
 135             'url': 'http://tv.nrk.no/program/mdfp15000514', 
 136             'md5': '383650ece2b25ecec996ad7b5bb2a384', 
 138                 'id': 'mdfp15000514', 
 140                 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', 
 141                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', 
 142                 'upload_date': '20140524', 
 147             # single playlist video 
 148             'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 
 149             'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 151                 'id': 'MSPO40010515-part2', 
 153                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 154                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 155                 'upload_date': '20150106', 
 157             'skip': 'Only works from Norway', 
 160             'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 
 163                     'md5': '9480285eff92d64f06e02a5367970a7a', 
 165                         'id': 'MSPO40010515-part1', 
 167                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 
 168                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 169                         'upload_date': '20150106', 
 173                     'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 175                         'id': 'MSPO40010515-part2', 
 177                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 178                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 179                         'upload_date': '20150106', 
 184                 'id': 'MSPO40010515', 
 185                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 
 186                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 187                 'upload_date': '20150106', 
 188                 'duration': 6947.5199999999995, 
 190             'skip': 'Only works from Norway', 
 194     def _debug_print(self
, txt
): 
 195         if self
._downloader
.params
.get('verbose', False): 
 196             self
.to_screen('[debug] %s' % txt
) 
 198     def _get_subtitles(self
, subtitlesurl
, video_id
, baseurl
): 
 199         url 
= "%s%s" % (baseurl
, subtitlesurl
) 
 200         self
._debug
_print
('%s: Subtitle url: %s' % (video_id
, url
)) 
 201         captions 
= self
._download
_xml
( 
 202             url
, video_id
, 'Downloading subtitles') 
 203         lang 
= captions
.get('lang', 'no') 
 205             {'ext': 'ttml', 'url': url
}, 
 208     def _extract_f4m(self
, manifest_url
, video_id
): 
 209         return self
._extract
_f
4m
_formats
(manifest_url 
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
) 
 211     def _real_extract(self
, url
): 
 212         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 213         video_id 
= mobj
.group('id') 
 214         part_id 
= mobj
.group('part_id') 
 215         baseurl 
= mobj
.group('baseurl') 
 217         webpage 
= self
._download
_webpage
(url
, video_id
) 
 219         title 
= self
._html
_search
_meta
( 
 220             'title', webpage
, 'title') 
 221         description 
= self
._html
_search
_meta
( 
 222             'description', webpage
, 'description') 
 224         thumbnail 
= self
._html
_search
_regex
( 
 225             r
'data-posterimage="([^"]+)"', 
 226             webpage
, 'thumbnail', fatal
=False) 
 227         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 228             'rightsfrom', webpage
, 'upload date', fatal
=False)) 
 229         duration 
= float_or_none(self
._html
_search
_regex
( 
 230             r
'data-duration="([^"]+)"', 
 231             webpage
, 'duration', fatal
=False)) 
 235             r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
) 
 238             for current_part_id
, stream_url
, part_title 
in parts
: 
 239                 if part_id 
and current_part_id 
!= part_id
: 
 241                 video_part_id 
= '%s-part%s' % (video_id
, current_part_id
) 
 242                 formats 
= self
._extract
_f
4m
(stream_url
, video_part_id
) 
 246                     'description': description
, 
 247                     'thumbnail': thumbnail
, 
 248                     'upload_date': upload_date
, 
 255                 playlist 
= self
.playlist_result(entries
, video_id
, title
, description
) 
 257                     'thumbnail': thumbnail
, 
 258                     'upload_date': upload_date
, 
 259                     'duration': duration
, 
 265         f4m_url 
= re
.search(r
'data-media="([^"]+)"', webpage
) 
 267             formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
)) 
 269         m3u8_url 
= re
.search(r
'data-hls-media="([^"]+)"', webpage
) 
 271             formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4')) 
 272         self
._sort
_formats
(formats
) 
 274         subtitles_url 
= self
._html
_search
_regex
( 
 275             r
'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', 
 276             webpage
, 'subtitle URL', default
=None) 
 279             subtitles 
= self
.extract_subtitles(subtitles_url
, video_id
, baseurl
) 
 284             'description': description
, 
 285             'thumbnail': thumbnail
, 
 286             'upload_date': upload_date
, 
 287             'duration': duration
, 
 289             'subtitles': subtitles
,