]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nrk.py
f6de260222c678e2233b668d4b557e22e51d224c
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 from .subtitles 
import SubtitlesInfoExtractor
 
  16 class NRKIE(InfoExtractor
): 
  17     _VALID_URL 
= r
'http://(?:www\.)?nrk\.no/(?:video|lyd)/[^/]+/(?P<id>[\dA-F]{16})' 
  21             'url': 'http://www.nrk.no/video/dompap_og_andre_fugler_i_piip_show/D0FA54B5C8B6CE59/emne/piipshow/', 
  22             'md5': 'a6eac35052f3b242bb6bb7f43aed5886', 
  26                 'title': 'Dompap og andre fugler i Piip-Show', 
  27                 'description': 'md5:d9261ba34c43b61c812cb6b0269a5c8f' 
  31             'url': 'http://www.nrk.no/lyd/lyd_av_oppleser_for_blinde/AEFDDD5473BA0198/', 
  32             'md5': '3471f2a51718195164e88f46bf427668', 
  36                 'title': 'Slik høres internett ut når du er blind', 
  37                 'description': 'md5:a621f5cc1bd75c8d5104cb048c6b8568', 
  42     def _real_extract(self
, url
): 
  43         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  44         video_id 
= mobj
.group('id') 
  46         page 
= self
._download
_webpage
(url
, video_id
) 
  48         video_id 
= self
._html
_search
_regex
(r
'<div class="nrk-video" data-nrk-id="(\d+)">', page
, 'video id') 
  50         data 
= self
._download
_json
( 
  51             'http://v7.psapi.nrk.no/mediaelement/%s' % video_id
, video_id
, 'Downloading media JSON') 
  53         if data
['usageRights']['isGeoBlocked']: 
  54             raise ExtractorError('NRK har ikke rettig-heter til å vise dette programmet utenfor Norge', expected
=True) 
  56         video_url 
= data
['mediaUrl'] + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124' 
  58         images 
= data
.get('images') 
  60             thumbnails 
= images
['webImages'] 
  61             thumbnails
.sort(key
=lambda image
: image
['pixelWidth']) 
  62             thumbnail 
= thumbnails
[-1]['imageUrl'] 
  70             'title': data
['title'], 
  71             'description': data
['description'], 
  72             'thumbnail': thumbnail
, 
  76 class NRKTVIE(SubtitlesInfoExtractor
): 
  77     _VALID_URL 
= r
'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?' 
  81             'url': 'http://tv.nrk.no/serie/20-spoersmaal-tv/MUHH48000314/23-05-2014', 
  82             'md5': 'adf2c5454fa2bf032f47a9f8fb351342', 
  86                 'title': '20 spørsmål', 
  87                 'description': 'md5:bdea103bc35494c143c6a9acdd84887a', 
  88                 'upload_date': '20140523', 
  93             'url': 'http://tv.nrk.no/program/mdfp15000514', 
  94             'md5': '383650ece2b25ecec996ad7b5bb2a384', 
  98                 'title': 'Kunnskapskanalen: Grunnlovsjubiléet - Stor ståhei for ingenting', 
  99                 'description': 'md5:654c12511f035aed1e42bdf5db3b206a', 
 100                 'upload_date': '20140524', 
 105             # single playlist video 
 106             'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015#del=2', 
 107             'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 109                 'id': 'MSPO40010515-part2', 
 111                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 112                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 113                 'upload_date': '20150106', 
 115             'skip': 'Only works from Norway', 
 118             'url': 'http://tv.nrk.no/serie/tour-de-ski/MSPO40010515/06-01-2015', 
 121                     'md5': '9480285eff92d64f06e02a5367970a7a', 
 123                         'id': 'MSPO40010515-part1', 
 125                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 1:2)', 
 126                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 127                         'upload_date': '20150106', 
 131                     'md5': 'adbd1dbd813edaf532b0a253780719c2', 
 133                         'id': 'MSPO40010515-part2', 
 135                         'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn 06.01.2015 (del 2:2)', 
 136                         'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 137                         'upload_date': '20150106', 
 142                 'id': 'MSPO40010515', 
 143                 'title': 'Tour de Ski: Sprint fri teknikk, kvinner og menn', 
 144                 'description': 'md5:238b67b97a4ac7d7b4bf0edf8cc57d26', 
 145                 'upload_date': '20150106', 
 146                 'duration': 6947.5199999999995, 
 148             'skip': 'Only works from Norway', 
 152     def _seconds2str(self
, s
): 
 153         return '%02d:%02d:%02d.%03d' % (s 
/ 3600, (s 
% 3600) / 60, s 
% 60, (s 
% 1) * 1000) 
 155     def _debug_print(self
, txt
): 
 156         if self
._downloader
.params
.get('verbose', False): 
 157             self
.to_screen('[debug] %s' % txt
) 
 159     def _extract_captions(self
, subtitlesurl
, video_id
, baseurl
): 
 160         url 
= "%s%s" % (baseurl
, subtitlesurl
) 
 161         self
._debug
_print
('%s: Subtitle url: %s' % (video_id
, url
)) 
 162         captions 
= self
._download
_xml
(url
, video_id
, 'Downloading subtitles') 
 163         lang 
= captions
.get('lang', 'no') 
 164         ps 
= captions
.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}')) 
 166         for pos
, p 
in enumerate(ps
): 
 167             begin 
= parse_duration(p
.get('begin')) 
 168             duration 
= parse_duration(p
.get('dur')) 
 169             starttime 
= self
._seconds
2str
(begin
) 
 170             endtime 
= self
._seconds
2str
(begin 
+ duration
) 
 171             text 
= '\n'.join(p
.itertext()) 
 172             srt 
+= '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos
), starttime
, endtime
, text
) 
 175     def _extract_f4m(self
, manifest_url
, video_id
): 
 176         return self
._extract
_f
4m
_formats
(manifest_url 
+ '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id
) 
 178     def _real_extract(self
, url
): 
 179         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 180         video_id 
= mobj
.group('id') 
 181         part_id 
= mobj
.group('part_id') 
 182         baseurl 
= mobj
.group('baseurl') 
 184         webpage 
= self
._download
_webpage
(url
, video_id
) 
 186         title 
= self
._html
_search
_meta
( 
 187             'title', webpage
, 'title') 
 188         description 
= self
._html
_search
_meta
( 
 189             'description', webpage
, 'description') 
 191         thumbnail 
= self
._html
_search
_regex
( 
 192             r
'data-posterimage="([^"]+)"', 
 193             webpage
, 'thumbnail', fatal
=False) 
 194         upload_date 
= unified_strdate(self
._html
_search
_meta
( 
 195             'rightsfrom', webpage
, 'upload date', fatal
=False)) 
 196         duration 
= float_or_none(self
._html
_search
_regex
( 
 197             r
'data-duration="([^"]+)"', 
 198             webpage
, 'duration', fatal
=False)) 
 202             r
'<a href="#del=(\d+)"[^>]+data-argument="([^"]+)">([^<]+)</a>', webpage
) 
 205             for current_part_id
, stream_url
, part_title 
in parts
: 
 206                 if part_id 
and current_part_id 
!= part_id
: 
 208                 video_part_id 
= '%s-part%s' % (video_id
, current_part_id
) 
 209                 formats 
= self
._extract
_f
4m
(stream_url
, video_part_id
) 
 213                     'description': description
, 
 214                     'thumbnail': thumbnail
, 
 215                     'upload_date': upload_date
, 
 222                 playlist 
= self
.playlist_result(entries
, video_id
, title
, description
) 
 224                     'thumbnail': thumbnail
, 
 225                     'upload_date': upload_date
, 
 226                     'duration': duration
, 
 232         f4m_url 
= re
.search(r
'data-media="([^"]+)"', webpage
) 
 234             formats
.extend(self
._extract
_f
4m
(f4m_url
.group(1), video_id
)) 
 236         m3u8_url 
= re
.search(r
'data-hls-media="([^"]+)"', webpage
) 
 238             formats
.extend(self
._extract
_m
3u8_formats
(m3u8_url
.group(1), video_id
, 'mp4')) 
 239         self
._sort
_formats
(formats
) 
 241         subtitles_url 
= self
._html
_search
_regex
( 
 242             r
'data-subtitlesurl[ ]*=[ ]*"([^"]+)"', 
 243             webpage
, 'subtitle URL', default
=None) 
 246             subtitles 
= self
._extract
_captions
(subtitles_url
, video_id
, baseurl
) 
 247         if self
._downloader
.params
.get('listsubtitles', False): 
 248             self
._list
_available
_subtitles
(video_id
, subtitles
) 
 254             'description': description
, 
 255             'thumbnail': thumbnail
, 
 256             'upload_date': upload_date
, 
 257             'duration': duration
, 
 259             'subtitles': subtitles
,