import re
from .common import InfoExtractor
+from ..compat import compat_str
from ..utils import (
ExtractorError,
float_or_none,
parse_duration,
unified_strdate,
)
-from .subtitles import SubtitlesInfoExtractor
class NRKIE(InfoExtractor):
}
-class NRKTVIE(SubtitlesInfoExtractor):
+class NRKTVIE(InfoExtractor):
_VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
_TESTS = [
if self._downloader.params.get('verbose', False):
self.to_screen('[debug] %s' % txt)
- def _extract_captions(self, subtitlesurl, video_id, baseurl):
+ def _get_subtitles(self, subtitlesurl, video_id, baseurl):
url = "%s%s" % (baseurl, subtitlesurl)
self._debug_print('%s: Subtitle url: %s' % (video_id, url))
- captions = self._download_xml(url, video_id, 'Downloading subtitles')
+ captions = self._download_xml(
+ url, video_id, 'Downloading subtitles',
+ transform_source=lambda s: s.replace(r'<br />', '\r\n'))
lang = captions.get('lang', 'no')
ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
srt = ''
duration = parse_duration(p.get('dur'))
starttime = self._seconds2str(begin)
endtime = self._seconds2str(begin + duration)
- text = '\n'.join(p.itertext())
- srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
- return {lang: srt}
+ srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
+ return {lang: [
+ {'ext': 'ttml', 'url': url},
+ {'ext': 'srt', 'data': srt},
+ ]}
def _extract_f4m(self, manifest_url, video_id):
return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
webpage, 'subtitle URL', default=None)
subtitles = None
if subtitles_url:
- subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
- if self._downloader.params.get('listsubtitles', False):
- self._list_available_subtitles(video_id, subtitles)
- return
+ subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
return {
'id': video_id,