]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/nrk.py
Imported Upstream version 2015.02.28
[youtubedl] / youtube_dl / extractor / nrk.py
index f6de260222c678e2233b668d4b557e22e51d224c..1e4cfa2e7c8c5e3ae05c7d5fbc11242a334a5322 100644 (file)
@@ -4,13 +4,13 @@ from __future__ import unicode_literals
 import re
 
 from .common import InfoExtractor
+from ..compat import compat_str
 from ..utils import (
     ExtractorError,
     float_or_none,
     parse_duration,
     unified_strdate,
 )
-from .subtitles import SubtitlesInfoExtractor
 
 
 class NRKIE(InfoExtractor):
@@ -73,7 +73,7 @@ class NRKIE(InfoExtractor):
         }
 
 
-class NRKTVIE(SubtitlesInfoExtractor):
+class NRKTVIE(InfoExtractor):
     _VALID_URL = r'(?P<baseurl>http://tv\.nrk(?:super)?\.no/)(?:serie/[^/]+|program)/(?P<id>[a-zA-Z]{4}\d{8})(?:/\d{2}-\d{2}-\d{4})?(?:#del=(?P<part_id>\d+))?'
 
     _TESTS = [
@@ -156,10 +156,12 @@ class NRKTVIE(SubtitlesInfoExtractor):
         if self._downloader.params.get('verbose', False):
             self.to_screen('[debug] %s' % txt)
 
-    def _extract_captions(self, subtitlesurl, video_id, baseurl):
+    def _get_subtitles(self, subtitlesurl, video_id, baseurl):
         url = "%s%s" % (baseurl, subtitlesurl)
         self._debug_print('%s: Subtitle url: %s' % (video_id, url))
-        captions = self._download_xml(url, video_id, 'Downloading subtitles')
+        captions = self._download_xml(
+            url, video_id, 'Downloading subtitles',
+            transform_source=lambda s: s.replace(r'<br />', '\r\n'))
         lang = captions.get('lang', 'no')
         ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/ns/ttml}'))
         srt = ''
@@ -168,9 +170,11 @@ class NRKTVIE(SubtitlesInfoExtractor):
             duration = parse_duration(p.get('dur'))
             starttime = self._seconds2str(begin)
             endtime = self._seconds2str(begin + duration)
-            text = '\n'.join(p.itertext())
-            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), starttime, endtime, text)
-        return {lang: srt}
+            srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (compat_str(pos), starttime, endtime, p.text)
+        return {lang: [
+            {'ext': 'ttml', 'url': url},
+            {'ext': 'srt', 'data': srt},
+        ]}
 
     def _extract_f4m(self, manifest_url, video_id):
         return self._extract_f4m_formats(manifest_url + '?hdcore=3.1.1&plugin=aasp-3.1.1.69.124', video_id)
@@ -243,10 +247,7 @@ class NRKTVIE(SubtitlesInfoExtractor):
             webpage, 'subtitle URL', default=None)
         subtitles = None
         if subtitles_url:
-            subtitles = self._extract_captions(subtitles_url, video_id, baseurl)
-        if self._downloader.params.get('listsubtitles', False):
-            self._list_available_subtitles(video_id, subtitles)
-            return
+            subtitles = self.extract_subtitles(subtitles_url, video_id, baseurl)
 
         return {
             'id': video_id,