import re
from .common import InfoExtractor
-from ..compat import (
- compat_urlparse,
- compat_HTTPError,
-)
-from ..utils import (
- HEADRequest,
- ExtractorError,
-)
+from ..compat import compat_urlparse
from .spiegeltv import SpiegeltvIE
class SpiegelIE(InfoExtractor):
- _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed)?(?:\.html)?(?:#.*)?$'
+ _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<id>[0-9]+)(?:-embed|-iframe)?(?:\.html)?(?:#.*)?$'
_TESTS = [{
'url': 'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html',
'md5': '2c2754212136f35fb4b19767d242f66e',
'description': 'SPIEGEL ONLINE-Nutzer durften den deutschen Astronauten Alexander Gerst über sein Leben auf der ISS-Station befragen. Hier kommen seine Antworten auf die besten sechs Fragen.',
'title': 'Fragen an Astronaut Alexander Gerst: "Bekommen Sie die Tageszeiten mit?"',
}
+ }, {
+ 'url': 'http://www.spiegel.de/video/astronaut-alexander-gerst-von-der-iss-station-beantwortet-fragen-video-1519126-iframe.html',
+ 'only_matching': True,
}]
def _real_extract(self, url):
description = self._html_search_meta('description', webpage, 'description')
base_url = self._search_regex(
- r'var\s+server\s*=\s*"([^"]+)\"', webpage, 'server URL')
+ [r'server\s*:\s*(["\'])(?P<url>.+?)\1', r'var\s+server\s*=\s*"(?P<url>[^"]+)\"'],
+ webpage, 'server URL', group='url')
xml_url = base_url + video_id + '.xml'
idoc = self._download_xml(xml_url, video_id)
if n.tag.startswith('type') and n.tag != 'type6':
format_id = n.tag.rpartition('type')[2]
video_url = base_url + n.find('./filename').text
- # Test video URLs beforehand as some of them are invalid
- try:
- self._request_webpage(
- HEADRequest(video_url), video_id,
- 'Checking %s video URL' % format_id)
- except ExtractorError as e:
- if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
- self.report_warning(
- '%s video URL is invalid, skipping' % format_id, video_id)
- continue
formats.append({
'format_id': format_id,
'url': video_url,
})
duration = float(idoc[0].findall('./duration')[0].text)
+ self._check_formats(formats, video_id)
self._sort_formats(formats)
return {