]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nzz.py
debian/copyright: Use HTTPS for Format: field.
[youtubedl] / youtube_dl / extractor / nzz.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 extract_attributes,
9 )
10
11
12 class NZZIE(InfoExtractor):
13 _VALID_URL = r'https?://(?:www\.)?nzz\.ch/(?:[^/]+/)*[^/?#]+-ld\.(?P<id>\d+)'
14 _TEST = {
15 'url': 'http://www.nzz.ch/zuerich/gymizyte/gymizyte-schreiben-schueler-heute-noch-diktate-ld.9153',
16 'info_dict': {
17 'id': '9153',
18 },
19 'playlist_mincount': 6,
20 }
21
22 def _real_extract(self, url):
23 page_id = self._match_id(url)
24 webpage = self._download_webpage(url, page_id)
25
26 entries = []
27 for player_element in re.findall(r'(<[^>]+class="kalturaPlayer"[^>]*>)', webpage):
28 player_params = extract_attributes(player_element)
29 if player_params.get('data-type') not in ('kaltura_singleArticle',):
30 self.report_warning('Unsupported player type')
31 continue
32 entry_id = player_params['data-id']
33 entries.append(self.url_result(
34 'kaltura:1750922:' + entry_id, 'Kaltura', entry_id))
35
36 return self.playlist_result(entries, page_id)