]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/fktv.py
2958452f470bca7f7322fa9dcdccca66f525cee0
[youtubedl] / youtube_dl / extractor / fktv.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..utils import (
5 clean_html,
6 determine_ext,
7 js_to_json,
8 )
9
10
11 class FKTVIE(InfoExtractor):
12 IE_NAME = 'fernsehkritik.tv'
13 _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
14
15 _TEST = {
16 'url': 'http://fernsehkritik.tv/folge-1',
17 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
18 'info_dict': {
19 'id': '1',
20 'ext': 'mp4',
21 'title': 'Folge 1 vom 10. April 2007',
22 'thumbnail': r're:^https?://.*\.jpg$',
23 },
24 }
25
26 def _real_extract(self, url):
27 episode = self._match_id(url)
28
29 webpage = self._download_webpage(
30 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
31 title = clean_html(self._html_search_regex(
32 '<h3>([^<]+)</h3>', webpage, 'title'))
33 thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
34 sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
35
36 formats = []
37 for source in sources:
38 furl = source.get('src')
39 if furl:
40 formats.append({
41 'url': furl,
42 'format_id': determine_ext(furl),
43 })
44 self._sort_formats(formats)
45
46 return {
47 'id': episode,
48 'title': title,
49 'formats': formats,
50 'thumbnail': thumbnail,
51 }