]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/fktv.py
Imported Upstream version 2015.11.27.1
[youtubedl] / youtube_dl / extractor / fktv.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7 clean_html,
8 determine_ext,
9 ExtractorError,
10 )
11
12
13 class FKTVIE(InfoExtractor):
14 IE_NAME = 'fernsehkritik.tv'
15 _VALID_URL = r'http://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
16
17 _TEST = {
18 'url': 'http://fernsehkritik.tv/folge-1',
19 'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
20 'info_dict': {
21 'id': '1',
22 'ext': 'mp4',
23 'title': 'Folge 1 vom 10. April 2007',
24 'thumbnail': 're:^https?://.*\.jpg$',
25 },
26 }
27
28 def _real_extract(self, url):
29 episode = self._match_id(url)
30
31 webpage = self._download_webpage(
32 'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
33 title = clean_html(self._html_search_regex(
34 '<h3>([^<]+)</h3>', webpage, 'title'))
35 matches = re.search(
36 r'(?s)<video(?:(?!poster)[^>])+(?:poster="([^"]+)")?[^>]*>(.*)</video>',
37 webpage)
38 if matches is None:
39 raise ExtractorError('Unable to extract the video')
40
41 poster, sources = matches.groups()
42 if poster is None:
43 self.report_warning('unable to extract thumbnail')
44
45 urls = re.findall(r'<source[^>]+src="([^"]+)"', sources)
46 formats = [{
47 'url': furl,
48 'format_id': determine_ext(furl),
49 } for furl in urls]
50 return {
51 'id': episode,
52 'title': title,
53 'formats': formats,
54 'thumbnail': poster,
55 }