Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/fktv.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     clean_html,
   6     determine_ext,
   7     js_to_json,
   8 )
   9
  10
  11 class FKTVIE(InfoExtractor):
  12     IE_NAME = 'fernsehkritik.tv'
  13     _VALID_URL = r'https?://(?:www\.)?fernsehkritik\.tv/folge-(?P<id>[0-9]+)(?:/.*)?'
  14
  15     _TEST = {
  16         'url': 'http://fernsehkritik.tv/folge-1',
  17         'md5': '21f0b0c99bce7d5b524eb1b17b1c6d79',
  18         'info_dict': {
  19             'id': '1',
  20             'ext': 'mp4',
  21             'title': 'Folge 1 vom 10. April 2007',
  22             'thumbnail': r're:^https?://.*\.jpg$',
  23         },
  24     }
  25
  26     def _real_extract(self, url):
  27         episode = self._match_id(url)
  28
  29         webpage = self._download_webpage(
  30             'http://fernsehkritik.tv/folge-%s/play' % episode, episode)
  31         title = clean_html(self._html_search_regex(
  32             '<h3>([^<]+)</h3>', webpage, 'title'))
  33         thumbnail = self._search_regex(r'POSTER\s*=\s*"([^"]+)', webpage, 'thumbnail', fatal=False)
  34         sources = self._parse_json(self._search_regex(r'(?s)MEDIA\s*=\s*(\[.+?\]);', webpage, 'media'), episode, js_to_json)
  35
  36         formats = []
  37         for source in sources:
  38             furl = source.get('src')
  39             if furl:
  40                 formats.append({
  41                     'url': furl,
  42                     'format_id': determine_ext(furl),
  43                 })
  44         self._sort_formats(formats)
  45
  46         return {
  47             'id': episode,
  48             'title': title,
  49             'formats': formats,
  50             'thumbnail': thumbnail,
  51         }