Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/puls4.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     unified_strdate,
   8     int_or_none,
   9 )
  10
  11
  12 class Puls4IE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?puls4\.com/video/[^/]+/play/(?P<id>[0-9]+)'
  14     _TESTS = [{
  15         'url': 'http://www.puls4.com/video/pro-und-contra/play/2716816',
  16         'md5': '49f6a6629747eeec43cef6a46b5df81d',
  17         'info_dict': {
  18             'id': '2716816',
  19             'ext': 'mp4',
  20             'title': 'Pro und Contra vom 23.02.2015',
  21             'description': 'md5:293e44634d9477a67122489994675db6',
  22             'duration': 2989,
  23             'upload_date': '20150224',
  24             'uploader': 'PULS_4',
  25         },
  26         'skip': 'Only works from Germany',
  27     }, {
  28         'url': 'http://www.puls4.com/video/kult-spielfilme/play/1298106',
  29         'md5': '6a48316c8903ece8dab9b9a7bf7a59ec',
  30         'info_dict': {
  31             'id': '1298106',
  32             'ext': 'mp4',
  33             'title': 'Lucky Fritz',
  34         },
  35         'skip': 'Only works from Germany',
  36     }]
  37
  38     def _real_extract(self, url):
  39         video_id = self._match_id(url)
  40         webpage = self._download_webpage(url, video_id)
  41
  42         error_message = self._html_search_regex(
  43             r'<div[^>]+class="message-error"[^>]*>(.+?)</div>',
  44             webpage, 'error message', default=None)
  45         if error_message:
  46             raise ExtractorError(
  47                 '%s returned error: %s' % (self.IE_NAME, error_message), expected=True)
  48
  49         real_url = self._html_search_regex(
  50             r'\"fsk-button\".+?href=\"([^"]+)',
  51             webpage, 'fsk_button', default=None)
  52         if real_url:
  53             webpage = self._download_webpage(real_url, video_id)
  54
  55         player = self._search_regex(
  56             r'p4_video_player(?:_iframe)?\("video_\d+_container"\s*,(.+?)\);\s*\}',
  57             webpage, 'player')
  58
  59         player_json = self._parse_json(
  60             '[%s]' % player, video_id,
  61             transform_source=lambda s: s.replace('undefined,', ''))
  62
  63         formats = None
  64         result = None
  65
  66         for v in player_json:
  67             if isinstance(v, list) and not formats:
  68                 formats = [{
  69                     'url': f['url'],
  70                     'format': 'hd' if f.get('hd') else 'sd',
  71                     'width': int_or_none(f.get('size_x')),
  72                     'height': int_or_none(f.get('size_y')),
  73                     'tbr': int_or_none(f.get('bitrate')),
  74                 } for f in v]
  75                 self._sort_formats(formats)
  76             elif isinstance(v, dict) and not result:
  77                 result = {
  78                     'id': video_id,
  79                     'title': v['videopartname'].strip(),
  80                     'description': v.get('videotitle'),
  81                     'duration': int_or_none(v.get('videoduration') or v.get('episodeduration')),
  82                     'upload_date': unified_strdate(v.get('clipreleasetime')),
  83                     'uploader': v.get('channel'),
  84                 }
  85
  86         result['formats'] = formats
  87
  88         return result