Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/dctp.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_str
   6 from ..utils import (
   7     float_or_none,
   8     unified_strdate,
   9 )
  10
  11
  12 class DctpTvIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?dctp\.tv/(?:#/)?filme/(?P<id>[^/?#&]+)'
  14     _TEST = {
  15         'url': 'http://www.dctp.tv/filme/videoinstallation-fuer-eine-kaufhausfassade/',
  16         'info_dict': {
  17             'id': '95eaa4f33dad413aa17b4ee613cccc6c',
  18             'display_id': 'videoinstallation-fuer-eine-kaufhausfassade',
  19             'ext': 'flv',
  20             'title': 'Videoinstallation für eine Kaufhausfassade',
  21             'description': 'Kurzfilm',
  22             'upload_date': '20110407',
  23             'thumbnail': r're:^https?://.*\.jpg$',
  24             'duration': 71.24,
  25         },
  26         'params': {
  27             # rtmp download
  28             'skip_download': True,
  29         },
  30     }
  31
  32     def _real_extract(self, url):
  33         display_id = self._match_id(url)
  34
  35         webpage = self._download_webpage(url, display_id)
  36
  37         video_id = self._html_search_meta(
  38             'DC.identifier', webpage, 'video id',
  39             default=None) or self._search_regex(
  40             r'id=["\']uuid[^>]+>([^<]+)<', webpage, 'video id')
  41
  42         title = self._og_search_title(webpage)
  43
  44         servers = self._download_json(
  45             'http://www.dctp.tv/streaming_servers/', display_id,
  46             note='Downloading server list', fatal=False)
  47
  48         if servers:
  49             endpoint = next(
  50                 server['endpoint']
  51                 for server in servers
  52                 if isinstance(server.get('endpoint'), compat_str) and
  53                 'cloudfront' in server['endpoint'])
  54         else:
  55             endpoint = 'rtmpe://s2pqqn4u96e4j8.cloudfront.net/cfx/st/'
  56
  57         app = self._search_regex(
  58             r'^rtmpe?://[^/]+/(?P<app>.*)$', endpoint, 'app')
  59
  60         formats = [{
  61             'url': endpoint,
  62             'app': app,
  63             'play_path': 'mp4:%s_dctp_0500_4x3.m4v' % video_id,
  64             'page_url': url,
  65             'player_url': 'http://svm-prod-dctptv-static.s3.amazonaws.com/dctptv-relaunch2012-109.swf',
  66             'ext': 'flv',
  67         }]
  68
  69         description = self._html_search_meta('DC.description', webpage)
  70         upload_date = unified_strdate(
  71             self._html_search_meta('DC.date.created', webpage))
  72         thumbnail = self._og_search_thumbnail(webpage)
  73         duration = float_or_none(self._search_regex(
  74             r'id=["\']duration_in_ms[^+]>(\d+)', webpage, 'duration',
  75             default=None), scale=1000)
  76
  77         return {
  78             'id': video_id,
  79             'title': title,
  80             'formats': formats,
  81             'display_id': display_id,
  82             'description': description,
  83             'upload_date': upload_date,
  84             'thumbnail': thumbnail,
  85             'duration': duration,
  86         }