Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/clipfish.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4 import time
   5 import xml.etree.ElementTree
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     ExtractorError,
  10     parse_duration,
  11 )
  12
  13
  14 class ClipfishIE(InfoExtractor):
  15     IE_NAME = 'clipfish'
  16
  17     _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
  18     _TEST = {
  19         'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
  20         'md5': '2521cd644e862936cf2e698206e47385',
  21         'info_dict': {
  22             'id': '3966754',
  23             'ext': 'mp4',
  24             'title': 'FIFA 14 - E3 2013 Trailer',
  25             'duration': 82,
  26         },
  27         'skip': 'Blocked in the US'
  28     }
  29
  30     def _real_extract(self, url):
  31         mobj = re.match(self._VALID_URL, url)
  32         video_id = mobj.group(1)
  33
  34         info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
  35                     (video_id, int(time.time())))
  36         doc = self._download_xml(
  37             info_url, video_id, note='Downloading info page')
  38         title = doc.find('title').text
  39         video_url = doc.find('filename').text
  40         if video_url is None:
  41             xml_bytes = xml.etree.ElementTree.tostring(doc)
  42             raise ExtractorError('Cannot find video URL in document %r' %
  43                                  xml_bytes)
  44         thumbnail = doc.find('imageurl').text
  45         duration = parse_duration(doc.find('duration').text)
  46
  47         return {
  48             'id': video_id,
  49             'title': title,
  50             'url': video_url,
  51             'thumbnail': thumbnail,
  52             'duration': duration,
  53         }