Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/clipfish.py

   1 import re
   2 import time
   3 import xml.etree.ElementTree
   4
   5 from .common import InfoExtractor
   6 from ..utils import ExtractorError
   7
   8
   9 class ClipfishIE(InfoExtractor):
  10     IE_NAME = u'clipfish'
  11
  12     _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
  13     _TEST = {
  14         u'url': u'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
  15         u'file': u'3966754.mp4',
  16         u'md5': u'2521cd644e862936cf2e698206e47385',
  17         u'info_dict': {
  18             u'title': u'FIFA 14 - E3 2013 Trailer',
  19             u'duration': 82,
  20         },
  21         u'skip': 'Blocked in the US'
  22     }
  23
  24     def _real_extract(self, url):
  25         mobj = re.match(self._VALID_URL, url)
  26         video_id = mobj.group(1)
  27
  28         info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
  29                     (video_id, int(time.time())))
  30         doc = self._download_xml(
  31             info_url, video_id, note=u'Downloading info page')
  32         title = doc.find('title').text
  33         video_url = doc.find('filename').text
  34         if video_url is None:
  35             xml_bytes = xml.etree.ElementTree.tostring(doc)
  36             raise ExtractorError(u'Cannot find video URL in document %r' %
  37                                  xml_bytes)
  38         thumbnail = doc.find('imageurl').text
  39         duration_str = doc.find('duration').text
  40         m = re.match(
  41             r'^(?P<hours>[0-9]+):(?P<minutes>[0-9]{2}):(?P<seconds>[0-9]{2}):(?P<ms>[0-9]*)$',
  42             duration_str)
  43         if m:
  44             duration = (
  45                 (int(m.group('hours')) * 60 * 60) +
  46                 (int(m.group('minutes')) * 60) +
  47                 (int(m.group('seconds')))
  48             )
  49         else:
  50             duration = None
  51
  52         return {
  53             'id': video_id,
  54             'title': title,
  55             'url': video_url,
  56             'thumbnail': thumbnail,
  57             'duration': duration,
  58         }