]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/clipfish.py
669919a2cc9039ffb91ae052b96d5531665341e0
[youtubedl] / youtube_dl / extractor / clipfish.py
1 from __future__ import unicode_literals
2
3 import re
4 import time
5 import xml.etree.ElementTree
6
7 from .common import InfoExtractor
8 from ..utils import (
9 ExtractorError,
10 parse_duration,
11 )
12
13
14 class ClipfishIE(InfoExtractor):
15 IE_NAME = 'clipfish'
16
17 _VALID_URL = r'^https?://(?:www\.)?clipfish\.de/.*?/video/(?P<id>[0-9]+)/'
18 _TEST = {
19 'url': 'http://www.clipfish.de/special/game-trailer/video/3966754/fifa-14-e3-2013-trailer/',
20 'md5': '2521cd644e862936cf2e698206e47385',
21 'info_dict': {
22 'id': '3966754',
23 'ext': 'mp4',
24 'title': 'FIFA 14 - E3 2013 Trailer',
25 'duration': 82,
26 },
27 u'skip': 'Blocked in the US'
28 }
29
30 def _real_extract(self, url):
31 mobj = re.match(self._VALID_URL, url)
32 video_id = mobj.group(1)
33
34 info_url = ('http://www.clipfish.de/devxml/videoinfo/%s?ts=%d' %
35 (video_id, int(time.time())))
36 doc = self._download_xml(
37 info_url, video_id, note=u'Downloading info page')
38 title = doc.find('title').text
39 video_url = doc.find('filename').text
40 if video_url is None:
41 xml_bytes = xml.etree.ElementTree.tostring(doc)
42 raise ExtractorError('Cannot find video URL in document %r' %
43 xml_bytes)
44 thumbnail = doc.find('imageurl').text
45 duration = parse_duration(doc.find('duration').text)
46
47 return {
48 'id': video_id,
49 'title': title,
50 'url': video_url,
51 'thumbnail': thumbnail,
52 'duration': duration,
53 }