]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/streamcloud.py
debian/NEWS: Write news about new behavior of youtube-dl.
[youtubedl] / youtube_dl / extractor / streamcloud.py
1 # coding: utf-8
2 import re
3 import time
4
5 from .common import InfoExtractor
6 from ..utils import (
7 compat_urllib_parse,
8 compat_urllib_request,
9 )
10
11
12 class StreamcloudIE(InfoExtractor):
13 IE_NAME = u'streamcloud.eu'
14 _VALID_URL = r'https?://streamcloud\.eu/(?P<id>[a-zA-Z0-9_-]+)/(?P<fname>[^#?]*)\.html'
15
16 _TEST = {
17 u'url': u'http://streamcloud.eu/skp9j99s4bpz/youtube-dl_test_video_____________-BaW_jenozKc.mp4.html',
18 u'file': u'skp9j99s4bpz.mp4',
19 u'md5': u'6bea4c7fa5daaacc2a946b7146286686',
20 u'info_dict': {
21 u'title': u'youtube-dl test video \'/\\ ä ↭',
22 u'duration': 9,
23 },
24 u'skip': u'Only available from the EU'
25 }
26
27 def _real_extract(self, url):
28 mobj = re.match(self._VALID_URL, url)
29 video_id = mobj.group('id')
30
31 orig_webpage = self._download_webpage(url, video_id)
32
33 fields = re.findall(r'''(?x)<input\s+
34 type="(?:hidden|submit)"\s+
35 name="([^"]+)"\s+
36 (?:id="[^"]+"\s+)?
37 value="([^"]*)"
38 ''', orig_webpage)
39 post = compat_urllib_parse.urlencode(fields)
40
41 self.to_screen('%s: Waiting for timeout' % video_id)
42 time.sleep(12)
43 headers = {
44 b'Content-Type': b'application/x-www-form-urlencoded',
45 }
46 req = compat_urllib_request.Request(url, post, headers)
47
48 webpage = self._download_webpage(
49 req, video_id, note=u'Downloading video page ...')
50 title = self._html_search_regex(
51 r'<h1[^>]*>([^<]+)<', webpage, u'title')
52 video_url = self._search_regex(
53 r'file:\s*"([^"]+)"', webpage, u'video URL')
54 duration_str = self._search_regex(
55 r'duration:\s*"?([0-9]+)"?', webpage, u'duration', fatal=False)
56 duration = None if duration_str is None else int(duration_str)
57 thumbnail = self._search_regex(
58 r'image:\s*"([^"]+)"', webpage, u'thumbnail URL', fatal=False)
59
60 return {
61 'id': video_id,
62 'title': title,
63 'url': video_url,
64 'duration': duration,
65 'thumbnail': thumbnail,
66 }