]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vodlocker.py
be0a2780f5299571ee134ac47e17eda6476673ac
[youtubedl] / youtube_dl / extractor / vodlocker.py
1 # -*- coding: utf-8 -*-
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5 from ..compat import compat_urllib_parse
6 from ..utils import sanitized_Request
7
8
9 class VodlockerIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?vodlocker\.com/(?P<id>[0-9a-zA-Z]+)(?:\..*?)?'
11
12 _TESTS = [{
13 'url': 'http://vodlocker.com/e8wvyzz4sl42',
14 'md5': 'ce0c2d18fa0735f1bd91b69b0e54aacf',
15 'info_dict': {
16 'id': 'e8wvyzz4sl42',
17 'ext': 'mp4',
18 'title': 'Germany vs Brazil',
19 'thumbnail': 're:http://.*\.jpg',
20 },
21 }]
22
23 def _real_extract(self, url):
24 video_id = self._match_id(url)
25 webpage = self._download_webpage(url, video_id)
26
27 fields = self._hidden_inputs(webpage)
28
29 if fields['op'] == 'download1':
30 self._sleep(3, video_id) # they do detect when requests happen too fast!
31 post = compat_urllib_parse.urlencode(fields)
32 req = sanitized_Request(url, post)
33 req.add_header('Content-type', 'application/x-www-form-urlencoded')
34 webpage = self._download_webpage(
35 req, video_id, 'Downloading video page')
36
37 title = self._search_regex(
38 r'id="file_title".*?>\s*(.*?)\s*<(?:br|span)', webpage, 'title')
39 thumbnail = self._search_regex(
40 r'image:\s*"(http[^\"]+)",', webpage, 'thumbnail')
41 url = self._search_regex(
42 r'file:\s*"(http[^\"]+)",', webpage, 'file url')
43
44 formats = [{
45 'format_id': 'sd',
46 'url': url,
47 }]
48
49 return {
50 'id': video_id,
51 'title': title,
52 'thumbnail': thumbnail,
53 'formats': formats,
54 }