]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/promptfile.py
Merge tag 'upstream/2014.11.21'
[youtubedl] / youtube_dl / extractor / promptfile.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 determine_ext,
10 compat_urllib_parse,
11 compat_urllib_request,
12 )
13
14
15 class PromptFileIE(InfoExtractor):
16 _VALID_URL = r'https?://(?:www\.)?promptfile\.com/l/(?P<id>[0-9A-Z\-]+)'
17 _TEST = {
18 'url': 'http://www.promptfile.com/l/D21B4746E9-F01462F0FF',
19 'md5': 'd1451b6302da7215485837aaea882c4c',
20 'info_dict': {
21 'id': 'D21B4746E9-F01462F0FF',
22 'ext': 'mp4',
23 'title': 'Birds.mp4',
24 'thumbnail': 're:^https?://.*\.jpg$',
25 }
26 }
27
28 def _real_extract(self, url):
29 video_id = self._match_id(url)
30 webpage = self._download_webpage(url, video_id)
31
32 if re.search(r'<div.+id="not_found_msg".+>(?!We are).+</div>[^-]', webpage) is not None:
33 raise ExtractorError('Video %s does not exist' % video_id,
34 expected=True)
35
36 fields = dict(re.findall(r'''(?x)type="hidden"\s+
37 name="(.+?)"\s+
38 value="(.*?)"
39 ''', webpage))
40 post = compat_urllib_parse.urlencode(fields)
41 req = compat_urllib_request.Request(url, post)
42 req.add_header('Content-type', 'application/x-www-form-urlencoded')
43 webpage = self._download_webpage(
44 req, video_id, 'Downloading video page')
45
46 url = self._html_search_regex(r'url:\s*\'([^\']+)\'', webpage, 'URL')
47 title = self._html_search_regex(
48 r'<span.+title="([^"]+)">', webpage, 'title')
49 thumbnail = self._html_search_regex(
50 r'<div id="player_overlay">.*button>.*?<img src="([^"]+)"',
51 webpage, 'thumbnail', fatal=False, flags=re.DOTALL)
52
53 formats = [{
54 'format_id': 'sd',
55 'url': url,
56 'ext': determine_ext(title),
57 }]
58 self._sort_formats(formats)
59
60 return {
61 'id': video_id,
62 'title': title,
63 'thumbnail': thumbnail,
64 'formats': formats,
65 }