]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/primesharetv.py
Imported Upstream version 2015.05.15
[youtubedl] / youtube_dl / extractor / primesharetv.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..compat import (
7 compat_urllib_parse,
8 compat_urllib_request,
9 )
10 from ..utils import ExtractorError
11
12
13 class PrimeShareTVIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?primeshare\.tv/download/(?P<id>[\da-zA-Z]+)'
15
16 _TEST = {
17 'url': 'http://primeshare.tv/download/238790B611',
18 'md5': 'b92d9bf5461137c36228009f31533fbc',
19 'info_dict': {
20 'id': '238790B611',
21 'ext': 'mp4',
22 'title': 'Public Domain - 1960s Commercial - Crest Toothpaste-YKsuFona',
23 },
24 }
25
26 def _real_extract(self, url):
27 video_id = self._match_id(url)
28
29 webpage = self._download_webpage(url, video_id)
30
31 if '>File not exist<' in webpage:
32 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
33
34 fields = dict(re.findall(r'''(?x)<input\s+
35 type="hidden"\s+
36 name="([^"]+)"\s+
37 (?:id="[^"]+"\s+)?
38 value="([^"]*)"
39 ''', webpage))
40
41 headers = {
42 'Referer': url,
43 'Content-Type': 'application/x-www-form-urlencoded',
44 }
45
46 wait_time = int(self._search_regex(
47 r'var\s+cWaitTime\s*=\s*(\d+)',
48 webpage, 'wait time', default=7)) + 1
49 self._sleep(wait_time, video_id)
50
51 req = compat_urllib_request.Request(
52 url, compat_urllib_parse.urlencode(fields), headers)
53 video_page = self._download_webpage(
54 req, video_id, 'Downloading video page')
55
56 video_url = self._search_regex(
57 r"url\s*:\s*'([^']+\.primeshare\.tv(?::443)?/file/[^']+)'",
58 video_page, 'video url')
59
60 title = self._html_search_regex(
61 r'<h1>Watch\s*(?:&nbsp;)?\s*\((.+?)(?:\s*\[\.\.\.\])?\)\s*(?:&nbsp;)?\s*<strong>',
62 video_page, 'title')
63
64 return {
65 'id': video_id,
66 'url': video_url,
67 'title': title,
68 'ext': 'mp4',
69 }