]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/xfileshare.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
17 class XFileShareIE(InfoExtractor
):
19 ('daclips.in', 'DaClips'),
20 ('filehoot.com', 'FileHoot'),
21 ('gorillavid.in', 'GorillaVid'),
22 ('movpod.in', 'MovPod'),
23 ('powerwatch.pw', 'PowerWatch'),
24 ('rapidvideo.ws', 'Rapidvideo.ws'),
25 ('thevideobee.to', 'TheVideoBee'),
26 ('vidto.me', 'Vidto'),
27 ('streamin.to', 'Streamin.To'),
28 ('xvidstage.com', 'XVIDSTAGE'),
31 IE_DESC
= 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES
))[1])
32 _VALID_URL
= (r
'https?://(?P<host>(?:www\.)?(?:%s))/(?:embed-)?(?P<id>[0-9a-zA-Z]+)'
33 % '|'.join(re
.escape(site
) for site
in list(zip(*_SITES
))[0]))
35 _FILE_NOT_FOUND_REGEXES
= (
36 r
'>(?:404 - )?File Not Found<',
37 r
'>The file was removed by administrator<',
41 'url': 'http://gorillavid.in/06y9juieqpmi',
42 'md5': '5ae4a3580620380619678ee4875893ba',
46 'title': 'Rebecca Black My Moment Official Music Video Reaction-6GK87Rc8bzQ',
47 'thumbnail': r
're:http://.*\.jpg',
50 'url': 'http://gorillavid.in/embed-z08zf8le23c6-960x480.html',
51 'only_matching': True,
53 'url': 'http://daclips.in/3rso4kdn6f9m',
54 'md5': '1ad8fd39bb976eeb66004d3a4895f106',
58 'title': 'Micro Pig piglets ready on 16th July 2009-bG0PdrCdxUc',
59 'thumbnail': r
're:http://.*\.jpg',
62 'url': 'http://movpod.in/0wguyyxi1yca',
63 'only_matching': True,
65 'url': 'http://filehoot.com/3ivfabn7573c.html',
69 'title': 'youtube-dl test video \'äBaW_jenozKc.mp4.mp4',
70 'thumbnail': r
're:http://.*\.jpg',
72 'skip': 'Video removed',
74 'url': 'http://vidto.me/ku5glz52nqe1.html',
81 'url': 'http://powerwatch.pw/duecjibvicbu',
85 'title': 'Big Buck Bunny trailer',
88 'url': 'http://xvidstage.com/e0qcnl03co6z',
92 'title': 'Chucky Prank 2015.mp4',
95 # removed by administrator
96 'url': 'http://xvidstage.com/amfy7atlkx25',
97 'only_matching': True,
100 def _real_extract(self
, url
):
101 mobj
= re
.match(self
._VALID
_URL
, url
)
102 video_id
= mobj
.group('id')
104 url
= 'http://%s/%s' % (mobj
.group('host'), video_id
)
105 webpage
= self
._download
_webpage
(url
, video_id
)
107 if any(re
.search(p
, webpage
) for p
in self
._FILE
_NOT
_FOUND
_REGEXES
):
108 raise ExtractorError('Video %s does not exist' % video_id
, expected
=True)
110 fields
= self
._hidden
_inputs
(webpage
)
112 if fields
['op'] == 'download1':
113 countdown
= int_or_none(self
._search
_regex
(
114 r
'<span id="countdown_str">(?:[Ww]ait)?\s*<span id="cxc">(\d+)</span>\s*(?:seconds?)?</span>',
115 webpage
, 'countdown', default
=None))
117 self
._sleep
(countdown
, video_id
)
119 post
= urlencode_postdata(fields
)
121 req
= sanitized_Request(url
, post
)
122 req
.add_header('Content-type', 'application/x-www-form-urlencoded')
124 webpage
= self
._download
_webpage
(req
, video_id
, 'Downloading video page')
126 title
= (self
._search
_regex
(
127 (r
'style="z-index: [0-9]+;">([^<]+)</span>',
128 r
'<td nowrap>([^<]+)</td>',
129 r
'h4-fine[^>]*>([^<]+)<',
131 r
'<h2 class="video-page-head">([^<]+)</h2>',
132 r
'<h2 style="[^"]*color:#403f3d[^"]*"[^>]*>([^<]+)<'), # streamin.to
133 webpage
, 'title', default
=None) or self
._og
_search
_title
(
134 webpage
, default
=None) or video_id
).strip()
136 def extract_video_url(default
=NO_DEFAULT
):
137 return self
._search
_regex
(
138 (r
'file\s*:\s*(["\'])(?P
<url
>http
.+?
)\
1,',
139 r'file_link\s
*=\s
*(["\'])(?P<url>http.+?)\1',
140 r'addVariable\((\\?["\'])file\
1\s
*,\s
*(\\?
["\'])(?P<url>http.+?)\2\)',
141 r'<embed[^>]+src=(["\'])(?P
<url
>http
.+?
)\
1'),
142 webpage, 'file url
', default=default, group='url
')
144 video_url = extract_video_url(default=None)
147 webpage = decode_packed_codes(self._search_regex(
148 r"(}\('(.+)',(\d+),(\d+),'[^
']*\b(?:file|embed)\b[^']*'\.split\('\|
'\))",
149 webpage, 'packed code
'))
150 video_url = extract_video_url()
152 thumbnail = self._search_regex(
153 r'image\s
*:\s
*["\'](http[^"\']+)["\'],', webpage, 'thumbnail', default=None)
164 'thumbnail': thumbnail,