]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/moniker.py
Imported Upstream version 2015.05.15
[youtubedl] / youtube_dl / extractor / moniker.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import os.path
5 import re
6
7 from .common import InfoExtractor
8 from ..compat import (
9 compat_urllib_parse,
10 compat_urllib_request,
11 )
12 from ..utils import ExtractorError
13
14
15 class MonikerIE(InfoExtractor):
16 IE_DESC = 'allmyvideos.net and vidspot.net'
17 _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?P<id>[a-zA-Z0-9_-]+)'
18
19 _TESTS = [{
20 'url': 'http://allmyvideos.net/jih3nce3x6wn',
21 'md5': '710883dee1bfc370ecf9fa6a89307c88',
22 'info_dict': {
23 'id': 'jih3nce3x6wn',
24 'ext': 'mp4',
25 'title': 'youtube-dl test video',
26 },
27 }, {
28 'url': 'http://vidspot.net/l2ngsmhs8ci5',
29 'md5': '710883dee1bfc370ecf9fa6a89307c88',
30 'info_dict': {
31 'id': 'l2ngsmhs8ci5',
32 'ext': 'mp4',
33 'title': 'youtube-dl test video',
34 },
35 }, {
36 'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
37 'only_matching': True,
38 }]
39
40 def _real_extract(self, url):
41 video_id = self._match_id(url)
42 orig_webpage = self._download_webpage(url, video_id)
43
44 if '>File Not Found<' in orig_webpage:
45 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
46
47 error = self._search_regex(
48 r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
49 if error:
50 raise ExtractorError(
51 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
52
53 fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
54 data = dict(fields)
55
56 post = compat_urllib_parse.urlencode(data)
57 headers = {
58 b'Content-Type': b'application/x-www-form-urlencoded',
59 }
60 req = compat_urllib_request.Request(url, post, headers)
61 webpage = self._download_webpage(
62 req, video_id, note='Downloading video page ...')
63
64 title = os.path.splitext(data['fname'])[0]
65
66 # Could be several links with different quality
67 links = re.findall(r'"file" : "?(.+?)",', webpage)
68 # Assume the links are ordered in quality
69 formats = [{
70 'url': l,
71 'quality': i,
72 } for i, l in enumerate(links)]
73 self._sort_formats(formats)
74
75 return {
76 'id': video_id,
77 'title': title,
78 'formats': formats,
79 }