]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/moniker.py
Merge tag 'upstream/2016.06.25'
[youtubedl] / youtube_dl / extractor / moniker.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import os.path
5 import re
6
7 from .common import InfoExtractor
8 from ..utils import (
9 ExtractorError,
10 remove_start,
11 sanitized_Request,
12 urlencode_postdata,
13 )
14
15
16 class MonikerIE(InfoExtractor):
17 IE_DESC = 'allmyvideos.net and vidspot.net'
18 _VALID_URL = r'https?://(?:www\.)?(?:allmyvideos|vidspot)\.net/(?:(?:2|v)/v-)?(?P<id>[a-zA-Z0-9_-]+)'
19
20 _TESTS = [{
21 'url': 'http://allmyvideos.net/jih3nce3x6wn',
22 'md5': '710883dee1bfc370ecf9fa6a89307c88',
23 'info_dict': {
24 'id': 'jih3nce3x6wn',
25 'ext': 'mp4',
26 'title': 'youtube-dl test video',
27 },
28 }, {
29 'url': 'http://allmyvideos.net/embed-jih3nce3x6wn',
30 'md5': '710883dee1bfc370ecf9fa6a89307c88',
31 'info_dict': {
32 'id': 'jih3nce3x6wn',
33 'ext': 'mp4',
34 'title': 'youtube-dl test video',
35 },
36 }, {
37 'url': 'http://vidspot.net/l2ngsmhs8ci5',
38 'md5': '710883dee1bfc370ecf9fa6a89307c88',
39 'info_dict': {
40 'id': 'l2ngsmhs8ci5',
41 'ext': 'mp4',
42 'title': 'youtube-dl test video',
43 },
44 }, {
45 'url': 'https://www.vidspot.net/l2ngsmhs8ci5',
46 'only_matching': True,
47 }, {
48 'url': 'http://vidspot.net/2/v-ywDf99',
49 'md5': '5f8254ce12df30479428b0152fb8e7ba',
50 'info_dict': {
51 'id': 'ywDf99',
52 'ext': 'mp4',
53 'title': 'IL FAIT LE MALIN EN PORSHE CAYENNE ( mais pas pour longtemps)',
54 'description': 'IL FAIT LE MALIN EN PORSHE CAYENNE.',
55 },
56 }, {
57 'url': 'http://allmyvideos.net/v/v-HXZm5t',
58 'only_matching': True,
59 }]
60
61 def _real_extract(self, url):
62 orig_video_id = self._match_id(url)
63 video_id = remove_start(orig_video_id, 'embed-')
64 url = url.replace(orig_video_id, video_id)
65 assert re.match(self._VALID_URL, url) is not None
66 orig_webpage = self._download_webpage(url, video_id)
67
68 if '>File Not Found<' in orig_webpage:
69 raise ExtractorError('Video %s does not exist' % video_id, expected=True)
70
71 error = self._search_regex(
72 r'class="err">([^<]+)<', orig_webpage, 'error', default=None)
73 if error:
74 raise ExtractorError(
75 '%s returned error: %s' % (self.IE_NAME, error), expected=True)
76
77 builtin_url = self._search_regex(
78 r'<iframe[^>]+src=(["\'])(?P<url>.+?/builtin-.+?)\1',
79 orig_webpage, 'builtin URL', default=None, group='url')
80
81 if builtin_url:
82 req = sanitized_Request(builtin_url)
83 req.add_header('Referer', url)
84 webpage = self._download_webpage(req, video_id, 'Downloading builtin page')
85 title = self._og_search_title(orig_webpage).strip()
86 description = self._og_search_description(orig_webpage).strip()
87 else:
88 fields = re.findall(r'type="hidden" name="(.+?)"\s* value="?(.+?)">', orig_webpage)
89 data = dict(fields)
90
91 post = urlencode_postdata(data)
92 headers = {
93 b'Content-Type': b'application/x-www-form-urlencoded',
94 }
95 req = sanitized_Request(url, post, headers)
96 webpage = self._download_webpage(
97 req, video_id, note='Downloading video page ...')
98
99 title = os.path.splitext(data['fname'])[0]
100 description = None
101
102 # Could be several links with different quality
103 links = re.findall(r'"file" : "?(.+?)",', webpage)
104 # Assume the links are ordered in quality
105 formats = [{
106 'url': l,
107 'quality': i,
108 } for i, l in enumerate(links)]
109 self._sort_formats(formats)
110
111 return {
112 'id': video_id,
113 'title': title,
114 'description': description,
115 'formats': formats,
116 }