]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vshare.py
debian/copyright: use spaces rather than tabs to start continuation lines.
[youtubedl] / youtube_dl / extractor / vshare.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import compat_chr
8 from ..utils import (
9 decode_packed_codes,
10 ExtractorError,
11 )
12
13
14 class VShareIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:www\.)?vshare\.io/[dv]/(?P<id>[^/?#&]+)'
16 _TESTS = [{
17 'url': 'https://vshare.io/d/0f64ce6',
18 'md5': '17b39f55b5497ae8b59f5fbce8e35886',
19 'info_dict': {
20 'id': '0f64ce6',
21 'title': 'vl14062007715967',
22 'ext': 'mp4',
23 }
24 }, {
25 'url': 'https://vshare.io/v/0f64ce6/width-650/height-430/1',
26 'only_matching': True,
27 }]
28
29 @staticmethod
30 def _extract_urls(webpage):
31 return re.findall(
32 r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//(?:www\.)?vshare\.io/v/[^/?#&]+)',
33 webpage)
34
35 def _extract_packed(self, webpage):
36 packed = self._search_regex(
37 r'(eval\(function.+)', webpage, 'packed code')
38 unpacked = decode_packed_codes(packed)
39 digits = self._search_regex(r'\[((?:\d+,?)+)\]', unpacked, 'digits')
40 digits = [int(digit) for digit in digits.split(',')]
41 key_digit = self._search_regex(
42 r'fromCharCode\(.+?(\d+)\)}', unpacked, 'key digit')
43 chars = [compat_chr(d - int(key_digit)) for d in digits]
44 return ''.join(chars)
45
46 def _real_extract(self, url):
47 video_id = self._match_id(url)
48
49 webpage = self._download_webpage(
50 'https://vshare.io/v/%s/width-650/height-430/1' % video_id,
51 video_id, headers={'Referer': url})
52
53 title = self._html_search_regex(
54 r'<title>([^<]+)</title>', webpage, 'title')
55 title = title.split(' - ')[0]
56
57 error = self._html_search_regex(
58 r'(?s)<div[^>]+\bclass=["\']xxx-error[^>]+>(.+?)</div', webpage,
59 'error', default=None)
60 if error:
61 raise ExtractorError(error, expected=True)
62
63 info = self._parse_html5_media_entries(
64 url, '<video>%s</video>' % self._extract_packed(webpage),
65 video_id)[0]
66
67 self._sort_formats(info['formats'])
68
69 info.update({
70 'id': video_id,
71 'title': title,
72 })
73
74 return info