]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yapfiles.py
debian/control: Update list of extractors in long description.
[youtubedl] / youtube_dl / extractor / yapfiles.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 int_or_none,
10 qualities,
11 unescapeHTML,
12 url_or_none,
13 )
14
15
16 class YapFilesIE(InfoExtractor):
17 _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
18 _VALID_URL = r'https?:%s' % _YAPFILES_URL
19 _TESTS = [{
20 # with hd
21 'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
22 'md5': '2db19e2bfa2450568868548a1aa1956c',
23 'info_dict': {
24 'id': 'vMDE1NjcyNDUt0413',
25 'ext': 'mp4',
26 'title': 'Самый худший пароль WIFI',
27 'thumbnail': r're:^https?://.*\.jpg$',
28 'duration': 72,
29 },
30 }, {
31 # without hd
32 'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
33 'only_matching': True,
34 }]
35
36 @staticmethod
37 def _extract_urls(webpage):
38 return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
39 r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
40 % YapFilesIE._YAPFILES_URL, webpage)]
41
42 def _real_extract(self, url):
43 video_id = self._match_id(url)
44
45 webpage = self._download_webpage(url, video_id, fatal=False)
46
47 player_url = None
48 query = {}
49 if webpage:
50 player_url = self._search_regex(
51 r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
52 'player url', default=None, group='url')
53
54 if not player_url:
55 player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
56 query = {
57 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
58 'type': 'json',
59 'ref': url,
60 }
61
62 player = self._download_json(
63 player_url, video_id, query=query)['player']
64
65 playlist_url = player['playlist']
66 title = player['title']
67 thumbnail = player.get('poster')
68
69 if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
70 raise ExtractorError(
71 'Video %s has been removed' % video_id, expected=True)
72
73 playlist = self._download_json(
74 playlist_url, video_id)['player']['main']
75
76 hd_height = int_or_none(player.get('hd'))
77
78 QUALITIES = ('sd', 'hd')
79 quality_key = qualities(QUALITIES)
80 formats = []
81 for format_id in QUALITIES:
82 is_hd = format_id == 'hd'
83 format_url = url_or_none(playlist.get(
84 'file%s' % ('_hd' if is_hd else '')))
85 if not format_url:
86 continue
87 formats.append({
88 'url': format_url,
89 'format_id': format_id,
90 'quality': quality_key(format_id),
91 'height': hd_height if is_hd else None,
92 })
93 self._sort_formats(formats)
94
95 return {
96 'id': video_id,
97 'title': title,
98 'thumbnail': thumbnail,
99 'duration': int_or_none(player.get('length')),
100 'formats': formats,
101 }