]> Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/funk.py
Update upstream source from tag 'upstream/2018.11.07'
[youtubedl] / youtube_dl / extractor / funk.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import itertools
5 import re
6
7 from .common import InfoExtractor
8 from .nexx import NexxIE
9 from ..compat import compat_str
10 from ..utils import (
11 int_or_none,
12 try_get,
13 )
14
15
16 class FunkBaseIE(InfoExtractor):
17 _HEADERS = {
18 'Accept': '*/*',
19 'Accept-Language': 'en-US,en;q=0.9,ru;q=0.8',
20 'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4',
21 }
22 _AUTH = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoid2ViYXBwLXYzMSIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxuZXh4LWNvbnRlbnQtYXBpLXYzMSx3ZWJhcHAtYXBpIn0.mbuG9wS9Yf5q6PqgR4fiaRFIagiHk9JhwoKES7ksVX4'
23
24 @staticmethod
25 def _make_headers(referer):
26 headers = FunkBaseIE._HEADERS.copy()
27 headers['Referer'] = referer
28 return headers
29
30 def _make_url_result(self, video):
31 return {
32 '_type': 'url_transparent',
33 'url': 'nexx:741:%s' % video['sourceId'],
34 'ie_key': NexxIE.ie_key(),
35 'id': video['sourceId'],
36 'title': video.get('title'),
37 'description': video.get('description'),
38 'duration': int_or_none(video.get('duration')),
39 'season_number': int_or_none(video.get('seasonNr')),
40 'episode_number': int_or_none(video.get('episodeNr')),
41 }
42
43
44 class FunkMixIE(FunkBaseIE):
45 _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
46 _TESTS = [{
47 'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
48 'md5': '8edf617c2f2b7c9847dfda313f199009',
49 'info_dict': {
50 'id': '123748',
51 'ext': 'mp4',
52 'title': '"Die realste Kifferdoku aller Zeiten"',
53 'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
54 'timestamp': 1490274721,
55 'upload_date': '20170323',
56 },
57 }]
58
59 def _real_extract(self, url):
60 mobj = re.match(self._VALID_URL, url)
61 mix_id = mobj.group('id')
62 alias = mobj.group('alias')
63
64 lists = self._download_json(
65 'https://www.funk.net/api/v3.1/curation/curatedLists/',
66 mix_id, headers=self._make_headers(url), query={
67 'size': 100,
68 })['_embedded']['curatedListList']
69
70 metas = next(
71 l for l in lists
72 if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
73 video = next(
74 meta['videoDataDelegate']
75 for meta in metas
76 if try_get(
77 meta, lambda x: x['videoDataDelegate']['alias'],
78 compat_str) == alias)
79
80 return self._make_url_result(video)
81
82
83 class FunkChannelIE(FunkBaseIE):
84 _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
85 _TESTS = [{
86 'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
87 'info_dict': {
88 'id': '1155821',
89 'ext': 'mp4',
90 'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
91 'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
92 'timestamp': 1514507395,
93 'upload_date': '20171229',
94 },
95 'params': {
96 'skip_download': True,
97 },
98 }, {
99 # only available via byIdList API
100 'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
101 'info_dict': {
102 'id': '205067',
103 'ext': 'mp4',
104 'title': 'Martin Sonneborn erklärt die EU',
105 'description': 'md5:050f74626e4ed87edf4626d2024210c0',
106 'timestamp': 1494424042,
107 'upload_date': '20170510',
108 },
109 'params': {
110 'skip_download': True,
111 },
112 }, {
113 'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
114 'only_matching': True,
115 }]
116
117 def _real_extract(self, url):
118 mobj = re.match(self._VALID_URL, url)
119 channel_id = mobj.group('id')
120 alias = mobj.group('alias')
121
122 headers = self._make_headers(url)
123
124 video = None
125
126 # Id-based channels are currently broken on their side: webplayer
127 # tries to process them via byChannelAlias endpoint and fails
128 # predictably.
129 for page_num in itertools.count():
130 by_channel_alias = self._download_json(
131 'https://www.funk.net/api/v3.1/webapp/videos/byChannelAlias/%s'
132 % channel_id,
133 'Downloading byChannelAlias JSON page %d' % (page_num + 1),
134 headers=headers, query={
135 'filterFsk': 'false',
136 'sort': 'creationDate,desc',
137 'size': 100,
138 'page': page_num,
139 }, fatal=False)
140 if not by_channel_alias:
141 break
142 video_list = try_get(
143 by_channel_alias, lambda x: x['_embedded']['videoList'], list)
144 if not video_list:
145 break
146 try:
147 video = next(r for r in video_list if r.get('alias') == alias)
148 break
149 except StopIteration:
150 pass
151 if not try_get(
152 by_channel_alias, lambda x: x['_links']['next']):
153 break
154
155 if not video:
156 by_id_list = self._download_json(
157 'https://www.funk.net/api/v3.0/content/videos/byIdList',
158 channel_id, 'Downloading byIdList JSON', headers=headers,
159 query={
160 'ids': alias,
161 }, fatal=False)
162 if by_id_list:
163 video = try_get(by_id_list, lambda x: x['result'][0], dict)
164
165 if not video:
166 results = self._download_json(
167 'https://www.funk.net/api/v3.0/content/videos/filter',
168 channel_id, 'Downloading filter JSON', headers=headers, query={
169 'channelId': channel_id,
170 'size': 100,
171 })['result']
172 video = next(r for r in results if r.get('alias') == alias)
173
174 return self._make_url_result(video)