]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/funk.py
debian/changelog: Annotate the log with bugs to close.
[youtubedl] / youtube_dl / extractor / funk.py
index ce5c67fbbb7100a35174836aee4e51e20b96be11..0ff058619bc05fa6b3d6c0680be56bff957ca802 100644 (file)
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from .nexx import NexxIE
-from ..utils import extract_attributes
+from ..utils import (
+    int_or_none,
+    try_get,
+)
+
+
+class FunkBaseIE(InfoExtractor):
+    def _make_url_result(self, video):
+        return {
+            '_type': 'url_transparent',
+            'url': 'nexx:741:%s' % video['sourceId'],
+            'ie_key': NexxIE.ie_key(),
+            'id': video['sourceId'],
+            'title': video.get('title'),
+            'description': video.get('description'),
+            'duration': int_or_none(video.get('duration')),
+            'season_number': int_or_none(video.get('seasonNr')),
+            'episode_number': int_or_none(video.get('episodeNr')),
+        }
+
+
+class FunkMixIE(FunkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?funk\.net/mix/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
+    _TESTS = [{
+        'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/die-realste-kifferdoku-aller-zeiten',
+        'md5': '8edf617c2f2b7c9847dfda313f199009',
+        'info_dict': {
+            'id': '123748',
+            'ext': 'mp4',
+            'title': '"Die realste Kifferdoku aller Zeiten"',
+            'description': 'md5:c97160f5bafa8d47ec8e2e461012aa9d',
+            'timestamp': 1490274721,
+            'upload_date': '20170323',
+        },
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        mix_id = mobj.group('id')
+        alias = mobj.group('alias')
 
+        lists = self._download_json(
+            'https://www.funk.net/api/v3.1/curation/curatedLists/',
+            mix_id, headers={
+                'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbC12Mi4wIiwic2NvcGUiOiJzdGF0aWMtY29udGVudC1hcGksY3VyYXRpb24tc2VydmljZSxzZWFyY2gtYXBpIn0.SGCC1IXHLtZYoo8PvRKlU2gXH1su8YSu47sB3S4iXBI',
+                'Referer': url,
+            }, query={
+                'size': 100,
+            })['result']['lists']
 
-class FunkIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?funk\.net/(?:mix|channel)/(?:[^/]+/)*(?P<id>[^?/#]+)'
+        metas = next(
+            l for l in lists
+            if mix_id in (l.get('entityId'), l.get('alias')))['videoMetas']
+        video = next(
+            meta['videoDataDelegate']
+            for meta in metas if meta.get('alias') == alias)
+
+        return self._make_url_result(video)
+
+
+class FunkChannelIE(FunkBaseIE):
+    _VALID_URL = r'https?://(?:www\.)?funk\.net/channel/(?P<id>[^/]+)/(?P<alias>[^/?#&]+)'
     _TESTS = [{
-        'url': 'https://www.funk.net/mix/59d65d935f8b160001828b5b/0/59d517e741dca10001252574/',
-        'md5': '4d40974481fa3475f8bccfd20c5361f8',
+        'url': 'https://www.funk.net/channel/ba/die-lustigsten-instrumente-aus-dem-internet-teil-2',
+        'info_dict': {
+            'id': '1155821',
+            'ext': 'mp4',
+            'title': 'Die LUSTIGSTEN INSTRUMENTE aus dem Internet - Teil 2',
+            'description': 'md5:a691d0413ef4835588c5b03ded670c1f',
+            'timestamp': 1514507395,
+            'upload_date': '20171229',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        # only available via byIdList API
+        'url': 'https://www.funk.net/channel/informr/martin-sonneborn-erklaert-die-eu',
         'info_dict': {
-            'id': '716599',
+            'id': '205067',
             'ext': 'mp4',
-            'title': 'Neue Rechte Welle',
-            'description': 'md5:a30a53f740ffb6bfd535314c2cc5fb69',
-            'timestamp': 1501337639,
-            'upload_date': '20170729',
+            'title': 'Martin Sonneborn erklärt die EU',
+            'description': 'md5:050f74626e4ed87edf4626d2024210c0',
+            'timestamp': 1494424042,
+            'upload_date': '20170510',
         },
         'params': {
-            'format': 'bestvideo',
             'skip_download': True,
         },
     }, {
-        'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/0/59d52049999264000182e79d/',
+        'url': 'https://www.funk.net/channel/59d5149841dca100012511e3/mein-erster-job-lovemilla-folge-1/lovemilla/',
         'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        channel_id = mobj.group('id')
+        alias = mobj.group('alias')
+
+        headers = {
+            'authorization': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJjbGllbnROYW1lIjoiY3VyYXRpb24tdG9vbCIsInNjb3BlIjoic3RhdGljLWNvbnRlbnQtYXBpLGN1cmF0aW9uLWFwaSxzZWFyY2gtYXBpIn0.q4Y2xZG8PFHai24-4Pjx2gym9RmJejtmK6lMXP5wAgc',
+            'Referer': url,
+        }
+
+        video = None
 
-        webpage = self._download_webpage(url, video_id)
+        by_id_list = self._download_json(
+            'https://www.funk.net/api/v3.0/content/videos/byIdList', channel_id,
+            headers=headers, query={
+                'ids': alias,
+            }, fatal=False)
+        if by_id_list:
+            video = try_get(by_id_list, lambda x: x['result'][0], dict)
 
-        domain_id = NexxIE._extract_domain_id(webpage) or '741'
-        nexx_id = extract_attributes(self._search_regex(
-            r'(<div[^>]id=["\']mediaplayer-funk[^>]+>)',
-            webpage, 'media player'))['data-id']
+        if not video:
+            results = self._download_json(
+                'https://www.funk.net/api/v3.0/content/videos/filter', channel_id,
+                headers=headers, query={
+                    'channelId': channel_id,
+                    'size': 100,
+                })['result']
+            video = next(r for r in results if r.get('alias') == alias)
 
-        return self.url_result(
-            'nexx:%s:%s' % (domain_id, nexx_id), ie=NexxIE.ie_key(),
-            video_id=nexx_id)
+        return self._make_url_result(video)