X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/532a08904ffbacc5e5ccf99edb660c5f37ddb213..4af078afdfeb5a7f230d6e445b1d1e7eefd04704:/youtube_dl/extractor/dailymotion.py
diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py
index f8db76c..1816c55 100644
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -1,22 +1,32 @@
# coding: utf-8
from __future__ import unicode_literals
-import re
-import json
+import base64
+import functools
+import hashlib
import itertools
+import json
+import random
+import re
+import string
from .common import InfoExtractor
-
+from ..compat import compat_struct_pack
from ..utils import (
determine_ext,
error_to_compat_str,
ExtractorError,
int_or_none,
+ mimetype2ext,
+ OnDemandPagedList,
parse_iso8601,
sanitized_Request,
str_to_int,
+ try_get,
unescapeHTML,
- mimetype2ext,
+ update_url_query,
+ url_or_none,
+ urlencode_postdata,
)
@@ -64,7 +74,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
'uploader': 'Deadline',
'uploader_id': 'x1xm8ri',
'age_limit': 0,
- 'view_count': int,
},
}, {
'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames',
@@ -141,13 +150,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
age_limit = self._rta_search(webpage)
- description = self._og_search_description(webpage) or self._html_search_meta(
+ description = self._og_search_description(
+ webpage, default=None) or self._html_search_meta(
'description', webpage, 'description')
view_count_str = self._search_regex(
(r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"',
r'video_views_count[^>]+>\s+([\s\d\,.]+)'),
- webpage, 'view count', fatal=False)
+ webpage, 'view count', default=None)
if view_count_str:
view_count_str = re.sub(r'\s', '', view_count_str)
view_count = str_to_int(view_count_str)
@@ -159,11 +169,39 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
[r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826
r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);',
r'buildPlayer\(({.+?})\);',
- r'var\s+config\s*=\s*({.+?});'],
+ r'var\s+config\s*=\s*({.+?});',
+ # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580)
+ r'__PLAYER_CONFIG__\s*=\s*({.+?});'],
webpage, 'player v5', default=None)
if player_v5:
- player = self._parse_json(player_v5, video_id)
- metadata = player['metadata']
+ player = self._parse_json(player_v5, video_id, fatal=False) or {}
+ metadata = try_get(player, lambda x: x['metadata'], dict)
+ if not metadata:
+ metadata_url = url_or_none(try_get(
+ player, lambda x: x['context']['metadata_template_url1']))
+ if metadata_url:
+ metadata_url = metadata_url.replace(':videoId', video_id)
+ else:
+ metadata_url = update_url_query(
+ 'https://www.dailymotion.com/player/metadata/video/%s'
+ % video_id, {
+ 'embedder': url,
+ 'integration': 'inline',
+ 'GK_PV5_NEON': '1',
+ })
+ metadata = self._download_json(
+ metadata_url, video_id, 'Downloading metadata JSON')
+
+ if try_get(metadata, lambda x: x['error']['type']) == 'password_protected':
+ password = self._downloader.params.get('videopassword')
+ if password:
+ r = int(metadata['id'][1:], 36)
+ us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=')
+ t = ''.join(random.choice(string.ascii_letters) for i in range(10))
+ n = us64e(compat_struct_pack('I', r))
+ i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest())
+ metadata = self._download_json(
+ 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id)
self._check_error(metadata)
@@ -178,9 +216,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
continue
ext = mimetype2ext(type_) or determine_ext(media_url)
if ext == 'm3u8':
- formats.extend(self._extract_m3u8_formats(
+ m3u8_formats = self._extract_m3u8_formats(
media_url, video_id, 'mp4', preference=-1,
- m3u8_id='hls', fatal=False))
+ m3u8_id='hls', fatal=False)
+ for f in m3u8_formats:
+ f['url'] = f['url'].split('#')[0]
+ formats.append(f)
elif ext == 'f4m':
formats.extend(self._extract_f4m_formats(
media_url, video_id, preference=-1, f4m_id='hds', fatal=False))
@@ -233,7 +274,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
# vevo embed
vevo_id = self._search_regex(
- r'[\w]*)',
+ r'[\w]*)',
webpage, 'vevo embed', default=None)
if vevo_id:
return self.url_result('vevo:%s' % vevo_id, 'Vevo')
@@ -297,8 +338,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
def _check_error(self, info):
error = info.get('error')
- if info.get('error') is not None:
- title = error['title']
+ if error:
+ title = error.get('title') or error['message']
# See https://developer.dailymotion.com/api#access-error
if error.get('code') == 'DM007':
self.raise_geo_restricted(msg=title)
@@ -323,58 +364,73 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:playlist'
- _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/'
- _MORE_PAGES_INDICATOR = r'(?s).*?
x[0-9a-z]+)'
_TESTS = [{
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
'info_dict': {
'title': 'SPORT',
- 'id': 'xv4bw_nqtv_sport',
+ 'id': 'xv4bw',
},
'playlist_mincount': 20,
}]
-
- def _extract_entries(self, id):
- video_ids = set()
- processed_urls = set()
- for pagenum in itertools.count(1):
- page_url = self._PAGE_TEMPLATE % (id, pagenum)
- webpage, urlh = self._download_webpage_handle_no_ff(
- page_url, id, 'Downloading page %s' % pagenum)
- if urlh.geturl() in processed_urls:
- self.report_warning('Stopped at duplicated page %s, which is the same as %s' % (
- page_url, urlh.geturl()), id)
- break
-
- processed_urls.add(urlh.geturl())
-
- for video_id in re.findall(r'data-xid="(.+?)"', webpage):
- if video_id not in video_ids:
- yield self.url_result(
- 'http://www.dailymotion.com/video/%s' % video_id,
- DailymotionIE.ie_key(), video_id)
- video_ids.add(video_id)
-
- if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
- break
+ _PAGE_SIZE = 100
+
+ def _fetch_page(self, playlist_id, authorizaion, page):
+ page += 1
+ videos = self._download_json(
+ 'https://graphql.api.dailymotion.com',
+ playlist_id, 'Downloading page %d' % page,
+ data=json.dumps({
+ 'query': '''{
+ collection(xid: "%s") {
+ videos(first: %d, page: %d) {
+ pageInfo {
+ hasNextPage
+ nextPage
+ }
+ edges {
+ node {
+ xid
+ url
+ }
+ }
+ }
+ }
+}''' % (playlist_id, self._PAGE_SIZE, page)
+ }).encode(), headers={
+ 'Authorization': authorizaion,
+ 'Origin': 'https://www.dailymotion.com',
+ })['data']['collection']['videos']
+ for edge in videos['edges']:
+ node = edge['node']
+ yield self.url_result(
+ node['url'], DailymotionIE.ie_key(), node['xid'])
def _real_extract(self, url):
- mobj = re.match(self._VALID_URL, url)
- playlist_id = mobj.group('id')
+ playlist_id = self._match_id(url)
webpage = self._download_webpage(url, playlist_id)
-
- return {
- '_type': 'playlist',
- 'id': playlist_id,
- 'title': self._og_search_title(webpage),
- 'entries': self._extract_entries(playlist_id),
- }
-
-
-class DailymotionUserIE(DailymotionPlaylistIE):
+ api = self._parse_json(self._search_regex(
+ r'__PLAYER_CONFIG__\s*=\s*({.+?});',
+ webpage, 'player config'), playlist_id)['context']['api']
+ auth = self._download_json(
+ api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
+ playlist_id, data=urlencode_postdata({
+ 'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
+ 'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
+ 'grant_type': 'client_credentials',
+ }))
+ authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
+ entries = OnDemandPagedList(functools.partial(
+ self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
+ return self.playlist_result(
+ entries, playlist_id,
+ self._og_search_title(webpage))
+
+
+class DailymotionUserIE(DailymotionBaseInfoExtractor):
IE_NAME = 'dailymotion:user'
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P[^/]+)'
+ _MORE_PAGES_INDICATOR = r'(?s)