X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/d9d7cd0e85dc712461d9185db9df9d6c900a573b..b6db0e9853c4704fd25a056749ab237cc74bcd00:/youtube_dl/extractor/dailymotion.py diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index f8db76c..9a74906 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -1,12 +1,16 @@ # coding: utf-8 from __future__ import unicode_literals -import re -import json +import base64 +import hashlib import itertools +import json +import random +import re +import string from .common import InfoExtractor - +from ..compat import compat_struct_pack from ..utils import ( determine_ext, error_to_compat_str, @@ -64,7 +68,6 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader': 'Deadline', 'uploader_id': 'x1xm8ri', 'age_limit': 0, - 'view_count': int, }, }, { 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', @@ -147,7 +150,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): view_count_str = self._search_regex( (r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"', r'video_views_count[^>]+>\s+([\s\d\,.]+)'), - webpage, 'view count', fatal=False) + webpage, 'view count', default=None) if view_count_str: view_count_str = re.sub(r'\s', '', view_count_str) view_count = str_to_int(view_count_str) @@ -159,12 +162,25 @@ class DailymotionIE(DailymotionBaseInfoExtractor): [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', r'buildPlayer\(({.+?})\);', - r'var\s+config\s*=\s*({.+?});'], + r'var\s+config\s*=\s*({.+?});', + # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580) + r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) metadata = player['metadata'] + if metadata.get('error', {}).get('type') == 'password_protected': + password = self._downloader.params.get('videopassword') + if password: + r = int(metadata['id'][1:], 36) + us64e = lambda x: base64.urlsafe_b64encode(x).decode().strip('=') + t = ''.join(random.choice(string.ascii_letters) for i in range(10)) + n = us64e(compat_struct_pack('I', r)) + i = us64e(hashlib.md5(('%s%d%s' % (password, r, t)).encode()).digest()) + metadata = self._download_json( + 'http://www.dailymotion.com/player/metadata/video/p' + i + t + n, video_id) + self._check_error(metadata) formats = [] @@ -178,9 +194,12 @@ class DailymotionIE(DailymotionBaseInfoExtractor): continue ext = mimetype2ext(type_) or determine_ext(media_url) if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( + m3u8_formats = self._extract_m3u8_formats( media_url, video_id, 'mp4', preference=-1, - m3u8_id='hls', fatal=False)) + m3u8_id='hls', fatal=False) + for f in m3u8_formats: + f['url'] = f['url'].split('#')[0] + formats.append(f) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( media_url, video_id, preference=-1, f4m_id='hds', fatal=False)) @@ -233,7 +252,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): # vevo embed vevo_id = self._search_regex( - r'[\w]*)', + r'[\w]*)', webpage, 'vevo embed', default=None) if vevo_id: return self.url_result('vevo:%s' % vevo_id, 'Vevo') @@ -297,8 +316,8 @@ class DailymotionIE(DailymotionBaseInfoExtractor): def _check_error(self, info): error = info.get('error') - if info.get('error') is not None: - title = error['title'] + if error: + title = error.get('title') or error['message'] # See https://developer.dailymotion.com/api#access-error if error.get('code') == 'DM007': self.raise_geo_restricted(msg=title) @@ -323,7 +342,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): IE_NAME = 'dailymotion:playlist' - _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P[^/?#&]+)' _MORE_PAGES_INDICATOR = r'(?s)
.*?[^/?]+)' % _VALID_URL_PREFIX - _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX - - _TESTS = [{ - # From http://www.francetvinfo.fr/economie/entreprises/les-entreprises-familiales-le-secret-de-la-reussite_933271.html - # Tested at FranceTvInfo_2 - 'url': 'http://api.dmcloud.net/embed/4e7343f894a6f677b10006b4/556e03339473995ee145930c?auth=1464865870-0-jyhsm84b-ead4c701fb750cf9367bf4447167a3db&autoplay=1', - 'only_matching': True, - }, { - # http://www.francetvinfo.fr/societe/larguez-les-amarres-le-cobaturage-se-developpe_980101.html - 'url': 'http://api.dmcloud.net/player/embed/4e7343f894a6f677b10006b4/559545469473996d31429f06?auth=1467430263-0-90tglw2l-a3a4b64ed41efe48d7fccad85b8b8fda&autoplay=1', - 'only_matching': True, - }] - - @classmethod - def _extract_dmcloud_url(cls, webpage): - mobj = re.search(r']+src=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, webpage) - if mobj: - return mobj.group(1) - - mobj = re.search( - r']+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=[\'"](%s)[\'"]' % cls._VALID_EMBED_URL, - webpage) - if mobj: - return mobj.group(1) - - def _real_extract(self, url): - video_id = self._match_id(url) - - webpage = self._download_webpage_no_ff(url, video_id) - - title = self._html_search_regex(r'([^>]+)', webpage, 'title') - - video_info = self._parse_json(self._search_regex( - r'var\s+info\s*=\s*([^;]+);', webpage, 'video info'), video_id) - - # TODO: parse ios_url, which is in fact a manifest - video_url = video_info['mp4_url'] - - return { - 'id': video_id, - 'url': video_url, - 'title': title, - 'thumbnail': video_info.get('thumbnail_url'), - }