X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9f2b33881274af98a9145c533a1d295fad71521a..92816565411fb77b1c3d9f49e414f380bc131a00:/youtube_dl/extractor/dailymotion.py diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 2e6226e..21a2d02 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -16,6 +16,7 @@ from ..utils import ( sanitized_Request, str_to_int, unescapeHTML, + mimetype2ext, ) @@ -37,7 +38,7 @@ class DailymotionBaseInfoExtractor(InfoExtractor): class DailymotionIE(DailymotionBaseInfoExtractor): - _VALID_URL = r'(?i)(?:https?://)?(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:embed|swf|#)/)?video/(?P[^/?_]+)' + _VALID_URL = r'(?i)https?://(?:(www|touch)\.)?dailymotion\.[a-z]{2,3}/(?:(?:(?:embed|swf|#)/)?video|swf)/(?P[^/?_]+)' IE_NAME = 'dailymotion' _FORMATS = [ @@ -48,68 +49,89 @@ class DailymotionIE(DailymotionBaseInfoExtractor): ('stream_h264_hd1080_url', 'hd180'), ] - _TESTS = [ - { - 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', - 'md5': '2137c41a8e78554bb09225b8eb322406', - 'info_dict': { - 'id': 'x2iuewm', - 'ext': 'mp4', - 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', - 'description': 'Several come bundled with the Steam Controller.', - 'thumbnail': 're:^https?:.*\.(?:jpg|png)$', - 'duration': 74, - 'timestamp': 1425657362, - 'upload_date': '20150306', - 'uploader': 'IGN', - 'uploader_id': 'xijv66', - 'age_limit': 0, - 'view_count': int, - 'comment_count': int, - } + _TESTS = [{ + 'url': 'http://www.dailymotion.com/video/x5kesuj_office-christmas-party-review-jason-bateman-olivia-munn-t-j-miller_news', + 'md5': '074b95bdee76b9e3654137aee9c79dfe', + 'info_dict': { + 'id': 'x5kesuj', + 'ext': 'mp4', + 'title': 'Office Christmas Party Review – Jason Bateman, Olivia Munn, T.J. Miller', + 'description': 'Office Christmas Party Review - Jason Bateman, Olivia Munn, T.J. Miller', + 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', + 'duration': 187, + 'timestamp': 1493651285, + 'upload_date': '20170501', + 'uploader': 'Deadline', + 'uploader_id': 'x1xm8ri', + 'age_limit': 0, + 'view_count': int, }, + }, { + 'url': 'https://www.dailymotion.com/video/x2iuewm_steam-machine-models-pricing-listed-on-steam-store-ign-news_videogames', + 'md5': '2137c41a8e78554bb09225b8eb322406', + 'info_dict': { + 'id': 'x2iuewm', + 'ext': 'mp4', + 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', + 'description': 'Several come bundled with the Steam Controller.', + 'thumbnail': r're:^https?:.*\.(?:jpg|png)$', + 'duration': 74, + 'timestamp': 1425657362, + 'upload_date': '20150306', + 'uploader': 'IGN', + 'uploader_id': 'xijv66', + 'age_limit': 0, + 'view_count': int, + }, + 'skip': 'video gone', + }, { # Vevo video - { - 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', - 'info_dict': { - 'title': 'Roar (Official)', - 'id': 'USUV71301934', - 'ext': 'mp4', - 'uploader': 'Katy Perry', - 'upload_date': '20130905', - }, - 'params': { - 'skip_download': True, - }, - 'skip': 'VEVO is only available in some countries', + 'url': 'http://www.dailymotion.com/video/x149uew_katy-perry-roar-official_musi', + 'info_dict': { + 'title': 'Roar (Official)', + 'id': 'USUV71301934', + 'ext': 'mp4', + 'uploader': 'Katy Perry', + 'upload_date': '20130905', + }, + 'params': { + 'skip_download': True, }, + 'skip': 'VEVO is only available in some countries', + }, { # age-restricted video - { - 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', - 'md5': '0d667a7b9cebecc3c89ee93099c4159d', - 'info_dict': { - 'id': 'xyh2zz', - 'ext': 'mp4', - 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', - 'uploader': 'HotWaves1012', - 'age_limit': 18, - } + 'url': 'http://www.dailymotion.com/video/xyh2zz_leanna-decker-cyber-girl-of-the-year-desires-nude-playboy-plus_redband', + 'md5': '0d667a7b9cebecc3c89ee93099c4159d', + 'info_dict': { + 'id': 'xyh2zz', + 'ext': 'mp4', + 'title': 'Leanna Decker - Cyber Girl Of The Year Desires Nude [Playboy Plus]', + 'uploader': 'HotWaves1012', + 'age_limit': 18, }, + 'skip': 'video gone', + }, { # geo-restricted, player v5 - { - 'url': 'http://www.dailymotion.com/video/xhza0o', - 'only_matching': True, - }, + 'url': 'http://www.dailymotion.com/video/xhza0o', + 'only_matching': True, + }, { # with subtitles - { - 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news', - 'only_matching': True, - }, - { - 'url': 'http://www.dailymotion.com/swf/video/x3n92nf', - 'only_matching': True, - } - ] + 'url': 'http://www.dailymotion.com/video/x20su5f_the-power-of-nightmares-1-the-rise-of-the-politics-of-fear-bbc-2004_news', + 'only_matching': True, + }, { + 'url': 'http://www.dailymotion.com/swf/video/x3n92nf', + 'only_matching': True, + }, { + 'url': 'http://www.dailymotion.com/swf/x3ss1m_funny-magic-trick-barry-and-stuart_fun', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + # Look for embedded Dailymotion player + matches = re.findall( + r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage) + return list(map(lambda m: unescapeHTML(m[1]), matches)) def _real_extract(self, url): video_id = self._match_id(url) @@ -125,18 +147,21 @@ class DailymotionIE(DailymotionBaseInfoExtractor): view_count_str = self._search_regex( (r']+itemprop="interactionCount"[^>]+content="UserPlays:([\s\d,.]+)"', r'video_views_count[^>]+>\s+([\s\d\,.]+)'), - webpage, 'view count', fatal=False) + webpage, 'view count', default=None) if view_count_str: view_count_str = re.sub(r'\s', '', view_count_str) view_count = str_to_int(view_count_str) comment_count = int_or_none(self._search_regex( r']+itemprop="interactionCount"[^>]+content="UserComments:(\d+)"', - webpage, 'comment count', fatal=False)) + webpage, 'comment count', default=None)) player_v5 = self._search_regex( [r'buildPlayer\(({.+?})\);\n', # See https://github.com/rg3/youtube-dl/issues/7826 r'playerV5\s*=\s*dmp\.create\([^,]+?,\s*({.+?})\);', - r'buildPlayer\(({.+?})\);'], + r'buildPlayer\(({.+?})\);', + r'var\s+config\s*=\s*({.+?});', + # New layout regex (see https://github.com/rg3/youtube-dl/issues/13580) + r'__PLAYER_CONFIG__\s*=\s*({.+?});'], webpage, 'player v5', default=None) if player_v5: player = self._parse_json(player_v5, video_id) @@ -153,18 +178,19 @@ class DailymotionIE(DailymotionBaseInfoExtractor): type_ = media.get('type') if type_ == 'application/vnd.lumberjack.manifest': continue - ext = determine_ext(media_url) - if type_ == 'application/x-mpegURL' or ext == 'm3u8': + ext = mimetype2ext(type_) or determine_ext(media_url) + if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', preference=-1, m3u8_id='hls', fatal=False)) - elif type_ == 'application/f4m' or ext == 'f4m': + elif ext == 'f4m': formats.extend(self._extract_f4m_formats( media_url, video_id, preference=-1, f4m_id='hds', fatal=False)) else: f = { 'url': media_url, 'format_id': 'http-%s' % quality, + 'ext': ext, } m = re.search(r'H264-(?P\d+)x(?P\d+)', media_url) if m: @@ -209,7 +235,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): # vevo embed vevo_id = self._search_regex( - r'[\w]*)', + r'[\w]*)', webpage, 'vevo embed', default=None) if vevo_id: return self.url_result('vevo:%s' % vevo_id, 'Vevo') @@ -272,9 +298,14 @@ class DailymotionIE(DailymotionBaseInfoExtractor): } def _check_error(self, info): + error = info.get('error') if info.get('error') is not None: + title = error['title'] + # See https://developer.dailymotion.com/api#access-error + if error.get('code') == 'DM007': + self.raise_geo_restricted(msg=title) raise ExtractorError( - '%s said: %s' % (self.IE_NAME, info['error']['title']), expected=True) + '%s said: %s' % (self.IE_NAME, title), expected=True) def _get_subtitles(self, video_id, webpage): try: @@ -294,7 +325,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): class DailymotionPlaylistIE(DailymotionBaseInfoExtractor): IE_NAME = 'dailymotion:playlist' - _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P.+?)/' + _VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P[^/?#&]+)' _MORE_PAGES_INDICATOR = r'(?s)
.*?[^/?]+)' % _VALID_URL_PREFIX _VALID_EMBED_URL = r'%s[^/]+/[^\'"]+' % _VALID_URL_PREFIX