X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/87a0165ca7e39af4dacb7ec637063b2cd35ae40b..89bb9e23b34f43c6563524917f7dffb9841a972e:/youtube_dl/extractor/foxnews.py?ds=sidebyside diff --git a/youtube_dl/extractor/foxnews.py b/youtube_dl/extractor/foxnews.py index 229bcb1..63613cb 100644 --- a/youtube_dl/extractor/foxnews.py +++ b/youtube_dl/extractor/foxnews.py @@ -22,7 +22,7 @@ class FoxNewsIE(AMPIE): 'duration': 265, 'timestamp': 1304411491, 'upload_date': '20110503', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, }, { @@ -36,7 +36,7 @@ class FoxNewsIE(AMPIE): 'duration': 292, 'timestamp': 1417662047, 'upload_date': '20141204', - 'thumbnail': 're:^https?://.*\.jpg$', + 'thumbnail': r're:^https?://.*\.jpg$', }, 'params': { # m3u8 download @@ -58,6 +58,14 @@ class FoxNewsIE(AMPIE): }, ] + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r'<(?:amp-)?iframe[^>]+\bsrc=(["\'])(?P(?:https?:)?//video\.foxnews\.com/v/video-embed\.html?.*?\bvideo_id=\d+.*?)\1', + webpage)] + def _real_extract(self, url): host, video_id = re.match(self._VALID_URL, url).groups() @@ -68,73 +76,52 @@ class FoxNewsIE(AMPIE): class FoxNewsArticleIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' + _VALID_URL = r'https?://(?:www\.)?(?:insider\.)?foxnews\.com/(?!v)([^/]+/)+(?P[a-z-]+)' IE_NAME = 'foxnews:article' - _TEST = { + _TESTS = [{ + # data-video-id 'url': 'http://www.foxnews.com/politics/2016/09/08/buzz-about-bud-clinton-camp-denies-claims-wore-earpiece-at-forum.html', - 'md5': '62aa5a781b308fdee212ebb6f33ae7ef', + 'md5': '83d44e1aff1433e7a29a7b537d1700b5', 'info_dict': { 'id': '5116295019001', 'ext': 'mp4', 'title': 'Trump and Clinton asked to defend positions on Iraq War', 'description': 'Veterans react on \'The Kelly File\'', - 'timestamp': 1473299755, + 'timestamp': 1473301045, 'upload_date': '20160908', }, - } - - def _real_extract(self, url): - display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - - video_id = self._html_search_regex( - r'data-video-id=([\'"])(?P[^\'"]+)\1', - webpage, 'video ID', group='id') - return self.url_result( - 'http://video.foxnews.com/v/' + video_id, - FoxNewsIE.ie_key()) - - -class FoxNewsInsiderIE(InfoExtractor): - _VALID_URL = r'https?://insider\.foxnews\.com/([^/]+/)+(?P[a-z-]+)' - IE_NAME = 'foxnews:insider' - - _TEST = { - 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', - 'md5': 'a10c755e582d28120c62749b4feb4c0c', + }, { + # iframe embed + 'url': 'http://www.foxnews.com/us/2018/03/09/parkland-survivor-kyle-kashuv-on-meeting-trump-his-app-to-prevent-another-school-shooting.amp.html?__twitter_impression=true', 'info_dict': { - 'id': '5099377331001', - 'display_id': 'univ-wisconsin-student-group-pushing-silence-certain-words', - 'ext': 'mp4', - 'title': 'Student Group: Saying \'Politically Correct,\' \'Trash\' and \'Lame\' Is Offensive', - 'description': 'Is campus censorship getting out of control?', - 'timestamp': 1472168725, - 'upload_date': '20160825', - 'thumbnail': 're:^https?://.*\.jpg$', + 'id': '5748266721001', + 'ext': 'flv', + 'title': 'Kyle Kashuv has a positive message for the Trump White House', + 'description': 'Marjory Stoneman Douglas student disagrees with classmates.', + 'thumbnail': r're:^https?://.*\.jpg$', + 'duration': 229, + 'timestamp': 1520594670, + 'upload_date': '20180309', }, 'params': { - # m3u8 download 'skip_download': True, }, - 'add_ie': [FoxNewsIE.ie_key()], - } + }, { + 'url': 'http://insider.foxnews.com/2016/08/25/univ-wisconsin-student-group-pushing-silence-certain-words', + 'only_matching': True, + }] def _real_extract(self, url): display_id = self._match_id(url) - webpage = self._download_webpage(url, display_id) - embed_url = self._html_search_meta('embedUrl', webpage, 'embed URL') - - title = self._og_search_title(webpage) - description = self._og_search_description(webpage) + video_id = self._html_search_regex( + r'data-video-id=([\'"])(?P[^\'"]+)\1', + webpage, 'video ID', group='id', default=None) + if video_id: + return self.url_result( + 'http://video.foxnews.com/v/' + video_id, FoxNewsIE.ie_key()) - return { - '_type': 'url_transparent', - 'ie_key': FoxNewsIE.ie_key(), - 'url': embed_url, - 'display_id': display_id, - 'title': title, - 'description': description, - } + return self.url_result( + FoxNewsIE._extract_urls(webpage)[0], FoxNewsIE.ie_key())