X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/9dc487f48b50767cf540fa36c3de2c386fd74c04..0390e4903cb42a95d11dfc2457d921eb811c606d:/youtube_dl/extractor/cbslocal.py?ds=sidebyside diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py index 74adb38..90852a9 100644 --- a/youtube_dl/extractor/cbslocal.py +++ b/youtube_dl/extractor/cbslocal.py @@ -1,16 +1,17 @@ # coding: utf-8 from __future__ import unicode_literals -import calendar -import datetime - from .anvato import AnvatoIE from .sendtonews import SendtoNewsIE from ..compat import compat_urlparse +from ..utils import ( + parse_iso8601, + unified_timestamp, +) class CBSLocalIE(AnvatoIE): - _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P[0-9a-z-]+)' + _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P[0-9a-z-]+)' _TESTS = [{ # Anvato backend @@ -24,6 +25,7 @@ class CBSLocalIE(AnvatoIE): 'thumbnail': 're:^https?://.*', 'timestamp': 1463440500, 'upload_date': '20160516', + 'uploader': 'CBS', 'subtitles': { 'en': 'mincount:5', }, @@ -37,23 +39,44 @@ class CBSLocalIE(AnvatoIE): 'Syndication\\Curb.tv', 'Content\\News' ], + 'tags': ['CBS 2 News Evening'], }, }, { # SendtoNews embed 'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/', 'info_dict': { 'id': 'GxfCe0Zo7D-175909-5588', - 'ext': 'mp4', - 'title': 'Recap: CLE 15, CIN 6', - 'description': '5/16/16: Indians\' bats explode for 15 runs in a win', - 'upload_date': '20160516', - 'timestamp': 1463433840, - 'duration': 49, }, + 'playlist_count': 9, 'params': { # m3u8 download 'skip_download': True, }, + }, { + 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/', + 'info_dict': { + 'id': '3580809', + 'ext': 'mp4', + 'title': 'A Very Blue Anniversary', + 'description': 'CBS2’s Cindy Hsu has more.', + 'thumbnail': 're:^https?://.*', + 'timestamp': int, + 'upload_date': r're:^\d{8}$', + 'uploader': 'CBS', + 'subtitles': { + 'en': 'mincount:5', + }, + 'categories': [ + 'Stations\\Spoken Word\\WCBSTV', + 'Syndication\\AOL', + 'Syndication\\MSN', + 'Syndication\\NDN', + 'Syndication\\Yahoo', + 'Content\\News', + 'Content\\News\\Local News', + ], + 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'], + }, }] def _real_extract(self, url): @@ -62,19 +85,16 @@ class CBSLocalIE(AnvatoIE): sendtonews_url = SendtoNewsIE._extract_url(webpage) if sendtonews_url: - info_dict = { - '_type': 'url_transparent', - 'url': compat_urlparse.urljoin(url, sendtonews_url), - } - else: - info_dict = self._extract_anvato_videos(webpage, display_id) + return self.url_result( + compat_urlparse.urljoin(url, sendtonews_url), + ie=SendtoNewsIE.ie_key()) + + info_dict = self._extract_anvato_videos(webpage, display_id) - time_str = self._html_search_regex( - r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False) - timestamp = None - if time_str: - timestamp = calendar.timegm(datetime.datetime.strptime( - time_str, '%b %d, %Y %I:%M %p').timetuple()) + timestamp = unified_timestamp(self._html_search_regex( + r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage, + 'released date', default=None)) or parse_iso8601( + self._html_search_meta('uploadDate', webpage)) info_dict.update({ 'display_id': display_id,