# coding: utf-8
from __future__ import unicode_literals
-import calendar
-import datetime
-
from .anvato import AnvatoIE
from .sendtonews import SendtoNewsIE
from ..compat import compat_urlparse
+from ..utils import (
+ parse_iso8601,
+ unified_timestamp,
+)
class CBSLocalIE(AnvatoIE):
- _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+ _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
_TESTS = [{
# Anvato backend
'thumbnail': 're:^https?://.*',
'timestamp': 1463440500,
'upload_date': '20160516',
+ 'uploader': 'CBS',
'subtitles': {
'en': 'mincount:5',
},
'Syndication\\Curb.tv',
'Content\\News'
],
+ 'tags': ['CBS 2 News Evening'],
},
}, {
# SendtoNews embed
'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
'info_dict': {
'id': 'GxfCe0Zo7D-175909-5588',
- 'ext': 'mp4',
- 'title': 'Recap: CLE 15, CIN 6',
- 'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
- 'upload_date': '20160516',
- 'timestamp': 1463433840,
- 'duration': 49,
},
+ 'playlist_count': 9,
'params': {
# m3u8 download
'skip_download': True,
},
+ }, {
+ 'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
+ 'info_dict': {
+ 'id': '3580809',
+ 'ext': 'mp4',
+ 'title': 'A Very Blue Anniversary',
+ 'description': 'CBS2’s Cindy Hsu has more.',
+ 'thumbnail': 're:^https?://.*',
+ 'timestamp': int,
+ 'upload_date': r're:^\d{8}$',
+ 'uploader': 'CBS',
+ 'subtitles': {
+ 'en': 'mincount:5',
+ },
+ 'categories': [
+ 'Stations\\Spoken Word\\WCBSTV',
+ 'Syndication\\AOL',
+ 'Syndication\\MSN',
+ 'Syndication\\NDN',
+ 'Syndication\\Yahoo',
+ 'Content\\News',
+ 'Content\\News\\Local News',
+ ],
+ 'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
+ },
}]
def _real_extract(self, url):
sendtonews_url = SendtoNewsIE._extract_url(webpage)
if sendtonews_url:
- info_dict = {
- '_type': 'url_transparent',
- 'url': compat_urlparse.urljoin(url, sendtonews_url),
- }
- else:
- info_dict = self._extract_anvato_videos(webpage, display_id)
+ return self.url_result(
+ compat_urlparse.urljoin(url, sendtonews_url),
+ ie=SendtoNewsIE.ie_key())
+
+ info_dict = self._extract_anvato_videos(webpage, display_id)
- time_str = self._html_search_regex(
- r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
- timestamp = None
- if time_str:
- timestamp = calendar.timegm(datetime.datetime.strptime(
- time_str, '%b %d, %Y %I:%M %p').timetuple())
+ timestamp = unified_timestamp(self._html_search_regex(
+ r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
+ 'released date', default=None)) or parse_iso8601(
+ self._html_search_meta('uploadDate', webpage))
info_dict.update({
'display_id': display_id,