]> Raphaël G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/cbslocal.py
Update upstream source from tag 'upstream/2019.01.17'
[youtubedl] / youtube_dl / extractor / cbslocal.py
index 4bcd104af7463b1cc4b9c6c88673cafc9ad655e7..90852a9ef9b7a6707ebcd1ade6cccb9ff9bbde5a 100644 (file)
@@ -4,11 +4,14 @@ from __future__ import unicode_literals
 from .anvato import AnvatoIE
 from .sendtonews import SendtoNewsIE
 from ..compat import compat_urlparse
 from .anvato import AnvatoIE
 from .sendtonews import SendtoNewsIE
 from ..compat import compat_urlparse
-from ..utils import unified_timestamp
+from ..utils import (
+    parse_iso8601,
+    unified_timestamp,
+)
 
 
 class CBSLocalIE(AnvatoIE):
 
 
 class CBSLocalIE(AnvatoIE):
-    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
 
     _TESTS = [{
         # Anvato backend
 
     _TESTS = [{
         # Anvato backend
@@ -22,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
             'thumbnail': 're:^https?://.*',
             'timestamp': 1463440500,
             'upload_date': '20160516',
             'thumbnail': 're:^https?://.*',
             'timestamp': 1463440500,
             'upload_date': '20160516',
+            'uploader': 'CBS',
             'subtitles': {
                 'en': 'mincount:5',
             },
             'subtitles': {
                 'en': 'mincount:5',
             },
@@ -35,6 +39,7 @@ class CBSLocalIE(AnvatoIE):
                 'Syndication\\Curb.tv',
                 'Content\\News'
             ],
                 'Syndication\\Curb.tv',
                 'Content\\News'
             ],
+            'tags': ['CBS 2 News Evening'],
         },
     }, {
         # SendtoNews embed
         },
     }, {
         # SendtoNews embed
@@ -47,6 +52,31 @@ class CBSLocalIE(AnvatoIE):
             # m3u8 download
             'skip_download': True,
         },
             # m3u8 download
             'skip_download': True,
         },
+    }, {
+        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
+        'info_dict': {
+            'id': '3580809',
+            'ext': 'mp4',
+            'title': 'A Very Blue Anniversary',
+            'description': 'CBS2’s Cindy Hsu has more.',
+            'thumbnail': 're:^https?://.*',
+            'timestamp': int,
+            'upload_date': r're:^\d{8}$',
+            'uploader': 'CBS',
+            'subtitles': {
+                'en': 'mincount:5',
+            },
+            'categories': [
+                'Stations\\Spoken Word\\WCBSTV',
+                'Syndication\\AOL',
+                'Syndication\\MSN',
+                'Syndication\\NDN',
+                'Syndication\\Yahoo',
+                'Content\\News',
+                'Content\\News\\Local News',
+            ],
+            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
+        },
     }]
 
     def _real_extract(self, url):
     }]
 
     def _real_extract(self, url):
@@ -61,9 +91,10 @@ class CBSLocalIE(AnvatoIE):
 
         info_dict = self._extract_anvato_videos(webpage, display_id)
 
 
         info_dict = self._extract_anvato_videos(webpage, display_id)
 
-        time_str = self._html_search_regex(
-            r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
-        timestamp = unified_timestamp(time_str)
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
+            'released date', default=None)) or parse_iso8601(
+            self._html_search_meta('uploadDate', webpage))
 
         info_dict.update({
             'display_id': display_id,
 
         info_dict.update({
             'display_id': display_id,