debian/control: Mark package compliant with Policy 4.3.0 (no changes needed).

[youtubedl] / youtube_dl / extractor / cbslocal.py
diff --git a/youtube_dl/extractor/cbslocal.py b/youtube_dl/extractor/cbslocal.py

index 74adb38a6cbac2cdda2a87dff49876389c9cf75e..90852a9ef9b7a6707ebcd1ade6cccb9ff9bbde5a 100644 (file)
--- a/youtube_dl/extractor/cbslocal.py
+++ b/youtube_dl/extractor/cbslocal.py
@@ -1,16 +1,17 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
-import calendar
-import datetime
-
  from .anvato import AnvatoIE
  from .sendtonews import SendtoNewsIE
  from ..compat import compat_urlparse
+from ..utils import (
+    parse_iso8601,
+    unified_timestamp,
+)
  
  
  class CBSLocalIE(AnvatoIE):
-    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/\d+/\d+/\d+/(?P<id>[0-9a-z-]+)'
+    _VALID_URL = r'https?://[a-z]+\.cbslocal\.com/(?:\d+/\d+/\d+|video)/(?P<id>[0-9a-z-]+)'
  
      _TESTS = [{
          # Anvato backend
@@ -24,6 +25,7 @@ class CBSLocalIE(AnvatoIE):
              'thumbnail': 're:^https?://.*',
              'timestamp': 1463440500,
              'upload_date': '20160516',
+            'uploader': 'CBS',
              'subtitles': {
                  'en': 'mincount:5',
              },
@@ -37,23 +39,44 @@ class CBSLocalIE(AnvatoIE):
                  'Syndication\\Curb.tv',
                  'Content\\News'
              ],
+            'tags': ['CBS 2 News Evening'],
          },
      }, {
          # SendtoNews embed
          'url': 'http://cleveland.cbslocal.com/2016/05/16/indians-score-season-high-15-runs-in-blowout-win-over-reds-rapid-reaction/',
          'info_dict': {
              'id': 'GxfCe0Zo7D-175909-5588',
-            'ext': 'mp4',
-            'title': 'Recap: CLE 15, CIN 6',
-            'description': '5/16/16: Indians\' bats explode for 15 runs in a win',
-            'upload_date': '20160516',
-            'timestamp': 1463433840,
-            'duration': 49,
          },
+        'playlist_count': 9,
          'params': {
              # m3u8 download
              'skip_download': True,
          },
+    }, {
+        'url': 'http://newyork.cbslocal.com/video/3580809-a-very-blue-anniversary/',
+        'info_dict': {
+            'id': '3580809',
+            'ext': 'mp4',
+            'title': 'A Very Blue Anniversary',
+            'description': 'CBS2’s Cindy Hsu has more.',
+            'thumbnail': 're:^https?://.*',
+            'timestamp': int,
+            'upload_date': r're:^\d{8}$',
+            'uploader': 'CBS',
+            'subtitles': {
+                'en': 'mincount:5',
+            },
+            'categories': [
+                'Stations\\Spoken Word\\WCBSTV',
+                'Syndication\\AOL',
+                'Syndication\\MSN',
+                'Syndication\\NDN',
+                'Syndication\\Yahoo',
+                'Content\\News',
+                'Content\\News\\Local News',
+            ],
+            'tags': ['CBS 2 News Weekends', 'Cindy Hsu', 'Blue Man Group'],
+        },
      }]
  
      def _real_extract(self, url):
@@ -62,19 +85,16 @@ class CBSLocalIE(AnvatoIE):
  
          sendtonews_url = SendtoNewsIE._extract_url(webpage)
          if sendtonews_url:
-            info_dict = {
-                '_type': 'url_transparent',
-                'url': compat_urlparse.urljoin(url, sendtonews_url),
-            }
-        else:
-            info_dict = self._extract_anvato_videos(webpage, display_id)
+            return self.url_result(
+                compat_urlparse.urljoin(url, sendtonews_url),
+                ie=SendtoNewsIE.ie_key())
+
+        info_dict = self._extract_anvato_videos(webpage, display_id)
  
-        time_str = self._html_search_regex(
-            r'class="entry-date">([^<]+)<', webpage, 'released date', fatal=False)
-        timestamp = None
-        if time_str:
-            timestamp = calendar.timegm(datetime.datetime.strptime(
-                time_str, '%b %d, %Y %I:%M %p').timetuple())
+        timestamp = unified_timestamp(self._html_search_regex(
+            r'class="(?:entry|post)-date"[^>]*>([^<]+)', webpage,
+            'released date', default=None)) or parse_iso8601(
+            self._html_search_meta('uploadDate', webpage))
  
          info_dict.update({
              'display_id': display_id,