New upstream version 2019.06.08

[youtubedl] / youtube_dl / extractor / niconico.py
diff --git a/youtube_dl/extractor/niconico.py b/youtube_dl/extractor/niconico.py

index 026329d3ea4210e2d17af374b67c4a87af252ee1..eb07ca7765e6ccfe08f856f44647f1ea7d7f706f 100644 (file)
--- a/youtube_dl/extractor/niconico.py
+++ b/youtube_dl/extractor/niconico.py
@@ -40,7 +40,7 @@ class NiconicoIE(InfoExtractor):
              'uploader': 'takuya0301',
              'uploader_id': '2698420',
              'upload_date': '20131123',
-            'timestamp': 1385182762,
+            'timestamp': int,  # timestamp is unstable
              'description': '(c) copyright 2008, Blender Foundation / www.bigbuckbunny.org',
              'duration': 33,
              'view_count': int,
@@ -115,8 +115,8 @@ class NiconicoIE(InfoExtractor):
          'skip': 'Requires an account',
      }, {
          # "New" HTML5 video
+        # md5 is unstable
          'url': 'http://www.nicovideo.jp/watch/sm31464864',
-        'md5': '351647b4917660986dc0fa8864085135',
          'info_dict': {
              'id': 'sm31464864',
              'ext': 'mp4',
@@ -124,7 +124,7 @@ class NiconicoIE(InfoExtractor):
              'description': 'md5:e52974af9a96e739196b2c1ca72b5feb',
              'timestamp': 1498514060,
              'upload_date': '20170626',
-            'uploader': 'ゲス',
+            'uploader': 'ゲスト',
              'uploader_id': '40826363',
              'thumbnail': r're:https?://.*',
              'duration': 198,
@@ -132,6 +132,25 @@ class NiconicoIE(InfoExtractor):
              'comment_count': int,
          },
          'skip': 'Requires an account',
+    }, {
+        # Video without owner
+        'url': 'http://www.nicovideo.jp/watch/sm18238488',
+        'md5': 'd265680a1f92bdcbbd2a507fc9e78a9e',
+        'info_dict': {
+            'id': 'sm18238488',
+            'ext': 'mp4',
+            'title': '【実写版】ミュータントタートルズ',
+            'description': 'md5:15df8988e47a86f9e978af2064bf6d8e',
+            'timestamp': 1341160408,
+            'upload_date': '20120701',
+            'uploader': None,
+            'uploader_id': None,
+            'thumbnail': r're:https?://.*',
+            'duration': 5271,
+            'view_count': int,
+            'comment_count': int,
+        },
+        'skip': 'Requires an account',
      }, {
          'url': 'http://sp.nicovideo.jp/watch/sm28964488?ss_pos=1&cp_in=wt_tg',
          'only_matching': True,
@@ -144,7 +163,7 @@ class NiconicoIE(InfoExtractor):
          self._login()
  
      def _login(self):
-        (username, password) = self._get_login_info()
+        username, password = self._get_login_info()
          # No authentication to be performed
          if not username:
              return True
@@ -233,7 +252,7 @@ class NiconicoIE(InfoExtractor):
                      },
                      'timing_constraint': 'unlimited'
                  }
-            }))
+            }).encode())
  
          resolution = video_quality.get('resolution', {})
  
@@ -350,14 +369,14 @@ class NiconicoIE(InfoExtractor):
          video_detail = watch_api_data.get('videoDetail', {})
  
          thumbnail = (
-            get_video_info(['thumbnail_url', 'thumbnailURL']) or
-            self._html_search_meta('image', webpage, 'thumbnail', default=None) or
-            video_detail.get('thumbnail'))
+            get_video_info(['thumbnail_url', 'thumbnailURL'])
+            or self._html_search_meta('image', webpage, 'thumbnail', default=None)
+            or video_detail.get('thumbnail'))
  
          description = get_video_info('description')
  
-        timestamp = (parse_iso8601(get_video_info('first_retrieve')) or
-                     unified_timestamp(get_video_info('postedDateTime')))
+        timestamp = (parse_iso8601(get_video_info('first_retrieve'))
+                     or unified_timestamp(get_video_info('postedDateTime')))
          if not timestamp:
              match = self._html_search_meta('datePublished', webpage, 'date published', default=None)
              if match:
@@ -376,9 +395,9 @@ class NiconicoIE(InfoExtractor):
                  view_count = int_or_none(match.replace(',', ''))
          view_count = view_count or video_detail.get('viewCount')
  
-        comment_count = (int_or_none(get_video_info('comment_num')) or
-                         video_detail.get('commentCount') or
-                         try_get(api_data, lambda x: x['thread']['commentCount']))
+        comment_count = (int_or_none(get_video_info('comment_num'))
+                         or video_detail.get('commentCount')
+                         or try_get(api_data, lambda x: x['thread']['commentCount']))
          if not comment_count:
              match = self._html_search_regex(
                  r'>Comments: <strong[^>]*>([^<]+)</strong>',
@@ -387,15 +406,17 @@ class NiconicoIE(InfoExtractor):
                  comment_count = int_or_none(match.replace(',', ''))
  
          duration = (parse_duration(
-            get_video_info('length') or
-            self._html_search_meta(
-                'video:duration', webpage, 'video duration', default=None)) or
-            video_detail.get('length') or
-            get_video_info('duration'))
+            get_video_info('length')
+            or self._html_search_meta(
+                'video:duration', webpage, 'video duration', default=None))
+            or video_detail.get('length')
+            or get_video_info('duration'))
  
          webpage_url = get_video_info('watch_url') or url
  
-        owner = api_data.get('owner', {})
+        # Note: cannot use api_data.get('owner', {}) because owner may be set to "null"
+        # in the JSON, which will cause None to be returned instead of {}.
+        owner = try_get(api_data, lambda x: x.get('owner'), dict) or {}
          uploader_id = get_video_info(['ch_id', 'user_id']) or owner.get('id')
          uploader = get_video_info(['ch_name', 'user_nickname']) or owner.get('nickname')