Use canonical URL in Vcs-Git.

[youtubedl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index eacaa5ecdb70d2a16748b4c2e58edc14d7d69484..908defecd3f24ef4070c46f6e8d195aea625ab4b 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -29,7 +29,6 @@ from ..compat import (
  from ..utils import (
      bool_or_none,
      clean_html,
  from ..utils import (
      bool_or_none,
      clean_html,
-    dict_get,
      error_to_compat_str,
      extract_attributes,
      ExtractorError,
      error_to_compat_str,
      extract_attributes,
      ExtractorError,
@@ -570,7 +569,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'upload_date': '20120506',
                  'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                  'alt_title': 'I Love It (feat. Charli XCX)',
                  'upload_date': '20120506',
                  'title': 'Icona Pop - I Love It (feat. Charli XCX) [OFFICIAL VIDEO]',
                  'alt_title': 'I Love It (feat. Charli XCX)',
-                'description': 'md5:f3ceb5ef83a08d95b9d146f973157cc8',
+                'description': 'md5:19a2f98d9032b9311e686ed039564f63',
                  'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                           'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                           'iconic ep', 'iconic', 'love', 'it'],
                  'tags': ['Icona Pop i love it', 'sweden', 'pop music', 'big beat records', 'big beat', 'charli',
                           'xcx', 'charli xcx', 'girls', 'hbo', 'i love it', "i don't care", 'icona', 'pop',
                           'iconic ep', 'iconic', 'love', 'it'],
@@ -685,12 +684,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'id': 'nfWlot6h_JM',
                  'ext': 'm4a',
                  'title': 'Taylor Swift - Shake It Off',
                  'id': 'nfWlot6h_JM',
                  'ext': 'm4a',
                  'title': 'Taylor Swift - Shake It Off',
-                'description': 'md5:bec2185232c05479482cb5a9b82719bf',
+                'description': 'md5:307195cd21ff7fa352270fe884570ef0',
                  'duration': 242,
                  'uploader': 'TaylorSwiftVEVO',
                  'uploader_id': 'TaylorSwiftVEVO',
                  'upload_date': '20140818',
                  'duration': 242,
                  'uploader': 'TaylorSwiftVEVO',
                  'uploader_id': 'TaylorSwiftVEVO',
                  'upload_date': '20140818',
-                'creator': 'Taylor Swift',
              },
              'params': {
                  'youtube_include_dash_manifest': True,
              },
              'params': {
                  'youtube_include_dash_manifest': True,
@@ -755,11 +753,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'upload_date': '20100430',
                  'uploader_id': 'deadmau5',
                  'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
                  'upload_date': '20100430',
                  'uploader_id': 'deadmau5',
                  'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/deadmau5',
-                'creator': 'deadmau5',
+                'creator': 'Dada Life, deadmau5',
                  'description': 'md5:12c56784b8032162bb936a5f76d55360',
                  'uploader': 'deadmau5',
                  'title': 'Deadmau5 - Some Chords (HD)',
                  'description': 'md5:12c56784b8032162bb936a5f76d55360',
                  'uploader': 'deadmau5',
                  'title': 'Deadmau5 - Some Chords (HD)',
-                'alt_title': 'Some Chords',
+                'alt_title': 'This Machine Kills Some Chords',
              },
              'expected_warnings': [
                  'DASH manifest missing',
              },
              'expected_warnings': [
                  'DASH manifest missing',
@@ -1135,6 +1133,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'skip_download': True,
                  'youtube_include_dash_manifest': False,
              },
                  'skip_download': True,
                  'youtube_include_dash_manifest': False,
              },
+            'skip': 'not actual anymore',
          },
          {
              # Youtube Music Auto-generated description
          },
          {
              # Youtube Music Auto-generated description
@@ -1145,8 +1144,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'title': 'Voyeur Girl',
                  'description': 'md5:7ae382a65843d6df2685993e90a8628f',
                  'upload_date': '20190312',
                  'title': 'Voyeur Girl',
                  'description': 'md5:7ae382a65843d6df2685993e90a8628f',
                  'upload_date': '20190312',
-                'uploader': 'Various Artists - Topic',
-                'uploader_id': 'UCVWKBi1ELZn0QX2CBLSkiyw',
+                'uploader': 'Stephen - Topic',
+                'uploader_id': 'UC-pWHpBjdGG69N9mM2auIAA',
                  'artist': 'Stephen',
                  'track': 'Voyeur Girl',
                  'album': 'it\'s too much love to know my dear',
                  'artist': 'Stephen',
                  'track': 'Voyeur Girl',
                  'album': 'it\'s too much love to know my dear',
@@ -1210,7 +1209,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'id': '-hcAI0g-f5M',
                  'ext': 'mp4',
                  'title': 'Put It On Me',
                  'id': '-hcAI0g-f5M',
                  'ext': 'mp4',
                  'title': 'Put It On Me',
-                'description': 'md5:93c55acc682ae7b0c668f2e34e1c069e',
+                'description': 'md5:f6422397c07c4c907c6638e1fee380a5',
                  'upload_date': '20180426',
                  'uploader': 'Matt Maeson - Topic',
                  'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
                  'upload_date': '20180426',
                  'uploader': 'Matt Maeson - Topic',
                  'uploader_id': 'UCnEkIGqtGcQMLk73Kp-Q5LQ',
@@ -1256,7 +1255,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
      def _extract_signature_function(self, video_id, player_url, example_sig):
          id_m = re.match(
  
      def _extract_signature_function(self, video_id, player_url, example_sig):
          id_m = re.match(
-            r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
+            r'.*?[-.](?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|(?:/[a-z]{2,3}_[A-Z]{2})?/base)?\.(?P<ext>[a-z]+)$',
              player_url)
          if not id_m:
              raise ExtractorError('Cannot identify player %r' % player_url)
              player_url)
          if not id_m:
              raise ExtractorError('Cannot identify player %r' % player_url)
@@ -1708,9 +1707,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          def extract_view_count(v_info):
              return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
  
          def extract_view_count(v_info):
              return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
  
-        def extract_token(v_info):
-            return dict_get(v_info, ('account_playback_token', 'accountPlaybackToken', 'token'))
-
          def extract_player_response(player_response, video_id):
              pl_response = str_or_none(player_response)
              if not pl_response:
          def extract_player_response(player_response, video_id):
              pl_response = str_or_none(player_response)
              if not pl_response:
@@ -1723,6 +1719,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          player_response = {}
  
          # Get video info
          player_response = {}
  
          # Get video info
+        video_info = {}
          embed_webpage = None
          if re.search(r'player-age-gate-content">', video_webpage) is not None:
              age_gate = True
          embed_webpage = None
          if re.search(r'player-age-gate-content">', video_webpage) is not None:
              age_gate = True
@@ -1737,19 +1734,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                      r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
              })
              video_info_url = proto + '://www.youtube.com/get_video_info?' + data
                      r'"sts"\s*:\s*(\d+)', embed_webpage, 'sts', default=''),
              })
              video_info_url = proto + '://www.youtube.com/get_video_info?' + data
-            video_info_webpage = self._download_webpage(
-                video_info_url, video_id,
-                note='Refetching age-gated info webpage',
-                errnote='unable to download video info webpage')
-            video_info = compat_parse_qs(video_info_webpage)
-            pl_response = video_info.get('player_response', [None])[0]
-            player_response = extract_player_response(pl_response, video_id)
-            add_dash_mpd(video_info)
-            view_count = extract_view_count(video_info)
+            try:
+                video_info_webpage = self._download_webpage(
+                    video_info_url, video_id,
+                    note='Refetching age-gated info webpage',
+                    errnote='unable to download video info webpage')
+            except ExtractorError:
+                video_info_webpage = None
+            if video_info_webpage:
+                video_info = compat_parse_qs(video_info_webpage)
+                pl_response = video_info.get('player_response', [None])[0]
+                player_response = extract_player_response(pl_response, video_id)
+                add_dash_mpd(video_info)
+                view_count = extract_view_count(video_info)
          else:
              age_gate = False
          else:
              age_gate = False
-            video_info = None
-            sts = None
              # Try looking directly into the video webpage
              ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
              if ytplayer_config:
              # Try looking directly into the video webpage
              ytplayer_config = self._get_ytplayer_config(video_id, video_webpage)
              if ytplayer_config:
@@ -1766,61 +1765,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
                  if args.get('livestream') == '1' or args.get('live_playback') == 1:
                      is_live = True
                          args['ypc_vid'], YoutubeIE.ie_key(), video_id=args['ypc_vid'])
                  if args.get('livestream') == '1' or args.get('live_playback') == 1:
                      is_live = True
-                sts = ytplayer_config.get('sts')
                  if not player_response:
                      player_response = extract_player_response(args.get('player_response'), video_id)
              if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                  add_dash_mpd_pr(player_response)
                  if not player_response:
                      player_response = extract_player_response(args.get('player_response'), video_id)
              if not video_info or self._downloader.params.get('youtube_include_dash_manifest', True):
                  add_dash_mpd_pr(player_response)
-                # We also try looking in get_video_info since it may contain different dashmpd
-                # URL that points to a DASH manifest with possibly different itag set (some itags
-                # are missing from DASH manifest pointed by webpage's dashmpd, some - from DASH
-                # manifest pointed by get_video_info's dashmpd).
-                # The general idea is to take a union of itags of both DASH manifests (for example
-                # video with such 'manifest behavior' see https://github.com/ytdl-org/youtube-dl/issues/6093)
-                self.report_video_info_webpage_download(video_id)
-                for el in ('embedded', 'detailpage', 'vevo', ''):
-                    query = {
-                        'video_id': video_id,
-                        'ps': 'default',
-                        'eurl': '',
-                        'gl': 'US',
-                        'hl': 'en',
-                    }
-                    if el:
-                        query['el'] = el
-                    if sts:
-                        query['sts'] = sts
-                    video_info_webpage = self._download_webpage(
-                        '%s://www.youtube.com/get_video_info' % proto,
-                        video_id, note=False,
-                        errnote='unable to download video info webpage',
-                        fatal=False, query=query)
-                    if not video_info_webpage:
-                        continue
-                    get_video_info = compat_parse_qs(video_info_webpage)
-                    if not player_response:
-                        pl_response = get_video_info.get('player_response', [None])[0]
-                        player_response = extract_player_response(pl_response, video_id)
-                    add_dash_mpd(get_video_info)
-                    if view_count is None:
-                        view_count = extract_view_count(get_video_info)
-                    if not video_info:
-                        video_info = get_video_info
-                    get_token = extract_token(get_video_info)
-                    if get_token:
-                        # Different get_video_info requests may report different results, e.g.
-                        # some may report video unavailability, but some may serve it without
-                        # any complaint (see https://github.com/ytdl-org/youtube-dl/issues/7362,
-                        # the original webpage as well as el=info and el=embedded get_video_info
-                        # requests report video unavailability due to geo restriction while
-                        # el=detailpage succeeds and returns valid data). This is probably
-                        # due to YouTube measures against IP ranges of hosting providers.
-                        # Working around by preferring the first succeeded video_info containing
-                        # the token if no such video_info yet was found.
-                        token = extract_token(video_info)
-                        if not token:
-                            video_info = get_video_info
-                        break
  
          def extract_unavailable_message():
              messages = []
  
          def extract_unavailable_message():
              messages = []
@@ -1833,13 +1781,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              if messages:
                  return '\n'.join(messages)
  
              if messages:
                  return '\n'.join(messages)
  
-        if not video_info:
+        if not video_info and not player_response:
              unavailable_message = extract_unavailable_message()
              if not unavailable_message:
                  unavailable_message = 'Unable to extract video data'
              raise ExtractorError(
                  'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
  
              unavailable_message = extract_unavailable_message()
              if not unavailable_message:
                  unavailable_message = 'Unable to extract video data'
              raise ExtractorError(
                  'YouTube said: %s' % unavailable_message, expected=True, video_id=video_id)
  
+        if not isinstance(video_info, dict):
+            video_info = {}
+
          video_details = try_get(
              player_response, lambda x: x['videoDetails'], dict) or {}
  
          video_details = try_get(
              player_response, lambda x: x['videoDetails'], dict) or {}
  
@@ -2035,7 +1986,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                  else:
                                      player_version = self._search_regex(
                                          [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
                                  else:
                                      player_version = self._search_regex(
                                          [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
-                                         r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
+                                         r'(?:www|player(?:_ias)?)[-.]([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
                                          player_url,
                                          'html5 player', fatal=False)
                                      player_desc = 'html5 player %s' % player_version
                                          player_url,
                                          'html5 player', fatal=False)
                                      player_desc = 'html5 player %s' % player_version
@@ -2392,30 +2343,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                          f['stretched_ratio'] = ratio
  
          if not formats:
                          f['stretched_ratio'] = ratio
  
          if not formats:
-            token = extract_token(video_info)
-            if not token:
-                if 'reason' in video_info:
-                    if 'The uploader has not made this video available in your country.' in video_info['reason']:
-                        regions_allowed = self._html_search_meta(
-                            'regionsAllowed', video_webpage, default=None)
-                        countries = regions_allowed.split(',') if regions_allowed else None
-                        self.raise_geo_restricted(
-                            msg=video_info['reason'][0], countries=countries)
-                    reason = video_info['reason'][0]
-                    if 'Invalid parameters' in reason:
-                        unavailable_message = extract_unavailable_message()
-                        if unavailable_message:
-                            reason = unavailable_message
-                    raise ExtractorError(
-                        'YouTube said: %s' % reason,
-                        expected=True, video_id=video_id)
-                else:
-                    raise ExtractorError(
-                        '"token" parameter not in video info for unknown reason',
-                        video_id=video_id)
-
-        if not formats and (video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos'])):
-            raise ExtractorError('This video is DRM protected.', expected=True)
+            if 'reason' in video_info:
+                if 'The uploader has not made this video available in your country.' in video_info['reason']:
+                    regions_allowed = self._html_search_meta(
+                        'regionsAllowed', video_webpage, default=None)
+                    countries = regions_allowed.split(',') if regions_allowed else None
+                    self.raise_geo_restricted(
+                        msg=video_info['reason'][0], countries=countries)
+                reason = video_info['reason'][0]
+                if 'Invalid parameters' in reason:
+                    unavailable_message = extract_unavailable_message()
+                    if unavailable_message:
+                        reason = unavailable_message
+                raise ExtractorError(
+                    'YouTube said: %s' % reason,
+                    expected=True, video_id=video_id)
+            if video_info.get('license_info') or try_get(player_response, lambda x: x['streamingData']['licenseInfos']):
+                raise ExtractorError('This video is DRM protected.', expected=True)
  
          self._sort_formats(formats)
  
  
          self._sort_formats(formats)
  
@@ -2495,20 +2439,23 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
      _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
      IE_NAME = 'youtube:playlist'
      _TESTS = [{
      _VIDEO_RE = _VIDEO_RE_TPL % r'(?P<id>[0-9A-Za-z_-]{11})'
      IE_NAME = 'youtube:playlist'
      _TESTS = [{
-        'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
          'info_dict': {
          'info_dict': {
-            'title': 'ytdl test PL',
-            'id': 'PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re',
+            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+            'uploader': 'Sergey M.',
+            'id': 'PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc',
+            'title': 'youtube-dl public playlist',
          },
          },
-        'playlist_count': 3,
+        'playlist_count': 1,
      }, {
      }, {
-        'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
+        'url': 'https://www.youtube.com/playlist?list=PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
          'info_dict': {
          'info_dict': {
-            'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',
-            'title': 'YDL_Empty_List',
+            'uploader_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
+            'uploader': 'Sergey M.',
+            'id': 'PL4lCao7KL_QFodcLWhDpGCYnngnHtQ-Xf',
+            'title': 'youtube-dl empty playlist',
          },
          'playlist_count': 0,
          },
          'playlist_count': 0,
-        'skip': 'This playlist is private',
      }, {
          'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
          'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
      }, {
          'note': 'Playlist with deleted videos (#651). As a bonus, the video #51 is also twice in this list.',
          'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',
@@ -2518,7 +2465,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
              'uploader': 'Christiaan008',
              'uploader_id': 'ChRiStIaAn008',
          },
              'uploader': 'Christiaan008',
              'uploader_id': 'ChRiStIaAn008',
          },
-        'playlist_count': 95,
+        'playlist_count': 96,
      }, {
          'note': 'issue #673',
          'url': 'PLBB231211A4F62143',
      }, {
          'note': 'issue #673',
          'url': 'PLBB231211A4F62143',