Merge tag 'upstream/2017.03.26'

[youtubedl] / youtube_dl / extractor / youtube.py
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py

index 76710931ae5e6a292af767f3f57685ad0be98cac..ca40de522bc5e341f2ac269db997a0e73914c127 100644 (file)
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -34,6 +34,7 @@ from ..utils import (
      int_or_none,
      mimetype2ext,
      orderedSet,
+    parse_codecs,
      parse_duration,
      remove_quotes,
      remove_start,
@@ -46,7 +47,6 @@ from ..utils import (
      unsmuggle_url,
      uppercase_escape,
      urlencode_postdata,
-    ISO3166Utils,
  )
  
  
@@ -59,6 +59,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
      # If True it will raise an error if no login info is provided
      _LOGIN_REQUIRED = False
  
+    _PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,}'
+
      def _set_language(self):
          self._set_cookie(
              '.youtube.com', 'PREF', 'f1=50000000&hl=en',
@@ -265,9 +267,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                           )
                       )?                                                       # all until now is optional -> you can pass the naked ID
                       ([0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID
-                     (?!.*?\blist=)                                            # combined list/video URLs are handled by the playlist IE
+                     (?!.*?\blist=
+                        (?:
+                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE
+                            WL                                                # WL are handled by the watch later IE
+                        )
+                     )
                       (?(1).+)?                                                # if we found the ID, everything can follow
-                     $"""
+                     $""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
      _NEXT_URL_RE = r'[\?&]next_url=([^&]+)'
      _formats = {
          '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'},
@@ -370,6 +377,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
      }
      _SUBTITLE_FORMATS = ('ttml', 'vtt')
  
+    _GEO_BYPASS = False
+
      IE_NAME = 'youtube'
      _TESTS = [
          {
@@ -916,7 +925,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
              # itag 212
              'url': '1t24XAntNCY',
              'only_matching': True,
-        }
+        },
+        {
+            # geo restricted to JP
+            'url': 'sJL6WA-aGkQ',
+            'only_matching': True,
+        },
+        {
+            'url': 'https://www.youtube.com/watch?v=MuAGGZNfUkU&list=RDMM',
+            'only_matching': True,
+        },
      ]
  
      def __init__(self, *args, **kwargs):
@@ -1375,11 +1393,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
          if 'token' not in video_info:
              if 'reason' in video_info:
                  if 'The uploader has not made this video available in your country.' in video_info['reason']:
-                    regions_allowed = self._html_search_meta('regionsAllowed', video_webpage, default=None)
-                    if regions_allowed:
-                        raise ExtractorError('YouTube said: This video is available in %s only' % (
-                            ', '.join(map(ISO3166Utils.short2full, regions_allowed.split(',')))),
-                            expected=True)
+                    regions_allowed = self._html_search_meta(
+                        'regionsAllowed', video_webpage, default=None)
+                    countries = regions_allowed.split(',') if regions_allowed else None
+                    self.raise_geo_restricted(
+                        msg=video_info['reason'][0], countries=countries)
                  raise ExtractorError(
                      'YouTube said: %s' % video_info['reason'][0],
                      expected=True, video_id=video_id)
@@ -1447,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
  
          # Check for "rental" videos
          if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
-            raise ExtractorError('"rental" videos not supported')
+            raise ExtractorError('"rental" videos not supported. See https://github.com/rg3/youtube-dl/issues/359 for more information.', expected=True)
  
          # Start extracting information
          self.report_information_extraction(video_id)
@@ -1696,15 +1714,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                                      codecs = mobj.group('val')
                                      break
                              if codecs:
-                                codecs = codecs.split(',')
-                                if len(codecs) == 2:
-                                    acodec, vcodec = codecs[1], codecs[0]
-                                else:
-                                    acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
-                                dct.update({
-                                    'acodec': acodec,
-                                    'vcodec': vcodec,
-                                })
+                                dct.update(parse_codecs(codecs))
                  formats.append(dct)
          elif video_info.get('hlsvp'):
              manifest_url = video_info['hlsvp'][0]
@@ -1852,7 +1862,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                          (?:
                              youtube\.com/
                              (?:
-                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries)
+                               (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/(?:videoseries|[0-9A-Za-z_-]{11}))
                                 \? (?:.*?[&;])*? (?:p|a|list)=
                              |  p/
                              )|
@@ -1865,8 +1875,8 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
                          )
                          .*
                       |
-                        ((?:PL|LL|EC|UU|FL|RD|UL|TL)[0-9A-Za-z-_]{10,})
-                     )"""
+                        (%(playlist_id)s)
+                     )""" % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE}
      _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&disable_polymer=true'
      _VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})&amp;[^"]*?index=(?P<index>\d+)(?:[^>]+>(?P<title>[^<]+))?'
      IE_NAME = 'youtube:playlist'
@@ -1925,6 +1935,13 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
              'title': 'JODA15',
              'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',
          }
+    }, {
+        'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+        'playlist_mincount': 485,
+        'info_dict': {
+            'title': '2017 華語最新單曲 (2/24更新)',
+            'id': 'PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl',
+        }
      }, {
          'note': 'Embedded SWF player',
          'url': 'https://www.youtube.com/p/YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ?hl=en_US&fs=1&rel=0',
@@ -2073,7 +2090,7 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
          # Check if it's a video-specific URL
          query_dict = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
          video_id = query_dict.get('v', [None])[0] or self._search_regex(
-            r'(?:^|//)youtu\.be/([0-9A-Za-z_-]{11})', url,
+            r'(?:(?:^|//)youtu\.be/|youtube\.com/embed/(?!videoseries))([0-9A-Za-z_-]{11})', url,
              'video id', default=None)
          if video_id:
              if self._downloader.params.get('noplaylist'):
@@ -2233,7 +2250,7 @@ class YoutubeUserIE(YoutubeChannelIE):
          'url': 'https://www.youtube.com/gametrailers',
          'only_matching': True,
      }, {
-        # This channel is not available.
+        # This channel is not available, geo restricted to JP
          'url': 'https://www.youtube.com/user/kananishinoSMEJ/videos',
          'only_matching': True,
      }]