debian/copyright: Use HTTPS for upstream's source.
[youtubedl] / youtube_dl / extractor / vevo.py
index c4e37f694426c175b1f33d7795ca01baf6f7547b..890a149ead3234428a3f33d78d8af43365a278e2 100644 (file)
@@ -1,6 +1,7 @@
 from __future__ import unicode_literals
 
 import re
+import json
 
 from .common import InfoExtractor
 from ..compat import (
@@ -11,18 +12,17 @@ from ..compat import (
 from ..utils import (
     ExtractorError,
     int_or_none,
-    sanitized_Request,
     parse_iso8601,
 )
 
 
 class VevoBaseIE(InfoExtractor):
-    def _extract_json(self, webpage, video_id, item):
+    def _extract_json(self, webpage, video_id):
         return self._parse_json(
             self._search_regex(
                 r'window\.__INITIAL_STORE__\s*=\s*({.+?});\s*</script>',
                 webpage, 'initial store'),
-            video_id)['default'][item]
+            video_id)
 
 
 class VevoIE(VevoBaseIE):
@@ -139,6 +139,11 @@ class VevoIE(VevoBaseIE):
         # no genres available
         'url': 'http://www.vevo.com/watch/INS171400764',
         'only_matching': True,
+    }, {
+        # Another case available only via the webpage; using streams/streamsV3 formats
+        # Geo-restricted to Netherlands/Germany
+        'url': 'http://www.vevo.com/watch/boostee/pop-corn-clip-officiel/FR1A91600909',
+        'only_matching': True,
     }]
     _VERSIONS = {
         0: 'youtube',  # only in AuthenticateVideo videoVersions
@@ -149,19 +154,24 @@ class VevoIE(VevoBaseIE):
     }
 
     def _initialize_api(self, video_id):
-        req = sanitized_Request(
-            'http://www.vevo.com/auth', data=b'')
         webpage = self._download_webpage(
-            req, None,
+            'https://accounts.vevo.com/token', None,
             note='Retrieving oauth token',
-            errnote='Unable to retrieve oauth token')
+            errnote='Unable to retrieve oauth token',
+            data=json.dumps({
+                'client_id': 'SPupX1tvqFEopQ1YS6SS',
+                'grant_type': 'urn:vevo:params:oauth:grant-type:anonymous',
+            }).encode('utf-8'),
+            headers={
+                'Content-Type': 'application/json',
+            })
 
         if re.search(r'(?i)THIS PAGE IS CURRENTLY UNAVAILABLE IN YOUR REGION', webpage):
             self.raise_geo_restricted(
                 '%s said: This page is currently unavailable in your region' % self.IE_NAME)
 
         auth_info = self._parse_json(webpage, video_id)
-        self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['access_token']
+        self._api_url_template = self.http_scheme() + '//apiv2.vevo.com/%s?token=' + auth_info['legacy_token']
 
     def _call_api(self, path, *args, **kwargs):
         try:
@@ -193,7 +203,14 @@ class VevoIE(VevoBaseIE):
         # https://github.com/rg3/youtube-dl/issues/9366)
         if not video_versions:
             webpage = self._download_webpage(url, video_id)
-            video_versions = self._extract_json(webpage, video_id, 'streams')[video_id][0]
+            json_data = self._extract_json(webpage, video_id)
+            if 'streams' in json_data.get('default', {}):
+                video_versions = json_data['default']['streams'][video_id][0]
+            else:
+                video_versions = [
+                    value
+                    for key, value in json_data['apollo']['data'].items()
+                    if key.startswith('%s.streams' % video_id)]
 
         uploader = None
         artist = None
@@ -207,7 +224,7 @@ class VevoIE(VevoBaseIE):
 
         formats = []
         for video_version in video_versions:
-            version = self._VERSIONS.get(video_version['version'])
+            version = self._VERSIONS.get(video_version.get('version'), 'generic')
             version_url = video_version.get('url')
             if not version_url:
                 continue
@@ -339,7 +356,7 @@ class VevoPlaylistIE(VevoBaseIE):
             if video_id:
                 return self.url_result('vevo:%s' % video_id, VevoIE.ie_key())
 
-        playlists = self._extract_json(webpage, playlist_id, '%ss' % playlist_kind)
+        playlists = self._extract_json(webpage, playlist_id)['default']['%ss' % playlist_kind]
 
         playlist = (list(playlists.values())[0]
                     if playlist_kind == 'playlist' else playlists[playlist_id])