Use canonical URL in Vcs-Git.

[youtubedl] / youtube_dl / extractor / scrippsnetworks.py
diff --git a/youtube_dl/extractor/scrippsnetworks.py b/youtube_dl/extractor/scrippsnetworks.py

index 597d6f543a362791a0af0d111156bdac79ac4270..b40b4c4afded1b6f9541d60b3b2d3fb5fe0c5973 100644 (file)
--- a/youtube_dl/extractor/scrippsnetworks.py
+++ b/youtube_dl/extractor/scrippsnetworks.py
@@ -1,60 +1,152 @@
  # coding: utf-8
  from __future__ import unicode_literals
  
  # coding: utf-8
  from __future__ import unicode_literals
  
-from .adobepass import AdobePassIE
+import json
+import hashlib
+import re
+
+from .aws import AWSIE
+from .anvato import AnvatoIE
+from .common import InfoExtractor
  from ..utils import (
  from ..utils import (
-    int_or_none,
      smuggle_url,
      smuggle_url,
-    update_url_query,
+    urlencode_postdata,
+    xpath_text,
  )
  
  
  )
  
  
-class ScrippsNetworksWatchIE(AdobePassIE):
+class ScrippsNetworksWatchIE(AWSIE):
      IE_NAME = 'scrippsnetworks:watch'
      IE_NAME = 'scrippsnetworks:watch'
-    _VALID_URL = r'https?://watch\.(?:hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv)\.com/player\.[A-Z0-9]+\.html#(?P<id>\d+)'
-    _TEST = {
-        'url': 'http://watch.hgtv.com/player.HNT.html#0256538',
-        'md5': '26545fd676d939954c6808274bdb905a',
+    _VALID_URL = r'''(?x)
+                    https?://
+                        watch\.
+                        (?P<site>geniuskitchen)\.com/
+                        (?:
+                            player\.[A-Z0-9]+\.html\#|
+                            show/(?:[^/]+/){2}|
+                            player/
+                        )
+                        (?P<id>\d+)
+                    '''
+    _TESTS = [{
+        'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
+        'info_dict': {
+            'id': '4194875',
+            'ext': 'mp4',
+            'title': 'Ample Hills Ice Cream Bike',
+            'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.',
+            'uploader': 'ANV',
+            'upload_date': '20171011',
+            'timestamp': 1507698000,
+        },
+        'params': {
+            'skip_download': True,
+        },
+        'add_ie': [AnvatoIE.ie_key()],
+    }]
+
+    _SNI_TABLE = {
+        'geniuskitchen': 'genius',
+    }
+
+    _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
+    _AWS_PROXY_HOST = 'web.api.video.snidigital.com'
+
+    _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        site_id, video_id = mobj.group('site', 'id')
+
+        aws_identity_id_json = json.dumps({
+            'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % self._AWS_REGION
+        }).encode('utf-8')
+        token = self._download_json(
+            'https://cognito-identity.%s.amazonaws.com/' % self._AWS_REGION, video_id,
+            data=aws_identity_id_json,
+            headers={
+                'Accept': '*/*',
+                'Content-Type': 'application/x-amz-json-1.1',
+                'Referer': url,
+                'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(),
+                'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
+                'X-Amz-User-Agent': self._AWS_USER_AGENT,
+            })['Token']
+
+        sts = self._download_xml(
+            'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
+                'Action': 'AssumeRoleWithWebIdentity',
+                'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
+                'RoleSessionName': 'web-identity',
+                'Version': '2011-06-15',
+                'WebIdentityToken': token,
+            }), headers={
+                'Referer': url,
+                'X-Amz-User-Agent': self._AWS_USER_AGENT,
+                'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
+            })
+
+        def get(key):
+            return xpath_text(
+                sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
+                fatal=True)
+
+        mcp_id = self._aws_execute_api({
+            'uri': '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id),
+            'access_key': get('AccessKeyId'),
+            'secret_key': get('SecretAccessKey'),
+            'session_token': get('SessionToken'),
+        }, video_id)['results'][0]['mcpId']
+
+        return self.url_result(
+            smuggle_url(
+                'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
+                {'geo_countries': ['US']}),
+            AnvatoIE.ie_key(), video_id=mcp_id)
+
+
+class ScrippsNetworksIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?(?P<site>cookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P<id>\d+)'
+    _TESTS = [{
+        'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338',
          'info_dict': {
          'info_dict': {
-            'id': '0256538',
+            'id': '0260338',
              'ext': 'mp4',
              'ext': 'mp4',
-            'title': 'Seeking a Wow House',
-            'description': 'Buyers retiring in Palm Springs, California, want a modern house with major wow factor. They\'re also looking for a pool and a large, open floorplan with tall windows looking out at the views.',
-            'uploader': 'SCNI',
-            'upload_date': '20170207',
-            'timestamp': 1486450493,
+            'title': 'The Best of the Best',
+            'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.',
+            'timestamp': 1475678834,
+            'upload_date': '20161005',
+            'uploader': 'SCNI-SCND',
          },
          },
-        'skip': 'requires TV provider authentication',
+        'add_ie': ['ThePlatform'],
+    }, {
+        'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368',
+        'only_matching': True,
+    }]
+    _ACCOUNT_MAP = {
+        'cookingchanneltv': 2433005105,
+        'discovery': 2706091867,
+        'diynetwork': 2433004575,
+        'foodnetwork': 2433005105,
+        'hgtv': 2433004575,
+        'travelchannel': 2433005739,
      }
      }
+    _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true'
  
      def _real_extract(self, url):
  
      def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        channel = self._parse_json(self._search_regex(
-            r'"channels"\s*:\s*(\[.+\])',
-            webpage, 'channels'), video_id)[0]
-        video_data = next(v for v in channel['videos'] if v.get('nlvid') == video_id)
-        title = video_data['title']
-        release_url = video_data['releaseUrl']
-        if video_data.get('restricted'):
-            requestor_id = self._search_regex(
-                r'requestorId\s*=\s*"([^"]+)";', webpage, 'requestor id')
-            resource = self._get_mvpd_resource(
-                requestor_id, title, video_id,
-                video_data.get('ratings', [{}])[0].get('rating'))
-            auth = self._extract_mvpd_auth(
-                url, video_id, requestor_id, resource)
-            release_url = update_url_query(release_url, {'auth': auth})
-
-        return {
-            '_type': 'url_transparent',
-            'id': video_id,
-            'title': title,
-            'url': smuggle_url(release_url, {'force_smil_url': True}),
-            'description': video_data.get('description'),
-            'thumbnail': video_data.get('thumbnailUrl'),
-            'series': video_data.get('showTitle'),
-            'season_number': int_or_none(video_data.get('season')),
-            'episode_number': int_or_none(video_data.get('episodeNumber')),
-            'ie_key': 'ThePlatform',
-        }
+        site, guid = re.match(self._VALID_URL, url).groups()
+        return self.url_result(smuggle_url(
+            self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid),
+            {'force_smil_url': True}), 'ThePlatform', guid)