]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/scrippsnetworks.py
Annotate changelog with bug being closed.
[youtubedl] / youtube_dl / extractor / scrippsnetworks.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import datetime
5 import json
6 import hashlib
7 import hmac
8 import re
9
10 from .common import InfoExtractor
11 from .anvato import AnvatoIE
12 from ..utils import (
13 smuggle_url,
14 urlencode_postdata,
15 xpath_text,
16 )
17
18
19 class ScrippsNetworksWatchIE(InfoExtractor):
20 IE_NAME = 'scrippsnetworks:watch'
21 _VALID_URL = r'''(?x)
22 https?://
23 watch\.
24 (?P<site>hgtv|foodnetwork|travelchannel|diynetwork|cookingchanneltv|geniuskitchen)\.com/
25 (?:
26 player\.[A-Z0-9]+\.html\#|
27 show/(?:[^/]+/){2}|
28 player/
29 )
30 (?P<id>\d+)
31 '''
32 _TESTS = [{
33 'url': 'http://watch.hgtv.com/show/HGTVE/Best-Ever-Treehouses/2241515/Best-Ever-Treehouses/',
34 'md5': '26545fd676d939954c6808274bdb905a',
35 'info_dict': {
36 'id': '4173834',
37 'ext': 'mp4',
38 'title': 'Best Ever Treehouses',
39 'description': "We're searching for the most over the top treehouses.",
40 'uploader': 'ANV',
41 'upload_date': '20170922',
42 'timestamp': 1506056400,
43 },
44 'params': {
45 'skip_download': True,
46 },
47 'add_ie': [AnvatoIE.ie_key()],
48 }, {
49 'url': 'http://watch.diynetwork.com/show/DSAL/Salvage-Dawgs/2656646/Covington-Church/',
50 'only_matching': True,
51 }, {
52 'url': 'http://watch.diynetwork.com/player.HNT.html#2656646',
53 'only_matching': True,
54 }, {
55 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/',
56 'only_matching': True,
57 }]
58
59 _SNI_TABLE = {
60 'hgtv': 'hgtv',
61 'diynetwork': 'diy',
62 'foodnetwork': 'food',
63 'cookingchanneltv': 'cook',
64 'travelchannel': 'trav',
65 'geniuskitchen': 'genius',
66 }
67 _SNI_HOST = 'web.api.video.snidigital.com'
68
69 _AWS_REGION = 'us-east-1'
70 _AWS_IDENTITY_ID_JSON = json.dumps({
71 'IdentityId': '%s:7655847c-0ae7-4d9b-80d6-56c062927eb3' % _AWS_REGION
72 })
73 _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback'
74 _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1'
75 _AWS_SERVICE = 'execute-api'
76 _AWS_REQUEST = 'aws4_request'
77 _AWS_SIGNED_HEADERS = ';'.join([
78 'host', 'x-amz-date', 'x-amz-security-token', 'x-api-key'])
79 _AWS_CANONICAL_REQUEST_TEMPLATE = '''GET
80 %(uri)s
81
82 host:%(host)s
83 x-amz-date:%(date)s
84 x-amz-security-token:%(token)s
85 x-api-key:%(key)s
86
87 %(signed_headers)s
88 %(payload_hash)s'''
89
90 def _real_extract(self, url):
91 mobj = re.match(self._VALID_URL, url)
92 site_id, video_id = mobj.group('site', 'id')
93
94 def aws_hash(s):
95 return hashlib.sha256(s.encode('utf-8')).hexdigest()
96
97 token = self._download_json(
98 'https://cognito-identity.us-east-1.amazonaws.com/', video_id,
99 data=self._AWS_IDENTITY_ID_JSON.encode('utf-8'),
100 headers={
101 'Accept': '*/*',
102 'Content-Type': 'application/x-amz-json-1.1',
103 'Referer': url,
104 'X-Amz-Content-Sha256': aws_hash(self._AWS_IDENTITY_ID_JSON),
105 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken',
106 'X-Amz-User-Agent': self._AWS_USER_AGENT,
107 })['Token']
108
109 sts = self._download_xml(
110 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({
111 'Action': 'AssumeRoleWithWebIdentity',
112 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role',
113 'RoleSessionName': 'web-identity',
114 'Version': '2011-06-15',
115 'WebIdentityToken': token,
116 }), headers={
117 'Referer': url,
118 'X-Amz-User-Agent': self._AWS_USER_AGENT,
119 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8',
120 })
121
122 def get(key):
123 return xpath_text(
124 sts, './/{https://sts.amazonaws.com/doc/2011-06-15/}%s' % key,
125 fatal=True)
126
127 access_key_id = get('AccessKeyId')
128 secret_access_key = get('SecretAccessKey')
129 session_token = get('SessionToken')
130
131 # Task 1: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-canonical-request.html
132 uri = '/1/web/brands/%s/episodes/scrid/%s' % (self._SNI_TABLE[site_id], video_id)
133 datetime_now = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
134 date = datetime_now[:8]
135 canonical_string = self._AWS_CANONICAL_REQUEST_TEMPLATE % {
136 'uri': uri,
137 'host': self._SNI_HOST,
138 'date': datetime_now,
139 'token': session_token,
140 'key': self._AWS_API_KEY,
141 'signed_headers': self._AWS_SIGNED_HEADERS,
142 'payload_hash': aws_hash(''),
143 }
144
145 # Task 2: http://docs.aws.amazon.com/general/latest/gr/sigv4-create-string-to-sign.html
146 credential_string = '/'.join([date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST])
147 string_to_sign = '\n'.join([
148 'AWS4-HMAC-SHA256', datetime_now, credential_string,
149 aws_hash(canonical_string)])
150
151 # Task 3: http://docs.aws.amazon.com/general/latest/gr/sigv4-calculate-signature.html
152 def aws_hmac(key, msg):
153 return hmac.new(key, msg.encode('utf-8'), hashlib.sha256)
154
155 def aws_hmac_digest(key, msg):
156 return aws_hmac(key, msg).digest()
157
158 def aws_hmac_hexdigest(key, msg):
159 return aws_hmac(key, msg).hexdigest()
160
161 k_secret = 'AWS4' + secret_access_key
162 k_date = aws_hmac_digest(k_secret.encode('utf-8'), date)
163 k_region = aws_hmac_digest(k_date, self._AWS_REGION)
164 k_service = aws_hmac_digest(k_region, self._AWS_SERVICE)
165 k_signing = aws_hmac_digest(k_service, self._AWS_REQUEST)
166
167 signature = aws_hmac_hexdigest(k_signing, string_to_sign)
168
169 auth_header = ', '.join([
170 'AWS4-HMAC-SHA256 Credential=%s' % '/'.join(
171 [access_key_id, date, self._AWS_REGION, self._AWS_SERVICE, self._AWS_REQUEST]),
172 'SignedHeaders=%s' % self._AWS_SIGNED_HEADERS,
173 'Signature=%s' % signature,
174 ])
175
176 mcp_id = self._download_json(
177 'https://%s%s' % (self._SNI_HOST, uri), video_id, headers={
178 'Accept': '*/*',
179 'Referer': url,
180 'Authorization': auth_header,
181 'X-Amz-Date': datetime_now,
182 'X-Amz-Security-Token': session_token,
183 'X-Api-Key': self._AWS_API_KEY,
184 })['results'][0]['mcpId']
185
186 return self.url_result(
187 smuggle_url(
188 'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:%s' % mcp_id,
189 {'geo_countries': ['US']}),
190 AnvatoIE.ie_key(), video_id=mcp_id)