]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/crackle.py
2 from __future__
import unicode_literals
, division
9 from .common
import InfoExtractor
10 from ..compat
import compat_HTTPError
22 class CrackleIE(InfoExtractor
):
23 _VALID_URL
= r
'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
25 # geo restricted to CA
26 'url': 'https://www.crackle.com/andromeda/2502343',
30 'title': 'Under The Night',
31 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
36 'genre': 'Action, Sci-Fi',
37 'creator': 'Allan Kroeker',
38 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
40 'series': 'Andromeda',
41 'episode': 'Under The Night',
47 'skip_download': True,
50 'url': 'https://www.sonycrackle.com/andromeda/2502343',
51 'only_matching': True,
69 def _real_extract(self
, url
):
70 video_id
= self
._match
_id
(url
)
72 country_code
= self
._downloader
.params
.get('geo_bypass_country', None)
73 countries
= [country_code
] if country_code
else (
74 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
78 for country
in countries
:
80 # Authorization generation algorithm is reverse engineered from:
81 # https://www.sonycrackle.com/static/js/main.ea93451f.chunk.js
82 media_detail_url
= 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s?disableProtocols=true' % (video_id
, country
)
83 timestamp
= time
.strftime('%Y%m%d%H%M', time
.gmtime())
84 h
= hmac
.new(b
'IGSLUQCBDFHEOIFM', '|'.join([media_detail_url
, timestamp
]).encode(), hashlib
.sha1
).hexdigest().upper()
85 media
= self
._download
_json
(
86 media_detail_url
, video_id
, 'Downloading media JSON as %s' % country
,
87 'Unable to download media JSON', headers
={
88 'Accept': 'application/json',
89 'Authorization': '|'.join([h
, timestamp
, '117', '1']),
91 except ExtractorError
as e
:
92 # 401 means geo restriction, trying next country
93 if isinstance(e
.cause
, compat_HTTPError
) and e
.cause
.code
== 401:
98 media_urls
= media
.get('MediaURLs')
99 if not media_urls
or not isinstance(media_urls
, list):
102 title
= media
['Title']
105 for e
in media
['MediaURLs']:
106 if e
.get('UseDRM') is True:
108 format_url
= url_or_none(e
.get('Path'))
111 ext
= determine_ext(format_url
)
113 formats
.extend(self
._extract
_m
3u8_formats
(
114 format_url
, video_id
, 'mp4', entry_protocol
='m3u8_native',
115 m3u8_id
='hls', fatal
=False))
117 formats
.extend(self
._extract
_mpd
_formats
(
118 format_url
, video_id
, mpd_id
='dash', fatal
=False))
119 elif format_url
.endswith('.ism/Manifest'):
120 formats
.extend(self
._extract
_ism
_formats
(
121 format_url
, video_id
, ism_id
='mss', fatal
=False))
123 mfs_path
= e
.get('Type')
124 mfs_info
= self
._MEDIA
_FILE
_SLOTS
.get(mfs_path
)
129 'format_id': 'http-' + mfs_path
.split('.')[0],
130 'width': mfs_info
['width'],
131 'height': mfs_info
['height'],
133 self
._sort
_formats
(formats
)
135 description
= media
.get('Description')
136 duration
= int_or_none(media
.get(
137 'DurationInSeconds')) or parse_duration(media
.get('Duration'))
138 view_count
= int_or_none(media
.get('CountViews'))
139 average_rating
= float_or_none(media
.get('UserRating'))
140 age_limit
= parse_age_limit(media
.get('Rating'))
141 genre
= media
.get('Genre')
142 release_year
= int_or_none(media
.get('ReleaseYear'))
143 creator
= media
.get('Directors')
144 artist
= media
.get('Cast')
146 if media
.get('MediaTypeDisplayValue') == 'Full Episode':
147 series
= media
.get('ShowName')
149 season_number
= int_or_none(media
.get('Season'))
150 episode_number
= int_or_none(media
.get('Episode'))
152 series
= episode
= season_number
= episode_number
= None
155 cc_files
= media
.get('ClosedCaptionFiles')
156 if isinstance(cc_files
, list):
157 for cc_file
in cc_files
:
158 if not isinstance(cc_file
, dict):
160 cc_url
= url_or_none(cc_file
.get('Path'))
163 lang
= cc_file
.get('Locale') or 'en'
164 subtitles
.setdefault(lang
, []).append({'url': cc_url
})
167 images
= media
.get('Images')
168 if isinstance(images
, list):
169 for image_key
, image_url
in images
.items():
170 mobj
= re
.search(r
'Img_(\d+)[xX](\d+)', image_key
)
175 'width': int(mobj
.group(1)),
176 'height': int(mobj
.group(2)),
182 'description': description
,
183 'duration': duration
,
184 'view_count': view_count
,
185 'average_rating': average_rating
,
186 'age_limit': age_limit
,
190 'release_year': release_year
,
193 'season_number': season_number
,
194 'episode_number': episode_number
,
195 'thumbnails': thumbnails
,
196 'subtitles': subtitles
,