]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/crackle.py
debian/changelog: Annotate the log with bugs to close.
[youtubedl] / youtube_dl / extractor / crackle.py
1 # coding: utf-8
2 from __future__ import unicode_literals, division
3
4 import re
5
6 from .common import InfoExtractor
7 from ..compat import (
8 compat_str,
9 compat_HTTPError,
10 )
11 from ..utils import (
12 determine_ext,
13 float_or_none,
14 int_or_none,
15 parse_age_limit,
16 parse_duration,
17 ExtractorError
18 )
19
20
21 class CrackleIE(InfoExtractor):
22 _VALID_URL = r'(?:crackle:|https?://(?:(?:www|m)\.)?(?:sony)?crackle\.com/(?:playlist/\d+/|(?:[^/]+/)+))(?P<id>\d+)'
23 _TESTS = [{
24 # geo restricted to CA
25 'url': 'https://www.crackle.com/andromeda/2502343',
26 'info_dict': {
27 'id': '2502343',
28 'ext': 'mp4',
29 'title': 'Under The Night',
30 'description': 'md5:d2b8ca816579ae8a7bf28bfff8cefc8a',
31 'duration': 2583,
32 'view_count': int,
33 'average_rating': 0,
34 'age_limit': 14,
35 'genre': 'Action, Sci-Fi',
36 'creator': 'Allan Kroeker',
37 'artist': 'Keith Hamilton Cobb, Kevin Sorbo, Lisa Ryder, Lexa Doig, Robert Hewitt Wolfe',
38 'release_year': 2000,
39 'series': 'Andromeda',
40 'episode': 'Under The Night',
41 'season_number': 1,
42 'episode_number': 1,
43 },
44 'params': {
45 # m3u8 download
46 'skip_download': True,
47 }
48 }, {
49 'url': 'https://www.sonycrackle.com/andromeda/2502343',
50 'only_matching': True,
51 }]
52
53 def _real_extract(self, url):
54 video_id = self._match_id(url)
55
56 country_code = self._downloader.params.get('geo_bypass_country', None)
57 countries = [country_code] if country_code else (
58 'US', 'AU', 'CA', 'AS', 'FM', 'GU', 'MP', 'PR', 'PW', 'MH', 'VI')
59
60 last_e = None
61
62 for country in countries:
63 try:
64 media = self._download_json(
65 'https://web-api-us.crackle.com/Service.svc/details/media/%s/%s'
66 % (video_id, country), video_id,
67 'Downloading media JSON as %s' % country,
68 'Unable to download media JSON', query={
69 'disableProtocols': 'true',
70 'format': 'json'
71 })
72 except ExtractorError as e:
73 # 401 means geo restriction, trying next country
74 if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
75 last_e = e
76 continue
77 raise
78
79 media_urls = media.get('MediaURLs')
80 if not media_urls or not isinstance(media_urls, list):
81 continue
82
83 title = media['Title']
84
85 formats = []
86 for e in media['MediaURLs']:
87 if e.get('UseDRM') is True:
88 continue
89 format_url = e.get('Path')
90 if not format_url or not isinstance(format_url, compat_str):
91 continue
92 ext = determine_ext(format_url)
93 if ext == 'm3u8':
94 formats.extend(self._extract_m3u8_formats(
95 format_url, video_id, 'mp4', entry_protocol='m3u8_native',
96 m3u8_id='hls', fatal=False))
97 elif ext == 'mpd':
98 formats.extend(self._extract_mpd_formats(
99 format_url, video_id, mpd_id='dash', fatal=False))
100 self._sort_formats(formats)
101
102 description = media.get('Description')
103 duration = int_or_none(media.get(
104 'DurationInSeconds')) or parse_duration(media.get('Duration'))
105 view_count = int_or_none(media.get('CountViews'))
106 average_rating = float_or_none(media.get('UserRating'))
107 age_limit = parse_age_limit(media.get('Rating'))
108 genre = media.get('Genre')
109 release_year = int_or_none(media.get('ReleaseYear'))
110 creator = media.get('Directors')
111 artist = media.get('Cast')
112
113 if media.get('MediaTypeDisplayValue') == 'Full Episode':
114 series = media.get('ShowName')
115 episode = title
116 season_number = int_or_none(media.get('Season'))
117 episode_number = int_or_none(media.get('Episode'))
118 else:
119 series = episode = season_number = episode_number = None
120
121 subtitles = {}
122 cc_files = media.get('ClosedCaptionFiles')
123 if isinstance(cc_files, list):
124 for cc_file in cc_files:
125 if not isinstance(cc_file, dict):
126 continue
127 cc_url = cc_file.get('Path')
128 if not cc_url or not isinstance(cc_url, compat_str):
129 continue
130 lang = cc_file.get('Locale') or 'en'
131 subtitles.setdefault(lang, []).append({'url': cc_url})
132
133 thumbnails = []
134 images = media.get('Images')
135 if isinstance(images, list):
136 for image_key, image_url in images.items():
137 mobj = re.search(r'Img_(\d+)[xX](\d+)', image_key)
138 if not mobj:
139 continue
140 thumbnails.append({
141 'url': image_url,
142 'width': int(mobj.group(1)),
143 'height': int(mobj.group(2)),
144 })
145
146 return {
147 'id': video_id,
148 'title': title,
149 'description': description,
150 'duration': duration,
151 'view_count': view_count,
152 'average_rating': average_rating,
153 'age_limit': age_limit,
154 'genre': genre,
155 'creator': creator,
156 'artist': artist,
157 'release_year': release_year,
158 'series': series,
159 'episode': episode,
160 'season_number': season_number,
161 'episode_number': episode_number,
162 'thumbnails': thumbnails,
163 'subtitles': subtitles,
164 'formats': formats,
165 }
166
167 raise last_e