]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  15 class AppleTrailersIE(InfoExtractor
): 
  16     IE_NAME 
= 'appletrailers' 
  17     _VALID_URL 
= r
'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  19         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', 
  22             'title': 'Man of Steel', 
  26                 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', 
  28                     'id': 'manofsteel-trailer4', 
  32                     'upload_date': '20130523', 
  37                 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', 
  39                     'id': 'manofsteel-trailer3', 
  43                     'upload_date': '20130417', 
  48                 'md5': 'd0f1e1150989b9924679b441f3404d48', 
  50                     'id': 'manofsteel-trailer', 
  54                     'upload_date': '20121212', 
  59                 'md5': '5fe08795b943eb2e757fa95cb6def1cb', 
  61                     'id': 'manofsteel-teaser', 
  65                     'upload_date': '20120721', 
  71         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 
  75         'playlist_mincount': 2, 
  76         'expected_warnings': ['Unable to download JSON metadata'], 
  78         # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json 
  79         'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', 
  82             'title': 'Kung Fu Panda 3', 
  84         'playlist_mincount': 4, 
  86         'url': 'http://trailers.apple.com/ca/metropole/autrui/', 
  87         'only_matching': True, 
  89         'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', 
  90         'only_matching': True, 
  93     _JSON_RE 
= r
'iTunes.playURL\((.*?)\);' 
  95     def _real_extract(self
, url
): 
  96         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  97         movie 
= mobj
.group('movie') 
  98         uploader_id 
= mobj
.group('company') 
 100         webpage 
= self
._download
_webpage
(url
, movie
) 
 101         film_id 
= self
._search
_regex
(r
"FilmId\s*=\s*'(\d+)'", webpage
, 'film id') 
 102         film_data 
= self
._download
_json
( 
 103             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id
, 
 104             film_id
, fatal
=False) 
 108             for clip 
in film_data
.get('clips', []): 
 109                 clip_title 
= clip
['title'] 
 112                 for version
, version_data 
in clip
.get('versions', {}).items(): 
 113                     for size
, size_data 
in version_data
.get('sizes', {}).items(): 
 114                         src 
= size_data
.get('src') 
 118                             'format_id': '%s-%s' % (version
, size
), 
 119                             'url': re
.sub(r
'_(\d+p.mov)', r
'_h\1', src
), 
 120                             'width': int_or_none(size_data
.get('width')), 
 121                             'height': int_or_none(size_data
.get('height')), 
 122                             'language': version
[:2], 
 124                 self
._sort
_formats
(formats
) 
 127                     'id': movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', clip_title
).lower(), 
 130                     'thumbnail': clip
.get('screen') or clip
.get('thumb'), 
 131                     'duration': parse_duration(clip
.get('runtime') or clip
.get('faded')), 
 132                     'upload_date': unified_strdate(clip
.get('posted')), 
 133                     'uploader_id': uploader_id
, 
 136             page_data 
= film_data
.get('page', {}) 
 137             return self
.playlist_result(entries
, film_id
, page_data
.get('movie_title')) 
 139         playlist_url 
= compat_urlparse
.urljoin(url
, 'includes/playlists/itunes.inc') 
 142             s 
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', '', s
) 
 143             s 
= re
.sub(r
'<img ([^<]*?)/?>', r
'<img \1/>', s
) 
 144             # The ' in the onClick attributes are not escaped, it couldn't be parsed 
 145             # like: http://trailers.apple.com/trailers/wb/gravity/ 
 148                 return 'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''') 
 149             s 
= re
.sub(self
._JSON
_RE
, _clean_json
, s
) 
 150             s 
= '<html>%s</html>' % s
 
 152         doc 
= self
._download
_xml
(playlist_url
, movie
, transform_source
=fix_html
) 
 155         for li 
in doc
.findall('./div/ul/li'): 
 156             on_click 
= li
.find('.//a').attrib
['onClick'] 
 157             trailer_info_json 
= self
._search
_regex
(self
._JSON
_RE
, 
 158                                                    on_click
, 'trailer info') 
 159             trailer_info 
= json
.loads(trailer_info_json
) 
 160             first_url 
= trailer_info
.get('url') 
 163             title 
= trailer_info
['title'] 
 164             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
 165             thumbnail 
= li
.find('.//img').attrib
['src'] 
 166             upload_date 
= trailer_info
['posted'].replace('-', '') 
 168             runtime 
= trailer_info
['runtime'] 
 169             m 
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
) 
 172                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
 174             trailer_id 
= first_url
.split('/')[-1].rpartition('_')[0].lower() 
 175             settings_json_url 
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
) 
 176             settings 
= self
._download
_json
(settings_json_url
, trailer_id
, 'Downloading settings json') 
 179             for format 
in settings
['metadata']['sizes']: 
 180                 # The src is a file pointing to the real video file 
 181                 format_url 
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src']) 
 184                     'format': format
['type'], 
 185                     'width': int_or_none(format
['width']), 
 186                     'height': int_or_none(format
['height']), 
 189             self
._sort
_formats
(formats
) 
 196                 'duration': duration
, 
 197                 'thumbnail': thumbnail
, 
 198                 'upload_date': upload_date
, 
 199                 'uploader_id': uploader_id
, 
 201                     'User-Agent': 'QuickTime compatible (youtube-dl)', 
 212 class AppleTrailersSectionIE(InfoExtractor
): 
 213     IE_NAME 
= 'appletrailers:section' 
 216             'feed_path': 'just_added', 
 217             'title': 'Just Added', 
 220             'feed_path': 'exclusive', 
 221             'title': 'Exclusive', 
 224             'feed_path': 'just_hd', 
 228             'feed_path': 'most_pop', 
 229             'title': 'Most Popular', 
 232             'feed_path': 'studios', 
 233             'title': 'Movie Studios', 
 236     _VALID_URL 
= r
'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS
) 
 238         'url': 'http://trailers.apple.com/#section=justadded', 
 240             'title': 'Just Added', 
 243         'playlist_mincount': 80, 
 245         'url': 'http://trailers.apple.com/#section=exclusive', 
 247             'title': 'Exclusive', 
 250         'playlist_mincount': 80, 
 252         'url': 'http://trailers.apple.com/#section=justhd', 
 257         'playlist_mincount': 80, 
 259         'url': 'http://trailers.apple.com/#section=mostpopular', 
 261             'title': 'Most Popular', 
 264         'playlist_mincount': 80, 
 266         'url': 'http://trailers.apple.com/#section=moviestudios', 
 268             'title': 'Movie Studios', 
 269             'id': 'moviestudios', 
 271         'playlist_mincount': 80, 
 274     def _real_extract(self
, url
): 
 275         section 
= self
._match
_id
(url
) 
 276         section_data 
= self
._download
_json
( 
 277             'http://trailers.apple.com/trailers/home/feeds/%s.json' % self
._SECTIONS
[section
]['feed_path'], 
 280             self
.url_result('http://trailers.apple.com' + e
['location']) 
 281             for e 
in section_data
] 
 282         return self
.playlist_result(entries
, section
, self
._SECTIONS
[section
]['title'])