]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  15 class AppleTrailersIE(InfoExtractor
): 
  16     IE_NAME 
= 'appletrailers' 
  17     _VALID_URL 
= r
'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  19         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', 
  22             'title': 'Man of Steel', 
  26                 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', 
  28                     'id': 'manofsteel-trailer4', 
  32                     'upload_date': '20130523', 
  37                 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', 
  39                     'id': 'manofsteel-trailer3', 
  43                     'upload_date': '20130417', 
  48                 'md5': 'd0f1e1150989b9924679b441f3404d48', 
  50                     'id': 'manofsteel-trailer', 
  54                     'upload_date': '20121212', 
  59                 'md5': '5fe08795b943eb2e757fa95cb6def1cb', 
  61                     'id': 'manofsteel-teaser', 
  65                     'upload_date': '20120721', 
  71         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 
  74             'title': 'Blackthorn', 
  76         'playlist_mincount': 2, 
  77         'expected_warnings': ['Unable to download JSON metadata'], 
  79         # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json 
  80         'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/', 
  83             'title': 'Kung Fu Panda 3', 
  85         'playlist_mincount': 4, 
  87         'url': 'http://trailers.apple.com/ca/metropole/autrui/', 
  88         'only_matching': True, 
  90         'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', 
  91         'only_matching': True, 
  94     _JSON_RE 
= r
'iTunes.playURL\((.*?)\);' 
  96     def _real_extract(self
, url
): 
  97         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  98         movie 
= mobj
.group('movie') 
  99         uploader_id 
= mobj
.group('company') 
 101         webpage 
= self
._download
_webpage
(url
, movie
) 
 102         film_id 
= self
._search
_regex
(r
"FilmId\s*=\s*'(\d+)'", webpage
, 'film id') 
 103         film_data 
= self
._download
_json
( 
 104             'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id
, 
 105             film_id
, fatal
=False) 
 109             for clip 
in film_data
.get('clips', []): 
 110                 clip_title 
= clip
['title'] 
 113                 for version
, version_data 
in clip
.get('versions', {}).items(): 
 114                     for size
, size_data 
in version_data
.get('sizes', {}).items(): 
 115                         src 
= size_data
.get('src') 
 119                             'format_id': '%s-%s' % (version
, size
), 
 120                             'url': re
.sub(r
'_(\d+p\.mov)', r
'_h\1', src
), 
 121                             'width': int_or_none(size_data
.get('width')), 
 122                             'height': int_or_none(size_data
.get('height')), 
 123                             'language': version
[:2], 
 125                 self
._sort
_formats
(formats
) 
 128                     'id': movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', clip_title
).lower(), 
 131                     'thumbnail': clip
.get('screen') or clip
.get('thumb'), 
 132                     'duration': parse_duration(clip
.get('runtime') or clip
.get('faded')), 
 133                     'upload_date': unified_strdate(clip
.get('posted')), 
 134                     'uploader_id': uploader_id
, 
 137             page_data 
= film_data
.get('page', {}) 
 138             return self
.playlist_result(entries
, film_id
, page_data
.get('movie_title')) 
 140         playlist_url 
= compat_urlparse
.urljoin(url
, 'includes/playlists/itunes.inc') 
 143             s 
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', '', s
) 
 144             s 
= re
.sub(r
'<img ([^<]*?)/?>', r
'<img \1/>', s
) 
 145             # The ' in the onClick attributes are not escaped, it couldn't be parsed 
 146             # like: http://trailers.apple.com/trailers/wb/gravity/ 
 149                 return 'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''') 
 150             s 
= re
.sub(self
._JSON
_RE
, _clean_json
, s
) 
 151             s 
= '<html>%s</html>' % s
 
 153         doc 
= self
._download
_xml
(playlist_url
, movie
, transform_source
=fix_html
) 
 156         for li 
in doc
.findall('./div/ul/li'): 
 157             on_click 
= li
.find('.//a').attrib
['onClick'] 
 158             trailer_info_json 
= self
._search
_regex
(self
._JSON
_RE
, 
 159                                                    on_click
, 'trailer info') 
 160             trailer_info 
= json
.loads(trailer_info_json
) 
 161             first_url 
= trailer_info
.get('url') 
 164             title 
= trailer_info
['title'] 
 165             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
 166             thumbnail 
= li
.find('.//img').attrib
['src'] 
 167             upload_date 
= trailer_info
['posted'].replace('-', '') 
 169             runtime 
= trailer_info
['runtime'] 
 170             m 
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
) 
 173                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
 175             trailer_id 
= first_url
.split('/')[-1].rpartition('_')[0].lower() 
 176             settings_json_url 
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
) 
 177             settings 
= self
._download
_json
(settings_json_url
, trailer_id
, 'Downloading settings json') 
 180             for format 
in settings
['metadata']['sizes']: 
 181                 # The src is a file pointing to the real video file 
 182                 format_url 
= re
.sub(r
'_(\d*p\.mov)', r
'_h\1', format
['src']) 
 185                     'format': format
['type'], 
 186                     'width': int_or_none(format
['width']), 
 187                     'height': int_or_none(format
['height']), 
 190             self
._sort
_formats
(formats
) 
 197                 'duration': duration
, 
 198                 'thumbnail': thumbnail
, 
 199                 'upload_date': upload_date
, 
 200                 'uploader_id': uploader_id
, 
 202                     'User-Agent': 'QuickTime compatible (youtube-dl)', 
 213 class AppleTrailersSectionIE(InfoExtractor
): 
 214     IE_NAME 
= 'appletrailers:section' 
 217             'feed_path': 'just_added', 
 218             'title': 'Just Added', 
 221             'feed_path': 'exclusive', 
 222             'title': 'Exclusive', 
 225             'feed_path': 'just_hd', 
 229             'feed_path': 'most_pop', 
 230             'title': 'Most Popular', 
 233             'feed_path': 'studios', 
 234             'title': 'Movie Studios', 
 237     _VALID_URL 
= r
'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS
) 
 239         'url': 'http://trailers.apple.com/#section=justadded', 
 241             'title': 'Just Added', 
 244         'playlist_mincount': 80, 
 246         'url': 'http://trailers.apple.com/#section=exclusive', 
 248             'title': 'Exclusive', 
 251         'playlist_mincount': 80, 
 253         'url': 'http://trailers.apple.com/#section=justhd', 
 258         'playlist_mincount': 80, 
 260         'url': 'http://trailers.apple.com/#section=mostpopular', 
 262             'title': 'Most Popular', 
 265         'playlist_mincount': 30, 
 267         'url': 'http://trailers.apple.com/#section=moviestudios', 
 269             'title': 'Movie Studios', 
 270             'id': 'moviestudios', 
 272         'playlist_mincount': 80, 
 275     def _real_extract(self
, url
): 
 276         section 
= self
._match
_id
(url
) 
 277         section_data 
= self
._download
_json
( 
 278             'http://trailers.apple.com/trailers/home/feeds/%s.json' % self
._SECTIONS
[section
]['feed_path'], 
 281             self
.url_result('http://trailers.apple.com' + e
['location']) 
 282             for e 
in section_data
] 
 283         return self
.playlist_result(entries
, section
, self
._SECTIONS
[section
]['title'])