]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  13 class AppleTrailersIE(InfoExtractor
): 
  14     IE_NAME 
= 'appletrailers' 
  15     _VALID_URL 
= r
'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  17         'url': 'http://trailers.apple.com/trailers/wb/manofsteel/', 
  23                 'md5': 'd97a8e575432dbcb81b7c3acb741f8a8', 
  25                     'id': 'manofsteel-trailer4', 
  29                     'upload_date': '20130523', 
  34                 'md5': 'b8017b7131b721fb4e8d6f49e1df908c', 
  36                     'id': 'manofsteel-trailer3', 
  40                     'upload_date': '20130417', 
  45                 'md5': 'd0f1e1150989b9924679b441f3404d48', 
  47                     'id': 'manofsteel-trailer', 
  51                     'upload_date': '20121212', 
  56                 'md5': '5fe08795b943eb2e757fa95cb6def1cb', 
  58                     'id': 'manofsteel-teaser', 
  62                     'upload_date': '20120721', 
  68         'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/', 
  72         'playlist_mincount': 2, 
  74         'url': 'http://trailers.apple.com/ca/metropole/autrui/', 
  75         'only_matching': True, 
  77         'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/', 
  78         'only_matching': True, 
  81     _JSON_RE 
= r
'iTunes.playURL\((.*?)\);' 
  83     def _real_extract(self
, url
): 
  84         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  85         movie 
= mobj
.group('movie') 
  86         uploader_id 
= mobj
.group('company') 
  88         playlist_url 
= compat_urlparse
.urljoin(url
, 'includes/playlists/itunes.inc') 
  91             s 
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', '', s
) 
  92             s 
= re
.sub(r
'<img ([^<]*?)/?>', r
'<img \1/>', s
) 
  93             # The ' in the onClick attributes are not escaped, it couldn't be parsed 
  94             # like: http://trailers.apple.com/trailers/wb/gravity/ 
  97                 return 'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''') 
  98             s 
= re
.sub(self
._JSON
_RE
, _clean_json
, s
) 
  99             s 
= '<html>%s</html>' % s
 
 101         doc 
= self
._download
_xml
(playlist_url
, movie
, transform_source
=fix_html
) 
 104         for li 
in doc
.findall('./div/ul/li'): 
 105             on_click 
= li
.find('.//a').attrib
['onClick'] 
 106             trailer_info_json 
= self
._search
_regex
(self
._JSON
_RE
, 
 107                                                    on_click
, 'trailer info') 
 108             trailer_info 
= json
.loads(trailer_info_json
) 
 109             first_url 
= trailer_info
.get('url') 
 112             title 
= trailer_info
['title'] 
 113             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
 114             thumbnail 
= li
.find('.//img').attrib
['src'] 
 115             upload_date 
= trailer_info
['posted'].replace('-', '') 
 117             runtime 
= trailer_info
['runtime'] 
 118             m 
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
) 
 121                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
 123             trailer_id 
= first_url
.split('/')[-1].rpartition('_')[0].lower() 
 124             settings_json_url 
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
) 
 125             settings 
= self
._download
_json
(settings_json_url
, trailer_id
, 'Downloading settings json') 
 128             for format 
in settings
['metadata']['sizes']: 
 129                 # The src is a file pointing to the real video file 
 130                 format_url 
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src']) 
 133                     'format': format
['type'], 
 134                     'width': int_or_none(format
['width']), 
 135                     'height': int_or_none(format
['height']), 
 138             self
._sort
_formats
(formats
) 
 145                 'duration': duration
, 
 146                 'thumbnail': thumbnail
, 
 147                 'upload_date': upload_date
, 
 148                 'uploader_id': uploader_id
, 
 150                     'User-Agent': 'QuickTime compatible (youtube-dl)', 
 161 class AppleTrailersSectionIE(InfoExtractor
): 
 162     IE_NAME 
= 'appletrailers:section' 
 165             'feed_path': 'just_added', 
 166             'title': 'Just Added', 
 169             'feed_path': 'exclusive', 
 170             'title': 'Exclusive', 
 173             'feed_path': 'just_hd', 
 177             'feed_path': 'most_pop', 
 178             'title': 'Most Popular', 
 181             'feed_path': 'studios', 
 182             'title': 'Movie Studios', 
 185     _VALID_URL 
= r
'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS
) 
 187         'url': 'http://trailers.apple.com/#section=justadded', 
 189             'title': 'Just Added', 
 192         'playlist_mincount': 80, 
 194         'url': 'http://trailers.apple.com/#section=exclusive', 
 196             'title': 'Exclusive', 
 199         'playlist_mincount': 80, 
 201         'url': 'http://trailers.apple.com/#section=justhd', 
 206         'playlist_mincount': 80, 
 208         'url': 'http://trailers.apple.com/#section=mostpopular', 
 210             'title': 'Most Popular', 
 213         'playlist_mincount': 80, 
 215         'url': 'http://trailers.apple.com/#section=moviestudios', 
 217             'title': 'Movie Studios', 
 218             'id': 'moviestudios', 
 220         'playlist_mincount': 80, 
 223     def _real_extract(self
, url
): 
 224         section 
= self
._match
_id
(url
) 
 225         section_data 
= self
._download
_json
( 
 226             'http://trailers.apple.com/trailers/home/feeds/%s.json' % self
._SECTIONS
[section
]['feed_path'], 
 229             self
.url_result('http://trailers.apple.com' + e
['location']) 
 230             for e 
in section_data
] 
 231         return self
.playlist_result(entries
, section
, self
._SECTIONS
[section
]['title'])