]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 class AppleTrailersIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  16         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", 
  19                 "file": "manofsteel-trailer4.mov", 
  20                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8", 
  24                     "upload_date": "20130523", 
  29                 "file": "manofsteel-trailer3.mov", 
  30                 "md5": "b8017b7131b721fb4e8d6f49e1df908c", 
  34                     "upload_date": "20130417", 
  39                 "file": "manofsteel-trailer.mov", 
  40                 "md5": "d0f1e1150989b9924679b441f3404d48", 
  44                     "upload_date": "20121212", 
  49                 "file": "manofsteel-teaser.mov", 
  50                 "md5": "5fe08795b943eb2e757fa95cb6def1cb", 
  54                     "upload_date": "20120721", 
  61     _JSON_RE 
= r
'iTunes.playURL\((.*?)\);' 
  63     def _real_extract(self
, url
): 
  64         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  65         movie 
= mobj
.group('movie') 
  66         uploader_id 
= mobj
.group('company') 
  68         playlist_url 
= compat_urlparse
.urljoin(url
, u
'includes/playlists/itunes.inc') 
  70             s 
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', u
'', s
) 
  71             s 
= re
.sub(r
'<img ([^<]*?)>', r
'<img \1/>', s
) 
  72             # The ' in the onClick attributes are not escaped, it couldn't be parsed 
  73             # like: http://trailers.apple.com/trailers/wb/gravity/ 
  75                 return u
'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''') 
  76             s 
= re
.sub(self
._JSON
_RE
, _clean_json
, s
) 
  77             s 
= u
'<html>' + s 
+ u
'</html>' 
  79         doc 
= self
._download
_xml
(playlist_url
, movie
, transform_source
=fix_html
) 
  82         for li 
in doc
.findall('./div/ul/li'): 
  83             on_click 
= li
.find('.//a').attrib
['onClick'] 
  84             trailer_info_json 
= self
._search
_regex
(self
._JSON
_RE
, 
  85                 on_click
, u
'trailer info') 
  86             trailer_info 
= json
.loads(trailer_info_json
) 
  87             title 
= trailer_info
['title'] 
  88             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
  89             thumbnail 
= li
.find('.//img').attrib
['src'] 
  90             upload_date 
= trailer_info
['posted'].replace('-', '') 
  92             runtime 
= trailer_info
['runtime'] 
  93             m 
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
) 
  96                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
  98             first_url 
= trailer_info
['url'] 
  99             trailer_id 
= first_url
.split('/')[-1].rpartition('_')[0].lower() 
 100             settings_json_url 
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
) 
 101             settings_json 
= self
._download
_webpage
(settings_json_url
, trailer_id
, u
'Downloading settings json') 
 102             settings 
= json
.loads(settings_json
) 
 105             for format 
in settings
['metadata']['sizes']: 
 106                 # The src is a file pointing to the real video file 
 107                 format_url 
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src']) 
 110                     'ext': determine_ext(format_url
), 
 111                     'format': format
['type'], 
 112                     'width': format
['width'], 
 113                     'height': int(format
['height']), 
 116             self
._sort
_formats
(formats
) 
 124                 'duration': duration
, 
 125                 'thumbnail': thumbnail
, 
 126                 'upload_date': upload_date
, 
 127                 'uploader_id': uploader_id
, 
 128                 'user_agent': 'QuickTime compatible (youtube-dl)',