]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
0c01fa1a13ffa6fbfbfe7b7fb2283d5ed4f8b70f
   1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 class AppleTrailersIE(InfoExtractor
): 
  14     _VALID_URL 
= r
'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  16         "url": "http://trailers.apple.com/trailers/wb/manofsteel/", 
  19                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8", 
  21                     "id": "manofsteel-trailer4", 
  25                     "upload_date": "20130523", 
  30                 "md5": "b8017b7131b721fb4e8d6f49e1df908c", 
  32                     "id": "manofsteel-trailer3", 
  36                     "upload_date": "20130417", 
  41                 "md5": "d0f1e1150989b9924679b441f3404d48", 
  43                     "id": "manofsteel-trailer", 
  47                     "upload_date": "20121212", 
  52                 "md5": "5fe08795b943eb2e757fa95cb6def1cb", 
  54                     "id": "manofsteel-teaser", 
  58                     "upload_date": "20120721", 
  65     _JSON_RE 
= r
'iTunes.playURL\((.*?)\);' 
  67     def _real_extract(self
, url
): 
  68         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  69         movie 
= mobj
.group('movie') 
  70         uploader_id 
= mobj
.group('company') 
  72         playlist_url 
= compat_urlparse
.urljoin(url
, 'includes/playlists/itunes.inc') 
  75             s 
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', '', s
) 
  76             s 
= re
.sub(r
'<img ([^<]*?)>', r
'<img \1/>', s
) 
  77             # The ' in the onClick attributes are not escaped, it couldn't be parsed 
  78             # like: http://trailers.apple.com/trailers/wb/gravity/ 
  81                 return 'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''') 
  82             s 
= re
.sub(self
._JSON
_RE
, _clean_json
, s
) 
  83             s 
= '<html>%s</html>' % s
 
  85         doc 
= self
._download
_xml
(playlist_url
, movie
, transform_source
=fix_html
) 
  88         for li 
in doc
.findall('./div/ul/li'): 
  89             on_click 
= li
.find('.//a').attrib
['onClick'] 
  90             trailer_info_json 
= self
._search
_regex
(self
._JSON
_RE
, 
  91                                                    on_click
, 'trailer info') 
  92             trailer_info 
= json
.loads(trailer_info_json
) 
  93             title 
= trailer_info
['title'] 
  94             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
  95             thumbnail 
= li
.find('.//img').attrib
['src'] 
  96             upload_date 
= trailer_info
['posted'].replace('-', '') 
  98             runtime 
= trailer_info
['runtime'] 
  99             m 
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
) 
 102                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
 104             first_url 
= trailer_info
['url'] 
 105             trailer_id 
= first_url
.split('/')[-1].rpartition('_')[0].lower() 
 106             settings_json_url 
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
) 
 107             settings 
= self
._download
_json
(settings_json_url
, trailer_id
, 'Downloading settings json') 
 110             for format 
in settings
['metadata']['sizes']: 
 111                 # The src is a file pointing to the real video file 
 112                 format_url 
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src']) 
 115                     'format': format
['type'], 
 116                     'width': int_or_none(format
['width']), 
 117                     'height': int_or_none(format
['height']), 
 120             self
._sort
_formats
(formats
) 
 128                 'duration': duration
, 
 129                 'thumbnail': thumbnail
, 
 130                 'upload_date': upload_date
, 
 131                 'uploader_id': uploader_id
, 
 132                 'user_agent': 'QuickTime compatible (youtube-dl)',