]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
6d6237f8af79c02048da0e1b1624f33086a120b6
   2 import xml
.etree
.ElementTree
 
   5 from .common 
import InfoExtractor
 
  12 class AppleTrailersIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)' 
  15         u
"url": u
"http://trailers.apple.com/trailers/wb/manofsteel/", 
  18                 u
"file": u
"manofsteel-trailer4.mov", 
  19                 u
"md5": u
"d97a8e575432dbcb81b7c3acb741f8a8", 
  22                     u
"title": u
"Trailer 4", 
  23                     u
"upload_date": u
"20130523", 
  24                     u
"uploader_id": u
"wb", 
  28                 u
"file": u
"manofsteel-trailer3.mov", 
  29                 u
"md5": u
"b8017b7131b721fb4e8d6f49e1df908c", 
  32                     u
"title": u
"Trailer 3", 
  33                     u
"upload_date": u
"20130417", 
  34                     u
"uploader_id": u
"wb", 
  38                 u
"file": u
"manofsteel-trailer.mov", 
  39                 u
"md5": u
"d0f1e1150989b9924679b441f3404d48", 
  43                     u
"upload_date": u
"20121212", 
  44                     u
"uploader_id": u
"wb", 
  48                 u
"file": u
"manofsteel-teaser.mov", 
  49                 u
"md5": u
"5fe08795b943eb2e757fa95cb6def1cb", 
  53                     u
"upload_date": u
"20120721", 
  54                     u
"uploader_id": u
"wb", 
  60     _JSON_RE 
= r
'iTunes.playURL\((.*?)\);' 
  62     def _real_extract(self
, url
): 
  63         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  64         movie 
= mobj
.group('movie') 
  65         uploader_id 
= mobj
.group('company') 
  67         playlist_url 
= compat_urlparse
.urljoin(url
, u
'includes/playlists/itunes.inc') 
  68         playlist_snippet 
= self
._download
_webpage
(playlist_url
, movie
) 
  69         playlist_cleaned 
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', u
'', playlist_snippet
) 
  70         playlist_cleaned 
= re
.sub(r
'<img ([^<]*?)>', r
'<img \1/>', playlist_cleaned
) 
  71         # The ' in the onClick attributes are not escaped, it couldn't be parsed 
  72         # with xml.etree.ElementTree.fromstring 
  73         # like: http://trailers.apple.com/trailers/wb/gravity/ 
  75             return u
'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''') 
  76         playlist_cleaned 
= re
.sub(self
._JSON
_RE
, _clean_json
, playlist_cleaned
) 
  77         playlist_html 
= u
'<html>' + playlist_cleaned 
+ u
'</html>' 
  79         doc 
= xml
.etree
.ElementTree
.fromstring(playlist_html
) 
  81         for li 
in doc
.findall('./div/ul/li'): 
  82             on_click 
= li
.find('.//a').attrib
['onClick'] 
  83             trailer_info_json 
= self
._search
_regex
(self
._JSON
_RE
, 
  84                 on_click
, u
'trailer info') 
  85             trailer_info 
= json
.loads(trailer_info_json
) 
  86             title 
= trailer_info
['title'] 
  87             video_id 
= movie 
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower() 
  88             thumbnail 
= li
.find('.//img').attrib
['src'] 
  89             upload_date 
= trailer_info
['posted'].replace('-', '') 
  91             runtime 
= trailer_info
['runtime'] 
  92             m 
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
) 
  95                 duration 
= 60 * int(m
.group('minutes')) + int(m
.group('seconds')) 
  97             first_url 
= trailer_info
['url'] 
  98             trailer_id 
= first_url
.split('/')[-1].rpartition('_')[0].lower() 
  99             settings_json_url 
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
) 
 100             settings_json 
= self
._download
_webpage
(settings_json_url
, trailer_id
, u
'Downloading settings json') 
 101             settings 
= json
.loads(settings_json
) 
 104             for format 
in settings
['metadata']['sizes']: 
 105                 # The src is a file pointing to the real video file 
 106                 format_url 
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src']) 
 109                     'ext': determine_ext(format_url
), 
 110                     'format': format
['type'], 
 111                     'width': format
['width'], 
 112                     'height': int(format
['height']), 
 114             formats 
= sorted(formats
, key
=lambda f
: (f
['height'], f
['width'])) 
 122                 'duration': duration
, 
 123                 'thumbnail': thumbnail
, 
 124                 'upload_date': upload_date
, 
 125                 'uploader_id': uploader_id
, 
 126                 'user_agent': 'QuickTime compatible (youtube-dl)', 
 128             # TODO: Remove when #980 has been merged 
 129             info
['url'] = formats
[-1]['url'] 
 130             info
['ext'] = formats
[-1]['ext'] 
 132             playlist
.append(info
)