]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
6d6237f8af79c02048da0e1b1624f33086a120b6
2 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
12 class AppleTrailersIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
15 u
"url": u
"http://trailers.apple.com/trailers/wb/manofsteel/",
18 u
"file": u
"manofsteel-trailer4.mov",
19 u
"md5": u
"d97a8e575432dbcb81b7c3acb741f8a8",
22 u
"title": u
"Trailer 4",
23 u
"upload_date": u
"20130523",
24 u
"uploader_id": u
"wb",
28 u
"file": u
"manofsteel-trailer3.mov",
29 u
"md5": u
"b8017b7131b721fb4e8d6f49e1df908c",
32 u
"title": u
"Trailer 3",
33 u
"upload_date": u
"20130417",
34 u
"uploader_id": u
"wb",
38 u
"file": u
"manofsteel-trailer.mov",
39 u
"md5": u
"d0f1e1150989b9924679b441f3404d48",
43 u
"upload_date": u
"20121212",
44 u
"uploader_id": u
"wb",
48 u
"file": u
"manofsteel-teaser.mov",
49 u
"md5": u
"5fe08795b943eb2e757fa95cb6def1cb",
53 u
"upload_date": u
"20120721",
54 u
"uploader_id": u
"wb",
60 _JSON_RE
= r
'iTunes.playURL\((.*?)\);'
62 def _real_extract(self
, url
):
63 mobj
= re
.match(self
._VALID
_URL
, url
)
64 movie
= mobj
.group('movie')
65 uploader_id
= mobj
.group('company')
67 playlist_url
= compat_urlparse
.urljoin(url
, u
'includes/playlists/itunes.inc')
68 playlist_snippet
= self
._download
_webpage
(playlist_url
, movie
)
69 playlist_cleaned
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', u
'', playlist_snippet
)
70 playlist_cleaned
= re
.sub(r
'<img ([^<]*?)>', r
'<img \1/>', playlist_cleaned
)
71 # The ' in the onClick attributes are not escaped, it couldn't be parsed
72 # with xml.etree.ElementTree.fromstring
73 # like: http://trailers.apple.com/trailers/wb/gravity/
75 return u
'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''')
76 playlist_cleaned
= re
.sub(self
._JSON
_RE
, _clean_json
, playlist_cleaned
)
77 playlist_html
= u
'<html>' + playlist_cleaned
+ u
'</html>'
79 doc
= xml
.etree
.ElementTree
.fromstring(playlist_html
)
81 for li
in doc
.findall('./div/ul/li'):
82 on_click
= li
.find('.//a').attrib
['onClick']
83 trailer_info_json
= self
._search
_regex
(self
._JSON
_RE
,
84 on_click
, u
'trailer info')
85 trailer_info
= json
.loads(trailer_info_json
)
86 title
= trailer_info
['title']
87 video_id
= movie
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower()
88 thumbnail
= li
.find('.//img').attrib
['src']
89 upload_date
= trailer_info
['posted'].replace('-', '')
91 runtime
= trailer_info
['runtime']
92 m
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
)
95 duration
= 60 * int(m
.group('minutes')) + int(m
.group('seconds'))
97 first_url
= trailer_info
['url']
98 trailer_id
= first_url
.split('/')[-1].rpartition('_')[0].lower()
99 settings_json_url
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
)
100 settings_json
= self
._download
_webpage
(settings_json_url
, trailer_id
, u
'Downloading settings json')
101 settings
= json
.loads(settings_json
)
104 for format
in settings
['metadata']['sizes']:
105 # The src is a file pointing to the real video file
106 format_url
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src'])
109 'ext': determine_ext(format_url
),
110 'format': format
['type'],
111 'width': format
['width'],
112 'height': int(format
['height']),
114 formats
= sorted(formats
, key
=lambda f
: (f
['height'], f
['width']))
122 'duration': duration
,
123 'thumbnail': thumbnail
,
124 'upload_date': upload_date
,
125 'uploader_id': uploader_id
,
126 'user_agent': 'QuickTime compatible (youtube-dl)',
128 # TODO: Remove when #980 has been merged
129 info
['url'] = formats
[-1]['url']
130 info
['ext'] = formats
[-1]['ext']
132 playlist
.append(info
)