]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/appletrailers.py
1 from __future__
import unicode_literals
6 from .common
import InfoExtractor
13 class AppleTrailersIE(InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
16 "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
19 "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
21 "id": "manofsteel-trailer4",
25 "upload_date": "20130523",
30 "md5": "b8017b7131b721fb4e8d6f49e1df908c",
32 "id": "manofsteel-trailer3",
36 "upload_date": "20130417",
41 "md5": "d0f1e1150989b9924679b441f3404d48",
43 "id": "manofsteel-trailer",
47 "upload_date": "20121212",
52 "md5": "5fe08795b943eb2e757fa95cb6def1cb",
54 "id": "manofsteel-teaser",
58 "upload_date": "20120721",
65 _JSON_RE
= r
'iTunes.playURL\((.*?)\);'
67 def _real_extract(self
, url
):
68 mobj
= re
.match(self
._VALID
_URL
, url
)
69 movie
= mobj
.group('movie')
70 uploader_id
= mobj
.group('company')
72 playlist_url
= compat_urlparse
.urljoin(url
, 'includes/playlists/itunes.inc')
74 s
= re
.sub(r
'(?s)<script[^<]*?>.*?</script>', '', s
)
75 s
= re
.sub(r
'<img ([^<]*?)>', r
'<img \1/>', s
)
76 # The ' in the onClick attributes are not escaped, it couldn't be parsed
77 # like: http://trailers.apple.com/trailers/wb/gravity/
79 return 'iTunes.playURL(%s);' % m
.group(1).replace('\'', ''')
80 s
= re
.sub(self
._JSON
_RE
, _clean_json
, s
)
81 s
= '<html>' + s
+ u
'</html>'
83 doc
= self
._download
_xml
(playlist_url
, movie
, transform_source
=fix_html
)
86 for li
in doc
.findall('./div/ul/li'):
87 on_click
= li
.find('.//a').attrib
['onClick']
88 trailer_info_json
= self
._search
_regex
(self
._JSON
_RE
,
89 on_click
, 'trailer info')
90 trailer_info
= json
.loads(trailer_info_json
)
91 title
= trailer_info
['title']
92 video_id
= movie
+ '-' + re
.sub(r
'[^a-zA-Z0-9]', '', title
).lower()
93 thumbnail
= li
.find('.//img').attrib
['src']
94 upload_date
= trailer_info
['posted'].replace('-', '')
96 runtime
= trailer_info
['runtime']
97 m
= re
.search(r
'(?P<minutes>[0-9]+):(?P<seconds>[0-9]{1,2})', runtime
)
100 duration
= 60 * int(m
.group('minutes')) + int(m
.group('seconds'))
102 first_url
= trailer_info
['url']
103 trailer_id
= first_url
.split('/')[-1].rpartition('_')[0].lower()
104 settings_json_url
= compat_urlparse
.urljoin(url
, 'includes/settings/%s.json' % trailer_id
)
105 settings
= self
._download
_json
(settings_json_url
, trailer_id
, 'Downloading settings json')
108 for format
in settings
['metadata']['sizes']:
109 # The src is a file pointing to the real video file
110 format_url
= re
.sub(r
'_(\d*p.mov)', r
'_h\1', format
['src'])
113 'format': format
['type'],
114 'width': int_or_none(format
['width']),
115 'height': int_or_none(format
['height']),
118 self
._sort
_formats
(formats
)
126 'duration': duration
,
127 'thumbnail': thumbnail
,
128 'upload_date': upload_date
,
129 'uploader_id': uploader_id
,
130 'user_agent': 'QuickTime compatible (youtube-dl)',