X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/7ceb2ec430c3363e0140a0519402428f36dc472e..a497d0e55172891fd4925626374a7afdd811e00f:/youtube_dl/extractor/appletrailers.py diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py index 6d6237f..922cede 100644 --- a/youtube_dl/extractor/appletrailers.py +++ b/youtube_dl/extractor/appletrailers.py @@ -1,5 +1,6 @@ +from __future__ import unicode_literals + import re -import xml.etree.ElementTree import json from .common import InfoExtractor @@ -10,48 +11,48 @@ from ..utils import ( class AppleTrailersIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P[^/]+)/(?P[^/]+)' + _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P[^/]+)/(?P[^/]+)' _TEST = { - u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/", - u"playlist": [ + "url": "http://trailers.apple.com/trailers/wb/manofsteel/", + "playlist": [ { - u"file": u"manofsteel-trailer4.mov", - u"md5": u"d97a8e575432dbcb81b7c3acb741f8a8", - u"info_dict": { - u"duration": 111, - u"title": u"Trailer 4", - u"upload_date": u"20130523", - u"uploader_id": u"wb", + "file": "manofsteel-trailer4.mov", + "md5": "d97a8e575432dbcb81b7c3acb741f8a8", + "info_dict": { + "duration": 111, + "title": "Trailer 4", + "upload_date": "20130523", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-trailer3.mov", - u"md5": u"b8017b7131b721fb4e8d6f49e1df908c", - u"info_dict": { - u"duration": 182, - u"title": u"Trailer 3", - u"upload_date": u"20130417", - u"uploader_id": u"wb", + "file": "manofsteel-trailer3.mov", + "md5": "b8017b7131b721fb4e8d6f49e1df908c", + "info_dict": { + "duration": 182, + "title": "Trailer 3", + "upload_date": "20130417", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-trailer.mov", - u"md5": u"d0f1e1150989b9924679b441f3404d48", - u"info_dict": { - u"duration": 148, - u"title": u"Trailer", - u"upload_date": u"20121212", - u"uploader_id": u"wb", + "file": "manofsteel-trailer.mov", + "md5": "d0f1e1150989b9924679b441f3404d48", + "info_dict": { + "duration": 148, + "title": "Trailer", + "upload_date": "20121212", + "uploader_id": "wb", }, }, { - u"file": u"manofsteel-teaser.mov", - u"md5": u"5fe08795b943eb2e757fa95cb6def1cb", - u"info_dict": { - u"duration": 93, - u"title": u"Teaser", - u"upload_date": u"20120721", - u"uploader_id": u"wb", + "file": "manofsteel-teaser.mov", + "md5": "5fe08795b943eb2e757fa95cb6def1cb", + "info_dict": { + "duration": 93, + "title": "Teaser", + "upload_date": "20120721", + "uploader_id": "wb", }, } ] @@ -65,18 +66,18 @@ class AppleTrailersIE(InfoExtractor): uploader_id = mobj.group('company') playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc') - playlist_snippet = self._download_webpage(playlist_url, movie) - playlist_cleaned = re.sub(r'(?s).*?', u'', playlist_snippet) - playlist_cleaned = re.sub(r'', r'', playlist_cleaned) - # The ' in the onClick attributes are not escaped, it couldn't be parsed - # with xml.etree.ElementTree.fromstring - # like: http://trailers.apple.com/trailers/wb/gravity/ - def _clean_json(m): - return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') - playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned) - playlist_html = u'' + playlist_cleaned + u'' + def fix_html(s): + s = re.sub(r'(?s).*?', u'', s) + s = re.sub(r'', r'', s) + # The ' in the onClick attributes are not escaped, it couldn't be parsed + # like: http://trailers.apple.com/trailers/wb/gravity/ + def _clean_json(m): + return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''') + s = re.sub(self._JSON_RE, _clean_json, s) + s = u'' + s + u'' + return s + doc = self._download_xml(playlist_url, movie, transform_source=fix_html) - doc = xml.etree.ElementTree.fromstring(playlist_html) playlist = [] for li in doc.findall('./div/ul/li'): on_click = li.find('.//a').attrib['onClick'] @@ -111,9 +112,10 @@ class AppleTrailersIE(InfoExtractor): 'width': format['width'], 'height': int(format['height']), }) - formats = sorted(formats, key=lambda f: (f['height'], f['width'])) - info = { + self._sort_formats(formats) + + playlist.append({ '_type': 'video', 'id': video_id, 'title': title, @@ -124,12 +126,7 @@ class AppleTrailersIE(InfoExtractor): 'upload_date': upload_date, 'uploader_id': uploader_id, 'user_agent': 'QuickTime compatible (youtube-dl)', - } - # TODO: Remove when #980 has been merged - info['url'] = formats[-1]['url'] - info['ext'] = formats[-1]['ext'] - - playlist.append(info) + }) return { '_type': 'playlist',