- playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
- playlist_snippet = self._download_webpage(playlist_url, movie)
- playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
- playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
- # The ' in the onClick attributes are not escaped, it couldn't be parsed
- # with xml.etree.ElementTree.fromstring
- # like: http://trailers.apple.com/trailers/wb/gravity/
- def _clean_json(m):
- return u'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
- playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
- playlist_html = u'<html>' + playlist_cleaned + u'</html>'
-
- doc = xml.etree.ElementTree.fromstring(playlist_html)
+ playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
+ def fix_html(s):
+ s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
+ s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+ # The ' in the onClick attributes are not escaped, it couldn't be parsed
+ # like: http://trailers.apple.com/trailers/wb/gravity/
+
+ def _clean_json(m):
+ return 'iTunes.playURL(%s);' % m.group(1).replace('\'', ''')
+ s = re.sub(self._JSON_RE, _clean_json, s)
+ s = '<html>%s</html>' % s
+ return s
+ doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
+