]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/appletrailers.py
Merge tag 'upstream/2013.12.23'
[youtubedl] / youtube_dl / extractor / appletrailers.py
index 6d6237f8af79c02048da0e1b1624f33086a120b6..ef5644aa54fe28002dc4d8c76308941c264252e3 100644 (file)
@@ -1,5 +1,4 @@
 import re
 import re
-import xml.etree.ElementTree
 import json
 
 from .common import InfoExtractor
 import json
 
 from .common import InfoExtractor
@@ -10,7 +9,7 @@ from ..utils import (
 
 
 class AppleTrailersIE(InfoExtractor):
 
 
 class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers.apple.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
     _TEST = {
         u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
         u"playlist": [
     _TEST = {
         u"url": u"http://trailers.apple.com/trailers/wb/manofsteel/",
         u"playlist": [
@@ -65,18 +64,18 @@ class AppleTrailersIE(InfoExtractor):
         uploader_id = mobj.group('company')
 
         playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
         uploader_id = mobj.group('company')
 
         playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
-        playlist_snippet = self._download_webpage(playlist_url, movie)
-        playlist_cleaned = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', playlist_snippet)
-        playlist_cleaned = re.sub(r'<img ([^<]*?)>', r'<img \1/>', playlist_cleaned)
-        # The ' in the onClick attributes are not escaped, it couldn't be parsed
-        # with xml.etree.ElementTree.fromstring
-        # like: http://trailers.apple.com/trailers/wb/gravity/
-        def _clean_json(m):
-            return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
-        playlist_cleaned = re.sub(self._JSON_RE, _clean_json, playlist_cleaned)
-        playlist_html = u'<html>' + playlist_cleaned + u'</html>'
+        def fix_html(s):
+            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+            # The ' in the onClick attributes are not escaped, it couldn't be parsed
+            # like: http://trailers.apple.com/trailers/wb/gravity/
+            def _clean_json(m):
+                return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+            s = re.sub(self._JSON_RE, _clean_json, s)
+            s = u'<html>' + s + u'</html>'
+            return s
+        doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
 
 
-        doc = xml.etree.ElementTree.fromstring(playlist_html)
         playlist = []
         for li in doc.findall('./div/ul/li'):
             on_click = li.find('.//a').attrib['onClick']
         playlist = []
         for li in doc.findall('./div/ul/li'):
             on_click = li.find('.//a').attrib['onClick']
@@ -113,7 +112,7 @@ class AppleTrailersIE(InfoExtractor):
                 })
             formats = sorted(formats, key=lambda f: (f['height'], f['width']))
 
                 })
             formats = sorted(formats, key=lambda f: (f['height'], f['width']))
 
-            info = {
+            playlist.append({
                 '_type': 'video',
                 'id': video_id,
                 'title': title,
                 '_type': 'video',
                 'id': video_id,
                 'title': title,
@@ -124,12 +123,7 @@ class AppleTrailersIE(InfoExtractor):
                 'upload_date': upload_date,
                 'uploader_id': uploader_id,
                 'user_agent': 'QuickTime compatible (youtube-dl)',
                 'upload_date': upload_date,
                 'uploader_id': uploader_id,
                 'user_agent': 'QuickTime compatible (youtube-dl)',
-            }
-            # TODO: Remove when #980 has been merged
-            info['url'] = formats[-1]['url']
-            info['ext'] = formats[-1]['ext']
-
-            playlist.append(info)
+            })
 
         return {
             '_type': 'playlist',
 
         return {
             '_type': 'playlist',