]> Raphaƫl G. Git Repositories - youtubedl/blobdiff - youtube_dl/extractor/appletrailers.py
debian/copyright: Update my copyright years.
[youtubedl] / youtube_dl / extractor / appletrailers.py
index 922cede056690bac963cdb2f896eb7b9254680af..576f03b5b71115771555e1d8d46f4a108eb9de93 100644 (file)
@@ -4,21 +4,25 @@ import re
 import json
 
 from .common import InfoExtractor
+from ..compat import compat_urlparse
 from ..utils import (
-    compat_urlparse,
-    determine_ext,
+    int_or_none,
 )
 
 
 class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TESTS = [{
         "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
+        'info_dict': {
+            'id': 'manofsteel',
+        },
         "playlist": [
             {
-                "file": "manofsteel-trailer4.mov",
                 "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
                 "info_dict": {
+                    "id": "manofsteel-trailer4",
+                    "ext": "mov",
                     "duration": 111,
                     "title": "Trailer 4",
                     "upload_date": "20130523",
@@ -26,9 +30,10 @@ class AppleTrailersIE(InfoExtractor):
                 },
             },
             {
-                "file": "manofsteel-trailer3.mov",
                 "md5": "b8017b7131b721fb4e8d6f49e1df908c",
                 "info_dict": {
+                    "id": "manofsteel-trailer3",
+                    "ext": "mov",
                     "duration": 182,
                     "title": "Trailer 3",
                     "upload_date": "20130417",
@@ -36,9 +41,10 @@ class AppleTrailersIE(InfoExtractor):
                 },
             },
             {
-                "file": "manofsteel-trailer.mov",
                 "md5": "d0f1e1150989b9924679b441f3404d48",
                 "info_dict": {
+                    "id": "manofsteel-trailer",
+                    "ext": "mov",
                     "duration": 148,
                     "title": "Trailer",
                     "upload_date": "20121212",
@@ -46,17 +52,21 @@ class AppleTrailersIE(InfoExtractor):
                 },
             },
             {
-                "file": "manofsteel-teaser.mov",
                 "md5": "5fe08795b943eb2e757fa95cb6def1cb",
                 "info_dict": {
+                    "id": "manofsteel-teaser",
+                    "ext": "mov",
                     "duration": 93,
                     "title": "Teaser",
                     "upload_date": "20120721",
                     "uploader_id": "wb",
                 },
-            }
+            },
         ]
-    }
+    }, {
+        'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+        'only_matching': True,
+    }]
 
     _JSON_RE = r'iTunes.playURL\((.*?)\);'
 
@@ -65,16 +75,18 @@ class AppleTrailersIE(InfoExtractor):
         movie = mobj.group('movie')
         uploader_id = mobj.group('company')
 
-        playlist_url = compat_urlparse.urljoin(url, u'includes/playlists/itunes.inc')
+        playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
         def fix_html(s):
-            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', u'', s)
+            s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
             s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
             # The ' in the onClick attributes are not escaped, it couldn't be parsed
             # like: http://trailers.apple.com/trailers/wb/gravity/
+
             def _clean_json(m):
-                return u'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
+                return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
             s = re.sub(self._JSON_RE, _clean_json, s)
-            s = u'<html>' + s + u'</html>'
+            s = '<html>%s</html>' % s
             return s
         doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
 
@@ -82,7 +94,7 @@ class AppleTrailersIE(InfoExtractor):
         for li in doc.findall('./div/ul/li'):
             on_click = li.find('.//a').attrib['onClick']
             trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, u'trailer info')
+                                                   on_click, 'trailer info')
             trailer_info = json.loads(trailer_info_json)
             title = trailer_info['title']
             video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@@ -98,8 +110,7 @@ class AppleTrailersIE(InfoExtractor):
             first_url = trailer_info['url']
             trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
             settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
-            settings_json = self._download_webpage(settings_json_url, trailer_id, u'Downloading settings json')
-            settings = json.loads(settings_json)
+            settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
 
             formats = []
             for format in settings['metadata']['sizes']:
@@ -107,10 +118,9 @@ class AppleTrailersIE(InfoExtractor):
                 format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
                 formats.append({
                     'url': format_url,
-                    'ext': determine_ext(format_url),
                     'format': format['type'],
-                    'width': format['width'],
-                    'height': int(format['height']),
+                    'width': int_or_none(format['width']),
+                    'height': int_or_none(format['height']),
                 })
 
             self._sort_formats(formats)
@@ -118,14 +128,15 @@ class AppleTrailersIE(InfoExtractor):
             playlist.append({
                 '_type': 'video',
                 'id': video_id,
-                'title': title,
                 'formats': formats,
                 'title': title,
                 'duration': duration,
                 'thumbnail': thumbnail,
                 'upload_date': upload_date,
                 'uploader_id': uploader_id,
-                'user_agent': 'QuickTime compatible (youtube-dl)',
+                'http_headers': {
+                    'User-Agent': 'QuickTime compatible (youtube-dl)',
+                },
             })
 
         return {