Imported Upstream version 2015.06.04.1

[youtubedl] / youtube_dl / extractor / appletrailers.py
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

index 4359b88d1b7057944beb126eb8a1c82dbb818758..576f03b5b71115771555e1d8d46f4a108eb9de93 100644 (file)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -4,16 +4,19 @@ import re
  import json
  
  from .common import InfoExtractor
+from ..compat import compat_urlparse
  from ..utils import (
-    compat_urlparse,
      int_or_none,
  )
  
  
  class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/trailers/(?P<company>[^/]+)/(?P<movie>[^/]+)'
-    _TEST = {
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    _TESTS = [{
          "url": "http://trailers.apple.com/trailers/wb/manofsteel/",
+        'info_dict': {
+            'id': 'manofsteel',
+        },
          "playlist": [
              {
                  "md5": "d97a8e575432dbcb81b7c3acb741f8a8",
@@ -60,7 +63,10 @@ class AppleTrailersIE(InfoExtractor):
                  },
              },
          ]
-    }
+    }, {
+        'url': 'http://trailers.apple.com/ca/metropole/autrui/',
+        'only_matching': True,
+    }]
  
      _JSON_RE = r'iTunes.playURL\((.*?)\);'
  
@@ -70,15 +76,17 @@ class AppleTrailersIE(InfoExtractor):
          uploader_id = mobj.group('company')
  
          playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
+
          def fix_html(s):
              s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
              s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
              # The ' in the onClick attributes are not escaped, it couldn't be parsed
              # like: http://trailers.apple.com/trailers/wb/gravity/
+
              def _clean_json(m):
                  return 'iTunes.playURL(%s);' % m.group(1).replace('\'', '&#39;')
              s = re.sub(self._JSON_RE, _clean_json, s)
-            s = '<html>' + s + u'</html>'
+            s = '<html>%s</html>' % s
              return s
          doc = self._download_xml(playlist_url, movie, transform_source=fix_html)
  
@@ -86,7 +94,7 @@ class AppleTrailersIE(InfoExtractor):
          for li in doc.findall('./div/ul/li'):
              on_click = li.find('.//a').attrib['onClick']
              trailer_info_json = self._search_regex(self._JSON_RE,
-                on_click, 'trailer info')
+                                                   on_click, 'trailer info')
              trailer_info = json.loads(trailer_info_json)
              title = trailer_info['title']
              video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
@@ -120,14 +128,15 @@ class AppleTrailersIE(InfoExtractor):
              playlist.append({
                  '_type': 'video',
                  'id': video_id,
-                'title': title,
                  'formats': formats,
                  'title': title,
                  'duration': duration,
                  'thumbnail': thumbnail,
                  'upload_date': upload_date,
                  'uploader_id': uploader_id,
-                'user_agent': 'QuickTime compatible (youtube-dl)',
+                'http_headers': {
+                    'User-Agent': 'QuickTime compatible (youtube-dl)',
+                },
              })
  
          return {