Update upstream source from tag 'upstream/2019.09.28'

[youtubedl] / youtube_dl / extractor / appletrailers.py
diff --git a/youtube_dl/extractor/appletrailers.py b/youtube_dl/extractor/appletrailers.py

index f68dc323675b9179e97e4727939dc99029b5ff3c..a9ef733e011237338d904f956c4324cc6dd7a72b 100644 (file)
--- a/youtube_dl/extractor/appletrailers.py
+++ b/youtube_dl/extractor/appletrailers.py
@@ -7,15 +7,19 @@ from .common import InfoExtractor
  from ..compat import compat_urlparse
  from ..utils import (
      int_or_none,
+    parse_duration,
+    unified_strdate,
  )
  
  
  class AppleTrailersIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
+    IE_NAME = 'appletrailers'
+    _VALID_URL = r'https?://(?:www\.|movie)?trailers\.apple\.com/(?:trailers|ca)/(?P<company>[^/]+)/(?P<movie>[^/]+)'
      _TESTS = [{
          'url': 'http://trailers.apple.com/trailers/wb/manofsteel/',
          'info_dict': {
-            'id': 'manofsteel',
+            'id': '5111',
+            'title': 'Man of Steel',
          },
          'playlist': [
              {
@@ -63,9 +67,28 @@ class AppleTrailersIE(InfoExtractor):
                  },
              },
          ]
+    }, {
+        'url': 'http://trailers.apple.com/trailers/magnolia/blackthorn/',
+        'info_dict': {
+            'id': '4489',
+            'title': 'Blackthorn',
+        },
+        'playlist_mincount': 2,
+        'expected_warnings': ['Unable to download JSON metadata'],
+    }, {
+        # json data only available from http://trailers.apple.com/trailers/feeds/data/15881.json
+        'url': 'http://trailers.apple.com/trailers/fox/kungfupanda3/',
+        'info_dict': {
+            'id': '15881',
+            'title': 'Kung Fu Panda 3',
+        },
+        'playlist_mincount': 4,
      }, {
          'url': 'http://trailers.apple.com/ca/metropole/autrui/',
          'only_matching': True,
+    }, {
+        'url': 'http://movietrailers.apple.com/trailers/focus_features/kuboandthetwostrings/',
+        'only_matching': True,
      }]
  
      _JSON_RE = r'iTunes.playURL\((.*?)\);'
@@ -75,11 +98,50 @@ class AppleTrailersIE(InfoExtractor):
          movie = mobj.group('movie')
          uploader_id = mobj.group('company')
  
+        webpage = self._download_webpage(url, movie)
+        film_id = self._search_regex(r"FilmId\s*=\s*'(\d+)'", webpage, 'film id')
+        film_data = self._download_json(
+            'http://trailers.apple.com/trailers/feeds/data/%s.json' % film_id,
+            film_id, fatal=False)
+
+        if film_data:
+            entries = []
+            for clip in film_data.get('clips', []):
+                clip_title = clip['title']
+
+                formats = []
+                for version, version_data in clip.get('versions', {}).items():
+                    for size, size_data in version_data.get('sizes', {}).items():
+                        src = size_data.get('src')
+                        if not src:
+                            continue
+                        formats.append({
+                            'format_id': '%s-%s' % (version, size),
+                            'url': re.sub(r'_(\d+p\.mov)', r'_h\1', src),
+                            'width': int_or_none(size_data.get('width')),
+                            'height': int_or_none(size_data.get('height')),
+                            'language': version[:2],
+                        })
+                self._sort_formats(formats)
+
+                entries.append({
+                    'id': movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', clip_title).lower(),
+                    'formats': formats,
+                    'title': clip_title,
+                    'thumbnail': clip.get('screen') or clip.get('thumb'),
+                    'duration': parse_duration(clip.get('runtime') or clip.get('faded')),
+                    'upload_date': unified_strdate(clip.get('posted')),
+                    'uploader_id': uploader_id,
+                })
+
+            page_data = film_data.get('page', {})
+            return self.playlist_result(entries, film_id, page_data.get('movie_title'))
+
          playlist_url = compat_urlparse.urljoin(url, 'includes/playlists/itunes.inc')
  
          def fix_html(s):
              s = re.sub(r'(?s)<script[^<]*?>.*?</script>', '', s)
-            s = re.sub(r'<img ([^<]*?)>', r'<img \1/>', s)
+            s = re.sub(r'<img ([^<]*?)/?>', r'<img \1/>', s)
              # The ' in the onClick attributes are not escaped, it couldn't be parsed
              # like: http://trailers.apple.com/trailers/wb/gravity/
  
@@ -96,6 +158,9 @@ class AppleTrailersIE(InfoExtractor):
              trailer_info_json = self._search_regex(self._JSON_RE,
                                                     on_click, 'trailer info')
              trailer_info = json.loads(trailer_info_json)
+            first_url = trailer_info.get('url')
+            if not first_url:
+                continue
              title = trailer_info['title']
              video_id = movie + '-' + re.sub(r'[^a-zA-Z0-9]', '', title).lower()
              thumbnail = li.find('.//img').attrib['src']
@@ -107,7 +172,6 @@ class AppleTrailersIE(InfoExtractor):
              if m:
                  duration = 60 * int(m.group('minutes')) + int(m.group('seconds'))
  
-            first_url = trailer_info['url']
              trailer_id = first_url.split('/')[-1].rpartition('_')[0].lower()
              settings_json_url = compat_urlparse.urljoin(url, 'includes/settings/%s.json' % trailer_id)
              settings = self._download_json(settings_json_url, trailer_id, 'Downloading settings json')
@@ -115,7 +179,7 @@ class AppleTrailersIE(InfoExtractor):
              formats = []
              for format in settings['metadata']['sizes']:
                  # The src is a file pointing to the real video file
-                format_url = re.sub(r'_(\d*p.mov)', r'_h\1', format['src'])
+                format_url = re.sub(r'_(\d*p\.mov)', r'_h\1', format['src'])
                  formats.append({
                      'url': format_url,
                      'format': format['type'],
@@ -144,3 +208,76 @@ class AppleTrailersIE(InfoExtractor):
              'id': movie,
              'entries': playlist,
          }
+
+
+class AppleTrailersSectionIE(InfoExtractor):
+    IE_NAME = 'appletrailers:section'
+    _SECTIONS = {
+        'justadded': {
+            'feed_path': 'just_added',
+            'title': 'Just Added',
+        },
+        'exclusive': {
+            'feed_path': 'exclusive',
+            'title': 'Exclusive',
+        },
+        'justhd': {
+            'feed_path': 'just_hd',
+            'title': 'Just HD',
+        },
+        'mostpopular': {
+            'feed_path': 'most_pop',
+            'title': 'Most Popular',
+        },
+        'moviestudios': {
+            'feed_path': 'studios',
+            'title': 'Movie Studios',
+        },
+    }
+    _VALID_URL = r'https?://(?:www\.)?trailers\.apple\.com/#section=(?P<id>%s)' % '|'.join(_SECTIONS)
+    _TESTS = [{
+        'url': 'http://trailers.apple.com/#section=justadded',
+        'info_dict': {
+            'title': 'Just Added',
+            'id': 'justadded',
+        },
+        'playlist_mincount': 80,
+    }, {
+        'url': 'http://trailers.apple.com/#section=exclusive',
+        'info_dict': {
+            'title': 'Exclusive',
+            'id': 'exclusive',
+        },
+        'playlist_mincount': 80,
+    }, {
+        'url': 'http://trailers.apple.com/#section=justhd',
+        'info_dict': {
+            'title': 'Just HD',
+            'id': 'justhd',
+        },
+        'playlist_mincount': 80,
+    }, {
+        'url': 'http://trailers.apple.com/#section=mostpopular',
+        'info_dict': {
+            'title': 'Most Popular',
+            'id': 'mostpopular',
+        },
+        'playlist_mincount': 30,
+    }, {
+        'url': 'http://trailers.apple.com/#section=moviestudios',
+        'info_dict': {
+            'title': 'Movie Studios',
+            'id': 'moviestudios',
+        },
+        'playlist_mincount': 80,
+    }]
+
+    def _real_extract(self, url):
+        section = self._match_id(url)
+        section_data = self._download_json(
+            'http://trailers.apple.com/trailers/home/feeds/%s.json' % self._SECTIONS[section]['feed_path'],
+            section)
+        entries = [
+            self.url_result('http://trailers.apple.com' + e['location'])
+            for e in section_data]
+        return self.playlist_result(entries, section, self._SECTIONS[section]['title'])