Merge tag 'upstream/2015.11.27.1'

[youtubedl] / youtube_dl / extractor / adultswim.py
diff --git a/youtube_dl/extractor/adultswim.py b/youtube_dl/extractor/adultswim.py

index 34b8b01157bb930937f6f69c4950d8d01c39ed6e..3ae618e71b807b403f298b6b0c0d7919bb98a364 100644 (file)
--- a/youtube_dl/extractor/adultswim.py
+++ b/youtube_dl/extractor/adultswim.py
@@ -2,13 +2,13 @@
  from __future__ import unicode_literals
  
  import re
  from __future__ import unicode_literals
  
  import re
-import json
  
  from .common import InfoExtractor
  from ..utils import (
  
  from .common import InfoExtractor
  from ..utils import (
+    determine_ext,
      ExtractorError,
      ExtractorError,
-    xpath_text,
      float_or_none,
      float_or_none,
+    xpath_text,
  )
  
  
  )
  
  
@@ -41,7 +41,8 @@ class AdultSwimIE(InfoExtractor):
              'id': 'rQxZvXQ4ROaSOqq-or2Mow',
              'title': 'Rick and Morty - Pilot',
              'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
              'id': 'rQxZvXQ4ROaSOqq-or2Mow',
              'title': 'Rick and Morty - Pilot',
              'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. "
-        }
+        },
+        'skip': 'This video is only available for registered users',
      }, {
          'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
          'playlist': [
      }, {
          'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/',
          'playlist': [
@@ -60,6 +61,24 @@ class AdultSwimIE(InfoExtractor):
              'title': 'American Dad - Putting Francine Out of Business',
              'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
          },
              'title': 'American Dad - Putting Francine Out of Business',
              'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].'
          },
+    }, {
+        'url': 'http://www.adultswim.com/videos/tim-and-eric-awesome-show-great-job/dr-steve-brule-for-your-wine/',
+        'playlist': [
+            {
+                'md5': '3e346a2ab0087d687a05e1e7f3b3e529',
+                'info_dict': {
+                    'id': 'sY3cMUR_TbuE4YmdjzbIcQ-0',
+                    'ext': 'flv',
+                    'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+                    'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+                },
+            }
+        ],
+        'info_dict': {
+            'id': 'sY3cMUR_TbuE4YmdjzbIcQ',
+            'title': 'Tim and Eric Awesome Show Great Job! - Dr. Steve Brule, For Your Wine',
+            'description': 'Dr. Brule reports live from Wine Country with a special report on wines.  \r\nWatch Tim and Eric Awesome Show Great Job! episode #20, "Embarrassed" on Adult Swim.\r\n\r\n',
+        },
      }]
  
      @staticmethod
      }]
  
      @staticmethod
@@ -80,6 +99,7 @@ class AdultSwimIE(InfoExtractor):
              for video in collection.get('videos'):
                  if video.get('slug') == slug:
                      return collection, video
              for video in collection.get('videos'):
                  if video.get('slug') == slug:
                      return collection, video
+        return None, None
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
  
      def _real_extract(self, url):
          mobj = re.match(self._VALID_URL, url)
@@ -90,30 +110,39 @@ class AdultSwimIE(InfoExtractor):
          webpage = self._download_webpage(url, episode_path)
  
          # Extract the value of `bootstrappedData` from the Javascript in the page.
          webpage = self._download_webpage(url, episode_path)
  
          # Extract the value of `bootstrappedData` from the Javascript in the page.
-        bootstrappedDataJS = self._search_regex(r'var bootstrappedData = ({.*});', webpage, episode_path)
-
-        try:
-            bootstrappedData = json.loads(bootstrappedDataJS)
-        except ValueError as ve:
-            errmsg = '%s: Failed to parse JSON ' % episode_path
-            raise ExtractorError(errmsg, cause=ve)
+        bootstrapped_data = self._parse_json(self._search_regex(
+            r'var bootstrappedData = ({.*});', webpage, 'bootstraped data'), episode_path)
  
          # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
          # NOTE: We are only downloading one video (the current one) not the playlist
          if is_playlist:
  
          # Downloading videos from a /videos/playlist/ URL needs to be handled differently.
          # NOTE: We are only downloading one video (the current one) not the playlist
          if is_playlist:
-            collections = bootstrappedData['playlists']['collections']
+            collections = bootstrapped_data['playlists']['collections']
              collection = self.find_collection_by_linkURL(collections, show_path)
              video_info = self.find_video_info(collection, episode_path)
  
              show_title = video_info['showTitle']
              segment_ids = [video_info['videoPlaybackID']]
          else:
              collection = self.find_collection_by_linkURL(collections, show_path)
              video_info = self.find_video_info(collection, episode_path)
  
              show_title = video_info['showTitle']
              segment_ids = [video_info['videoPlaybackID']]
          else:
-            collections = bootstrappedData['show']['collections']
+            collections = bootstrapped_data['show']['collections']
              collection, video_info = self.find_collection_containing_video(collections, episode_path)
              collection, video_info = self.find_collection_containing_video(collections, episode_path)
-
-            show = bootstrappedData['show']
+            # Video wasn't found in the collections, let's try `slugged_video`.
+            if video_info is None:
+                if bootstrapped_data.get('slugged_video', {}).get('slug') == episode_path:
+                    video_info = bootstrapped_data['slugged_video']
+                else:
+                    raise ExtractorError('Unable to find video info')
+
+            show = bootstrapped_data['show']
              show_title = show['title']
              show_title = show['title']
-            segment_ids = [clip['videoPlaybackID'] for clip in video_info['clips']]
+            stream = video_info.get('stream')
+            clips = [stream] if stream else video_info.get('clips')
+            if not clips:
+                raise ExtractorError(
+                    'This video is only available via cable service provider subscription that'
+                    ' is not currently supported. You may want to use --cookies.'
+                    if video_info.get('auth') is True else 'Unable to find stream or clips',
+                    expected=True)
+            segment_ids = [clip['videoPlaybackID'] for clip in clips]
  
          episode_id = video_info['id']
          episode_title = video_info['title']
  
          episode_id = video_info['id']
          episode_title = video_info['title']
@@ -122,7 +151,7 @@ class AdultSwimIE(InfoExtractor):
  
          entries = []
          for part_num, segment_id in enumerate(segment_ids):
  
          entries = []
          for part_num, segment_id in enumerate(segment_ids):
-            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
+            segment_url = 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=desktop' % segment_id
  
              segment_title = '%s - %s' % (show_title, episode_title)
              if len(segment_ids) > 1:
  
              segment_title = '%s - %s' % (show_title, episode_title)
              if len(segment_ids) > 1:
@@ -136,19 +165,32 @@ class AdultSwimIE(InfoExtractor):
                  xpath_text(idoc, './/trt', 'segment duration').strip())
  
              formats = []
                  xpath_text(idoc, './/trt', 'segment duration').strip())
  
              formats = []
-            file_els = idoc.findall('.//files/file')
+            file_els = idoc.findall('.//files/file') or idoc.findall('./files/file')
  
  
+            unique_urls = []
+            unique_file_els = []
              for file_el in file_els:
              for file_el in file_els:
+                media_url = file_el.text
+                if not media_url or determine_ext(media_url) == 'f4m':
+                    continue
+                if file_el.text not in unique_urls:
+                    unique_urls.append(file_el.text)
+                    unique_file_els.append(file_el)
+
+            for file_el in unique_file_els:
                  bitrate = file_el.attrib.get('bitrate')
                  ftype = file_el.attrib.get('type')
                  bitrate = file_el.attrib.get('bitrate')
                  ftype = file_el.attrib.get('type')
-
-                formats.append({
-                    'format_id': '%s_%s' % (bitrate, ftype),
-                    'url': file_el.text.strip(),
-                    # The bitrate may not be a number (for example: 'iphone')
-                    'tbr': int(bitrate) if bitrate.isdigit() else None,
-                    'quality': 1 if ftype == 'hd' else -1
-                })
+                media_url = file_el.text
+                if determine_ext(media_url) == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        media_url, segment_title, 'mp4', preference=0, m3u8_id='hls'))
+                else:
+                    formats.append({
+                        'format_id': '%s_%s' % (bitrate, ftype),
+                        'url': file_el.text.strip(),
+                        # The bitrate may not be a number (for example: 'iphone')
+                        'tbr': int(bitrate) if bitrate.isdigit() else None,
+                    })
  
              self._sort_formats(formats)
  
  
              self._sort_formats(formats)