]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/adultswim.py
   2 from __future__ 
import unicode_literals
 
   7 from .common 
import InfoExtractor
 
  15 class AdultSwimIE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:www\.)?adultswim\.com/videos/(?P<is_playlist>playlists/)?(?P<show_path>[^/]+)/(?P<episode_path>[^/?#]+)/?' 
  19         'url': 'http://adultswim.com/videos/rick-and-morty/pilot', 
  22                 'md5': '247572debc75c7652f253c8daa51a14d', 
  24                     'id': 'rQxZvXQ4ROaSOqq-or2Mow-0', 
  26                     'title': 'Rick and Morty - Pilot Part 1', 
  27                     'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " 
  31                 'md5': '77b0e037a4b20ec6b98671c4c379f48d', 
  33                     'id': 'rQxZvXQ4ROaSOqq-or2Mow-3', 
  35                     'title': 'Rick and Morty - Pilot Part 4', 
  36                     'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " 
  41             'title': 'Rick and Morty - Pilot', 
  42             'description': "Rick moves in with his daughter's family and establishes himself as a bad influence on his grandson, Morty. " 
  45         'url': 'http://www.adultswim.com/videos/playlists/american-parenting/putting-francine-out-of-business/', 
  48                 'md5': '2eb5c06d0f9a1539da3718d897f13ec5', 
  50                     'id': '-t8CamQlQ2aYZ49ItZCFog-0', 
  52                     'title': 'American Dad - Putting Francine Out of Business', 
  53                     'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' 
  58             'title': 'American Dad - Putting Francine Out of Business', 
  59             'description': 'Stan hatches a plan to get Francine out of the real estate business.Watch more American Dad on [adult swim].' 
  64     def find_video_info(collection
, slug
): 
  65         for video 
in collection
.get('videos'): 
  66             if video
.get('slug') == slug
: 
  70     def find_collection_by_linkURL(collections
, linkURL
): 
  71         for collection 
in collections
: 
  72             if collection
.get('linkURL') == linkURL
: 
  76     def find_collection_containing_video(collections
, slug
): 
  77         for collection 
in collections
: 
  78             for video 
in collection
.get('videos'): 
  79                 if video
.get('slug') == slug
: 
  80                     return collection
, video
 
  82     def _real_extract(self
, url
): 
  83         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  84         show_path 
= mobj
.group('show_path') 
  85         episode_path 
= mobj
.group('episode_path') 
  86         is_playlist 
= True if mobj
.group('is_playlist') else False 
  88         webpage 
= self
._download
_webpage
(url
, episode_path
) 
  90         # Extract the value of `bootstrappedData` from the Javascript in the page. 
  91         bootstrappedDataJS 
= self
._search
_regex
(r
'var bootstrappedData = ({.*});', webpage
, episode_path
) 
  94             bootstrappedData 
= json
.loads(bootstrappedDataJS
) 
  95         except ValueError as ve
: 
  96             errmsg 
= '%s: Failed to parse JSON ' % episode_path
 
  97             raise ExtractorError(errmsg
, cause
=ve
) 
  99         # Downloading videos from a /videos/playlist/ URL needs to be handled differently. 
 100         # NOTE: We are only downloading one video (the current one) not the playlist 
 102             collections 
= bootstrappedData
['playlists']['collections'] 
 103             collection 
= self
.find_collection_by_linkURL(collections
, show_path
) 
 104             video_info 
= self
.find_video_info(collection
, episode_path
) 
 106             show_title 
= video_info
['showTitle'] 
 107             segment_ids 
= [video_info
['videoPlaybackID']] 
 109             collections 
= bootstrappedData
['show']['collections'] 
 110             collection
, video_info 
= self
.find_collection_containing_video(collections
, episode_path
) 
 112             show 
= bootstrappedData
['show'] 
 113             show_title 
= show
['title'] 
 114             segment_ids 
= [clip
['videoPlaybackID'] for clip 
in video_info
['clips']] 
 116         episode_id 
= video_info
['id'] 
 117         episode_title 
= video_info
['title'] 
 118         episode_description 
= video_info
['description'] 
 119         episode_duration 
= video_info
.get('duration') 
 122         for part_num
, segment_id 
in enumerate(segment_ids
): 
 123             segment_url 
= 'http://www.adultswim.com/videos/api/v0/assets?id=%s&platform=mobile' % segment_id
 
 125             segment_title 
= '%s - %s' % (show_title
, episode_title
) 
 126             if len(segment_ids
) > 1: 
 127                 segment_title 
+= ' Part %d' % (part_num 
+ 1) 
 129             idoc 
= self
._download
_xml
( 
 130                 segment_url
, segment_title
, 
 131                 'Downloading segment information', 'Unable to download segment information') 
 133             segment_duration 
= float_or_none( 
 134                 xpath_text(idoc
, './/trt', 'segment duration').strip()) 
 137             file_els 
= idoc
.findall('.//files/file') 
 139             for file_el 
in file_els
: 
 140                 bitrate 
= file_el
.attrib
.get('bitrate') 
 141                 ftype 
= file_el
.attrib
.get('type') 
 144                     'format_id': '%s_%s' % (bitrate
, ftype
), 
 145                     'url': file_el
.text
.strip(), 
 146                     # The bitrate may not be a number (for example: 'iphone') 
 147                     'tbr': int(bitrate
) if bitrate
.isdigit() else None, 
 148                     'quality': 1 if ftype 
== 'hd' else -1 
 151             self
._sort
_formats
(formats
) 
 155                 'title': segment_title
, 
 157                 'duration': segment_duration
, 
 158                 'description': episode_description
 
 164             'display_id': episode_path
, 
 166             'title': '%s - %s' % (show_title
, episode_title
), 
 167             'description': episode_description
, 
 168             'duration': episode_duration