]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soompi.py
5da66ca9ec346bb1be27a475bb6e1b7a91392d61
   2 from __future__ 
import unicode_literals
 
   6 from .crunchyroll 
import CrunchyrollIE
 
   8 from .common 
import InfoExtractor
 
   9 from ..compat 
import compat_HTTPError
 
  18 class SoompiBaseIE(InfoExtractor
): 
  19     def _get_episodes(self
, webpage
, episode_filter
=None): 
  20         episodes 
= self
._parse
_json
( 
  22                 r
'VIDEOS\s*=\s*(\[.+?\]);', webpage
, 'episodes JSON'), 
  24         return list(filter(episode_filter
, episodes
)) 
  27 class SoompiIE(SoompiBaseIE
, CrunchyrollIE
): 
  29     _VALID_URL 
= r
'https?://tv\.soompi\.com/(?:en/)?watch/(?P<id>[0-9]+)' 
  31         'url': 'http://tv.soompi.com/en/watch/29235', 
  35             'title': 'Episode 1096', 
  36             'description': '2015-05-20' 
  39             'skip_download': True, 
  43     def _get_episode(self
, webpage
, video_id
): 
  44         return self
._get
_episodes
(webpage
, lambda x
: x
['id'] == video_id
)[0] 
  46     def _get_subtitles(self
, config
, video_id
): 
  48         for subtitle 
in config
.findall('./{default}preload/subtitles/subtitle'): 
  49             sub_langs
[subtitle
.attrib
['id']] = subtitle
.attrib
['title'] 
  52         for s 
in config
.findall('./{default}preload/subtitle'): 
  53             lang_code 
= sub_langs
.get(s
.attrib
['id']) 
  57             data 
= xpath_text(s
, './data', 'data') 
  58             iv 
= xpath_text(s
, './iv', 'iv') 
  59             if not id or not iv 
or not data
: 
  61             subtitle 
= self
._decrypt
_subtitles
(data
, iv
, sub_id
).decode('utf-8') 
  62             subtitles
[lang_code
] = self
._extract
_subtitles
(subtitle
) 
  65     def _real_extract(self
, url
): 
  66         video_id 
= self
._match
_id
(url
) 
  69             webpage 
= self
._download
_webpage
( 
  70                 url
, video_id
, 'Downloading episode page') 
  71         except ExtractorError 
as ee
: 
  72             if isinstance(ee
.cause
, compat_HTTPError
) and ee
.cause
.code 
== 403: 
  73                 webpage 
= ee
.cause
.read() 
  74                 block_message 
= self
._html
_search
_regex
( 
  75                     r
'(?s)<div class="block-message">(.+?)</div>', webpage
, 
  76                     'block message', default
=None) 
  78                     raise ExtractorError(block_message
, expected
=True) 
  83         for format_id 
in re
.findall(r
'\?quality=([0-9a-zA-Z]+)', webpage
): 
  84             config 
= self
._download
_xml
( 
  85                 'http://tv.soompi.com/en/show/_/%s-config.xml?mode=hls&quality=%s' % (video_id
, format_id
), 
  86                 video_id
, 'Downloading %s XML' % format_id
) 
  87             m3u8_url 
= xpath_text( 
  88                 config
, './{default}preload/stream_info/file', 
  89                 '%s m3u8 URL' % format_id
) 
  92             formats
.extend(self
._extract
_m
3u8_formats
( 
  93                 m3u8_url
, video_id
, 'mp4', m3u8_id
=format_id
)) 
  94         self
._sort
_formats
(formats
) 
  96         episode 
= self
._get
_episode
(webpage
, video_id
) 
  98         title 
= episode
['name'] 
  99         description 
= episode
.get('description') 
 100         duration 
= int_or_none(episode
.get('duration')) 
 104             'url': thumbnail_url
, 
 105         } for thumbnail_id
, thumbnail_url 
in episode
.get('img_url', {}).items()] 
 107         subtitles 
= self
.extract_subtitles(config
, video_id
) 
 112             'description': description
, 
 113             'thumbnails': thumbnails
, 
 114             'duration': duration
, 
 116             'subtitles': subtitles
 
 120 class SoompiShowIE(SoompiBaseIE
): 
 121     IE_NAME 
= 'soompi:show' 
 122     _VALID_URL 
= r
'https?://tv\.soompi\.com/en/shows/(?P<id>[0-9a-zA-Z\-_]+)' 
 124         'url': 'http://tv.soompi.com/en/shows/liar-game', 
 127             'title': 'Liar Game', 
 128             'description': 'md5:52c02bce0c1a622a95823591d0589b66', 
 130         'playlist_count': 14, 
 133     def _real_extract(self
, url
): 
 134         show_id 
= self
._match
_id
(url
) 
 136         webpage 
= self
._download
_webpage
( 
 137             url
, show_id
, 'Downloading show page') 
 139         title 
= remove_start(self
._og
_search
_title
(webpage
), 'SoompiTV | ') 
 140         description 
= self
._og
_search
_description
(webpage
) 
 143             self
.url_result('http://tv.soompi.com/en/watch/%s' % episode
['id'], 'Soompi') 
 144             for episode 
in self
._get
_episodes
(webpage
)] 
 146         return self
.playlist_result(entries
, show_id
, title
, description
)