]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/asiancrush.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from .kaltura
import KalturaIE
8 from ..utils
import extract_attributes
11 class AsianCrushIE(InfoExtractor
):
12 _VALID_URL_BASE
= r
'https?://(?:www\.)?(?P<host>(?:(?:asiancrush|yuyutv|midnightpulp)\.com|cocoro\.tv))'
13 _VALID_URL
= r
'%s/video/(?:[^/]+/)?0+(?P<id>\d+)v\b' % _VALID_URL_BASE
15 'url': 'https://www.asiancrush.com/video/012869v/women-who-flirt/',
16 'md5': 'c3b740e48d0ba002a42c0b72857beae6',
20 'title': 'Women Who Flirt',
21 'description': 'md5:7e986615808bcfb11756eb503a751487',
22 'timestamp': 1496936429,
23 'upload_date': '20170608',
24 'uploader_id': 'craig@crifkin.com',
27 'url': 'https://www.asiancrush.com/video/she-was-pretty/011886v-pretty-episode-3/',
28 'only_matching': True,
30 'url': 'https://www.yuyutv.com/video/013886v/the-act-of-killing/',
31 'only_matching': True,
33 'url': 'https://www.yuyutv.com/video/peep-show/013922v-warring-factions/',
34 'only_matching': True,
36 'url': 'https://www.midnightpulp.com/video/010400v/drifters/',
37 'only_matching': True,
39 'url': 'https://www.midnightpulp.com/video/mononoke/016378v-zashikiwarashi-part-1/',
40 'only_matching': True,
42 'url': 'https://www.cocoro.tv/video/the-wonderful-wizard-of-oz/008878v-the-wonderful-wizard-of-oz-ep01/',
43 'only_matching': True,
46 def _real_extract(self
, url
):
47 mobj
= re
.match(self
._VALID
_URL
, url
)
48 host
= mobj
.group('host')
49 video_id
= mobj
.group('id')
51 webpage
= self
._download
_webpage
(url
, video_id
)
53 entry_id
, partner_id
, title
= [None] * 3
55 vars = self
._parse
_json
(
57 r
'iEmbedVars\s*=\s*({.+?})', webpage
, 'embed vars',
58 default
='{}'), video_id
, fatal
=False)
60 entry_id
= vars.get('entry_id')
61 partner_id
= vars.get('partner_id')
62 title
= vars.get('vid_label')
65 entry_id
= self
._search
_regex
(
66 r
'\bentry_id["\']\s
*:\s
*["\'](\d+)', webpage, 'entry id')
68 player = self._download_webpage(
69 'https://api.%s/embeddedVideoPlayer' % host, video_id,
70 query={'id': entry_id})
72 kaltura_id = self._search_regex(
73 r'entry_id["\']\s
*:\s
*(["\'])(?P<id>(?:(?!\1).)+)\1', player,
74 'kaltura id', group='id')
77 partner_id = self._search_regex(
78 r'/p(?:artner_id)?/(\d+)', player, 'partner id',
81 description = self._html_search_regex(
82 r'(?s)<div[^>]+\bclass=["\']description
["\'][^>]*>(.+?)</div>',
83 webpage, 'description', fatal=False)
86 '_type': 'url_transparent',
87 'url': 'kaltura:%s:%s' % (partner_id, kaltura_id),
88 'ie_key': KalturaIE.ie_key(),
91 'description': description,
95 class AsianCrushPlaylistIE(InfoExtractor):
96 _VALID_URL = r'%s/series/0+(?P<id>\d+)s\b' % AsianCrushIE._VALID_URL_BASE
98 'url': 'https://www.asiancrush.com/series/012481s/scholar-walks-night/',
101 'title': 'Scholar Who Walks the Night',
102 'description': 'md5:7addd7c5132a09fd4741152d96cce886',
104 'playlist_count': 20,
106 'url': 'https://www.yuyutv.com/series/013920s/peep-show/',
107 'only_matching': True,
109 'url': 'https://www.midnightpulp.com/series/016375s/mononoke/',
110 'only_matching': True,
112 'url': 'https://www.cocoro.tv/series/008549s/the-wonderful-wizard-of-oz/',
113 'only_matching': True,
116 def _real_extract(self, url):
117 playlist_id = self._match_id(url)
119 webpage = self._download_webpage(url, playlist_id)
123 for mobj in re.finditer(
124 r'<a[^>]+href=(["\'])(?P
<url
>%s.*?
)\
1[^
>]*>' % AsianCrushIE._VALID_URL,
126 attrs = extract_attributes(mobj.group(0))
127 if attrs.get('class') == 'clearfix
':
128 entries.append(self.url_result(
129 mobj.group('url
'), ie=AsianCrushIE.ie_key()))
131 title = self._html_search_regex(
132 r'(?s
)<h1
\b[^
>]\bid
=["\']movieTitle[^>]+>(.+?)</h1>', webpage,
133 'title', default=None) or self._og_search_title(
134 webpage, default=None) or self._html_search_meta(
135 'twitter:title', webpage, 'title',
136 default=None) or self._search_regex(
137 r'<title>([^<]+)</title>', webpage, 'title', fatal=False)
139 title = re.sub(r'\s*\|\s*.+?$', '', title)
141 description = self._og_search_description(
142 webpage, default=None) or self._html_search_meta(
143 'twitter:description', webpage, 'description', fatal=False)
145 return self.playlist_result(entries, playlist_id, title, description)