]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/soulanime.py
debian/control: Add recommends on aria2 | wget | curl to use external downloaders.
[youtubedl] / youtube_dl / extractor / soulanime.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import InfoExtractor
6 from ..utils import (
7 HEADRequest,
8 urlhandle_detect_ext,
9 )
10
11
12 class SoulAnimeWatchingIE(InfoExtractor):
13 IE_NAME = "soulanime:watching"
14 IE_DESC = "SoulAnime video"
15 _TEST = {
16 'url': 'http://www.soul-anime.net/watching/seirei-tsukai-no-blade-dance-episode-9/',
17 'md5': '05fae04abf72298098b528e98abf4298',
18 'info_dict': {
19 'id': 'seirei-tsukai-no-blade-dance-episode-9',
20 'ext': 'mp4',
21 'title': 'seirei-tsukai-no-blade-dance-episode-9',
22 'description': 'seirei-tsukai-no-blade-dance-episode-9'
23 }
24 }
25 _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/watch[^/]*/(?P<id>[^/]+)'
26
27 def _real_extract(self, url):
28 mobj = re.match(self._VALID_URL, url)
29 video_id = mobj.group('id')
30 domain = mobj.group('domain')
31
32 page = self._download_webpage(url, video_id)
33
34 video_url_encoded = self._html_search_regex(
35 r'<div id="download">[^<]*<a href="(?P<url>[^"]+)"', page, 'url')
36 video_url = "http://www.soul-anime." + domain + video_url_encoded
37
38 ext_req = HEADRequest(video_url)
39 ext_handle = self._request_webpage(
40 ext_req, video_id, note='Determining extension')
41 ext = urlhandle_detect_ext(ext_handle)
42
43 return {
44 'id': video_id,
45 'url': video_url,
46 'ext': ext,
47 'title': video_id,
48 'description': video_id
49 }
50
51
52 class SoulAnimeSeriesIE(InfoExtractor):
53 IE_NAME = "soulanime:series"
54 IE_DESC = "SoulAnime Series"
55
56 _VALID_URL = r'http://[w.]*soul-anime\.(?P<domain>[^/]+)/anime./(?P<id>[^/]+)'
57
58 _EPISODE_REGEX = r'<option value="(/watch[^/]*/[^"]+)">[^<]*</option>'
59
60 _TEST = {
61 'url': 'http://www.soul-anime.net/anime1/black-rock-shooter-tv/',
62 'info_dict': {
63 'id': 'black-rock-shooter-tv'
64 },
65 'playlist_count': 8
66 }
67
68 def _real_extract(self, url):
69 mobj = re.match(self._VALID_URL, url)
70 series_id = mobj.group('id')
71 domain = mobj.group('domain')
72
73 pattern = re.compile(self._EPISODE_REGEX)
74
75 page = self._download_webpage(url, series_id, "Downloading series page")
76 mobj = pattern.findall(page)
77
78 entries = [self.url_result("http://www.soul-anime." + domain + obj) for obj in mobj]
79
80 return self.playlist_result(entries, series_id)