X-Git-Url: https://git.rapsys.eu/youtubedl/blobdiff_plain/f19349c26118065acbe21509383c63465df794fe..291f6705b350d9f813b12efb37b7963555758994:/youtube_dl/extractor/dramafever.py diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index d836c1a..db1de69 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -1,26 +1,26 @@ -# encoding: utf-8 +# coding: utf-8 from __future__ import unicode_literals import itertools +import json from .common import InfoExtractor from ..compat import ( compat_HTTPError, - compat_urllib_parse, compat_urlparse, ) from ..utils import ( - ExtractorError, clean_html, - determine_ext, + ExtractorError, int_or_none, - parse_iso8601, - sanitized_Request, + parse_age_limit, + parse_duration, + unified_timestamp, + url_or_none, ) class DramaFeverBaseIE(InfoExtractor): - _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' _CONSUMER_SECRET = 'DA59dtVXYLxajktV' @@ -38,11 +38,11 @@ class DramaFeverBaseIE(InfoExtractor): 'consumer secret', default=self._CONSUMER_SECRET) def _real_initialize(self): - self._login() self._consumer_secret = self._get_consumer_secret() + self._login() def _login(self): - (username, password) = self._get_login_info() + username, password = self._get_login_info() if username is None: return @@ -51,113 +51,163 @@ class DramaFeverBaseIE(InfoExtractor): 'password': password, } - request = sanitized_Request( - self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) - response = self._download_webpage( - request, None, 'Logging in as %s' % username) + try: + response = self._download_json( + 'https://www.dramafever.com/api/users/login', None, 'Logging in', + data=json.dumps(login_form).encode('utf-8'), headers={ + 'x-consumer-key': self._consumer_secret, + }) + except ExtractorError as e: + if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, 404): + response = self._parse_json( + e.cause.read().decode('utf-8'), None) + else: + raise - if all(logout_pattern not in response - for logout_pattern in ['href="/accounts/logout/"', '>Log out<']): - error = self._html_search_regex( - r'(?s)class="hidden-xs prompt"[^>]*>(.+?)<', - response, 'error message', default=None) - if error: - raise ExtractorError('Unable to login: %s' % error, expected=True) - raise ExtractorError('Unable to log in') + # Successful login + if response.get('result') or response.get('guid') or response.get('user_guid'): + return + + errors = response.get('errors') + if errors and isinstance(errors, list): + error = errors[0] + message = error.get('message') or error['reason'] + raise ExtractorError('Unable to login: %s' % message, expected=True) + raise ExtractorError('Unable to log in') class DramaFeverIE(DramaFeverBaseIE): IE_NAME = 'dramafever' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+/[0-9]+)(?:/|$)' - _TEST = { - 'url': 'http://www.dramafever.com/drama/4512/1/Cooking_with_Shin/', + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+/[0-9]+)(?:/|$)' + _TESTS = [{ + 'url': 'https://www.dramafever.com/drama/4274/1/Heirs/', 'info_dict': { - 'id': '4512.1', + 'id': '4274.1', + 'ext': 'wvm', + 'title': 'Heirs - Episode 1', + 'description': 'md5:362a24ba18209f6276e032a651c50bc2', + 'thumbnail': r're:^https?://.*\.jpg', + 'duration': 3783, + 'timestamp': 1381354993, + 'upload_date': '20131009', + 'series': 'Heirs', + 'season_number': 1, + 'episode': 'Episode 1', + 'episode_number': 1, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'http://www.dramafever.com/drama/4826/4/Mnet_Asian_Music_Awards_2015/?ap=1', + 'info_dict': { + 'id': '4826.4', 'ext': 'flv', - 'title': 'Cooking with Shin 4512.1', - 'description': 'md5:a8eec7942e1664a6896fcd5e1287bfd0', - 'thumbnail': 're:^https?://.*\.jpg', - 'timestamp': 1404336058, - 'upload_date': '20140702', - 'duration': 343, - } - } + 'title': 'Mnet Asian Music Awards 2015', + 'description': 'md5:3ff2ee8fedaef86e076791c909cf2e91', + 'episode': 'Mnet Asian Music Awards 2015 - Part 3', + 'episode_number': 4, + 'thumbnail': r're:^https?://.*\.jpg', + 'timestamp': 1450213200, + 'upload_date': '20151215', + 'duration': 5359, + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + }, { + 'url': 'https://www.dramafever.com/zh-cn/drama/4972/15/Doctor_Romantic/', + 'only_matching': True, + }] + + def _call_api(self, path, video_id, note, fatal=False): + return self._download_json( + 'https://www.dramafever.com/api/5/' + path, + video_id, note=note, headers={ + 'x-consumer-key': self._consumer_secret, + }, fatal=fatal) + + def _get_subtitles(self, video_id): + subtitles = {} + subs = self._call_api( + 'video/%s/subtitles/webvtt/' % video_id, video_id, + 'Downloading subtitles JSON', fatal=False) + if not subs or not isinstance(subs, list): + return subtitles + for sub in subs: + if not isinstance(sub, dict): + continue + sub_url = url_or_none(sub.get('url')) + if not sub_url: + continue + subtitles.setdefault( + sub.get('code') or sub.get('language') or 'en', []).append({ + 'url': sub_url + }) + return subtitles def _real_extract(self, url): video_id = self._match_id(url).replace('/', '.') - try: - feed = self._download_json( - 'http://www.dramafever.com/amp/episode/feed.json?guid=%s' % video_id, - video_id, 'Downloading episode JSON')['channel']['item'] - except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): - raise ExtractorError( - 'Currently unavailable in your country.', expected=True) - raise + series_id, episode_number = video_id.split('.') - media_group = feed.get('media-group', {}) + video = self._call_api( + 'series/%s/episodes/%s/' % (series_id, episode_number), video_id, + 'Downloading video JSON') formats = [] - for media_content in media_group['media-content']: - src = media_content.get('@attributes', {}).get('url') - if not src: - continue - ext = determine_ext(src) - if ext == 'f4m': - formats.extend(self._extract_f4m_formats( - src, video_id, f4m_id='hds')) - elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - src, video_id, 'mp4', m3u8_id='hls')) - else: + download_assets = video.get('download_assets') + if download_assets and isinstance(download_assets, dict): + for format_id, format_dict in download_assets.items(): + if not isinstance(format_dict, dict): + continue + format_url = url_or_none(format_dict.get('url')) + if not format_url: + continue formats.append({ - 'url': src, + 'url': format_url, + 'format_id': format_id, + 'filesize': int_or_none(video.get('filesize')), }) + + stream = self._call_api( + 'video/%s/stream/' % video_id, video_id, 'Downloading stream JSON', + fatal=False) + if stream: + stream_url = stream.get('stream_url') + if stream_url: + formats.extend(self._extract_m3u8_formats( + stream_url, video_id, 'mp4', entry_protocol='m3u8_native', + m3u8_id='hls', fatal=False)) self._sort_formats(formats) - title = media_group.get('media-title') - description = media_group.get('media-description') - duration = int_or_none(media_group['media-content'][0].get('@attributes', {}).get('duration')) - thumbnail = self._proto_relative_url( - media_group.get('media-thumbnail', {}).get('@attributes', {}).get('url')) - timestamp = parse_iso8601(feed.get('pubDate'), ' ') + title = video.get('title') or 'Episode %s' % episode_number + description = video.get('description') + thumbnail = video.get('thumbnail') + timestamp = unified_timestamp(video.get('release_date')) + duration = parse_duration(video.get('duration')) + age_limit = parse_age_limit(video.get('tv_rating')) + series = video.get('series_title') + season_number = int_or_none(video.get('season')) - subtitles = {} - for media_subtitle in media_group.get('media-subTitle', []): - lang = media_subtitle.get('@attributes', {}).get('lang') - href = media_subtitle.get('@attributes', {}).get('href') - if not lang or not href: - continue - subtitles[lang] = [{ - 'ext': 'ttml', - 'url': href, - }] + if series: + title = '%s - %s' % (series, title) - series_id, episode_number = video_id.split('.') - episode_info = self._download_json( - # We only need a single episode info, so restricting page size to one episode - # and dealing with page number as with episode number - r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1' - % (self._consumer_secret, series_id, episode_number), - video_id, 'Downloading episode info JSON', fatal=False) - if episode_info: - value = episode_info.get('value') - if value: - subfile = value[0].get('subfile') or value[0].get('new_subfile') - if subfile and subfile != 'http://www.dramafever.com/st/': - subtitles.setdefault('English', []).append({ - 'ext': 'srt', - 'url': subfile, - }) + subtitles = self.extract_subtitles(video_id) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, - 'timestamp': timestamp, 'duration': duration, + 'timestamp': timestamp, + 'age_limit': age_limit, + 'series': series, + 'season_number': season_number, + 'episode_number': int_or_none(episode_number), 'formats': formats, 'subtitles': subtitles, } @@ -165,7 +215,7 @@ class DramaFeverIE(DramaFeverBaseIE): class DramaFeverSeriesIE(DramaFeverBaseIE): IE_NAME = 'dramafever:series' - _VALID_URL = r'https?://(?:www\.)?dramafever\.com/drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' + _VALID_URL = r'https?://(?:www\.)?dramafever\.com/(?:[^/]+/)?drama/(?P[0-9]+)(?:/(?:(?!\d+(?:/|$)).+)?)?$' _TESTS = [{ 'url': 'http://www.dramafever.com/drama/4512/Cooking_with_Shin/', 'info_dict': {