Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/puhutv.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import (
   6     compat_HTTPError,
   7     compat_str,
   8 )
   9 from ..utils import (
  10     ExtractorError,
  11     int_or_none,
  12     float_or_none,
  13     parse_resolution,
  14     str_or_none,
  15     try_get,
  16     unified_timestamp,
  17     url_or_none,
  18     urljoin,
  19 )
  20
  21
  22 class PuhuTVIE(InfoExtractor):
  23     _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle'
  24     IE_NAME = 'puhutv'
  25     _TESTS = [{
  26         # film
  27         'url': 'https://puhutv.com/sut-kardesler-izle',
  28         'md5': 'a347470371d56e1585d1b2c8dab01c96',
  29         'info_dict': {
  30             'id': '5085',
  31             'display_id': 'sut-kardesler',
  32             'ext': 'mp4',
  33             'title': 'Süt Kardeşler',
  34             'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa',
  35             'thumbnail': r're:^https?://.*\.jpg$',
  36             'duration': 4832.44,
  37             'creator': 'Arzu Film',
  38             'timestamp': 1561062602,
  39             'upload_date': '20190620',
  40             'release_year': 1976,
  41             'view_count': int,
  42             'tags': list,
  43         },
  44     }, {
  45         # episode, geo restricted, bypassable with --geo-verification-proxy
  46         'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle',
  47         'only_matching': True,
  48     }, {
  49         # 4k, with subtitles
  50         'url': 'https://puhutv.com/dip-1-bolum-izle',
  51         'only_matching': True,
  52     }]
  53     _SUBTITLE_LANGS = {
  54         'English': 'en',
  55         'Deutsch': 'de',
  56         'عربى': 'ar'
  57     }
  58
  59     def _real_extract(self, url):
  60         display_id = self._match_id(url)
  61
  62         info = self._download_json(
  63             urljoin(url, '/api/slug/%s-izle' % display_id),
  64             display_id)['data']
  65
  66         video_id = compat_str(info['id'])
  67         show = info.get('title') or {}
  68         title = info.get('name') or show['name']
  69         if info.get('display_name'):
  70             title = '%s %s' % (title, info['display_name'])
  71
  72         try:
  73             videos = self._download_json(
  74                 'https://puhutv.com/api/assets/%s/videos' % video_id,
  75                 display_id, 'Downloading video JSON',
  76                 headers=self.geo_verification_headers())
  77         except ExtractorError as e:
  78             if isinstance(e.cause, compat_HTTPError) and e.cause.code == 403:
  79                 self.raise_geo_restricted()
  80             raise
  81
  82         urls = []
  83         formats = []
  84
  85         def add_http_from_hls(m3u8_f):
  86             http_url = m3u8_f['url'].replace('/hls/', '/mp4/').replace('/chunklist.m3u8', '.mp4')
  87             if http_url != m3u8_f['url']:
  88                 f = m3u8_f.copy()
  89                 f.update({
  90                     'format_id': f['format_id'].replace('hls', 'http'),
  91                     'protocol': 'http',
  92                     'url': http_url,
  93                 })
  94                 formats.append(f)
  95
  96         for video in videos['data']['videos']:
  97             media_url = url_or_none(video.get('url'))
  98             if not media_url or media_url in urls:
  99                 continue
 100             urls.append(media_url)
 101
 102             playlist = video.get('is_playlist')
 103             if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url:
 104                 m3u8_formats = self._extract_m3u8_formats(
 105                     media_url, video_id, 'mp4', entry_protocol='m3u8_native',
 106                     m3u8_id='hls', fatal=False)
 107                 for m3u8_f in m3u8_formats:
 108                     formats.append(m3u8_f)
 109                     add_http_from_hls(m3u8_f)
 110                 continue
 111
 112             quality = int_or_none(video.get('quality'))
 113             f = {
 114                 'url': media_url,
 115                 'ext': 'mp4',
 116                 'height': quality
 117             }
 118             video_format = video.get('video_format')
 119             is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False
 120             if is_hls:
 121                 format_id = 'hls'
 122                 f['protocol'] = 'm3u8_native'
 123             elif video_format == 'mp4':
 124                 format_id = 'http'
 125             else:
 126                 continue
 127             if quality:
 128                 format_id += '-%sp' % quality
 129             f['format_id'] = format_id
 130             formats.append(f)
 131             if is_hls:
 132                 add_http_from_hls(f)
 133         self._sort_formats(formats)
 134
 135         creator = try_get(
 136             show, lambda x: x['producer']['name'], compat_str)
 137
 138         content = info.get('content') or {}
 139
 140         images = try_get(
 141             content, lambda x: x['images']['wide'], dict) or {}
 142         thumbnails = []
 143         for image_id, image_url in images.items():
 144             if not isinstance(image_url, compat_str):
 145                 continue
 146             if not image_url.startswith(('http', '//')):
 147                 image_url = 'https://%s' % image_url
 148             t = parse_resolution(image_id)
 149             t.update({
 150                 'id': image_id,
 151                 'url': image_url
 152             })
 153             thumbnails.append(t)
 154
 155         tags = []
 156         for genre in show.get('genres') or []:
 157             if not isinstance(genre, dict):
 158                 continue
 159             genre_name = genre.get('name')
 160             if genre_name and isinstance(genre_name, compat_str):
 161                 tags.append(genre_name)
 162
 163         subtitles = {}
 164         for subtitle in content.get('subtitles') or []:
 165             if not isinstance(subtitle, dict):
 166                 continue
 167             lang = subtitle.get('language')
 168             sub_url = url_or_none(subtitle.get('url') or subtitle.get('file'))
 169             if not lang or not isinstance(lang, compat_str) or not sub_url:
 170                 continue
 171             subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{
 172                 'url': sub_url
 173             }]
 174
 175         return {
 176             'id': video_id,
 177             'display_id': display_id,
 178             'title': title,
 179             'description': info.get('description') or show.get('description'),
 180             'season_id': str_or_none(info.get('season_id')),
 181             'season_number': int_or_none(info.get('season_number')),
 182             'episode_number': int_or_none(info.get('episode_number')),
 183             'release_year': int_or_none(show.get('released_at')),
 184             'timestamp': unified_timestamp(info.get('created_at')),
 185             'creator': creator,
 186             'view_count': int_or_none(content.get('watch_count')),
 187             'duration': float_or_none(content.get('duration_in_ms'), 1000),
 188             'tags': tags,
 189             'subtitles': subtitles,
 190             'thumbnails': thumbnails,
 191             'formats': formats
 192         }
 193
 194
 195 class PuhuTVSerieIE(InfoExtractor):
 196     _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay'
 197     IE_NAME = 'puhutv:serie'
 198     _TESTS = [{
 199         'url': 'https://puhutv.com/deniz-yildizi-detay',
 200         'info_dict': {
 201             'title': 'Deniz Yıldızı',
 202             'id': 'deniz-yildizi',
 203         },
 204         'playlist_mincount': 205,
 205     }, {
 206         # a film detail page which is using same url with serie page
 207         'url': 'https://puhutv.com/kaybedenler-kulubu-detay',
 208         'only_matching': True,
 209     }]
 210
 211     def _extract_entries(self, seasons):
 212         for season in seasons:
 213             season_id = season.get('id')
 214             if not season_id:
 215                 continue
 216             page = 1
 217             has_more = True
 218             while has_more is True:
 219                 season = self._download_json(
 220                     'https://galadriel.puhutv.com/seasons/%s' % season_id,
 221                     season_id, 'Downloading page %s' % page, query={
 222                         'page': page,
 223                         'per': 40,
 224                     })
 225                 episodes = season.get('episodes')
 226                 if isinstance(episodes, list):
 227                     for ep in episodes:
 228                         slug_path = str_or_none(ep.get('slugPath'))
 229                         if not slug_path:
 230                             continue
 231                         video_id = str_or_none(int_or_none(ep.get('id')))
 232                         yield self.url_result(
 233                             'https://puhutv.com/%s' % slug_path,
 234                             ie=PuhuTVIE.ie_key(), video_id=video_id,
 235                             video_title=ep.get('name') or ep.get('eventLabel'))
 236                 page += 1
 237                 has_more = season.get('hasMore')
 238
 239     def _real_extract(self, url):
 240         playlist_id = self._match_id(url)
 241
 242         info = self._download_json(
 243             urljoin(url, '/api/slug/%s-detay' % playlist_id),
 244             playlist_id)['data']
 245
 246         seasons = info.get('seasons')
 247         if seasons:
 248             return self.playlist_result(
 249                 self._extract_entries(seasons), playlist_id, info.get('name'))
 250
 251         # For films, these are using same url with series
 252         video_id = info.get('slug') or info['assets'][0]['slug']
 253         return self.url_result(
 254             'https://puhutv.com/%s-izle' % video_id,
 255             PuhuTVIE.ie_key(), video_id)