Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/picarto.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import time
   5
   6 from .common import InfoExtractor
   7 from ..compat import compat_str
   8 from ..utils import (
   9     ExtractorError,
  10     js_to_json,
  11     try_get,
  12     update_url_query,
  13     urlencode_postdata,
  14 )
  15
  16
  17 class PicartoIE(InfoExtractor):
  18     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)'
  19     _TEST = {
  20         'url': 'https://picarto.tv/Setz',
  21         'info_dict': {
  22             'id': 'Setz',
  23             'ext': 'mp4',
  24             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  25             'timestamp': int,
  26             'is_live': True
  27         },
  28         'skip': 'Stream is offline',
  29     }
  30
  31     @classmethod
  32     def suitable(cls, url):
  33         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
  34
  35     def _real_extract(self, url):
  36         channel_id = self._match_id(url)
  37         stream_page = self._download_webpage(url, channel_id)
  38
  39         if '>This channel does not exist' in stream_page:
  40             raise ExtractorError(
  41                 'Channel %s does not exist' % channel_id, expected=True)
  42
  43         player = self._parse_json(
  44             self._search_regex(
  45                 r'(?s)playerSettings\[\d+\]\s*=\s*(\{.+?\}\s*\n)', stream_page,
  46                 'player settings'),
  47             channel_id, transform_source=js_to_json)
  48
  49         if player.get('online') is False:
  50             raise ExtractorError('Stream is offline', expected=True)
  51
  52         cdn_data = self._download_json(
  53             'https://picarto.tv/process/channel', channel_id,
  54             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
  55             note='Downloading load balancing info')
  56
  57         def get_event(key):
  58             return try_get(player, lambda x: x['event'][key], compat_str) or ''
  59
  60         params = {
  61             'token': player.get('token') or '',
  62             'ticket': get_event('ticket'),
  63             'con': int(time.time() * 1000),
  64             'type': get_event('ticket'),
  65             'scope': get_event('scope'),
  66         }
  67
  68         prefered_edge = cdn_data.get('preferedEdge')
  69         default_tech = player.get('defaultTech')
  70
  71         formats = []
  72
  73         for edge in cdn_data['edges']:
  74             edge_ep = edge.get('ep')
  75             if not edge_ep or not isinstance(edge_ep, compat_str):
  76                 continue
  77             edge_id = edge.get('id')
  78             for tech in cdn_data['techs']:
  79                 tech_label = tech.get('label')
  80                 tech_type = tech.get('type')
  81                 preference = 0
  82                 if edge_id == prefered_edge:
  83                     preference += 1
  84                 if tech_type == default_tech:
  85                     preference += 1
  86                 format_id = []
  87                 if edge_id:
  88                     format_id.append(edge_id)
  89                 if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
  90                     format_id.append('hls')
  91                     formats.extend(self._extract_m3u8_formats(
  92                         update_url_query(
  93                             'https://%s/hls/%s/index.m3u8'
  94                             % (edge_ep, channel_id), params),
  95                         channel_id, 'mp4', preference=preference,
  96                         m3u8_id='-'.join(format_id), fatal=False))
  97                     continue
  98                 elif tech_type == 'video/mp4' or tech_label == 'MP4':
  99                     format_id.append('mp4')
 100                     formats.append({
 101                         'url': update_url_query(
 102                             'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
 103                             params),
 104                         'format_id': '-'.join(format_id),
 105                         'preference': preference,
 106                     })
 107                 else:
 108                     # rtmp format does not seem to work
 109                     continue
 110         self._sort_formats(formats)
 111
 112         mature = player.get('mature')
 113         if mature is None:
 114             age_limit = None
 115         else:
 116             age_limit = 18 if mature is True else 0
 117
 118         return {
 119             'id': channel_id,
 120             'title': self._live_title(channel_id),
 121             'is_live': True,
 122             'thumbnail': player.get('vodThumb'),
 123             'age_limit': age_limit,
 124             'formats': formats,
 125         }
 126
 127
 128 class PicartoVodIE(InfoExtractor):
 129     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
 130     _TESTS = [{
 131         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
 132         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
 133         'info_dict': {
 134             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
 135             'ext': 'mp4',
 136             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
 137             'thumbnail': r're:^https?://.*\.jpg'
 138         },
 139     }, {
 140         'url': 'https://picarto.tv/videopopout/Plague',
 141         'only_matching': True,
 142     }]
 143
 144     def _real_extract(self, url):
 145         video_id = self._match_id(url)
 146
 147         webpage = self._download_webpage(url, video_id)
 148
 149         vod_info = self._parse_json(
 150             self._search_regex(
 151                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
 152                 video_id),
 153             video_id, transform_source=js_to_json)
 154
 155         formats = self._extract_m3u8_formats(
 156             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
 157             m3u8_id='hls')
 158         self._sort_formats(formats)
 159
 160         return {
 161             'id': video_id,
 162             'title': video_id,
 163             'thumbnail': vod_info.get('vodThumb'),
 164             'formats': formats,
 165         }