Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/picarto.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6
   7 from .common import InfoExtractor
   8 from ..compat import compat_str
   9 from ..utils import (
  10     ExtractorError,
  11     js_to_json,
  12     try_get,
  13     update_url_query,
  14     urlencode_postdata,
  15 )
  16
  17
  18 class PicartoIE(InfoExtractor):
  19     _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)(?:/(?P<token>[a-zA-Z0-9]+))?'
  20     _TEST = {
  21         'url': 'https://picarto.tv/Setz',
  22         'info_dict': {
  23             'id': 'Setz',
  24             'ext': 'mp4',
  25             'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
  26             'timestamp': int,
  27             'is_live': True
  28         },
  29         'skip': 'Stream is offline',
  30     }
  31
  32     @classmethod
  33     def suitable(cls, url):
  34         return False if PicartoVodIE.suitable(url) else super(PicartoIE, cls).suitable(url)
  35
  36     def _real_extract(self, url):
  37         mobj = re.match(self._VALID_URL, url)
  38         channel_id = mobj.group('id')
  39
  40         metadata = self._download_json(
  41             'https://api.picarto.tv/v1/channel/name/' + channel_id,
  42             channel_id)
  43
  44         if metadata.get('online') is False:
  45             raise ExtractorError('Stream is offline', expected=True)
  46
  47         cdn_data = self._download_json(
  48             'https://picarto.tv/process/channel', channel_id,
  49             data=urlencode_postdata({'loadbalancinginfo': channel_id}),
  50             note='Downloading load balancing info')
  51
  52         token = mobj.group('token') or 'public'
  53         params = {
  54             'con': int(time.time() * 1000),
  55             'token': token,
  56         }
  57
  58         prefered_edge = cdn_data.get('preferedEdge')
  59         formats = []
  60
  61         for edge in cdn_data['edges']:
  62             edge_ep = edge.get('ep')
  63             if not edge_ep or not isinstance(edge_ep, compat_str):
  64                 continue
  65             edge_id = edge.get('id')
  66             for tech in cdn_data['techs']:
  67                 tech_label = tech.get('label')
  68                 tech_type = tech.get('type')
  69                 preference = 0
  70                 if edge_id == prefered_edge:
  71                     preference += 1
  72                 format_id = []
  73                 if edge_id:
  74                     format_id.append(edge_id)
  75                 if tech_type == 'application/x-mpegurl' or tech_label == 'HLS':
  76                     format_id.append('hls')
  77                     formats.extend(self._extract_m3u8_formats(
  78                         update_url_query(
  79                             'https://%s/hls/%s/index.m3u8'
  80                             % (edge_ep, channel_id), params),
  81                         channel_id, 'mp4', preference=preference,
  82                         m3u8_id='-'.join(format_id), fatal=False))
  83                     continue
  84                 elif tech_type == 'video/mp4' or tech_label == 'MP4':
  85                     format_id.append('mp4')
  86                     formats.append({
  87                         'url': update_url_query(
  88                             'https://%s/mp4/%s.mp4' % (edge_ep, channel_id),
  89                             params),
  90                         'format_id': '-'.join(format_id),
  91                         'preference': preference,
  92                     })
  93                 else:
  94                     # rtmp format does not seem to work
  95                     continue
  96         self._sort_formats(formats)
  97
  98         mature = metadata.get('adult')
  99         if mature is None:
 100             age_limit = None
 101         else:
 102             age_limit = 18 if mature is True else 0
 103
 104         return {
 105             'id': channel_id,
 106             'title': self._live_title(metadata.get('title') or channel_id),
 107             'is_live': True,
 108             'thumbnail': try_get(metadata, lambda x: x['thumbnails']['web']),
 109             'channel': channel_id,
 110             'channel_url': 'https://picarto.tv/%s' % channel_id,
 111             'age_limit': age_limit,
 112             'formats': formats,
 113         }
 114
 115
 116 class PicartoVodIE(InfoExtractor):
 117     _VALID_URL = r'https?://(?:www.)?picarto\.tv/videopopout/(?P<id>[^/?#&]+)'
 118     _TESTS = [{
 119         'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv',
 120         'md5': '3ab45ba4352c52ee841a28fb73f2d9ca',
 121         'info_dict': {
 122             'id': 'ArtofZod_2017.12.12.00.13.23.flv',
 123             'ext': 'mp4',
 124             'title': 'ArtofZod_2017.12.12.00.13.23.flv',
 125             'thumbnail': r're:^https?://.*\.jpg'
 126         },
 127     }, {
 128         'url': 'https://picarto.tv/videopopout/Plague',
 129         'only_matching': True,
 130     }]
 131
 132     def _real_extract(self, url):
 133         video_id = self._match_id(url)
 134
 135         webpage = self._download_webpage(url, video_id)
 136
 137         vod_info = self._parse_json(
 138             self._search_regex(
 139                 r'(?s)#vod-player["\']\s*,\s*(\{.+?\})\s*\)', webpage,
 140                 video_id),
 141             video_id, transform_source=js_to_json)
 142
 143         formats = self._extract_m3u8_formats(
 144             vod_info['vod'], video_id, 'mp4', entry_protocol='m3u8_native',
 145             m3u8_id='hls')
 146         self._sort_formats(formats)
 147
 148         return {
 149             'id': video_id,
 150             'title': video_id,
 151             'thumbnail': vod_info.get('vodThumb'),
 152             'formats': formats,
 153         }