Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rbmaradio.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     compat_urllib_parse_urlparse,
   7
   8     ExtractorError,
   9 )
  10
  11
  12 class RBMARadioIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?rbmaradio\.com/shows/(?P<videoID>[^/]+)$'
  14
  15     def _real_extract(self, url):
  16         m = re.match(self._VALID_URL, url)
  17         video_id = m.group('videoID')
  18
  19         webpage = self._download_webpage(url, video_id)
  20
  21         json_data = self._search_regex(r'window\.gon.*?gon\.show=(.+?);$',
  22             webpage, u'json data', flags=re.MULTILINE)
  23
  24         try:
  25             data = json.loads(json_data)
  26         except ValueError as e:
  27             raise ExtractorError(u'Invalid JSON: ' + str(e))
  28
  29         video_url = data['akamai_url'] + '&cbr=256'
  30         url_parts = compat_urllib_parse_urlparse(video_url)
  31         video_ext = url_parts.path.rpartition('.')[2]
  32         info = {
  33                 'id': video_id,
  34                 'url': video_url,
  35                 'ext': video_ext,
  36                 'title': data['title'],
  37                 'description': data.get('teaser_text'),
  38                 'location': data.get('country_of_origin'),
  39                 'uploader': data.get('host', {}).get('name'),
  40                 'uploader_id': data.get('host', {}).get('slug'),
  41                 'thumbnail': data.get('image', {}).get('large_url_2x'),
  42                 'duration': data.get('duration'),
  43         }
  44         return [info]