Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tunein.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import json
   5 import re
   6
   7 from .common import InfoExtractor
   8 from ..utils import ExtractorError
   9
  10
  11 class TuneInIE(InfoExtractor):
  12     _VALID_URL = r'''(?x)https?://(?:www\.)?
  13     (?:
  14         tunein\.com/
  15         (?:
  16             radio/.*?-s|
  17             station/.*?StationId\=
  18         )(?P<id>[0-9]+)
  19         |tun\.in/(?P<redirect_id>[A-Za-z0-9]+)
  20     )
  21     '''
  22     _API_URL_TEMPLATE = 'http://tunein.com/tuner/tune/?stationId={0:}&tuneType=Station'
  23
  24     _INFO_DICT = {
  25         'id': '34682',
  26         'title': 'Jazz 24 on 88.5 Jazz24 - KPLU-HD2',
  27         'ext': 'AAC',
  28         'thumbnail': 're:^https?://.*\.png$',
  29         'location': 'Tacoma, WA',
  30     }
  31     _TESTS = [
  32         {
  33             'url': 'http://tunein.com/radio/Jazz24-885-s34682/',
  34             'info_dict': _INFO_DICT,
  35             'params': {
  36                 'skip_download': True,  # live stream
  37             },
  38         },
  39         {  # test redirection
  40             'url': 'http://tun.in/ser7s',
  41             'info_dict': _INFO_DICT,
  42             'params': {
  43                 'skip_download': True,  # live stream
  44             },
  45         },
  46     ]
  47
  48     def _real_extract(self, url):
  49         mobj = re.match(self._VALID_URL, url)
  50         redirect_id = mobj.group('redirect_id')
  51         if redirect_id:
  52             # The server doesn't support HEAD requests
  53             urlh = self._request_webpage(
  54                 url, redirect_id, note='Downloading redirect page')
  55             url = urlh.geturl()
  56             self.to_screen('Following redirect: %s' % url)
  57             mobj = re.match(self._VALID_URL, url)
  58         station_id = mobj.group('id')
  59
  60         station_info = self._download_json(
  61             self._API_URL_TEMPLATE.format(station_id),
  62             station_id, note='Downloading station JSON')
  63
  64         title = station_info['Title']
  65         thumbnail = station_info.get('Logo')
  66         location = station_info.get('Location')
  67         streams_url = station_info.get('StreamUrl')
  68         if not streams_url:
  69             raise ExtractorError('No downloadable streams found',
  70                                  expected=True)
  71         stream_data = self._download_webpage(
  72             streams_url, station_id, note='Downloading stream data')
  73         streams = json.loads(self._search_regex(
  74             r'\((.*)\);', stream_data, 'stream info'))['Streams']
  75
  76         is_live = None
  77         formats = []
  78         for stream in streams:
  79             if stream.get('Type') == 'Live':
  80                 is_live = True
  81             formats.append({
  82                 'abr': stream.get('Bandwidth'),
  83                 'ext': stream.get('MediaType'),
  84                 'acodec': stream.get('MediaType'),
  85                 'vcodec': 'none',
  86                 'url': stream.get('Url'),
  87                 # Sometimes streams with the highest quality do not exist
  88                 'preference': stream.get('Reliability'),
  89             })
  90         self._sort_formats(formats)
  91
  92         return {
  93             'id': station_id,
  94             'title': title,
  95             'formats': formats,
  96             'thumbnail': thumbnail,
  97             'location': location,
  98             'is_live': is_live,
  99         }