Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/streamcz.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import json
   6
   7 from .common import InfoExtractor
   8 from ..utils import int_or_none
   9
  10
  11 class StreamCZIE(InfoExtractor):
  12     _VALID_URL = r'https?://(?:www\.)?stream\.cz/.+/(?P<videoid>.+)'
  13
  14     _TEST = {
  15         'url': 'http://www.stream.cz/peklonataliri/765767-ecka-pro-deti',
  16         'md5': '6d3ca61a8d0633c9c542b92fcb936b0c',
  17         'info_dict': {
  18             'id': '765767',
  19             'ext': 'mp4',
  20             'title': 'Peklo na talíři: Éčka pro děti',
  21             'description': 'md5:49ace0df986e95e331d0fe239d421519',
  22             'thumbnail': 'http://im.stream.cz/episode/52961d7e19d423f8f06f0100',
  23             'duration': 256,
  24         },
  25     }
  26
  27     def _real_extract(self, url):
  28         mobj = re.match(self._VALID_URL, url)
  29         video_id = mobj.group('videoid')
  30
  31         webpage = self._download_webpage(url, video_id)
  32
  33         data = self._html_search_regex(r'Stream\.Data\.Episode\((.+?)\);', webpage, 'stream data')
  34
  35         jsonData = json.loads(data)
  36
  37         formats = []
  38         for video in jsonData['instances']:
  39             for video_format in video['instances']:
  40                 format_id = video_format['quality']
  41
  42                 if format_id == '240p':
  43                     quality = 0
  44                 elif format_id == '360p':
  45                     quality = 1
  46                 elif format_id == '480p':
  47                     quality = 2
  48                 elif format_id == '720p':
  49                     quality = 3
  50
  51                 formats.append({
  52                     'format_id': '%s-%s' % (video_format['type'].split('/')[1], format_id),
  53                     'url': video_format['source'],
  54                     'quality': quality,
  55                 })
  56
  57         self._sort_formats(formats)
  58
  59         return {
  60             'id': str(jsonData['id']),
  61             'title': self._og_search_title(webpage),
  62             'thumbnail': jsonData['episode_image_original_url'].replace('//', 'http://'),
  63             'formats': formats,
  64             'description': self._og_search_description(webpage),
  65             'duration': int_or_none(jsonData['duration']),
  66             'view_count': int_or_none(jsonData['stats_total']),
  67         }