Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/gamersyde.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     js_to_json,
   8     parse_duration,
   9     remove_start,
  10 )
  11
  12
  13 class GamersydeIE(InfoExtractor):
  14     _VALID_URL = r'https?://(?:www\.)?gamersyde\.com/hqstream_(?P<display_id>[\da-z_]+)-(?P<id>\d+)_[a-z]{2}\.html'
  15     _TEST = {
  16         'url': 'http://www.gamersyde.com/hqstream_bloodborne_birth_of_a_hero-34371_en.html',
  17         'md5': 'f38d400d32f19724570040d5ce3a505f',
  18         'info_dict': {
  19             'id': '34371',
  20             'ext': 'mp4',
  21             'duration': 372,
  22             'title': 'Bloodborne - Birth of a hero',
  23             'thumbnail': r're:^https?://.*\.jpg$',
  24         }
  25     }
  26
  27     def _real_extract(self, url):
  28         mobj = re.match(self._VALID_URL, url)
  29         video_id = mobj.group('id')
  30         display_id = mobj.group('display_id')
  31
  32         webpage = self._download_webpage(url, display_id)
  33
  34         playlist = self._parse_json(
  35             self._search_regex(
  36                 r'(?s)playlist: \[({.+?})\]\s*}\);', webpage, 'files'),
  37             display_id, transform_source=js_to_json)
  38
  39         formats = []
  40         for source in playlist['sources']:
  41             video_url = source.get('file')
  42             if not video_url:
  43                 continue
  44             format_id = source.get('label')
  45             f = {
  46                 'url': video_url,
  47                 'format_id': format_id,
  48             }
  49             m = re.search(r'^(?P<height>\d+)[pP](?P<fps>\d+)fps', format_id)
  50             if m:
  51                 f.update({
  52                     'height': int(m.group('height')),
  53                     'fps': int(m.group('fps')),
  54                 })
  55             formats.append(f)
  56         self._sort_formats(formats)
  57
  58         title = remove_start(playlist['title'], '%s - ' % video_id)
  59         thumbnail = playlist.get('image')
  60         duration = parse_duration(self._search_regex(
  61             r'Length:</label>([^<]+)<', webpage, 'duration', fatal=False))
  62
  63         return {
  64             'id': video_id,
  65             'display_id': display_id,
  66             'title': title,
  67             'thumbnail': thumbnail,
  68             'duration': duration,
  69             'formats': formats,
  70         }