Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/iprima.py

   1 # -*- coding: utf-8 -*-
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import time
   6
   7 from .common import InfoExtractor
   8 from ..utils import (
   9     sanitized_Request,
  10 )
  11
  12
  13 class IPrimaIE(InfoExtractor):
  14     _VALID_URL = r'https?://play\.iprima\.cz/(?:.+/)?(?P<id>[^?#]+)'
  15
  16     _TESTS = [{
  17         'url': 'http://play.iprima.cz/gondici-s-r-o-33',
  18         'info_dict': {
  19             'id': 'p136534',
  20             'ext': 'mp4',
  21             'title': 'Gondíci s. r. o. (34)',
  22             'description': 'md5:16577c629d006aa91f59ca8d8e7f99bd',
  23         },
  24         'params': {
  25             'skip_download': True,  # m3u8 download
  26         },
  27     }, {
  28         'url': 'http://play.iprima.cz/particka/particka-92',
  29         'only_matching': True,
  30     }]
  31
  32     def _real_extract(self, url):
  33         mobj = re.match(self._VALID_URL, url)
  34         video_id = mobj.group('id')
  35
  36         webpage = self._download_webpage(url, video_id)
  37
  38         video_id = self._search_regex(r'data-product="([^"]+)">', webpage, 'real id')
  39
  40         req = sanitized_Request(
  41             'http://play.iprima.cz/prehravac/init?_infuse=1'
  42             '&_ts=%s&productId=%s' % (round(time.time()), video_id))
  43         req.add_header('Referer', url)
  44         playerpage = self._download_webpage(req, video_id, note='Downloading player')
  45
  46         m3u8_url = self._search_regex(r"'src': '([^']+\.m3u8)'", playerpage, 'm3u8 url')
  47
  48         formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4')
  49
  50         self._sort_formats(formats)
  51
  52         return {
  53             'id': video_id,
  54             'title': self._og_search_title(webpage),
  55             'thumbnail': self._og_search_thumbnail(webpage),
  56             'formats': formats,
  57             'description': self._og_search_description(webpage),
  58         }