Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cliprs.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     ExtractorError,
   7     float_or_none,
   8     int_or_none,
   9     parse_iso8601,
  10 )
  11
  12
  13 class ClipRsIE(InfoExtractor):
  14     _VALID_URL = r'https?://(?:www\.)?clip\.rs/(?P<id>[^/]+)/\d+'
  15     _TEST = {
  16         'url': 'http://www.clip.rs/premijera-frajle-predstavljaju-novi-spot-za-pesmu-moli-me-moli/3732',
  17         'md5': 'c412d57815ba07b56f9edc7b5d6a14e5',
  18         'info_dict': {
  19             'id': '1488842.1399140381',
  20             'ext': 'mp4',
  21             'title': 'PREMIJERA Frajle predstavljaju novi spot za pesmu Moli me, moli',
  22             'description': 'md5:56ce2c3b4ab31c5a2e0b17cb9a453026',
  23             'duration': 229,
  24             'timestamp': 1459850243,
  25             'upload_date': '20160405',
  26         }
  27     }
  28
  29     def _real_extract(self, url):
  30         video_id = self._match_id(url)
  31
  32         webpage = self._download_webpage(url, video_id)
  33
  34         video_id = self._search_regex(
  35             r'id=(["\'])mvp:(?P<id>.+?)\1', webpage, 'mvp id', group='id')
  36
  37         response = self._download_json(
  38             'http://qi.ckm.onetapi.pl/', video_id,
  39             query={
  40                 'body[id]': video_id,
  41                 'body[jsonrpc]': '2.0',
  42                 'body[method]': 'get_asset_detail',
  43                 'body[params][ID_Publikacji]': video_id,
  44                 'body[params][Service]': 'www.onet.pl',
  45                 'content-type': 'application/jsonp',
  46                 'x-onet-app': 'player.front.onetapi.pl',
  47             })
  48
  49         error = response.get('error')
  50         if error:
  51             raise ExtractorError(
  52                 '%s said: %s' % (self.IE_NAME, error['message']), expected=True)
  53
  54         video = response['result'].get('0')
  55
  56         formats = []
  57         for _, formats_dict in video['formats'].items():
  58             if not isinstance(formats_dict, dict):
  59                 continue
  60             for format_id, format_list in formats_dict.items():
  61                 if not isinstance(format_list, list):
  62                     continue
  63                 for f in format_list:
  64                     if not f.get('url'):
  65                         continue
  66                     formats.append({
  67                         'url': f['url'],
  68                         'format_id': format_id,
  69                         'height': int_or_none(f.get('vertical_resolution')),
  70                         'width': int_or_none(f.get('horizontal_resolution')),
  71                         'abr': float_or_none(f.get('audio_bitrate')),
  72                         'vbr': float_or_none(f.get('video_bitrate')),
  73                     })
  74         self._sort_formats(formats)
  75
  76         meta = video.get('meta', {})
  77
  78         title = self._og_search_title(webpage, default=None) or meta['title']
  79         description = self._og_search_description(webpage, default=None) or meta.get('description')
  80         duration = meta.get('length') or meta.get('lenght')
  81         timestamp = parse_iso8601(meta.get('addDate'), ' ')
  82
  83         return {
  84             'id': video_id,
  85             'title': title,
  86             'description': description,
  87             'duration': duration,
  88             'timestamp': timestamp,
  89             'formats': formats,
  90         }