Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yapfiles.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..compat import compat_str
   8 from ..utils import (
   9     ExtractorError,
  10     int_or_none,
  11     qualities,
  12     unescapeHTML,
  13 )
  14
  15
  16 class YapFilesIE(InfoExtractor):
  17     _YAPFILES_URL = r'//(?:(?:www|api)\.)?yapfiles\.ru/get_player/*\?.*?\bv=(?P<id>\w+)'
  18     _VALID_URL = r'https?:%s' % _YAPFILES_URL
  19     _TESTS = [{
  20         # with hd
  21         'url': 'http://www.yapfiles.ru/get_player/?v=vMDE1NjcyNDUt0413',
  22         'md5': '2db19e2bfa2450568868548a1aa1956c',
  23         'info_dict': {
  24             'id': 'vMDE1NjcyNDUt0413',
  25             'ext': 'mp4',
  26             'title': 'Самый худший пароль WIFI',
  27             'thumbnail': r're:^https?://.*\.jpg$',
  28             'duration': 72,
  29         },
  30     }, {
  31         # without hd
  32         'url': 'https://api.yapfiles.ru/get_player/?uid=video_player_1872528&plroll=1&adv=1&v=vMDE4NzI1Mjgt690b',
  33         'only_matching': True,
  34     }]
  35
  36     @staticmethod
  37     def _extract_urls(webpage):
  38         return [unescapeHTML(mobj.group('url')) for mobj in re.finditer(
  39             r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?%s.*?)\1'
  40             % YapFilesIE._YAPFILES_URL, webpage)]
  41
  42     def _real_extract(self, url):
  43         video_id = self._match_id(url)
  44
  45         webpage = self._download_webpage(url, video_id, fatal=False)
  46
  47         player_url = None
  48         query = {}
  49         if webpage:
  50             player_url = self._search_regex(
  51                 r'player\.init\s*\(\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
  52                 'player url', default=None, group='url')
  53
  54         if not player_url:
  55             player_url = 'http://api.yapfiles.ru/load/%s/' % video_id
  56             query = {
  57                 'md5': 'ded5f369be61b8ae5f88e2eeb2f3caff',
  58                 'type': 'json',
  59                 'ref': url,
  60             }
  61
  62         player = self._download_json(
  63             player_url, video_id, query=query)['player']
  64
  65         playlist_url = player['playlist']
  66         title = player['title']
  67         thumbnail = player.get('poster')
  68
  69         if title == 'Ролик удален' or 'deleted.jpg' in (thumbnail or ''):
  70             raise ExtractorError(
  71                 'Video %s has been removed' % video_id, expected=True)
  72
  73         playlist = self._download_json(
  74             playlist_url, video_id)['player']['main']
  75
  76         hd_height = int_or_none(player.get('hd'))
  77
  78         QUALITIES = ('sd', 'hd')
  79         quality_key = qualities(QUALITIES)
  80         formats = []
  81         for format_id in QUALITIES:
  82             is_hd = format_id == 'hd'
  83             format_url = playlist.get(
  84                 'file%s' % ('_hd' if is_hd else ''))
  85             if not format_url or not isinstance(format_url, compat_str):
  86                 continue
  87             formats.append({
  88                 'url': format_url,
  89                 'format_id': format_id,
  90                 'quality': quality_key(format_id),
  91                 'height': hd_height if is_hd else None,
  92             })
  93         self._sort_formats(formats)
  94
  95         return {
  96             'id': video_id,
  97             'title': title,
  98             'thumbnail': thumbnail,
  99             'duration': int_or_none(player.get('length')),
 100             'formats': formats,
 101         }