Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cloudy.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     ExtractorError,
   9     compat_parse_qs,
  10     compat_urllib_parse,
  11     remove_end,
  12     HEADRequest,
  13     compat_HTTPError,
  14 )
  15
  16
  17 class CloudyIE(InfoExtractor):
  18     _IE_DESC = 'cloudy.ec and videoraj.ch'
  19     _VALID_URL = r'''(?x)
  20         https?://(?:www\.)?(?P<host>cloudy\.ec|videoraj\.ch)/
  21         (?:v/|embed\.php\?id=)
  22         (?P<id>[A-Za-z0-9]+)
  23         '''
  24     _EMBED_URL = 'http://www.%s/embed.php?id=%s'
  25     _API_URL = 'http://www.%s/api/player.api.php?%s'
  26     _MAX_TRIES = 2
  27     _TESTS = [
  28         {
  29             'url': 'https://www.cloudy.ec/v/af511e2527aac',
  30             'md5': '5cb253ace826a42f35b4740539bedf07',
  31             'info_dict': {
  32                 'id': 'af511e2527aac',
  33                 'ext': 'flv',
  34                 'title': 'Funny Cats and Animals Compilation june 2013',
  35             }
  36         },
  37         {
  38             'url': 'http://www.videoraj.ch/v/47f399fd8bb60',
  39             'md5': '7d0f8799d91efd4eda26587421c3c3b0',
  40             'info_dict': {
  41                 'id': '47f399fd8bb60',
  42                 'ext': 'flv',
  43                 'title': 'Burning a New iPhone 5 with Gasoline - Will it Survive?',
  44             }
  45         }
  46     ]
  47
  48     def _extract_video(self, video_host, video_id, file_key, error_url=None, try_num=0):
  49
  50         if try_num > self._MAX_TRIES - 1:
  51             raise ExtractorError('Unable to extract video URL', expected=True)
  52
  53         form = {
  54             'file': video_id,
  55             'key': file_key,
  56         }
  57
  58         if error_url:
  59             form.update({
  60                 'numOfErrors': try_num,
  61                 'errorCode': '404',
  62                 'errorUrl': error_url,
  63             })
  64
  65         data_url = self._API_URL % (video_host, compat_urllib_parse.urlencode(form))
  66         player_data = self._download_webpage(
  67             data_url, video_id, 'Downloading player data')
  68         data = compat_parse_qs(player_data)
  69
  70         try_num += 1
  71
  72         if 'error' in data:
  73             raise ExtractorError(
  74                 '%s error: %s' % (self.IE_NAME, ' '.join(data['error_msg'])),
  75                 expected=True)
  76
  77         title = data.get('title', [None])[0]
  78         if title:
  79             title = remove_end(title, '&asdasdas').strip()
  80
  81         video_url = data.get('url', [None])[0]
  82
  83         if video_url:
  84             try:
  85                 self._request_webpage(HEADRequest(video_url), video_id, 'Checking video URL')
  86             except ExtractorError as e:
  87                 if isinstance(e.cause, compat_HTTPError) and e.cause.code in [404, 410]:
  88                     self.report_warning('Invalid video URL, requesting another', video_id)
  89                     return self._extract_video(video_host, video_id, file_key, video_url, try_num)
  90
  91         return {
  92             'id': video_id,
  93             'url': video_url,
  94             'title': title,
  95         }
  96
  97     def _real_extract(self, url):
  98         mobj = re.match(self._VALID_URL, url)
  99         video_host = mobj.group('host')
 100         video_id = mobj.group('id')
 101
 102         url = self._EMBED_URL % (video_host, video_id)
 103         webpage = self._download_webpage(url, video_id)
 104
 105         file_key = self._search_regex(
 106             r'filekey\s*=\s*"([^"]+)"', webpage, 'file_key')
 107
 108         return self._extract_video(video_host, video_id, file_key)