Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/viki.py

   1 import re
   2
   3 from ..utils import (
   4     ExtractorError,
   5     unescapeHTML,
   6     unified_strdate,
   7 )
   8 from .subtitles import SubtitlesInfoExtractor
   9
  10
  11 class VikiIE(SubtitlesInfoExtractor):
  12     IE_NAME = u'viki'
  13
  14     _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)'
  15     _TEST = {
  16         u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14',
  17         u'file': u'1023585v.mp4',
  18         u'md5': u'a21454021c2646f5433514177e2caa5f',
  19         u'info_dict': {
  20             u'title': u'Heirs Episode 14',
  21             u'uploader': u'SBS',
  22             u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  23             u'upload_date': u'20131121',
  24             u'age_limit': 13,
  25         },
  26         u'skip': u'Blocked in the US',
  27     }
  28
  29     def _real_extract(self, url):
  30         mobj = re.match(self._VALID_URL, url)
  31         video_id = mobj.group(1)
  32
  33         webpage = self._download_webpage(url, video_id)
  34         title = self._og_search_title(webpage)
  35         description = self._og_search_description(webpage)
  36         thumbnail = self._og_search_thumbnail(webpage)
  37
  38         uploader_m = re.search(
  39             r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage)
  40         if uploader_m is None:
  41             uploader = None
  42         else:
  43             uploader = uploader_m.group(1).strip()
  44
  45         rating_str = self._html_search_regex(
  46             r'<strong>Rating: </strong>\s*([^<]*)<', webpage,
  47             u'rating information', default='').strip()
  48         RATINGS = {
  49             'G': 0,
  50             'PG': 10,
  51             'PG-13': 13,
  52             'R': 16,
  53             'NC': 18,
  54         }
  55         age_limit = RATINGS.get(rating_str)
  56
  57         info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id
  58         info_webpage = self._download_webpage(
  59             info_url, video_id, note=u'Downloading info page')
  60         if re.match(r'\s*<div\s+class="video-error', info_webpage):
  61             raise ExtractorError(
  62                 u'Video %s is blocked from your location.' % video_id,
  63                 expected=True)
  64         video_url = self._html_search_regex(
  65             r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL')
  66
  67         upload_date_str = self._html_search_regex(
  68             r'"created_at":"([^"]+)"', info_webpage, u'upload date')
  69         upload_date = (
  70             unified_strdate(upload_date_str)
  71             if upload_date_str is not None
  72             else None
  73         )
  74
  75         # subtitles
  76         video_subtitles = self.extract_subtitles(video_id, info_webpage)
  77         if self._downloader.params.get('listsubtitles', False):
  78             self._list_available_subtitles(video_id, info_webpage)
  79             return
  80
  81         return {
  82             'id': video_id,
  83             'title': title,
  84             'url': video_url,
  85             'description': description,
  86             'thumbnail': thumbnail,
  87             'age_limit': age_limit,
  88             'uploader': uploader,
  89             'subtitles': video_subtitles,
  90             'upload_date': upload_date,
  91         }
  92
  93     def _get_available_subtitles(self, video_id, info_webpage):
  94         res = {}
  95         for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage):
  96             sturl = unescapeHTML(sturl_html)
  97             m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl)
  98             if not m:
  99                 continue
 100             res[m.group('lang')] = sturl
 101         return res