Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/worldstarhiphop.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4
   5
   6 class WorldStarHipHopIE(InfoExtractor):
   7     _VALID_URL = r'https?://(?:www|m)\.worldstar(?:candy|hiphop)\.com/(?:videos|android)/video\.php\?.*?\bv=(?P<id>[^&]+)'
   8     _TESTS = [{
   9         'url': 'http://www.worldstarhiphop.com/videos/video.php?v=wshh6a7q1ny0G34ZwuIO',
  10         'md5': '9d04de741161603bf7071bbf4e883186',
  11         'info_dict': {
  12             'id': 'wshh6a7q1ny0G34ZwuIO',
  13             'ext': 'mp4',
  14             'title': 'KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!'
  15         }
  16     }, {
  17         'url': 'http://m.worldstarhiphop.com/android/video.php?v=wshh6a7q1ny0G34ZwuIO',
  18         'only_matching': True,
  19     }]
  20
  21     def _real_extract(self, url):
  22         video_id = self._match_id(url)
  23         webpage = self._download_webpage(url, video_id)
  24
  25         entries = self._parse_html5_media_entries(url, webpage, video_id)
  26
  27         if not entries:
  28             return self.url_result(url, 'Generic')
  29
  30         title = self._html_search_regex(
  31             [r'(?s)<div class="content-heading">\s*<h1>(.*?)</h1>',
  32              r'<span[^>]+class="tc-sp-pinned-title">(.*)</span>'],
  33             webpage, 'title')
  34
  35         info = entries[0]
  36         info.update({
  37             'id': video_id,
  38             'title': title,
  39         })
  40         return info