Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ringtv.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6
   7
   8 class RingTVIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'
  10     _TEST = {
  11         'url': 'http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30',
  12         'md5': 'd25945f5df41cdca2d2587165ac28720',
  13         'info_dict': {
  14             'id': '857645',
  15             'ext': 'mp4',
  16             'title': 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',
  17             'description': 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',
  18         }
  19     }
  20
  21     def _real_extract(self, url):
  22         mobj = re.match(self._VALID_URL, url)
  23         video_id = mobj.group('id').split('-')[0]
  24         webpage = self._download_webpage(url, video_id)
  25
  26         if mobj.group('type') == 'news':
  27             video_id = self._search_regex(
  28                 r'''(?x)<iframe[^>]+src="http://cms\.springboardplatform\.com/
  29                         embed_iframe/[0-9]+/video/([0-9]+)/''',
  30                 webpage, 'real video ID')
  31         title = self._og_search_title(webpage)
  32         description = self._html_search_regex(
  33             r'addthis:description="([^"]+)"',
  34             webpage, 'description', fatal=False)
  35         final_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/conversion/%s.mp4' % video_id
  36         thumbnail_url = 'http://ringtv.craveonline.springboardplatform.com/storage/ringtv.craveonline.com/snapshots/%s.jpg' % video_id
  37
  38         return {
  39             'id': video_id,
  40             'url': final_url,
  41             'title': title,
  42             'thumbnail': thumbnail_url,
  43             'description': description,
  44         }