Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/redtube.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6
   7
   8 class RedTubeIE(InfoExtractor):
   9     _VALID_URL = r'http://(?:www\.)?redtube\.com/(?P<id>[0-9]+)'
  10     _TEST = {
  11         'url': 'http://www.redtube.com/66418',
  12         'file': '66418.mp4',
  13         # md5 varies from time to time, as in
  14         # https://travis-ci.org/rg3/youtube-dl/jobs/14052463#L295
  15         #'md5': u'7b8c22b5e7098a3e1c09709df1126d2d',
  16         'info_dict': {
  17             "title": "Sucked on a toilet",
  18             "age_limit": 18,
  19         }
  20     }
  21
  22     def _real_extract(self, url):
  23         mobj = re.match(self._VALID_URL, url)
  24
  25         video_id = mobj.group('id')
  26         video_extension = 'mp4'
  27         webpage = self._download_webpage(url, video_id)
  28
  29         self.report_extraction(video_id)
  30
  31         video_url = self._html_search_regex(
  32             r'<source src="(.+?)" type="video/mp4">', webpage, u'video URL')
  33
  34         video_title = self._html_search_regex(
  35             r'<h1 class="videoTitle[^"]*">(.+?)</h1>',
  36             webpage, u'title')
  37
  38         video_thumbnail = self._html_search_regex(
  39             r'playerInnerHTML.+?<img\s+src="(.+?)"',
  40             webpage, u'thumbnail', fatal=False)
  41
  42         # No self-labeling, but they describe themselves as
  43         # "Home of Videos Porno"
  44         age_limit = 18
  45
  46         return {
  47             'id': video_id,
  48             'url': video_url,
  49             'ext': video_extension,
  50             'title': video_title,
  51             'thumbnail': video_thumbnail,
  52             'age_limit': age_limit,
  53         }