Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/extremetube.py

   1 from __future__ import unicode_literals
   2
   3 from ..utils import str_to_int
   4 from .keezmovies import KeezMoviesIE
   5
   6
   7 class ExtremeTubeIE(KeezMoviesIE):
   8     _VALID_URL = r'https?://(?:www\.)?extremetube\.com/(?:[^/]+/)?video/(?:(?P<display_id>[^/]+)-)(?P<id>\d+)'
   9     _TESTS = [{
  10         'url': 'http://www.extremetube.com/video/music-video-14-british-euro-brit-european-cumshots-swallow-652431',
  11         'md5': '1fb9228f5e3332ec8c057d6ac36f33e0',
  12         'info_dict': {
  13             'id': '652431',
  14             'display_id': 'music-video-14-british-euro-brit-european-cumshots-swallow',
  15             'ext': 'mp4',
  16             'title': 'Music Video 14 british euro brit european cumshots swallow',
  17             'uploader': 'unknown',
  18             'view_count': int,
  19             'age_limit': 18,
  20         }
  21     }, {
  22         'url': 'http://www.extremetube.com/gay/video/abcde-1234',
  23         'only_matching': True,
  24     }, {
  25         'url': 'http://www.extremetube.com/video/latina-slut-fucked-by-fat-black-dick',
  26         'only_matching': True,
  27     }, {
  28         'url': 'http://www.extremetube.com/video/652431',
  29         'only_matching': True,
  30     }]
  31
  32     def _real_extract(self, url):
  33         webpage, info = self._extract_info(url)
  34
  35         if not info['title']:
  36             info['title'] = self._search_regex(
  37                 r'<h1[^>]+title="([^"]+)"[^>]*>', webpage, 'title')
  38
  39         uploader = self._html_search_regex(
  40             r'Uploaded by:\s*</strong>\s*(.+?)\s*</div>',
  41             webpage, 'uploader', fatal=False)
  42         view_count = str_to_int(self._search_regex(
  43             r'Views:\s*</strong>\s*<span>([\d,\.]+)</span>',
  44             webpage, 'view count', fatal=False))
  45
  46         info.update({
  47             'uploader': uploader,
  48             'view_count': view_count,
  49         })
  50
  51         return info