Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vporn.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     parse_duration,
   8     str_to_int,
   9 )
  10
  11
  12 class VpornIE(InfoExtractor):
  13     _VALID_URL = r'https?://(?:www\.)?vporn\.com/[^/]+/(?P<display_id>[^/]+)/(?P<id>\d+)'
  14     _TESTS = [
  15         {
  16             'url': 'http://www.vporn.com/masturbation/violet-on-her-th-birthday/497944/',
  17             'md5': 'facf37c1b86546fa0208058546842c55',
  18             'info_dict': {
  19                 'id': '497944',
  20                 'display_id': 'violet-on-her-th-birthday',
  21                 'ext': 'mp4',
  22                 'title': 'Violet on her 19th birthday',
  23                 'description': 'Violet dances in front of the camera which is sure to get you horny.',
  24                 'thumbnail': 're:^https?://.*\.jpg$',
  25                 'uploader': 'kileyGrope',
  26                 'categories': ['Masturbation', 'Teen'],
  27                 'duration': 393,
  28                 'age_limit': 18,
  29                 'view_count': int,
  30             }
  31         },
  32         {
  33             'url': 'http://www.vporn.com/female/hana-shower/523564/',
  34             'md5': 'ced35a4656198a1664cf2cda1575a25f',
  35             'info_dict': {
  36                 'id': '523564',
  37                 'display_id': 'hana-shower',
  38                 'ext': 'mp4',
  39                 'title': 'Hana Shower',
  40                 'description': 'Hana showers at the bathroom.',
  41                 'thumbnail': 're:^https?://.*\.jpg$',
  42                 'uploader': 'Hmmmmm',
  43                 'categories': ['Big Boobs', 'Erotic', 'Teen', 'Female'],
  44                 'duration': 588,
  45                 'age_limit': 18,
  46                 'view_count': int,
  47             }
  48         },
  49     ]
  50
  51     def _real_extract(self, url):
  52         mobj = re.match(self._VALID_URL, url)
  53         video_id = mobj.group('id')
  54         display_id = mobj.group('display_id')
  55
  56         webpage = self._download_webpage(url, display_id)
  57
  58         title = self._html_search_regex(
  59             r'videoname\s*=\s*\'([^\']+)\'', webpage, 'title').strip()
  60         description = self._html_search_regex(
  61             r'class="(?:descr|description_txt)">(.*?)</div>',
  62             webpage, 'description', fatal=False)
  63         thumbnail = self._html_search_regex(
  64             r'flashvars\.imageUrl\s*=\s*"([^"]+)"', webpage, 'description', fatal=False, default=None)
  65         if thumbnail:
  66             thumbnail = 'http://www.vporn.com' + thumbnail
  67
  68         uploader = self._html_search_regex(
  69             r'(?s)Uploaded by:.*?<a href="/user/[^"]+"[^>]*>(.+?)</a>',
  70             webpage, 'uploader', fatal=False)
  71
  72         categories = re.findall(r'<a href="/cat/[^"]+"[^>]*>([^<]+)</a>', webpage)
  73
  74         duration = parse_duration(self._search_regex(
  75             r'Runtime:\s*</span>\s*(\d+ min \d+ sec)',
  76             webpage, 'duration', fatal=False))
  77
  78         view_count = str_to_int(self._search_regex(
  79             r'class="views">([\d,\.]+) [Vv]iews<',
  80             webpage, 'view count', fatal=False))
  81         comment_count = str_to_int(self._html_search_regex(
  82             r"'Comments \(([\d,\.]+)\)'",
  83             webpage, 'comment count', default=None))
  84
  85         formats = []
  86
  87         for video in re.findall(r'flashvars\.videoUrl([^=]+?)\s*=\s*"(https?://[^"]+)"', webpage):
  88             video_url = video[1]
  89             fmt = {
  90                 'url': video_url,
  91                 'format_id': video[0],
  92             }
  93             m = re.search(r'_(?P<width>\d+)x(?P<height>\d+)_(?P<vbr>\d+)k\.mp4$', video_url)
  94             if m:
  95                 fmt.update({
  96                     'width': int(m.group('width')),
  97                     'height': int(m.group('height')),
  98                     'vbr': int(m.group('vbr')),
  99                 })
 100             formats.append(fmt)
 101
 102         self._sort_formats(formats)
 103
 104         return {
 105             'id': video_id,
 106             'display_id': display_id,
 107             'title': title,
 108             'description': description,
 109             'thumbnail': thumbnail,
 110             'uploader': uploader,
 111             'categories': categories,
 112             'duration': duration,
 113             'view_count': view_count,
 114             'comment_count': comment_count,
 115             'age_limit': 18,
 116             'formats': formats,
 117         }