Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/beeg.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..compat import (
   5     compat_str,
   6     compat_urlparse,
   7 )
   8 from ..utils import (
   9     int_or_none,
  10     unified_timestamp,
  11 )
  12
  13
  14 class BeegIE(InfoExtractor):
  15     _VALID_URL = r'https?://(?:www\.)?beeg\.(?:com|porn(?:/video)?)/(?P<id>\d+)'
  16     _TESTS = [{
  17         # api/v6 v1
  18         'url': 'http://beeg.com/5416503',
  19         'md5': 'a1a1b1a8bc70a89e49ccfd113aed0820',
  20         'info_dict': {
  21             'id': '5416503',
  22             'ext': 'mp4',
  23             'title': 'Sultry Striptease',
  24             'description': 'md5:d22219c09da287c14bed3d6c37ce4bc2',
  25             'timestamp': 1391813355,
  26             'upload_date': '20140207',
  27             'duration': 383,
  28             'tags': list,
  29             'age_limit': 18,
  30         }
  31     }, {
  32         # api/v6 v2
  33         'url': 'https://beeg.com/1941093077?t=911-1391',
  34         'only_matching': True,
  35     }, {
  36         'url': 'https://beeg.porn/video/5416503',
  37         'only_matching': True,
  38     }, {
  39         'url': 'https://beeg.porn/5416503',
  40         'only_matching': True,
  41     }]
  42
  43     def _real_extract(self, url):
  44         video_id = self._match_id(url)
  45
  46         webpage = self._download_webpage(url, video_id)
  47
  48         beeg_version = self._search_regex(
  49             r'beeg_version\s*=\s*([\da-zA-Z_-]+)', webpage, 'beeg version',
  50             default='1546225636701')
  51
  52         qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query)
  53         t = qs.get('t', [''])[0].split('-')
  54         if len(t) > 1:
  55             query = {
  56                 'v': 2,
  57                 's': t[0],
  58                 'e': t[1],
  59             }
  60         else:
  61             query = {'v': 1}
  62
  63         for api_path in ('', 'api.'):
  64             video = self._download_json(
  65                 'https://%sbeeg.com/api/v6/%s/video/%s'
  66                 % (api_path, beeg_version, video_id), video_id,
  67                 fatal=api_path == 'api.', query=query)
  68             if video:
  69                 break
  70
  71         formats = []
  72         for format_id, video_url in video.items():
  73             if not video_url:
  74                 continue
  75             height = self._search_regex(
  76                 r'^(\d+)[pP]$', format_id, 'height', default=None)
  77             if not height:
  78                 continue
  79             formats.append({
  80                 'url': self._proto_relative_url(
  81                     video_url.replace('{DATA_MARKERS}', 'data=pc_XX__%s_0' % beeg_version), 'https:'),
  82                 'format_id': format_id,
  83                 'height': int(height),
  84             })
  85         self._sort_formats(formats)
  86
  87         title = video['title']
  88         video_id = compat_str(video.get('id') or video_id)
  89         display_id = video.get('code')
  90         description = video.get('desc')
  91         series = video.get('ps_name')
  92
  93         timestamp = unified_timestamp(video.get('date'))
  94         duration = int_or_none(video.get('duration'))
  95
  96         tags = [tag.strip() for tag in video['tags'].split(',')] if video.get('tags') else None
  97
  98         return {
  99             'id': video_id,
 100             'display_id': display_id,
 101             'title': title,
 102             'description': description,
 103             'series': series,
 104             'timestamp': timestamp,
 105             'duration': duration,
 106             'tags': tags,
 107             'formats': formats,
 108             'age_limit': self._rta_search(webpage),
 109         }