]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tube8.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   6 from ..compat 
import compat_str
 
  12 from ..aes 
import aes_decrypt_text
 
  15 class Tube8IE(InfoExtractor
): 
  16     _VALID_URL 
= r
'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)' 
  18         'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', 
  19         'md5': '65e20c48e6abff62ed0c3965fff13a39', 
  22             'display_id': 'kasia-music-video', 
  24             'description': 'hot teen Kasia grinding', 
  25             'uploader': 'unknown', 
  26             'title': 'Kasia music video', 
  31         'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', 
  32         'only_matching': True, 
  35     def _real_extract(self
, url
): 
  36         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  37         video_id 
= mobj
.group('id') 
  38         display_id 
= mobj
.group('display_id') 
  40         req 
= sanitized_Request(url
) 
  41         req
.add_header('Cookie', 'age_verified=1') 
  42         webpage 
= self
._download
_webpage
(req
, display_id
) 
  44         flashvars 
= self
._parse
_json
( 
  46                 r
'flashvars\s*=\s*({.+?});\r?\n', webpage
, 'flashvars'), 
  50         for key
, video_url 
in flashvars
.items(): 
  51             if not isinstance(video_url
, compat_str
) or not video_url
.startswith('http'): 
  53             height 
= self
._search
_regex
( 
  54                 r
'quality_(\d+)[pP]', key
, 'height', default
=None) 
  57             if flashvars
.get('encrypted') is True: 
  58                 video_url 
= aes_decrypt_text( 
  59                     video_url
, flashvars
['video_title'], 32).decode('utf-8') 
  62                 'format_id': '%sp' % height
, 
  63                 'height': int(height
), 
  65         self
._sort
_formats
(formats
) 
  67         thumbnail 
= flashvars
.get('image_url') 
  69         title 
= self
._html
_search
_regex
( 
  70             r
'videoTitle\s*=\s*"([^"]+)', webpage
, 'title') 
  71         description 
= self
._html
_search
_regex
( 
  72             r
'>Description:</strong>\s*(.+?)\s*<', webpage
, 'description', fatal
=False) 
  73         uploader 
= self
._html
_search
_regex
( 
  74             r
'<span class="username">\s*(.+?)\s*<', 
  75             webpage
, 'uploader', fatal
=False) 
  76         duration 
= int_or_none(flashvars
.get('video_duration')) 
  78         like_count 
= int_or_none(self
._search
_regex
( 
  79             r
'rupVar\s*=\s*"(\d+)"', webpage
, 'like count', fatal
=False)) 
  80         dislike_count 
= int_or_none(self
._search
_regex
( 
  81             r
'rdownVar\s*=\s*"(\d+)"', webpage
, 'dislike count', fatal
=False)) 
  82         view_count 
= str_to_int(self
._search
_regex
( 
  83             r
'<strong>Views: </strong>([\d,\.]+)\s*</li>', 
  84             webpage
, 'view count', fatal
=False)) 
  85         comment_count 
= str_to_int(self
._search
_regex
( 
  86             r
'<span id="allCommentsCount">(\d+)</span>', 
  87             webpage
, 'comment count', fatal
=False)) 
  91             'display_id': display_id
, 
  93             'description': description
, 
  94             'thumbnail': thumbnail
, 
  97             'view_count': view_count
, 
  98             'like_count': like_count
, 
  99             'dislike_count': dislike_count
, 
 100             'comment_count': comment_count
,