]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/tube8.py
1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
6 from ..compat
import compat_str
12 from ..aes
import aes_decrypt_text
15 class Tube8IE(InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P<display_id>[^/]+)/(?P<id>\d+)'
18 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/',
19 'md5': '65e20c48e6abff62ed0c3965fff13a39',
22 'display_id': 'kasia-music-video',
24 'description': 'hot teen Kasia grinding',
25 'uploader': 'unknown',
26 'title': 'Kasia music video',
31 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/',
32 'only_matching': True,
35 def _real_extract(self
, url
):
36 mobj
= re
.match(self
._VALID
_URL
, url
)
37 video_id
= mobj
.group('id')
38 display_id
= mobj
.group('display_id')
40 req
= sanitized_Request(url
)
41 req
.add_header('Cookie', 'age_verified=1')
42 webpage
= self
._download
_webpage
(req
, display_id
)
44 flashvars
= self
._parse
_json
(
46 r
'flashvars\s*=\s*({.+?});\r?\n', webpage
, 'flashvars'),
50 for key
, video_url
in flashvars
.items():
51 if not isinstance(video_url
, compat_str
) or not video_url
.startswith('http'):
53 height
= self
._search
_regex
(
54 r
'quality_(\d+)[pP]', key
, 'height', default
=None)
57 if flashvars
.get('encrypted') is True:
58 video_url
= aes_decrypt_text(
59 video_url
, flashvars
['video_title'], 32).decode('utf-8')
62 'format_id': '%sp' % height
,
63 'height': int(height
),
65 self
._sort
_formats
(formats
)
67 thumbnail
= flashvars
.get('image_url')
69 title
= self
._html
_search
_regex
(
70 r
'videoTitle\s*=\s*"([^"]+)', webpage
, 'title')
71 description
= self
._html
_search
_regex
(
72 r
'>Description:</strong>\s*(.+?)\s*<', webpage
, 'description', fatal
=False)
73 uploader
= self
._html
_search
_regex
(
74 r
'<span class="username">\s*(.+?)\s*<',
75 webpage
, 'uploader', fatal
=False)
76 duration
= int_or_none(flashvars
.get('video_duration'))
78 like_count
= int_or_none(self
._search
_regex
(
79 r
'rupVar\s*=\s*"(\d+)"', webpage
, 'like count', fatal
=False))
80 dislike_count
= int_or_none(self
._search
_regex
(
81 r
'rdownVar\s*=\s*"(\d+)"', webpage
, 'dislike count', fatal
=False))
82 view_count
= str_to_int(self
._search
_regex
(
83 r
'<strong>Views: </strong>([\d,\.]+)\s*</li>',
84 webpage
, 'view count', fatal
=False))
85 comment_count
= str_to_int(self
._search
_regex
(
86 r
'<span id="allCommentsCount">(\d+)</span>',
87 webpage
, 'comment count', fatal
=False))
91 'display_id': display_id
,
93 'description': description
,
94 'thumbnail': thumbnail
,
97 'view_count': view_count
,
98 'like_count': like_count
,
99 'dislike_count': dislike_count
,
100 'comment_count': comment_count
,