]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/youjizz.py
b50f34e9bb30e47c679940ca1577ea8cc6683934
[youtubedl] / youtube_dl / extractor / youjizz.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4
5
6 class YouJizzIE(InfoExtractor):
7 _VALID_URL = r'https?://(?:\w+\.)?youjizz\.com/videos/(?:[^/#?]+)?-(?P<id>[0-9]+)\.html(?:$|[?#])'
8 _TESTS = [{
9 'url': 'http://www.youjizz.com/videos/zeichentrick-1-2189178.html',
10 'md5': '78fc1901148284c69af12640e01c6310',
11 'info_dict': {
12 'id': '2189178',
13 'ext': 'mp4',
14 'title': 'Zeichentrick 1',
15 'age_limit': 18,
16 }
17 }, {
18 'url': 'http://www.youjizz.com/videos/-2189178.html',
19 'only_matching': True,
20 }]
21
22 def _real_extract(self, url):
23 video_id = self._match_id(url)
24 webpage = self._download_webpage(url, video_id)
25 # YouJizz's HTML5 player has invalid HTML
26 webpage = webpage.replace('"controls', '" controls')
27 age_limit = self._rta_search(webpage)
28 video_title = self._html_search_regex(
29 r'<title>\s*(.*)\s*</title>', webpage, 'title')
30
31 info_dict = self._parse_html5_media_entries(url, webpage, video_id)[0]
32
33 info_dict.update({
34 'id': video_id,
35 'title': video_title,
36 'age_limit': age_limit,
37 })
38
39 return info_dict