]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/videobam.py
Imported Upstream version 2014.12.01
[youtubedl] / youtube_dl / extractor / videobam.py
1 from __future__ import unicode_literals
2
3 import re
4 import json
5
6 from .common import InfoExtractor
7 from ..utils import int_or_none
8
9
10 class VideoBamIE(InfoExtractor):
11 _VALID_URL = r'http://(?:www\.)?videobam\.com/(?:videos/download/)?(?P<id>[a-zA-Z]+)'
12
13 _TESTS = [
14 {
15 'url': 'http://videobam.com/OiJQM',
16 'md5': 'db471f27763a531f10416a0c58b5a1e0',
17 'info_dict': {
18 'id': 'OiJQM',
19 'ext': 'mp4',
20 'title': 'Is Alcohol Worse Than Ecstasy?',
21 'description': 'md5:d25b96151515c91debc42bfbb3eb2683',
22 'uploader': 'frihetsvinge',
23 },
24 },
25 {
26 'url': 'http://videobam.com/pqLvq',
27 'md5': 'd9a565b5379a99126ef94e1d7f9a383e',
28 'note': 'HD video',
29 'info_dict': {
30 'id': 'pqLvq',
31 'ext': 'mp4',
32 'title': '_',
33 }
34 },
35 ]
36
37 def _real_extract(self, url):
38 mobj = re.match(self._VALID_URL, url)
39 video_id = mobj.group('id')
40
41 page = self._download_webpage('http://videobam.com/%s' % video_id, video_id, 'Downloading page')
42
43 formats = []
44
45 for preference, format_id in enumerate(['low', 'high']):
46 mobj = re.search(r"%s: '(?P<url>[^']+)'" % format_id, page)
47 if not mobj:
48 continue
49 formats.append({
50 'url': mobj.group('url'),
51 'ext': 'mp4',
52 'format_id': format_id,
53 'preference': preference,
54 })
55
56 if not formats:
57 player_config = json.loads(self._html_search_regex(r'var player_config = ({.+?});', page, 'player config'))
58 formats = [{
59 'url': item['url'],
60 'ext': 'mp4',
61 } for item in player_config['playlist'] if 'autoPlay' in item]
62
63 self._sort_formats(formats)
64
65 title = self._og_search_title(page, default='_', fatal=False)
66 description = self._og_search_description(page, default=None)
67 thumbnail = self._og_search_thumbnail(page)
68 uploader = self._html_search_regex(r'Upload by ([^<]+)</a>', page, 'uploader', fatal=False, default=None)
69 view_count = int_or_none(
70 self._html_search_regex(r'<strong>Views:</strong> (\d+) ', page, 'view count', fatal=False))
71
72 return {
73 'id': video_id,
74 'title': title,
75 'description': description,
76 'thumbnail': thumbnail,
77 'uploader': uploader,
78 'view_count': view_count,
79 'formats': formats,
80 'age_limit': 18,
81 }