Update list of supported sites
[youtubedl] / youtube_dl / extractor / closertotruth.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 import re
5
6 from .common import InfoExtractor
7
8
9 class CloserToTruthIE(InfoExtractor):
10 _VALID_URL = r'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)'
11 _TESTS = [{
12 'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688',
13 'info_dict': {
14 'id': '0_zof1ktre',
15 'display_id': 'solutions-the-mind-body-problem',
16 'ext': 'mov',
17 'title': 'Solutions to the Mind-Body Problem?',
18 'upload_date': '20140221',
19 'timestamp': 1392956007,
20 'uploader_id': 'CTTXML'
21 },
22 'params': {
23 'skip_download': True,
24 },
25 }, {
26 'url': 'http://closertotruth.com/episodes/how-do-brains-work',
27 'info_dict': {
28 'id': '0_iuxai6g6',
29 'display_id': 'how-do-brains-work',
30 'ext': 'mov',
31 'title': 'How do Brains Work?',
32 'upload_date': '20140221',
33 'timestamp': 1392956024,
34 'uploader_id': 'CTTXML'
35 },
36 'params': {
37 'skip_download': True,
38 },
39 }, {
40 'url': 'http://closertotruth.com/interviews/1725',
41 'info_dict': {
42 'id': '1725',
43 'title': 'AyaFr-002',
44 },
45 'playlist_mincount': 2,
46 }]
47
48 def _real_extract(self, url):
49 display_id = self._match_id(url)
50
51 webpage = self._download_webpage(url, display_id)
52
53 partner_id = self._search_regex(
54 r'<script[^>]+src=["\'].*?\b(?:partner_id|p)/(\d+)',
55 webpage, 'kaltura partner_id')
56
57 title = self._search_regex(
58 r'<title>(.+?)\s*\|\s*.+?</title>', webpage, 'video title')
59
60 select = self._search_regex(
61 r'(?s)<select[^>]+id="select-version"[^>]*>(.+?)</select>',
62 webpage, 'select version', default=None)
63 if select:
64 entry_ids = set()
65 entries = []
66 for mobj in re.finditer(
67 r'<option[^>]+value=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)',
68 webpage):
69 entry_id = mobj.group('id')
70 if entry_id in entry_ids:
71 continue
72 entry_ids.add(entry_id)
73 entries.append({
74 '_type': 'url_transparent',
75 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
76 'ie_key': 'Kaltura',
77 'title': mobj.group('title'),
78 })
79 if entries:
80 return self.playlist_result(entries, display_id, title)
81
82 entry_id = self._search_regex(
83 r'<a[^>]+id=(["\'])embed-kaltura\1[^>]+data-kaltura=(["\'])(?P<id>[0-9a-z_]+)\2',
84 webpage, 'kaltura entry_id', group='id')
85
86 return {
87 '_type': 'url_transparent',
88 'display_id': display_id,
89 'url': 'kaltura:%s:%s' % (partner_id, entry_id),
90 'ie_key': 'Kaltura',
91 'title': title
92 }