]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/closertotruth.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9 class CloserToTruthIE(InfoExtractor
): 
  10     _VALID_URL 
= r
'https?://(?:www\.)?closertotruth\.com/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
  12         'url': 'http://closertotruth.com/series/solutions-the-mind-body-problem#video-3688', 
  15             'display_id': 'solutions-the-mind-body-problem', 
  17             'title': 'Solutions to the Mind-Body Problem?', 
  18             'upload_date': '20140221', 
  19             'timestamp': 1392956007, 
  20             'uploader_id': 'CTTXML' 
  23             'skip_download': True, 
  26         'url': 'http://closertotruth.com/episodes/how-do-brains-work', 
  29             'display_id': 'how-do-brains-work', 
  31             'title': 'How do Brains Work?', 
  32             'upload_date': '20140221', 
  33             'timestamp': 1392956024, 
  34             'uploader_id': 'CTTXML' 
  37             'skip_download': True, 
  40         'url': 'http://closertotruth.com/interviews/1725', 
  45         'playlist_mincount': 2, 
  48     def _real_extract(self
, url
): 
  49         display_id 
= self
._match
_id
(url
) 
  51         webpage 
= self
._download
_webpage
(url
, display_id
) 
  53         partner_id 
= self
._search
_regex
( 
  54             r
'<script[^>]+src=["\'].*?
\b(?
:partner_id|p
)/(\d
+)', 
  55             webpage, 'kaltura partner_id
') 
  57         title = self._search_regex( 
  58             r'<title
>(.+?
)\s
*\|\s
*.+?
</title
>', webpage, 'video title
') 
  60         select = self._search_regex( 
  61             r'(?s
)<select
[^
>]+id="select-version"[^
>]*>(.+?
)</select
>', 
  62             webpage, 'select version
', default=None) 
  66             for mobj in re.finditer( 
  67                     r'<option
[^
>]+value
=(["\'])(?P<id>[0-9a-z_]+)(?:#.+?)?\1[^>]*>(?P<title>[^<]+)', 
  69                 entry_id = mobj.group('id') 
  70                 if entry_id in entry_ids: 
  72                 entry_ids.add(entry_id) 
  74                     '_type': 'url_transparent', 
  75                     'url': 'kaltura:%s:%s' % (partner_id, entry_id), 
  77                     'title': mobj.group('title'), 
  80                 return self.playlist_result(entries, display_id, title) 
  82         entry_id = self._search_regex( 
  83             r'<a[^>]+id=(["\'])embed
-kaltura\
1[^
>]+data
-kaltura
=(["\'])(?P<id>[0-9a-z_]+)\2', 
  84             webpage, 'kaltura entry_id', group='id') 
  87             '_type': 'url_transparent', 
  88             'display_id': display_id, 
  89             'url': 'kaltura:%s:%s' % (partner_id, entry_id),