]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/inc.py
   1 from __future__ 
import unicode_literals
 
   3 from .common 
import InfoExtractor
 
   4 from .kaltura 
import KalturaIE
 
   7 class IncIE(InfoExtractor
): 
   8     _VALID_URL 
= r
'https?://(?:www\.)?inc\.com/(?:[^/]+/)+(?P<id>[^.]+).html' 
  10         'url': 'http://www.inc.com/tip-sheet/bill-gates-says-these-5-books-will-make-you-smarter.html', 
  11         'md5': '7416739c9c16438c09fa35619d6ba5cb', 
  15             'title': 'Bill Gates Says These 5 Books Will Make You Smarter', 
  16             'description': 'md5:bea7ff6cce100886fc1995acb743237e', 
  17             'timestamp': 1474414430, 
  18             'upload_date': '20160920', 
  19             'uploader_id': 'video@inc.com', 
  22             'skip_download': True, 
  25         # div with id=kaltura_player_1_kqs38cgm 
  26         'url': 'https://www.inc.com/oscar-raymundo/richard-branson-young-entrepeneurs.html', 
  30             'title': 'Branson: "In the end, you have to say, Screw it. Just do it."', 
  31             'description': 'md5:21b832d034f9af5191ca5959da5e9cb6', 
  32             'timestamp': 1364403232, 
  33             'upload_date': '20130327', 
  34             'uploader_id': 'incdigital@inc.com', 
  37             'skip_download': True, 
  40         'url': 'http://www.inc.com/video/david-whitford/founders-forum-tripadvisor-steve-kaufer-most-enjoyable-moment-for-entrepreneur.html', 
  41         'only_matching': True, 
  44     def _real_extract(self
, url
): 
  45         display_id 
= self
._match
_id
(url
) 
  46         webpage 
= self
._download
_webpage
(url
, display_id
) 
  48         partner_id 
= self
._search
_regex
( 
  49             r
'var\s+_?bizo_data_partner_id\s*=\s*["\'](\d
+)', webpage, 
  50             'partner 
id', default='1034971') 
  52         kaltura_id = self._search_regex( 
  53             r'id=(["\'])kaltura_player_(?P<id>.+?)\1', webpage, 'kaltura id', 
  54             default=None, group='id') or self._parse_json(self._search_regex( 
  55                 r'pageInfo\.videos\s*=\s*\[(.+)\];', webpage, 'kaltura id'), 
  56             display_id)['vid_kaltura_id'] 
  58         return self.url_result( 
  59             'kaltura:%s:%s' % (partner_id, kaltura_id), KalturaIE.ie_key())