]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/businessinsider.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
   5 from .jwplatform 
import JWPlatformIE
 
   8 class BusinessInsiderIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)' 
  11         'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6', 
  12         'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e', 
  16             'title': "Here's how much radiation you're exposed to in everyday life", 
  17             'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd', 
  18             'upload_date': '20170709', 
  19             'timestamp': 1499606400, 
  22             'skip_download': True, 
  25         'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/', 
  26         'only_matching': True, 
  28         'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T', 
  29         'only_matching': True, 
  32     def _real_extract(self
, url
): 
  33         video_id 
= self
._match
_id
(url
) 
  34         webpage 
= self
._download
_webpage
(url
, video_id
) 
  35         jwplatform_id 
= self
._search
_regex
( 
  36             (r
'data-media-id=["\']([a
-zA
-Z0
-9]{8}
)', 
  37              r'id=["\']jwplayer_([a-zA-Z0-9]{8})', 
  38              r'id["\']?\s
*:\s
*["\']?([a-zA-Z0-9]{8})'), 
  39             webpage, 'jwplatform id') 
  40         return self.url_result( 
  41             'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),