]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/businessinsider.py
2 from __future__
import unicode_literals
4 from .common
import InfoExtractor
5 from .jwplatform
import JWPlatformIE
8 class BusinessInsiderIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
11 'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
12 'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
16 'title': "Here's how much radiation you're exposed to in everyday life",
17 'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
18 'upload_date': '20170709',
19 'timestamp': 1499606400,
22 'skip_download': True,
25 'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
26 'only_matching': True,
28 'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
29 'only_matching': True,
32 def _real_extract(self
, url
):
33 video_id
= self
._match
_id
(url
)
34 webpage
= self
._download
_webpage
(url
, video_id
)
35 jwplatform_id
= self
._search
_regex
(
36 (r
'data-media-id=["\']([a
-zA
-Z0
-9]{8}
)',
37 r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
38 r'id["\']?\s
*:\s
*["\']?([a-zA-Z0-9]{8})'),
39 webpage, 'jwplatform id')
40 return self.url_result(
41 'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),