Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/businessinsider.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from .jwplatform import JWPlatformIE
   6
   7
   8 class BusinessInsiderIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:[^/]+\.)?businessinsider\.(?:com|nl)/(?:[^/]+/)*(?P<id>[^/?#&]+)'
  10     _TESTS = [{
  11         'url': 'http://uk.businessinsider.com/how-much-radiation-youre-exposed-to-in-everyday-life-2016-6',
  12         'md5': 'ca237a53a8eb20b6dc5bd60564d4ab3e',
  13         'info_dict': {
  14             'id': 'hZRllCfw',
  15             'ext': 'mp4',
  16             'title': "Here's how much radiation you're exposed to in everyday life",
  17             'description': 'md5:9a0d6e2c279948aadaa5e84d6d9b99bd',
  18             'upload_date': '20170709',
  19             'timestamp': 1499606400,
  20         },
  21         'params': {
  22             'skip_download': True,
  23         },
  24     }, {
  25         'url': 'https://www.businessinsider.nl/5-scientifically-proven-things-make-you-less-attractive-2017-7/',
  26         'only_matching': True,
  27     }, {
  28         'url': 'http://www.businessinsider.com/excel-index-match-vlookup-video-how-to-2015-2?IR=T',
  29         'only_matching': True,
  30     }]
  31
  32     def _real_extract(self, url):
  33         video_id = self._match_id(url)
  34         webpage = self._download_webpage(url, video_id)
  35         jwplatform_id = self._search_regex(
  36             (r'data-media-id=["\']([a-zA-Z0-9]{8})',
  37              r'id=["\']jwplayer_([a-zA-Z0-9]{8})',
  38              r'id["\']?\s*:\s*["\']?([a-zA-Z0-9]{8})'),
  39             webpage, 'jwplatform id')
  40         return self.url_result(
  41             'jwplatform:%s' % jwplatform_id, ie=JWPlatformIE.ie_key(),
  42             video_id=video_id)