]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/morningstar.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   9 class MorningstarIE(InfoExtractor
): 
  10     IE_DESC 
= 'morningstar.com' 
  11     _VALID_URL 
= r
'https?://(?:(?:www|news)\.)morningstar\.com/[cC]over/video[cC]enter\.aspx\?id=(?P<id>[0-9]+)' 
  13         'url': 'http://www.morningstar.com/cover/videocenter.aspx?id=615869', 
  14         'md5': '6c0acface7a787aadc8391e4bbf7b0f5', 
  18             'title': 'Get Ahead of the Curve on 2013 Taxes', 
  19             'description': "Vanguard's Joel Dickson on managing higher tax rates for high-income earners and fund capital-gain distributions in 2013.", 
  20             'thumbnail': r
're:^https?://.*m(?:orning)?star\.com/.+thumb\.jpg$' 
  23         'url': 'http://news.morningstar.com/cover/videocenter.aspx?id=825556', 
  24         'only_matching': True, 
  27     def _real_extract(self
, url
): 
  28         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  29         video_id 
= mobj
.group('id') 
  31         webpage 
= self
._download
_webpage
(url
, video_id
) 
  32         title 
= self
._html
_search
_regex
( 
  33             r
'<h1 id="titleLink">(.*?)</h1>', webpage
, 'title') 
  34         video_url 
= self
._html
_search
_regex
( 
  35             r
'<input type="hidden" id="hidVideoUrl" value="([^"]+)"', 
  37         thumbnail 
= self
._html
_search
_regex
( 
  38             r
'<input type="hidden" id="hidSnapshot" value="([^"]+)"', 
  39             webpage
, 'thumbnail', fatal
=False) 
  40         description 
= self
._html
_search
_regex
( 
  41             r
'<div id="mstarDeck".*?>(.*?)</div>', 
  42             webpage
, 'description', fatal
=False) 
  48             'thumbnail': thumbnail
, 
  49             'description': description
,