]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ruv.py
   2 from __future__ 
import unicode_literals
 
   4 from .common 
import InfoExtractor
 
  11 class RuvIE(InfoExtractor
): 
  12     _VALID_URL 
= r
'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)' 
  15         'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', 
  16         'md5': '66347652f4e13e71936817102acc1724', 
  19             'display_id': 'fh-valur/20170516', 
  21             'title': 'FH - Valur', 
  22             'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', 
  23             'timestamp': 1494963600, 
  24             'upload_date': '20170516', 
  28         'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', 
  29         'md5': '395ea250c8a13e5fdb39d4670ef85378', 
  32             'display_id': 'morgunutvarpid/20170619', 
  34             'title': 'Morgunútvarpið', 
  35             'description': 'md5:a4cf1202c0a1645ca096b06525915418', 
  36             'timestamp': 1497855000, 
  37             'upload_date': '20170619', 
  40         'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', 
  41         'only_matching': True, 
  43         'url': 'http://www.ruv.is/node/1151854', 
  44         'only_matching': True, 
  46         'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', 
  47         'only_matching': True, 
  49         'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', 
  50         'only_matching': True, 
  53     def _real_extract(self
, url
): 
  54         display_id 
= self
._match
_id
(url
) 
  56         webpage 
= self
._download
_webpage
(url
, display_id
) 
  58         title 
= self
._og
_search
_title
(webpage
) 
  60         FIELD_RE 
= r
'video\.%s\s*=\s*(["\'])(?P
<url
>(?
:(?
!\
1).)+)\
1' 
  62         media_url = self._html_search_regex( 
  63             FIELD_RE % 'src
', webpage, 'video URL
', group='url
') 
  65         video_id = self._search_regex( 
  66             r'<link
\b[^
>]+\bhref
=["\']https?://www\.ruv\.is/node/(\d+)', 
  67             webpage, 'video id', default=display_id) 
  69         ext = determine_ext(media_url) 
  72             formats = self._extract_m3u8_formats( 
  73                 media_url, video_id, 'mp4', entry_protocol='m3u8_native', 
  86         description = self._og_search_description(webpage, default=None) 
  87         thumbnail = self._og_search_thumbnail( 
  88             webpage, default=None) or self._search_regex( 
  89             FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) 
  90         timestamp = unified_timestamp(self._html_search_meta( 
  91             'article:published_time', webpage, 'timestamp', fatal=False)) 
  95             'display_id': display_id, 
  97             'description': description, 
  98             'thumbnail': thumbnail, 
  99             'timestamp': timestamp,