]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vrt.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  16 class VRTIE(InfoExtractor
): 
  17     IE_DESC 
= 'VRT NWS, Flanders News, Flandern Info and Sporza' 
  18     _VALID_URL 
= r
'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)' 
  20         'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/', 
  21         'md5': 'e1663accf5cf13f375f3cd0d10476669', 
  23             'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd', 
  25             'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand', 
  26             'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.', 
  27             'timestamp': 1557924660, 
  28             'upload_date': '20190515', 
  32         'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/', 
  33         'md5': '910bba927566e9ab992278f647eb4b75', 
  35             'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818', 
  37             'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters', 
  38             'timestamp': 1557923760, 
  39             'upload_date': '20190515', 
  43         'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/', 
  44         'only_matching': True, 
  46         'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/', 
  47         'only_matching': True, 
  50         'vrt.be/vrtnws': 'vrtnieuws', 
  51         'sporza.be': 'sporza', 
  54     def _real_extract(self
, url
): 
  55         site
, display_id 
= re
.match(self
._VALID
_URL
, url
).groups() 
  56         webpage 
= self
._download
_webpage
(url
, display_id
) 
  57         attrs 
= extract_attributes(self
._search
_regex
( 
  58             r
'(<[^>]+class="vrtvideo"[^>]*>)', webpage
, 'vrt video')) 
  60         asset_id 
= attrs
['data-videoid'] 
  61         publication_id 
= attrs
.get('data-publicationid') 
  63             asset_id 
= publication_id 
+ '$' + asset_id
 
  64         client 
= attrs
.get('data-client') or self
._CLIENT
_MAP
[site
] 
  66         title 
= strip_or_none(get_element_by_class( 
  67             'vrt-title', webpage
) or self
._html
_search
_meta
( 
  68             ['og:title', 'twitter:title', 'name'], webpage
)) 
  69         description 
= self
._html
_search
_meta
( 
  70             ['og:description', 'twitter:description', 'description'], webpage
) 
  71         if description 
== '…': 
  73         timestamp 
= unified_timestamp(self
._html
_search
_meta
( 
  74             'article:published_time', webpage
)) 
  77             '_type': 'url_transparent', 
  79             'display_id': display_id
, 
  81             'description': description
, 
  82             'thumbnail': attrs
.get('data-posterimage'), 
  83             'timestamp': timestamp
, 
  84             'duration': float_or_none(attrs
.get('data-duration'), 1000), 
  85             'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client
, asset_id
),