]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vrt.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
16 class VRTIE(InfoExtractor
):
17 IE_DESC
= 'VRT NWS, Flanders News, Flandern Info and Sporza'
18 _VALID_URL
= r
'https?://(?:www\.)?(?P<site>vrt\.be/vrtnws|sporza\.be)/[a-z]{2}/\d{4}/\d{2}/\d{2}/(?P<id>[^/?&#]+)'
20 'url': 'https://www.vrt.be/vrtnws/nl/2019/05/15/beelden-van-binnenkant-notre-dame-een-maand-na-de-brand/',
21 'md5': 'e1663accf5cf13f375f3cd0d10476669',
23 'id': 'pbs-pub-7855fc7b-1448-49bc-b073-316cb60caa71$vid-2ca50305-c38a-4762-9890-65cbd098b7bd',
25 'title': 'Beelden van binnenkant Notre-Dame, één maand na de brand',
26 'description': 'Op maandagavond 15 april ging een deel van het dakgebinte van de Parijse kathedraal in vlammen op.',
27 'timestamp': 1557924660,
28 'upload_date': '20190515',
32 'url': 'https://sporza.be/nl/2019/05/15/de-belgian-cats-zijn-klaar-voor-het-ek/',
33 'md5': '910bba927566e9ab992278f647eb4b75',
35 'id': 'pbs-pub-f2c86a46-8138-413a-a4b9-a0015a16ce2c$vid-1f112b31-e58e-4379-908d-aca6d80f8818',
37 'title': 'De Belgian Cats zijn klaar voor het EK mét Ann Wauters',
38 'timestamp': 1557923760,
39 'upload_date': '20190515',
43 'url': 'https://www.vrt.be/vrtnws/en/2019/05/15/belgium_s-eurovision-entry-falls-at-the-first-hurdle/',
44 'only_matching': True,
46 'url': 'https://www.vrt.be/vrtnws/de/2019/05/15/aus-fuer-eliott-im-halbfinale-des-eurosongfestivals/',
47 'only_matching': True,
50 'vrt.be/vrtnws': 'vrtnieuws',
51 'sporza.be': 'sporza',
54 def _real_extract(self
, url
):
55 site
, display_id
= re
.match(self
._VALID
_URL
, url
).groups()
56 webpage
= self
._download
_webpage
(url
, display_id
)
57 attrs
= extract_attributes(self
._search
_regex
(
58 r
'(<[^>]+class="vrtvideo"[^>]*>)', webpage
, 'vrt video'))
60 asset_id
= attrs
['data-videoid']
61 publication_id
= attrs
.get('data-publicationid')
63 asset_id
= publication_id
+ '$' + asset_id
64 client
= attrs
.get('data-client') or self
._CLIENT
_MAP
[site
]
66 title
= strip_or_none(get_element_by_class(
67 'vrt-title', webpage
) or self
._html
_search
_meta
(
68 ['og:title', 'twitter:title', 'name'], webpage
))
69 description
= self
._html
_search
_meta
(
70 ['og:description', 'twitter:description', 'description'], webpage
)
71 if description
== '…':
73 timestamp
= unified_timestamp(self
._html
_search
_meta
(
74 'article:published_time', webpage
))
77 '_type': 'url_transparent',
79 'display_id': display_id
,
81 'description': description
,
82 'thumbnail': attrs
.get('data-posterimage'),
83 'timestamp': timestamp
,
84 'duration': float_or_none(attrs
.get('data-duration'), 1000),
85 'url': 'https://mediazone.vrt.be/api/v1/%s/assets/%s' % (client
, asset_id
),