2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
9 compat_urllib_parse_unquote
,
18 class PolskieRadioIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:www\.)?polskieradio\.pl/\d+/\d+/Artykul/(?P<id>[0-9]+)'
21 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943,Prof-Andrzej-Nowak-o-historii-nie-da-sie-myslec-beznamietnie',
24 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie',
25 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5',
28 'md5': '2984ee6ce9046d91fc233bc1a864a09a',
32 'title': 'md5:d4623290d4ac983bf924061c75c23a0d',
33 'timestamp': 1456594200,
34 'upload_date': '20160227',
36 'thumbnail': 're:^https?://static\.prsa\.pl/images/.*\.jpg$'
40 'url': 'http://www.polskieradio.pl/265/5217/Artykul/1635803,Euro-2016-nie-ma-miejsca-na-blad-Polacy-graja-ze-Szwajcaria-o-cwiercfinal',
43 'title': 'Euro 2016: nie ma miejsca na błąd. Polacy grają ze Szwajcarią o ćwierćfinał',
44 'description': 'md5:01cb7d0cad58664095d72b51a1ebada2',
46 'playlist_mincount': 12,
48 'url': 'http://polskieradio.pl/9/305/Artykul/1632955,Bardzo-popularne-slowo-remis',
49 'only_matching': True,
51 'url': 'http://www.polskieradio.pl/7/5102/Artykul/1587943',
52 'only_matching': True,
55 'url': 'http://www.polskieradio.pl/9/299/Artykul/1634903,Brexit-Leszek-Miller-swiat-sie-nie-zawali-Europa-bedzie-trwac-dalej',
56 'only_matching': True,
59 def _real_extract(self
, url
):
60 playlist_id
= self
._match
_id
(url
)
62 webpage
= self
._download
_webpage
(url
, playlist_id
)
64 content
= self
._search
_regex
(
65 r
'(?s)<div[^>]+class="audio atarticle"[^>]*>(.+?)<script>',
68 timestamp
= unified_timestamp(self
._html
_search
_regex
(
69 r
'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>',
70 webpage
, 'timestamp', fatal
=False))
72 thumbnail_url
= self
._og
_search
_thumbnail
(webpage
)
78 for data_media
in re
.findall(r
'<[^>]+data-media=({[^>]+})', content
):
79 media
= self
._parse
_json
(data_media
, playlist_id
, fatal
=False)
80 if not media
.get('file') or not media
.get('desc'):
82 media_url
= self
._proto
_relative
_url
(media
['file'], 'http:')
83 if media_url
in media_urls
:
85 media_urls
.add(media_url
)
87 'id': compat_str(media
['id']),
89 'title': compat_urllib_parse_unquote(media
['desc']),
90 'duration': int_or_none(media
.get('length')),
91 'vcodec': 'none' if media
.get('provider') == 'audio' else None,
92 'timestamp': timestamp
,
93 'thumbnail': thumbnail_url
96 title
= self
._og
_search
_title
(webpage
).strip()
97 description
= strip_or_none(self
._og
_search
_description
(webpage
))
99 return self
.playlist_result(entries
, playlist_id
, title
, description
)