]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/libsyn.py
9ab1416f55e29d69681d0ccf3678957482a3e80c
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import unified_strdate
10 class LibsynIE(InfoExtractor
):
11 _VALID_URL
= r
'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)'
14 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
15 'md5': '443360ee1b58007bc3dcf09b41d093bb',
19 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
20 'description': 'md5:601cb790edd05908957dae8aaa866465',
21 'upload_date': '20150220',
25 def _real_extract(self
, url
):
26 video_id
= self
._match
_id
(url
)
28 webpage
= self
._download
_webpage
(url
, video_id
)
32 } for media_url
in set(re
.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage
))]
34 podcast_title
= self
._search
_regex
(
35 r
'<h2>([^<]+)</h2>', webpage
, 'title')
36 episode_title
= self
._search
_regex
(
37 r
'<h3>([^<]+)</h3>', webpage
, 'title', default
=None)
39 title
= '%s - %s' % (podcast_title
, episode_title
) if podcast_title
else episode_title
41 description
= self
._html
_search
_regex
(
42 r
'<div id="info_text_body">(.+?)</div>', webpage
,
43 'description', fatal
=False)
45 thumbnail
= self
._search
_regex
(
46 r
'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
47 webpage
, 'thumbnail', fatal
=False)
49 release_date
= unified_strdate(self
._search
_regex
(
50 r
'<div class="release_date">Released: ([^<]+)<', webpage
, 'release date', fatal
=False))
55 'description': description
,
56 'thumbnail': thumbnail
,
57 'upload_date': release_date
,