]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/libsyn.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..utils
import unified_strdate
10 class LibsynIE(InfoExtractor
):
11 _VALID_URL
= r
'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))'
14 'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/',
15 'md5': '443360ee1b58007bc3dcf09b41d093bb',
19 'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart",
20 'description': 'md5:601cb790edd05908957dae8aaa866465',
21 'upload_date': '20150220',
22 'thumbnail': 're:^https?://.*',
25 'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/',
26 'md5': '6c5cb21acd622d754d3b1a92b582ce42',
30 'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career',
31 'upload_date': '20150818',
32 'thumbnail': 're:^https?://.*',
36 def _real_extract(self
, url
):
37 m
= re
.match(self
._VALID
_URL
, url
)
38 video_id
= m
.group('id')
39 url
= m
.group('mainurl')
40 webpage
= self
._download
_webpage
(url
, video_id
)
44 } for media_url
in set(re
.findall(r
'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage
))]
46 podcast_title
= self
._search
_regex
(
47 r
'<h2>([^<]+)</h2>', webpage
, 'podcast title', default
=None)
48 episode_title
= self
._search
_regex
(
49 r
'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage
, 'episode title')
51 title
= '%s - %s' % (podcast_title
, episode_title
) if podcast_title
else episode_title
53 description
= self
._html
_search
_regex
(
54 r
'<div id="info_text_body">(.+?)</div>', webpage
,
55 'description', default
=None)
56 thumbnail
= self
._search
_regex
(
57 r
'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"',
58 webpage
, 'thumbnail', fatal
=False)
59 release_date
= unified_strdate(self
._search
_regex
(
60 r
'<div class="release_date">Released: ([^<]+)<', webpage
, 'release date', fatal
=False))
65 'description': description
,
66 'thumbnail': thumbnail
,
67 'upload_date': release_date
,