]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/libsyn.py
4750b03a3fb2f47818858338b7eb9a8b4889c012
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import unified_strdate
 
  10 class LibsynIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'(?P<mainurl>https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+))' 
  14         'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', 
  15         'md5': '443360ee1b58007bc3dcf09b41d093bb', 
  19             'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", 
  20             'description': 'md5:601cb790edd05908957dae8aaa866465', 
  21             'upload_date': '20150220', 
  22             'thumbnail': 're:^https?://.*', 
  25         'url': 'https://html5-player.libsyn.com/embed/episode/id/3727166/height/75/width/200/theme/standard/direction/no/autoplay/no/autonext/no/thumbnail/no/preload/no/no_addthis/no/', 
  26         'md5': '6c5cb21acd622d754d3b1a92b582ce42', 
  30             'title': 'Clients From Hell Podcast - How a Sex Toy Company Kickstarted my Freelance Career', 
  31             'upload_date': '20150818', 
  32             'thumbnail': 're:^https?://.*', 
  36     def _real_extract(self
, url
): 
  37         m 
= re
.match(self
._VALID
_URL
, url
) 
  38         video_id 
= m
.group('id') 
  39         url 
= m
.group('mainurl') 
  40         webpage 
= self
._download
_webpage
(url
, video_id
) 
  44         } for media_url 
in set(re
.findall(r
'var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage
))] 
  46         podcast_title 
= self
._search
_regex
( 
  47             r
'<h2>([^<]+)</h2>', webpage
, 'podcast title', default
=None) 
  48         episode_title 
= self
._search
_regex
( 
  49             r
'(?:<div class="episode-title">|<h3>)([^<]+)</', webpage
, 'episode title') 
  51         title 
= '%s - %s' % (podcast_title
, episode_title
) if podcast_title 
else episode_title
 
  53         description 
= self
._html
_search
_regex
( 
  54             r
'<div id="info_text_body">(.+?)</div>', webpage
, 
  55             'description', default
=None) 
  56         thumbnail 
= self
._search
_regex
( 
  57             r
'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"', 
  58             webpage
, 'thumbnail', fatal
=False) 
  59         release_date 
= unified_strdate(self
._search
_regex
( 
  60             r
'<div class="release_date">Released: ([^<]+)<', webpage
, 'release date', fatal
=False)) 
  65             'description': description
, 
  66             'thumbnail': thumbnail
, 
  67             'upload_date': release_date
,