]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/libsyn.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..utils 
import unified_strdate
 
  10 class LibsynIE(InfoExtractor
): 
  11     _VALID_URL 
= r
'https?://html5-player\.libsyn\.com/embed/episode/id/(?P<id>[0-9]+)' 
  14         'url': 'http://html5-player.libsyn.com/embed/episode/id/3377616/', 
  15         'md5': '443360ee1b58007bc3dcf09b41d093bb', 
  19             'title': "The Daily Show Podcast without Jon Stewart - Episode 12: Bassem Youssef: Egypt's Jon Stewart", 
  20             'description': 'md5:601cb790edd05908957dae8aaa866465', 
  21             'upload_date': '20150220', 
  25     def _real_extract(self
, url
): 
  26         video_id 
= self
._match
_id
(url
) 
  28         webpage 
= self
._download
_webpage
(url
, video_id
) 
  32         } for media_url 
in set(re
.findall('var\s+mediaURL(?:Libsyn)?\s*=\s*"([^"]+)"', webpage
))] 
  34         podcast_title 
= self
._search
_regex
( 
  35             r
'<h2>([^<]+)</h2>', webpage
, 'title') 
  36         episode_title 
= self
._search
_regex
( 
  37             r
'<h3>([^<]+)</h3>', webpage
, 'title', default
=None) 
  39         title 
= '%s - %s' % (podcast_title
, episode_title
) if podcast_title 
else episode_title
 
  41         description 
= self
._html
_search
_regex
( 
  42             r
'<div id="info_text_body">(.+?)</div>', webpage
, 
  43             'description', fatal
=False) 
  45         thumbnail 
= self
._search
_regex
( 
  46             r
'<img[^>]+class="info-show-icon"[^>]+src="([^"]+)"', 
  47             webpage
, 'thumbnail', fatal
=False) 
  49         release_date 
= unified_strdate(self
._search
_regex
( 
  50             r
'<div class="release_date">Released: ([^<]+)<', webpage
, 'release date', fatal
=False)) 
  55             'description': description
, 
  56             'thumbnail': thumbnail
, 
  57             'upload_date': release_date
,