]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/stitcher.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  14 class StitcherIE(InfoExtractor
): 
  15     _VALID_URL 
= r
'https?://(?:www\.)?stitcher\.com/podcast/(?:[^/]+/)+e/(?:(?P<display_id>[^/#?&]+?)-)?(?P<id>\d+)(?:[/#?&]|$)' 
  17         'url': 'http://www.stitcher.com/podcast/the-talking-machines/e/40789481?autoplay=true', 
  18         'md5': '391dd4e021e6edeb7b8e68fbf2e9e940', 
  22             'title': 'Machine Learning Mastery and Cancer Clusters', 
  23             'description': 'md5:55163197a44e915a14a1ac3a1de0f2d3', 
  25             'thumbnail': r
're:^https?://.*\.jpg', 
  28         'url': 'http://www.stitcher.com/podcast/panoply/vulture-tv/e/the-rare-hourlong-comedy-plus-40846275?autoplay=true', 
  31             'display_id': 'the-rare-hourlong-comedy-plus', 
  33             'title': "The CW's 'Crazy Ex-Girlfriend'", 
  34             'description': 'md5:04f1e2f98eb3f5cbb094cea0f9e19b17', 
  36             'thumbnail': r
're:^https?://.*\.jpg', 
  39             'skip_download': True, 
  43         'url': 'http://www.stitcher.com/podcast/marketplace-on-stitcher/e/40910226?autoplay=true', 
  44         'only_matching': True, 
  46         'url': 'http://www.stitcher.com/podcast/panoply/getting-in/e/episode-2a-how-many-extracurriculars-should-i-have-40876278?autoplay=true', 
  47         'only_matching': True, 
  50     def _real_extract(self
, url
): 
  51         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  52         audio_id 
= mobj
.group('id') 
  53         display_id 
= mobj
.group('display_id') or audio_id
 
  55         webpage 
= self
._download
_webpage
(url
, display_id
) 
  57         episode 
= self
._parse
_json
( 
  58             js_to_json(self
._search
_regex
( 
  59                 r
'(?s)var\s+stitcher(?:Config)?\s*=\s*({.+?});\n', webpage
, 'episode config')), 
  60             display_id
)['config']['episode'] 
  62         title 
= unescapeHTML(episode
['title']) 
  64             'url': episode
[episode_key
], 
  65             'ext': determine_ext(episode
[episode_key
]) or 'mp3', 
  67         } for episode_key 
in ('episodeURL',) if episode
.get(episode_key
)] 
  68         description 
= self
._search
_regex
( 
  69             r
'Episode Info:\s*</span>([^<]+)<', webpage
, 'description', fatal
=False) 
  70         duration 
= int_or_none(episode
.get('duration')) 
  71         thumbnail 
= episode
.get('episodeImage') 
  75             'display_id': display_id
, 
  77             'description': description
, 
  79             'thumbnail': thumbnail
,