]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hgtv.py
d/p/disable-autoupdate-mechanism.patch: Extend to clean up errant import and README...
[youtubedl] / youtube_dl / extractor / hgtv.py
1 # coding: utf-8
2 from __future__ import unicode_literals
3
4 from .common import InfoExtractor
5
6
7 class HGTVComShowIE(InfoExtractor):
8 IE_NAME = 'hgtv.com:show'
9 _VALID_URL = r'https?://(?:www\.)?hgtv\.com/shows/[^/]+/(?P<id>[^/?#&]+)'
10 _TESTS = [{
11 # data-module="video"
12 'url': 'http://www.hgtv.com/shows/flip-or-flop/flip-or-flop-full-episodes-season-4-videos',
13 'info_dict': {
14 'id': 'flip-or-flop-full-episodes-season-4-videos',
15 'title': 'Flip or Flop Full Episodes',
16 },
17 'playlist_mincount': 15,
18 }, {
19 # data-deferred-module="video"
20 'url': 'http://www.hgtv.com/shows/good-bones/episodes/an-old-victorian-house-gets-a-new-facelift',
21 'only_matching': True,
22 }]
23
24 def _real_extract(self, url):
25 display_id = self._match_id(url)
26
27 webpage = self._download_webpage(url, display_id)
28
29 config = self._parse_json(
30 self._search_regex(
31 r'(?s)data-(?:deferred-)?module=["\']video["\'][^>]*>.*?<script[^>]+type=["\']text/x-config["\'][^>]*>(.+?)</script',
32 webpage, 'video config'),
33 display_id)['channels'][0]
34
35 entries = [
36 self.url_result(video['releaseUrl'])
37 for video in config['videos'] if video.get('releaseUrl')]
38
39 return self.playlist_result(
40 entries, display_id, config.get('title'), config.get('description'))