Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/discovery.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     parse_iso8601,
   6     int_or_none,
   7 )
   8
   9
  10 class DiscoveryIE(InfoExtractor):
  11     _VALID_URL = r'http://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
  12     _TEST = {
  13         'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
  14         'md5': '3c69d77d9b0d82bfd5e5932a60f26504',
  15         'info_dict': {
  16             'id': 'mission-impossible-outtakes',
  17             'ext': 'flv',
  18             'title': 'Mission Impossible Outtakes',
  19             'description': ('Watch Jamie Hyneman and Adam Savage practice being'
  20                             ' each other -- to the point of confusing Jamie\'s dog -- and '
  21                             'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
  22                             ' back.'),
  23             'duration': 156,
  24             'timestamp': 1303099200,
  25             'upload_date': '20110418',
  26         },
  27     }
  28
  29     def _real_extract(self, url):
  30         video_id = self._match_id(url)
  31         webpage = self._download_webpage(url, video_id)
  32
  33         info = self._parse_json(self._search_regex(
  34             r'(?s)<script type="application/ld\+json">(.*?)</script>',
  35             webpage, 'video info'), video_id)
  36
  37         return {
  38             'id': video_id,
  39             'title': info['name'],
  40             'url': info['contentURL'],
  41             'description': info.get('description'),
  42             'thumbnail': info.get('thumbnailUrl'),
  43             'timestamp': parse_iso8601(info.get('uploadDate')),
  44             'duration': int_or_none(info.get('duration')),
  45         }