Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/comcarcoff.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..compat import compat_str
   6 from ..utils import (
   7     int_or_none,
   8     parse_duration,
   9     parse_iso8601,
  10 )
  11
  12
  13 class ComCarCoffIE(InfoExtractor):
  14     _VALID_URL = r'http://(?:www\.)?comediansincarsgettingcoffee\.com/(?P<id>[a-z0-9\-]*)'
  15     _TESTS = [{
  16         'url': 'http://comediansincarsgettingcoffee.com/miranda-sings-happy-thanksgiving-miranda/',
  17         'info_dict': {
  18             'id': '2494164',
  19             'ext': 'mp4',
  20             'upload_date': '20141127',
  21             'timestamp': 1417107600,
  22             'duration': 1232,
  23             'title': 'Happy Thanksgiving Miranda',
  24             'description': 'Jerry Seinfeld and his special guest Miranda Sings cruise around town in search of coffee, complaining and apologizing along the way.',
  25         },
  26         'params': {
  27             'skip_download': 'requires ffmpeg',
  28         }
  29     }]
  30
  31     def _real_extract(self, url):
  32         display_id = self._match_id(url)
  33         if not display_id:
  34             display_id = 'comediansincarsgettingcoffee.com'
  35         webpage = self._download_webpage(url, display_id)
  36
  37         full_data = self._parse_json(
  38             self._search_regex(
  39                 r'window\.app\s*=\s*({.+?});\n', webpage, 'full data json'),
  40             display_id)['videoData']
  41
  42         display_id = full_data['activeVideo']['video']
  43         video_data = full_data.get('videos', {}).get(display_id) or full_data['singleshots'][display_id]
  44         video_id = compat_str(video_data['mediaId'])
  45         thumbnails = [{
  46             'url': video_data['images']['thumb'],
  47         }, {
  48             'url': video_data['images']['poster'],
  49         }]
  50
  51         timestamp = int_or_none(video_data.get('pubDateTime')) or parse_iso8601(
  52             video_data.get('pubDate'))
  53         duration = int_or_none(video_data.get('durationSeconds')) or parse_duration(
  54             video_data.get('duration'))
  55
  56         return {
  57             '_type': 'url_transparent',
  58             'url': 'crackle:%s' % video_id,
  59             'id': video_id,
  60             'display_id': display_id,
  61             'title': video_data['title'],
  62             'description': video_data.get('description'),
  63             'timestamp': timestamp,
  64             'duration': duration,
  65             'thumbnails': thumbnails,
  66             'season_number': int_or_none(video_data.get('season')),
  67             'episode_number': int_or_none(video_data.get('episode')),
  68             'webpage_url': 'http://comediansincarsgettingcoffee.com/%s' % (video_data.get('urlSlug', video_data.get('slug'))),
  69         }