Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/reddit.py

   1 from __future__ import unicode_literals
   2
   3 import re
   4
   5 from .common import InfoExtractor
   6 from ..utils import (
   7     ExtractorError,
   8     int_or_none,
   9     float_or_none,
  10     url_or_none,
  11 )
  12
  13
  14 class RedditIE(InfoExtractor):
  15     _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
  16     _TEST = {
  17         # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
  18         'url': 'https://v.redd.it/zv89llsvexdz',
  19         'md5': '0a070c53eba7ec4534d95a5a1259e253',
  20         'info_dict': {
  21             'id': 'zv89llsvexdz',
  22             'ext': 'mp4',
  23             'title': 'zv89llsvexdz',
  24         },
  25         'params': {
  26             'format': 'bestvideo',
  27         },
  28     }
  29
  30     def _real_extract(self, url):
  31         video_id = self._match_id(url)
  32
  33         formats = self._extract_m3u8_formats(
  34             'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
  35             'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
  36
  37         formats.extend(self._extract_mpd_formats(
  38             'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
  39             mpd_id='dash', fatal=False))
  40
  41         self._sort_formats(formats)
  42
  43         return {
  44             'id': video_id,
  45             'title': video_id,
  46             'formats': formats,
  47         }
  48
  49
  50 class RedditRIE(InfoExtractor):
  51     _VALID_URL = r'(?P<url>https?://(?:[^/]+\.)?reddit\.com/r/[^/]+/comments/(?P<id>[^/?#&]+))'
  52     _TESTS = [{
  53         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
  54         'info_dict': {
  55             'id': 'zv89llsvexdz',
  56             'ext': 'mp4',
  57             'title': 'That small heart attack.',
  58             'thumbnail': r're:^https?://.*\.jpg$',
  59             'timestamp': 1501941939,
  60             'upload_date': '20170805',
  61             'uploader': 'Antw87',
  62             'like_count': int,
  63             'dislike_count': int,
  64             'comment_count': int,
  65             'age_limit': 0,
  66         },
  67         'params': {
  68             'format': 'bestvideo',
  69             'skip_download': True,
  70         },
  71     }, {
  72         'url': 'https://www.reddit.com/r/videos/comments/6rrwyj',
  73         'only_matching': True,
  74     }, {
  75         # imgur
  76         'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
  77         'only_matching': True,
  78     }, {
  79         # imgur @ old reddit
  80         'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/',
  81         'only_matching': True,
  82     }, {
  83         # streamable
  84         'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/',
  85         'only_matching': True,
  86     }, {
  87         # youtube
  88         'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/',
  89         'only_matching': True,
  90     }, {
  91         # reddit video @ nm reddit
  92         'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/',
  93         'only_matching': True,
  94     }]
  95
  96     def _real_extract(self, url):
  97         mobj = re.match(self._VALID_URL, url)
  98         url, video_id = mobj.group('url', 'id')
  99
 100         video_id = self._match_id(url)
 101
 102         data = self._download_json(
 103             url + '/.json', video_id)[0]['data']['children'][0]['data']
 104
 105         video_url = data['url']
 106
 107         # Avoid recursing into the same reddit URL
 108         if 'reddit.com/' in video_url and '/%s/' % video_id in video_url:
 109             raise ExtractorError('No media found', expected=True)
 110
 111         over_18 = data.get('over_18')
 112         if over_18 is True:
 113             age_limit = 18
 114         elif over_18 is False:
 115             age_limit = 0
 116         else:
 117             age_limit = None
 118
 119         return {
 120             '_type': 'url_transparent',
 121             'url': video_url,
 122             'title': data.get('title'),
 123             'thumbnail': url_or_none(data.get('thumbnail')),
 124             'timestamp': float_or_none(data.get('created_utc')),
 125             'uploader': data.get('author'),
 126             'like_count': int_or_none(data.get('ups')),
 127             'dislike_count': int_or_none(data.get('downs')),
 128             'comment_count': int_or_none(data.get('num_comments')),
 129             'age_limit': age_limit,
 130         }