Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/patreon.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     js_to_json,
   7 )
   8
   9
  10 class PatreonIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?patreon\.com/creation\?hid=(?P<id>[^&#]+)'
  12     _TESTS = [
  13         {
  14             'url': 'http://www.patreon.com/creation?hid=743933',
  15             'md5': 'e25505eec1053a6e6813b8ed369875cc',
  16             'info_dict': {
  17                 'id': '743933',
  18                 'ext': 'mp3',
  19                 'title': 'Episode 166: David Smalley of Dogma Debate',
  20                 'uploader': 'Cognitive Dissonance Podcast',
  21                 'thumbnail': 're:^https?://.*$',
  22             },
  23         },
  24         {
  25             'url': 'http://www.patreon.com/creation?hid=754133',
  26             'md5': '3eb09345bf44bf60451b8b0b81759d0a',
  27             'info_dict': {
  28                 'id': '754133',
  29                 'ext': 'mp3',
  30                 'title': 'CD 167 Extra',
  31                 'uploader': 'Cognitive Dissonance Podcast',
  32                 'thumbnail': 're:^https?://.*$',
  33             },
  34         },
  35         {
  36             'url': 'https://www.patreon.com/creation?hid=1682498',
  37             'info_dict': {
  38                 'id': 'SU4fj_aEMVw',
  39                 'ext': 'mp4',
  40                 'title': 'I\'m on Patreon!',
  41                 'uploader': 'TraciJHines',
  42                 'thumbnail': 're:^https?://.*$',
  43                 'upload_date': '20150211',
  44                 'description': 'md5:c5a706b1f687817a3de09db1eb93acd4',
  45                 'uploader_id': 'TraciJHines',
  46             },
  47             'params': {
  48                 'noplaylist': True,
  49                 'skip_download': True,
  50             }
  51         }
  52     ]
  53
  54     # Currently Patreon exposes download URL via hidden CSS, so login is not
  55     # needed. Keeping this commented for when this inevitably changes.
  56     '''
  57     def _login(self):
  58         (username, password) = self._get_login_info()
  59         if username is None:
  60             return
  61
  62         login_form = {
  63             'redirectUrl': 'http://www.patreon.com/',
  64             'email': username,
  65             'password': password,
  66         }
  67
  68         request = compat_urllib_request.Request(
  69             'https://www.patreon.com/processLogin',
  70             compat_urllib_parse.urlencode(login_form).encode('utf-8')
  71         )
  72         login_page = self._download_webpage(request, None, note='Logging in as %s' % username)
  73
  74         if re.search(r'onLoginFailed', login_page):
  75             raise ExtractorError('Unable to login, incorrect username and/or password', expected=True)
  76
  77     def _real_initialize(self):
  78         self._login()
  79     '''
  80
  81     def _real_extract(self, url):
  82         video_id = self._match_id(url)
  83         webpage = self._download_webpage(url, video_id)
  84         title = self._og_search_title(webpage).strip()
  85
  86         attach_fn = self._html_search_regex(
  87             r'<div class="attach"><a target="_blank" href="([^"]+)">',
  88             webpage, 'attachment URL', default=None)
  89         embed = self._html_search_regex(
  90             r'<div[^>]+id="watchCreation"[^>]*>\s*<iframe[^>]+src="([^"]+)"',
  91             webpage, 'embedded URL', default=None)
  92
  93         if attach_fn is not None:
  94             video_url = 'http://www.patreon.com' + attach_fn
  95             thumbnail = self._og_search_thumbnail(webpage)
  96             uploader = self._html_search_regex(
  97                 r'<strong>(.*?)</strong> is creating', webpage, 'uploader')
  98         elif embed is not None:
  99             return self.url_result(embed)
 100         else:
 101             playlist = self._parse_json(self._search_regex(
 102                 r'(?s)new\s+jPlayerPlaylist\(\s*\{\s*[^}]*},\s*(\[.*?,?\s*\])',
 103                 webpage, 'playlist JSON'),
 104                 video_id, transform_source=js_to_json)
 105             data = playlist[0]
 106             video_url = self._proto_relative_url(data['mp3'])
 107             thumbnail = self._proto_relative_url(data.get('cover'))
 108             uploader = data.get('artist')
 109
 110         return {
 111             'id': video_id,
 112             'url': video_url,
 113             'ext': 'mp3',
 114             'title': title,
 115             'uploader': uploader,
 116             'thumbnail': thumbnail,
 117         }