Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/newgrounds.py

   1 from __future__ import unicode_literals
   2
   3 import json
   4 import re
   5
   6 from .common import InfoExtractor
   7
   8
   9 class NewgroundsIE(InfoExtractor):
  10     _VALID_URL = r'https?://(?:www\.)?newgrounds\.com/(?:audio/listen|portal/view)/(?P<id>[0-9]+)'
  11     _TESTS = [{
  12         'url': 'http://www.newgrounds.com/audio/listen/549479',
  13         'md5': 'fe6033d297591288fa1c1f780386f07a',
  14         'info_dict': {
  15             'id': '549479',
  16             'ext': 'mp3',
  17             'title': 'B7 - BusMode',
  18             'uploader': 'Burn7',
  19         }
  20     }, {
  21         'url': 'http://www.newgrounds.com/portal/view/673111',
  22         'md5': '3394735822aab2478c31b1004fe5e5bc',
  23         'info_dict': {
  24             'id': '673111',
  25             'ext': 'mp4',
  26             'title': 'Dancin',
  27             'uploader': 'Squirrelman82',
  28         },
  29     }]
  30
  31     def _real_extract(self, url):
  32         mobj = re.match(self._VALID_URL, url)
  33         music_id = mobj.group('id')
  34         webpage = self._download_webpage(url, music_id)
  35
  36         title = self._html_search_regex(
  37             r'<title>([^>]+)</title>', webpage, 'title')
  38
  39         uploader = self._html_search_regex(
  40             [r',"artist":"([^"]+)",', r'[\'"]owner[\'"]\s*:\s*[\'"]([^\'"]+)[\'"],'],
  41             webpage, 'uploader')
  42
  43         music_url_json_string = self._html_search_regex(
  44             r'({"url":"[^"]+"),', webpage, 'music url') + '}'
  45         music_url_json = json.loads(music_url_json_string)
  46         music_url = music_url_json['url']
  47
  48         return {
  49             'id': music_id,
  50             'title': title,
  51             'url': music_url,
  52             'uploader': uploader,
  53         }