]> Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hotnewhiphop.py
Update upstream source from tag 'upstream/2018.03.14'
[youtubedl] / youtube_dl / extractor / hotnewhiphop.py
1 from __future__ import unicode_literals
2
3 from .common import InfoExtractor
4 from ..compat import compat_b64decode
5 from ..utils import (
6 ExtractorError,
7 HEADRequest,
8 sanitized_Request,
9 urlencode_postdata,
10 )
11
12
13 class HotNewHipHopIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?hotnewhiphop\.com/.*\.(?P<id>.*)\.html'
15 _TEST = {
16 'url': 'http://www.hotnewhiphop.com/freddie-gibbs-lay-it-down-song.1435540.html',
17 'md5': '2c2cd2f76ef11a9b3b581e8b232f3d96',
18 'info_dict': {
19 'id': '1435540',
20 'ext': 'mp3',
21 'title': 'Freddie Gibbs - Lay It Down'
22 }
23 }
24
25 def _real_extract(self, url):
26 video_id = self._match_id(url)
27 webpage = self._download_webpage(url, video_id)
28
29 video_url_base64 = self._search_regex(
30 r'data-path="(.*?)"', webpage, 'video URL', default=None)
31
32 if video_url_base64 is None:
33 video_url = self._search_regex(
34 r'"contentUrl" content="(.*?)"', webpage, 'content URL')
35 return self.url_result(video_url, ie='Youtube')
36
37 reqdata = urlencode_postdata([
38 ('mediaType', 's'),
39 ('mediaId', video_id),
40 ])
41 r = sanitized_Request(
42 'http://www.hotnewhiphop.com/ajax/media/getActions/', data=reqdata)
43 r.add_header('Content-Type', 'application/x-www-form-urlencoded')
44 mkd = self._download_json(
45 r, video_id, note='Requesting media key',
46 errnote='Could not download media key')
47 if 'mediaKey' not in mkd:
48 raise ExtractorError('Did not get a media key')
49
50 redirect_url = compat_b64decode(video_url_base64).decode('utf-8')
51 redirect_req = HEADRequest(redirect_url)
52 req = self._request_webpage(
53 redirect_req, video_id,
54 note='Resolving final URL', errnote='Could not resolve final URL')
55 video_url = req.geturl()
56 if video_url.endswith('.html'):
57 raise ExtractorError('Redirect failed')
58
59 video_title = self._og_search_title(webpage).strip()
60
61 return {
62 'id': video_id,
63 'url': video_url,
64 'title': video_title,
65 'thumbnail': self._og_search_thumbnail(webpage),
66 }