]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hearthisat.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_urlparse
 
  17 class HearThisAtIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://(?:www\.)?hearthis\.at/(?P<artist>[^/]+)/(?P<title>[A-Za-z0-9\-]+)/?$' 
  19     _PLAYLIST_URL 
= 'https://hearthis.at/playlist.php' 
  21         'url': 'https://hearthis.at/moofi/dr-kreep', 
  22         'md5': 'ab6ec33c8fed6556029337c7885eb4e0', 
  26             'title': 'Moofi - Dr. Kreep', 
  27             'thumbnail': 're:^https?://.*\.jpg$', 
  28             'timestamp': 1421564134, 
  29             'description': 'Creepy Patch. Mutable Instruments Braids Vowel + Formant Mode.', 
  30             'upload_date': '20150118', 
  35             'categories': ['Experimental'], 
  39     def _real_extract(self
, url
): 
  40         m 
= re
.match(self
._VALID
_URL
, url
) 
  41         display_id 
= '{artist:s} - {title:s}'.format(**m
.groupdict()) 
  43         webpage 
= self
._download
_webpage
(url
, display_id
) 
  44         track_id 
= self
._search
_regex
( 
  45             r
'intTrackId\s*=\s*(\d+)', webpage
, 'track ID') 
  47         payload 
= urlencode_postdata({'tracks[]': track_id
}) 
  48         req 
= sanitized_Request(self
._PLAYLIST
_URL
, payload
) 
  49         req
.add_header('Content-type', 'application/x-www-form-urlencoded') 
  51         track 
= self
._download
_json
(req
, track_id
, 'Downloading playlist')[0] 
  52         title 
= '{artist:s} - {title:s}'.format(**track
) 
  55         if track
.get('category'): 
  56             categories 
= [track
['category']] 
  58         description 
= self
._og
_search
_description
(webpage
) 
  59         thumbnail 
= self
._og
_search
_thumbnail
(webpage
) 
  61         meta_span 
= r
'<span[^>]+class="%s".*?</i>([^<]+)</span>' 
  62         view_count 
= str_to_int(self
._search
_regex
( 
  63             meta_span 
% 'plays_count', webpage
, 'view count', fatal
=False)) 
  64         like_count 
= str_to_int(self
._search
_regex
( 
  65             meta_span 
% 'likes_count', webpage
, 'like count', fatal
=False)) 
  66         comment_count 
= str_to_int(self
._search
_regex
( 
  67             meta_span 
% 'comment_count', webpage
, 'comment count', fatal
=False)) 
  68         duration 
= str_to_int(self
._search
_regex
( 
  69             r
'data-length="(\d+)', webpage
, 'duration', fatal
=False)) 
  70         timestamp 
= str_to_int(self
._search
_regex
( 
  71             r
'<span[^>]+class="calctime"[^>]+data-time="(\d+)', webpage
, 'timestamp', fatal
=False)) 
  74         mp3_url 
= self
._search
_regex
( 
  75             r
'(?s)<a class="player-link"\s+(?:[a-zA-Z0-9_:-]+="[^"]+"\s+)*?data-mp3="([^"]+)"', 
  76             webpage
, 'mp3 URL', fatal
=False) 
  84         download_path 
= self
._search
_regex
( 
  85             r
'<a class="[^"]*download_fct[^"]*"\s+href="([^"]+)"', 
  86             webpage
, 'download URL', default
=None) 
  88             download_url 
= compat_urlparse
.urljoin(url
, download_path
) 
  89             ext_req 
= HEADRequest(download_url
) 
  90             ext_handle 
= self
._request
_webpage
( 
  91                 ext_req
, display_id
, note
='Determining extension') 
  92             ext 
= urlhandle_detect_ext(ext_handle
) 
  94                 'format_id': 'download', 
  98                 'preference': 2,  # Usually better quality 
 100         self
._sort
_formats
(formats
) 
 104             'display_id': display_id
, 
 107             'thumbnail': thumbnail
, 
 108             'description': description
, 
 109             'duration': duration
, 
 110             'timestamp': timestamp
, 
 111             'view_count': view_count
, 
 112             'comment_count': comment_count
, 
 113             'like_count': like_count
, 
 114             'categories': categories
,