]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/jamendo.py
   2 from __future__ 
import unicode_literals
 
   6 from ..compat 
import compat_urlparse
 
   7 from .common 
import InfoExtractor
 
   8 from ..utils 
import parse_duration
 
  11 class JamendoBaseIE(InfoExtractor
): 
  12     def _extract_meta(self
, webpage
, fatal
=True): 
  13         title 
= self
._og
_search
_title
( 
  14             webpage
, default
=None) or self
._search
_regex
( 
  15             r
'<title>([^<]+)', webpage
, 
  16             'title', default
=None) 
  18             title 
= self
._search
_regex
( 
  19                 r
'(.+?)\s*\|\s*Jamendo Music', title
, 'title', default
=None) 
  21             title 
= self
._html
_search
_meta
( 
  22                 'name', webpage
, 'title', fatal
=fatal
) 
  23         mobj 
= re
.search(r
'(.+) - (.+)', title 
or '') 
  24         artist
, second 
= mobj
.groups() if mobj 
else [None] * 2 
  25         return title
, artist
, second
 
  28 class JamendoIE(JamendoBaseIE
): 
  29     _VALID_URL 
= r
'https?://(?:www\.)?jamendo\.com/track/(?P<id>[0-9]+)/(?P<display_id>[^/?#&]+)' 
  31         'url': 'https://www.jamendo.com/track/196219/stories-from-emona-i', 
  32         'md5': '6e9e82ed6db98678f171c25a8ed09ffd', 
  35             'display_id': 'stories-from-emona-i', 
  37             'title': 'Maya Filipič - Stories from Emona I', 
  38             'artist': 'Maya Filipič', 
  39             'track': 'Stories from Emona I', 
  41             'thumbnail': r
're:^https?://.*\.jpg' 
  45     def _real_extract(self
, url
): 
  46         mobj 
= self
._VALID
_URL
_RE
.match(url
) 
  47         track_id 
= mobj
.group('id') 
  48         display_id 
= mobj
.group('display_id') 
  50         webpage 
= self
._download
_webpage
(url
, display_id
) 
  52         title
, artist
, track 
= self
._extract
_meta
(webpage
) 
  55             'url': 'https://%s.jamendo.com/?trackid=%s&format=%s&from=app-97dab294' 
  56                    % (sub_domain
, track_id
, format_id
), 
  57             'format_id': format_id
, 
  60         } for quality
, (format_id
, sub_domain
, ext
) in enumerate(( 
  61             ('mp31', 'mp3l', 'mp3'), 
  62             ('mp32', 'mp3d', 'mp3'), 
  63             ('ogg1', 'ogg', 'ogg'), 
  64             ('flac', 'flac', 'flac'), 
  66         self
._sort
_formats
(formats
) 
  68         thumbnail 
= self
._html
_search
_meta
( 
  69             'image', webpage
, 'thumbnail', fatal
=False) 
  70         duration 
= parse_duration(self
._search
_regex
( 
  71             r
'<span[^>]+itemprop=["\']duration
["\'][^>]+content=["\'](.+?
)["\']', 
  72             webpage, 'duration', fatal=False)) 
  76             'display_id': display_id, 
  77             'thumbnail': thumbnail, 
  86 class JamendoAlbumIE(JamendoBaseIE): 
  87     _VALID_URL = r'https?://(?:www\.)?jamendo\.com/album/(?P<id>[0-9]+)/(?P<display_id>[\w-]+)' 
  89         'url': 'https://www.jamendo.com/album/121486/duck-on-cover', 
  92             'title': 'Shearer - Duck On Cover' 
  95             'md5': 'e1a2fcb42bda30dfac990212924149a8', 
  99                 'title': 'Shearer - Warmachine', 
 101                 'track': 'Warmachine', 
 104             'md5': '1f358d7b2f98edfe90fd55dac0799d50', 
 108                 'title': 'Shearer - Without Your Ghost', 
 110                 'track': 'Without Your Ghost', 
 118     def _real_extract(self, url): 
 119         mobj = self._VALID_URL_RE.match(url) 
 120         album_id = mobj.group('id') 
 122         webpage = self._download_webpage(url, mobj.group('display_id')) 
 124         title, artist, album = self._extract_meta(webpage, fatal=False) 
 127             '_type': 'url_transparent', 
 128             'url': compat_urlparse.urljoin(url, m.group('path')), 
 129             'ie_key': JamendoIE.ie_key(), 
 130             'id': self._search_regex( 
 131                 r'/track/(\d+)', m.group('path'), 'track id', default=None), 
 134         } for m in re.finditer( 
 135             r'<a[^>]+href=(["\'])(?P
<path
>(?
:(?
!\
1).)+)\
1[^
>]+class=["\'][^>]*js-trackrow-albumpage-link', 
 138         return self.playlist_result(entries, album_id, title)