]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/musicvault.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
   8 class MusicVaultIE(InfoExtractor
): 
   9     _VALID_URL 
= r
'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html' 
  11         'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html', 
  12         'md5': '3adcbdb3dcc02d647539e53f284ba171', 
  16             'uploader_id': 'the-allman-brothers-band', 
  17             'title': 'Straight from the Heart', 
  19             'uploader': 'The Allman Brothers Band', 
  20             'thumbnail': 're:^https?://.*/thumbnail/.*', 
  21             'upload_date': '20131219', 
  22             'location': 'Capitol Theatre (Passaic, NJ)', 
  23             'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981', 
  28     def _real_extract(self
, url
): 
  29         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  30         display_id 
= mobj
.group('display_id') 
  31         webpage 
= self
._download
_webpage
(url
, display_id
) 
  33         thumbnail 
= self
._search
_regex
( 
  34             r
'<meta itemprop="thumbnail" content="([^"]+)"', 
  35             webpage
, 'thumbnail', fatal
=False) 
  37         data_div 
= self
._search
_regex
( 
  38             r
'(?s)<div class="data">(.*?)</div>', webpage
, 'data fields') 
  39         uploader 
= self
._html
_search
_regex
( 
  40             r
'<h1.*?>(.*?)</h1>', data_div
, 'uploader', fatal
=False) 
  41         title 
= self
._html
_search
_regex
( 
  42             r
'<h2.*?>(.*?)</h2>', data_div
, 'title') 
  43         location 
= self
._html
_search
_regex
( 
  44             r
'<h4.*?>(.*?)</h4>', data_div
, 'location', fatal
=False) 
  46         kaltura_id 
= self
._search
_regex
( 
  47             r
'<div id="video-detail-player" data-kaltura-id="([^"]+)"', 
  48             webpage
, 'kaltura ID') 
  49         wid 
= self
._search
_regex
(r
'/wid/_([0-9]+)/', webpage
, 'wid') 
  52             'id': mobj
.group('id'), 
  53             '_type': 'url_transparent', 
  54             'url': 'kaltura:%s:%s' % (wid
, kaltura_id
), 
  56             'display_id': display_id
, 
  57             'uploader_id': mobj
.group('uploader_id'), 
  58             'thumbnail': thumbnail
, 
  59             'description': self
._html
_search
_meta
('description', webpage
),