]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/musicvault.py
   1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  12 class MusicVaultIE(InfoExtractor
): 
  13     _VALID_URL 
= r
'https?://www\.musicvault\.com/(?P<uploader_id>[^/?#]*)/video/(?P<display_id>[^/?#]*)_(?P<id>[0-9]+)\.html' 
  15         'url': 'http://www.musicvault.com/the-allman-brothers-band/video/straight-from-the-heart_1010863.html', 
  16         'md5': '2cdbb3ae75f7fb3519821507d2fb3c15', 
  20             'uploader_id': 'the-allman-brothers-band', 
  21             'title': 'Straight from the Heart', 
  23             'uploader': 'The Allman Brothers Band', 
  24             'thumbnail': 're:^https?://.*/thumbnail/.*', 
  25             'upload_date': '19811216', 
  26             'location': 'Capitol Theatre (Passaic, NJ)', 
  27             'description': 'Listen to The Allman Brothers Band perform Straight from the Heart at Capitol Theatre (Passaic, NJ) on Dec 16, 1981', 
  31     def _real_extract(self
, url
): 
  32         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  33         display_id 
= mobj
.group('display_id') 
  34         webpage 
= self
._download
_webpage
(url
, display_id
) 
  36         thumbnail 
= self
._search
_regex
( 
  37             r
'<meta itemprop="thumbnail" content="([^"]+)"', 
  38             webpage
, 'thumbnail', fatal
=False) 
  40         data_div 
= self
._search
_regex
( 
  41             r
'(?s)<div class="data">(.*?)</div>', webpage
, 'data fields') 
  42         uploader 
= self
._html
_search
_regex
( 
  43             r
'<h1.*?>(.*?)</h1>', data_div
, 'uploader', fatal
=False) 
  44         title 
= self
._html
_search
_regex
( 
  45             r
'<h2.*?>(.*?)</h2>', data_div
, 'title') 
  46         upload_date 
= unified_strdate(self
._html
_search
_regex
( 
  47             r
'<h3.*?>(.*?)</h3>', data_div
, 'uploader', fatal
=False)) 
  48         location 
= self
._html
_search
_regex
( 
  49             r
'<h4.*?>(.*?)</h4>', data_div
, 'location', fatal
=False) 
  51         duration 
= parse_duration(self
._html
_search
_meta
('duration', webpage
)) 
  53         VIDEO_URL_TEMPLATE 
= 'http://cdnapi.kaltura.com/p/%(uid)s/sp/%(wid)s/playManifest/entryId/%(entry_id)s/format/url/protocol/http' 
  54         kaltura_id 
= self
._search
_regex
( 
  55             r
'<div id="video-detail-player" data-kaltura-id="([^"]+)"', 
  56             webpage
, 'kaltura ID') 
  57         video_url 
= VIDEO_URL_TEMPLATE 
% { 
  58             'entry_id': kaltura_id
, 
  59             'wid': self
._search
_regex
(r
'/wid/_([0-9]+)/', webpage
, 'wid'), 
  60             'uid': self
._search
_regex
(r
'uiconf_id/([0-9]+)/', webpage
, 'uid'), 
  64             'id': mobj
.group('id'), 
  67             'display_id': display_id
, 
  68             'uploader_id': mobj
.group('uploader_id'), 
  69             'thumbnail': thumbnail
, 
  70             'description': self
._html
_search
_meta
('description', webpage
), 
  71             'upload_date': upload_date
,