Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/myspace.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import json
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_str,
  10 )
  11 from ..utils import ExtractorError
  12
  13
  14 class MySpaceIE(InfoExtractor):
  15     _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  16
  17     _TESTS = [
  18         {
  19             'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
  20             'info_dict': {
  21                 'id': '109594919',
  22                 'ext': 'flv',
  23                 'title': 'Little Big Town',
  24                 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
  25                 'uploader': 'Five Minutes to the Stage',
  26                 'uploader_id': 'fiveminutestothestage',
  27             },
  28             'params': {
  29                 # rtmp download
  30                 'skip_download': True,
  31             },
  32         },
  33         # songs
  34         {
  35             'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
  36             'info_dict': {
  37                 'id': '93388656',
  38                 'ext': 'flv',
  39                 'title': 'Of weakened soul...',
  40                 'uploader': 'Killsorrow',
  41                 'uploader_id': 'killsorrow',
  42             },
  43             'params': {
  44                 # rtmp download
  45                 'skip_download': True,
  46             },
  47         }, {
  48             'add_ie': ['Vevo'],
  49             'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
  50             'info_dict': {
  51                 'id': 'USZM20600099',
  52                 'ext': 'mp4',
  53                 'title': 'Animal I Have Become',
  54                 'uploader': 'Three Days Grace',
  55                 'timestamp': int,
  56                 'upload_date': '20060502',
  57             },
  58             'skip': 'VEVO is only available in some countries',
  59         }, {
  60             'add_ie': ['Youtube'],
  61             'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
  62             'info_dict': {
  63                 'id': 'ypWvQgnJrSU',
  64                 'ext': 'mp4',
  65                 'title': 'Starset - First Light',
  66                 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
  67                 'uploader': 'Jacob Soren',
  68                 'uploader_id': 'SorenPromotions',
  69                 'upload_date': '20140725',
  70             }
  71         },
  72     ]
  73
  74     def _real_extract(self, url):
  75         mobj = re.match(self._VALID_URL, url)
  76         video_id = mobj.group('id')
  77         webpage = self._download_webpage(url, video_id)
  78         player_url = self._search_regex(
  79             r'playerSwf":"([^"?]*)', webpage, 'player URL')
  80
  81         if mobj.group('mediatype').startswith('music/song'):
  82             # songs don't store any useful info in the 'context' variable
  83             song_data = self._search_regex(
  84                 r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  85                 webpage, 'song_data', default=None, group=0)
  86             if song_data is None:
  87                 # some songs in an album are not playable
  88                 self.report_warning(
  89                     '%s: No downloadable song on this page' % video_id)
  90                 return
  91
  92             def search_data(name):
  93                 return self._search_regex(
  94                     r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
  95                     song_data, name, default='', group='data')
  96             streamUrl = search_data('stream-url')
  97             if not streamUrl:
  98                 vevo_id = search_data('vevo-id')
  99                 youtube_id = search_data('youtube-id')
 100                 if vevo_id:
 101                     self.to_screen('Vevo video detected: %s' % vevo_id)
 102                     return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
 103                 elif youtube_id:
 104                     self.to_screen('Youtube video detected: %s' % youtube_id)
 105                     return self.url_result(youtube_id, ie='Youtube')
 106                 else:
 107                     raise ExtractorError(
 108                         'Found song but don\'t know how to download it')
 109             info = {
 110                 'id': video_id,
 111                 'title': self._og_search_title(webpage),
 112                 'uploader': search_data('artist-name'),
 113                 'uploader_id': search_data('artist-username'),
 114                 'thumbnail': self._og_search_thumbnail(webpage),
 115             }
 116         else:
 117             context = json.loads(self._search_regex(
 118                 r'context = ({.*?});', webpage, 'context'))
 119             video = context['video']
 120             streamUrl = video['streamUrl']
 121             info = {
 122                 'id': compat_str(video['mediaId']),
 123                 'title': video['title'],
 124                 'description': video['description'],
 125                 'thumbnail': video['imageUrl'],
 126                 'uploader': video['artistName'],
 127                 'uploader_id': video['artistUsername'],
 128             }
 129
 130         rtmp_url, play_path = streamUrl.split(';', 1)
 131         info.update({
 132             'url': rtmp_url,
 133             'play_path': play_path,
 134             'player_url': player_url,
 135             'ext': 'flv',
 136         })
 137         return info
 138
 139
 140 class MySpaceAlbumIE(InfoExtractor):
 141     IE_NAME = 'MySpace:album'
 142     _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
 143
 144     _TESTS = [{
 145         'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
 146         'info_dict': {
 147             'title': 'Transmissions',
 148             'id': '19455773',
 149         },
 150         'playlist_count': 14,
 151         'skip': 'this album is only available in some countries',
 152     }, {
 153         'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
 154         'info_dict': {
 155             'title': 'The Demo',
 156             'id': '18596029',
 157         },
 158         'playlist_count': 5,
 159     }]
 160
 161     def _real_extract(self, url):
 162         mobj = re.match(self._VALID_URL, url)
 163         playlist_id = mobj.group('id')
 164         display_id = mobj.group('title') + playlist_id
 165         webpage = self._download_webpage(url, display_id)
 166         tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
 167         if not tracks_paths:
 168             raise ExtractorError(
 169                 '%s: No songs found, try using proxy' % display_id,
 170                 expected=True)
 171         entries = [
 172             self.url_result(t_path, ie=MySpaceIE.ie_key())
 173             for t_path in tracks_paths]
 174         return {
 175             '_type': 'playlist',
 176             'id': playlist_id,
 177             'display_id': display_id,
 178             'title': self._og_search_title(webpage),
 179             'entries': entries,
 180         }