Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/myspace.py

   1 # encoding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5 import json
   6
   7 from .common import InfoExtractor
   8 from ..compat import (
   9     compat_str,
  10 )
  11 from ..utils import ExtractorError
  12
  13
  14 class MySpaceIE(InfoExtractor):
  15     _VALID_URL = r'https?://myspace\.com/([^/]+)/(?P<mediatype>video/[^/]+/|music/song/.*?)(?P<id>\d+)'
  16
  17     _TESTS = [
  18         {
  19             'url': 'https://myspace.com/fiveminutestothestage/video/little-big-town/109594919',
  20             'info_dict': {
  21                 'id': '109594919',
  22                 'ext': 'flv',
  23                 'title': 'Little Big Town',
  24                 'description': 'This country quartet was all smiles while playing a sold out show at the Pacific Amphitheatre in Orange County, California.',
  25                 'uploader': 'Five Minutes to the Stage',
  26                 'uploader_id': 'fiveminutestothestage',
  27             },
  28             'params': {
  29                 # rtmp download
  30                 'skip_download': True,
  31             },
  32         },
  33         # songs
  34         {
  35             'url': 'https://myspace.com/killsorrow/music/song/of-weakened-soul...-93388656-103880681',
  36             'info_dict': {
  37                 'id': '93388656',
  38                 'ext': 'flv',
  39                 'title': 'Of weakened soul...',
  40                 'uploader': 'Killsorrow',
  41                 'uploader_id': 'killsorrow',
  42             },
  43             'params': {
  44                 # rtmp download
  45                 'skip_download': True,
  46             },
  47         }, {
  48             'add_ie': ['Vevo'],
  49             'url': 'https://myspace.com/threedaysgrace/music/song/animal-i-have-become-28400208-28218041',
  50             'info_dict': {
  51                 'id': 'USZM20600099',
  52                 'ext': 'mp4',
  53                 'title': 'Animal I Have Become',
  54                 'uploader': 'Three Days Grace',
  55                 'timestamp': int,
  56                 'upload_date': '20060502',
  57             },
  58             'skip': 'VEVO is only available in some countries',
  59         }, {
  60             'add_ie': ['Youtube'],
  61             'url': 'https://myspace.com/starset2/music/song/first-light-95799905-106964426',
  62             'info_dict': {
  63                 'id': 'ypWvQgnJrSU',
  64                 'ext': 'mp4',
  65                 'title': 'Starset - First Light',
  66                 'description': 'md5:2d5db6c9d11d527683bcda818d332414',
  67                 'uploader': 'Jacob Soren',
  68                 'uploader_id': 'SorenPromotions',
  69                 'upload_date': '20140725',
  70             }
  71         },
  72     ]
  73
  74     def _real_extract(self, url):
  75         mobj = re.match(self._VALID_URL, url)
  76         video_id = mobj.group('id')
  77         webpage = self._download_webpage(url, video_id)
  78         player_url = self._search_regex(
  79             r'playerSwf":"([^"?]*)', webpage, 'player URL')
  80
  81         if mobj.group('mediatype').startswith('music/song'):
  82             # songs don't store any useful info in the 'context' variable
  83             song_data = self._search_regex(
  84                 r'''<button.*data-song-id=(["\'])%s\1.*''' % video_id,
  85                 webpage, 'song_data', default=None, group=0)
  86             if song_data is None:
  87                 # some songs in an album are not playable
  88                 self.report_warning(
  89                     '%s: No downloadable song on this page' % video_id)
  90                 return
  91             def search_data(name):
  92                 return self._search_regex(
  93                     r'''data-%s=([\'"])(?P<data>.*?)\1''' % name,
  94                     song_data, name, default='', group='data')
  95             streamUrl = search_data('stream-url')
  96             if not streamUrl:
  97                 vevo_id = search_data('vevo-id')
  98                 youtube_id = search_data('youtube-id')
  99                 if vevo_id:
 100                     self.to_screen('Vevo video detected: %s' % vevo_id)
 101                     return self.url_result('vevo:%s' % vevo_id, ie='Vevo')
 102                 elif youtube_id:
 103                     self.to_screen('Youtube video detected: %s' % youtube_id)
 104                     return self.url_result(youtube_id, ie='Youtube')
 105                 else:
 106                     raise ExtractorError(
 107                         'Found song but don\'t know how to download it')
 108             info = {
 109                 'id': video_id,
 110                 'title': self._og_search_title(webpage),
 111                 'uploader': search_data('artist-name'),
 112                 'uploader_id': search_data('artist-username'),
 113                 'thumbnail': self._og_search_thumbnail(webpage),
 114             }
 115         else:
 116             context = json.loads(self._search_regex(
 117                 r'context = ({.*?});', webpage, 'context'))
 118             video = context['video']
 119             streamUrl = video['streamUrl']
 120             info = {
 121                 'id': compat_str(video['mediaId']),
 122                 'title': video['title'],
 123                 'description': video['description'],
 124                 'thumbnail': video['imageUrl'],
 125                 'uploader': video['artistName'],
 126                 'uploader_id': video['artistUsername'],
 127             }
 128
 129         rtmp_url, play_path = streamUrl.split(';', 1)
 130         info.update({
 131             'url': rtmp_url,
 132             'play_path': play_path,
 133             'player_url': player_url,
 134             'ext': 'flv',
 135         })
 136         return info
 137
 138
 139 class MySpaceAlbumIE(InfoExtractor):
 140     IE_NAME = 'MySpace:album'
 141     _VALID_URL = r'https?://myspace\.com/([^/]+)/music/album/(?P<title>.*-)(?P<id>\d+)'
 142
 143     _TESTS = [{
 144         'url': 'https://myspace.com/starset2/music/album/transmissions-19455773',
 145         'info_dict': {
 146             'title': 'Transmissions',
 147             'id': '19455773',
 148         },
 149         'playlist_count': 14,
 150         'skip': 'this album is only available in some countries',
 151     }, {
 152         'url': 'https://myspace.com/killsorrow/music/album/the-demo-18596029',
 153         'info_dict': {
 154             'title': 'The Demo',
 155             'id': '18596029',
 156         },
 157         'playlist_count': 5,
 158     }]
 159
 160     def _real_extract(self, url):
 161         mobj = re.match(self._VALID_URL, url)
 162         playlist_id = mobj.group('id')
 163         display_id = mobj.group('title') + playlist_id
 164         webpage = self._download_webpage(url, display_id)
 165         tracks_paths = re.findall(r'"music:song" content="(.*?)"', webpage)
 166         if not tracks_paths:
 167             raise ExtractorError(
 168                 '%s: No songs found, try using proxy' % display_id,
 169                 expected=True)
 170         entries = [
 171             self.url_result(t_path, ie=MySpaceIE.ie_key())
 172             for t_path in tracks_paths]
 173         return {
 174             '_type': 'playlist',
 175             'id': playlist_id,
 176             'display_id': display_id,
 177             'title': self._og_search_title(webpage),
 178             'entries': entries,
 179         }