2 from __future__ 
import unicode_literals
 
   8 from .common 
import InfoExtractor
 
   9 from ..compat 
import compat_urllib_parse_unquote
 
  18 class MailRuIE(InfoExtractor
): 
  20     IE_DESC 
= 'Видео@Mail.Ru' 
  23                         (?:(?:www|m)\.)?my\.mail\.ru/+ 
  25                             video/.*\#video=/?(?P<idv1>(?:[^/]+/){3}\d+)| 
  26                             (?:(?P<idv2prefix>(?:[^/]+/+){2})video/(?P<idv2suffix>[^/]+/\d+))\.html| 
  27                             (?:video/embed|\+/video/meta)/(?P<metaid>\d+) 
  32             'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', 
  33             'md5': 'dea205f03120046894db4ebb6159879a', 
  37                 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', 
  38                 'timestamp': 1393235077, 
  39                 'upload_date': '20140224', 
  40                 'uploader': 'sonypicturesrus', 
  41                 'uploader_id': 'sonypicturesrus@mail.ru', 
  44             'skip': 'Not accessible from Travis CI server', 
  47             'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', 
  48             'md5': '00a91a58c3402204dcced523777b475f', 
  50                 'id': '46843144_1263', 
  52                 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', 
  53                 'timestamp': 1397039888, 
  54                 'upload_date': '20140409', 
  56                 'uploader_id': 'hitech@corp.mail.ru', 
  59             'skip': 'Not accessible from Travis CI server', 
  62             # only available via metaUrl API 
  63             'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html', 
  64             'md5': '3b26d2491c6949d031a32b96bd97c096', 
  69                 'timestamp': 1449094163, 
  70                 'upload_date': '20151202', 
  71                 'uploader': '720pizle@mail.ru', 
  72                 'uploader_id': '720pizle@mail.ru', 
  75             'skip': 'Not accessible from Travis CI server', 
  78             'url': 'http://m.my.mail.ru/mail/3sktvtr/video/_myvideo/138.html', 
  79             'only_matching': True, 
  82             'url': 'https://my.mail.ru/video/embed/7949340477499637815', 
  83             'only_matching': True, 
  86             'url': 'http://my.mail.ru/+/video/meta/7949340477499637815', 
  87             'only_matching': True, 
  90             'url': 'https://my.mail.ru//list/sinyutin10/video/_myvideo/4.html', 
  91             'only_matching': True, 
  94             'url': 'https://my.mail.ru//list//sinyutin10/video/_myvideo/4.html', 
  95             'only_matching': True, 
  99     def _real_extract(self
, url
): 
 100         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 101         meta_id 
= mobj
.group('metaid') 
 105             meta_url 
= 'https://my.mail.ru/+/video/meta/%s' % meta_id
 
 107             video_id 
= mobj
.group('idv1') 
 109                 video_id 
= mobj
.group('idv2prefix') + mobj
.group('idv2suffix') 
 110             webpage 
= self
._download
_webpage
(url
, video_id
) 
 111             page_config 
= self
._parse
_json
(self
._search
_regex
( 
 112                 r
'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', 
 113                 webpage
, 'page config', default
='{}'), video_id
, fatal
=False) 
 115                 meta_url 
= page_config
.get('metaUrl') or page_config
.get('video', {}).get('metaUrl') 
 121             video_data 
= self
._download
_json
( 
 122                 meta_url
, video_id 
or meta_id
, 'Downloading video meta JSON', 
 125         # Fallback old approach 
 127             video_data 
= self
._download
_json
( 
 128                 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id
, 
 129                 video_id
, 'Downloading video JSON') 
 132         for f 
in video_data
['videos']: 
 133             video_url 
= f
.get('url') 
 136             format_id 
= f
.get('key') 
 137             height 
= int_or_none(self
._search
_regex
( 
 138                 r
'^(\d+)[pP]$', format_id
, 'height', default
=None)) if format_id 
else None 
 141                 'format_id': format_id
, 
 144         self
._sort
_formats
(formats
) 
 146         meta_data 
= video_data
['meta'] 
 147         title 
= remove_end(meta_data
['title'], '.mp4') 
 149         author 
= video_data
.get('author') 
 150         uploader 
= author
.get('name') 
 151         uploader_id 
= author
.get('id') or author
.get('email') 
 152         view_count 
= int_or_none(video_data
.get('viewsCount') or video_data
.get('views_count')) 
 154         acc_id 
= meta_data
.get('accId') 
 155         item_id 
= meta_data
.get('itemId') 
 156         content_id 
= '%s_%s' % (acc_id
, item_id
) if acc_id 
and item_id 
else video_id
 
 158         thumbnail 
= meta_data
.get('poster') 
 159         duration 
= int_or_none(meta_data
.get('duration')) 
 160         timestamp 
= int_or_none(meta_data
.get('timestamp')) 
 165             'thumbnail': thumbnail
, 
 166             'timestamp': timestamp
, 
 167             'uploader': uploader
, 
 168             'uploader_id': uploader_id
, 
 169             'duration': duration
, 
 170             'view_count': view_count
, 
 175 class MailRuMusicSearchBaseIE(InfoExtractor
): 
 176     def _search(self
, query
, url
, audio_id
, limit
=100, offset
=0): 
 177         search 
= self
._download
_json
( 
 178             'https://my.mail.ru/cgi-bin/my/ajax', audio_id
, 
 179             'Downloading songs JSON page %d' % (offset 
// limit 
+ 1), 
 182                 'X-Requested-With': 'XMLHttpRequest', 
 186                 'func_name': 'music.search', 
 191                 'arg_search_params': json
.dumps({ 
 198                 'arg_offset': offset
, 
 200         return next(e 
for e 
in search 
if isinstance(e
, dict)) 
 203     def _extract_track(t
, fatal
=True): 
 204         audio_url 
= t
['URL'] if fatal 
else t
.get('URL') 
 208         audio_id 
= t
['File'] if fatal 
else t
.get('File') 
 212         thumbnail 
= t
.get('AlbumCoverURL') or t
.get('FiledAlbumCover') 
 213         uploader 
= t
.get('OwnerName') or t
.get('OwnerName_Text_HTML') 
 214         uploader_id 
= t
.get('UploaderID') 
 215         duration 
= int_or_none(t
.get('DurationInSeconds')) or parse_duration( 
 216             t
.get('Duration') or t
.get('DurationStr')) 
 217         view_count 
= int_or_none(t
.get('PlayCount') or t
.get('PlayCount_hr')) 
 219         track 
= t
.get('Name') or t
.get('Name_Text_HTML') 
 220         artist 
= t
.get('Author') or t
.get('Author_Text_HTML') 
 223             title 
= '%s - %s' % (artist
, track
) if artist 
else track
 
 228             'extractor_key': MailRuMusicIE
.ie_key(), 
 231             'thumbnail': thumbnail
, 
 232             'uploader': uploader
, 
 233             'uploader_id': uploader_id
, 
 234             'duration': duration
, 
 235             'view_count': view_count
, 
 237             'abr': int_or_none(t
.get('BitRate')), 
 240             'album': t
.get('Album'), 
 245 class MailRuMusicIE(MailRuMusicSearchBaseIE
): 
 246     IE_NAME 
= 'mailru:music' 
 247     IE_DESC 
= 'Музыка@Mail.Ru' 
 248     _VALID_URL 
= r
'https?://my\.mail\.ru/+music/+songs/+[^/?#&]+-(?P<id>[\da-f]+)' 
 250         'url': 'https://my.mail.ru/music/songs/%D0%BC8%D0%BB8%D1%82%D1%85-l-a-h-luciferian-aesthetics-of-herrschaft-single-2017-4e31f7125d0dfaef505d947642366893', 
 251         'md5': '0f8c22ef8c5d665b13ac709e63025610', 
 253             'id': '4e31f7125d0dfaef505d947642366893', 
 255             'title': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017 - М8Л8ТХ', 
 256             'uploader': 'Игорь Мудрый', 
 257             'uploader_id': '1459196328', 
 262             'track': 'L.A.H. (Luciferian Aesthetics of Herrschaft) single, 2017', 
 267     def _real_extract(self
, url
): 
 268         audio_id 
= self
._match
_id
(url
) 
 270         webpage 
= self
._download
_webpage
(url
, audio_id
) 
 272         title 
= self
._og
_search
_title
(webpage
) 
 273         music_data 
= self
._search
(title
, url
, audio_id
)['MusicData'] 
 274         t 
= next(t 
for t 
in music_data 
if t
.get('File') == audio_id
) 
 276         info 
= self
._extract
_track
(t
) 
 277         info
['title'] = title
 
 281 class MailRuMusicSearchIE(MailRuMusicSearchBaseIE
): 
 282     IE_NAME 
= 'mailru:music:search' 
 283     IE_DESC 
= 'Музыка@Mail.Ru' 
 284     _VALID_URL 
= r
'https?://my\.mail\.ru/+music/+search/+(?P<id>[^/?#&]+)' 
 286         'url': 'https://my.mail.ru/music/search/black%20shadow', 
 288             'id': 'black shadow', 
 290         'playlist_mincount': 532, 
 293     def _real_extract(self
, url
): 
 294         query 
= compat_urllib_parse_unquote(self
._match
_id
(url
)) 
 301         for _ 
in itertools
.count(1): 
 302             search 
= self
._search
(query
, url
, query
, LIMIT
, offset
) 
 304             music_data 
= search
.get('MusicData') 
 305             if not music_data 
or not isinstance(music_data
, list): 
 309                 track 
= self
._extract
_track
(t
, fatal
=False) 
 311                     entries
.append(track
) 
 314                 search
, lambda x
: x
['Results']['music']['Total'], int) 
 316             if total 
is not None: 
 322         return self
.playlist_result(entries
, query
)