]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mailru.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  13 class MailRuIE(InfoExtractor
): 
  15     IE_DESC 
= 'Видео@Mail.Ru' 
  16     _VALID_URL 
= r
'http://(?:www\.)?my\.mail\.ru/(?:video/.*#video=/?(?P<idv1>(?:[^/]+/){3}\d+)|(?:(?P<idv2prefix>(?:[^/]+/){2})video/(?P<idv2suffix>[^/]+/\d+))\.html)' 
  20             'url': 'http://my.mail.ru/video/top#video=/mail/sonypicturesrus/75/76', 
  21             'md5': 'dea205f03120046894db4ebb6159879a', 
  25                 'title': 'Новый Человек-Паук. Высокое напряжение. Восстание Электро', 
  26                 'timestamp': 1393232740, 
  27                 'upload_date': '20140224', 
  28                 'uploader': 'sonypicturesrus', 
  29                 'uploader_id': 'sonypicturesrus@mail.ru', 
  32             'skip': 'Not accessible from Travis CI server', 
  35             'url': 'http://my.mail.ru/corp/hitech/video/news_hi-tech_mail_ru/1263.html', 
  36             'md5': '00a91a58c3402204dcced523777b475f', 
  38                 'id': '46843144_1263', 
  40                 'title': 'Samsung Galaxy S5 Hammer Smash Fail Battery Explosion', 
  41                 'timestamp': 1397039888, 
  42                 'upload_date': '20140409', 
  43                 'uploader': 'hitech@corp.mail.ru', 
  44                 'uploader_id': 'hitech@corp.mail.ru', 
  47             'skip': 'Not accessible from Travis CI server', 
  50             # only available via metaUrl API 
  51             'url': 'http://my.mail.ru/mail/720pizle/video/_myvideo/502.html', 
  52             'md5': '3b26d2491c6949d031a32b96bd97c096', 
  57                 'timestamp': 1449094163, 
  58                 'upload_date': '20151202', 
  59                 'uploader': '720pizle@mail.ru', 
  60                 'uploader_id': '720pizle@mail.ru', 
  63             'skip': 'Not accessible from Travis CI server', 
  67     def _real_extract(self
, url
): 
  68         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  69         video_id 
= mobj
.group('idv1') 
  72             video_id 
= mobj
.group('idv2prefix') + mobj
.group('idv2suffix') 
  74         webpage 
= self
._download
_webpage
(url
, video_id
) 
  78         page_config 
= self
._parse
_json
(self
._search
_regex
( 
  79             r
'(?s)<script[^>]+class="sp-video__page-config"[^>]*>(.+?)</script>', 
  80             webpage
, 'page config', default
='{}'), video_id
, fatal
=False) 
  82             meta_url 
= page_config
.get('metaUrl') or page_config
.get('video', {}).get('metaUrl') 
  84                 video_data 
= self
._download
_json
( 
  85                     meta_url
, video_id
, 'Downloading video meta JSON', fatal
=False) 
  87         # Fallback old approach 
  89             video_data 
= self
._download
_json
( 
  90                 'http://api.video.mail.ru/videos/%s.json?new=1' % video_id
, 
  91                 video_id
, 'Downloading video JSON') 
  94         for f 
in video_data
['videos']: 
  95             video_url 
= f
.get('url') 
  98             format_id 
= f
.get('key') 
  99             height 
= int_or_none(self
._search
_regex
( 
 100                 r
'^(\d+)[pP]$', format_id
, 'height', default
=None)) if format_id 
else None 
 103                 'format_id': format_id
, 
 106         self
._sort
_formats
(formats
) 
 108         meta_data 
= video_data
['meta'] 
 109         title 
= remove_end(meta_data
['title'], '.mp4') 
 111         author 
= video_data
.get('author') 
 112         uploader 
= author
.get('name') 
 113         uploader_id 
= author
.get('id') or author
.get('email') 
 114         view_count 
= int_or_none(video_data
.get('viewsCount') or video_data
.get('views_count')) 
 116         acc_id 
= meta_data
.get('accId') 
 117         item_id 
= meta_data
.get('itemId') 
 118         content_id 
= '%s_%s' % (acc_id
, item_id
) if acc_id 
and item_id 
else video_id
 
 120         thumbnail 
= meta_data
.get('poster') 
 121         duration 
= int_or_none(meta_data
.get('duration')) 
 122         timestamp 
= int_or_none(meta_data
.get('timestamp')) 
 127             'thumbnail': thumbnail
, 
 128             'timestamp': timestamp
, 
 129             'uploader': uploader
, 
 130             'uploader_id': uploader_id
, 
 131             'duration': duration
, 
 132             'view_count': view_count
,