]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/videomore.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_str
 
  20 class VideomoreIE(InfoExtractor
): 
  23                     videomore:(?P<sid>\d+)$| 
  24                     https?://(?:player\.)?videomore\.ru/ 
  33                         (?:[/?#&]|\.(?:xml|json)|$) 
  36         'url': 'http://videomore.ru/kino_v_detalayah/5_sezon/367617', 
  37         'md5': '44455a346edc0d509ac5b5a5b531dc35', 
  41             'title': 'Кино в деталях 5 сезон В гостях Алексей Чумаков и Юлия Ковальчук', 
  42             'series': 'Кино в деталях', 
  43             'episode': 'В гостях Алексей Чумаков и Юлия Ковальчук', 
  44             'thumbnail': r
're:^https?://.*\.jpg', 
  51         'url': 'http://videomore.ru/embed/259974', 
  55             'title': 'Молодежка 2 сезон 40 серия', 
  56             'series': 'Молодежка', 
  57             'episode': '40 серия', 
  58             'thumbnail': r
're:^https?://.*\.jpg', 
  65             'skip_download': True, 
  68         'url': 'http://videomore.ru/molodezhka/sezon_promo/341073', 
  72             'title': 'Промо Команда проиграла из-за Бакина?', 
  73             'episode': 'Команда проиграла из-за Бакина?', 
  74             'thumbnail': r
're:^https?://.*\.jpg', 
  80             'skip_download': True, 
  83         'url': 'http://videomore.ru/elki_3?track_id=364623', 
  84         'only_matching': True, 
  86         'url': 'http://videomore.ru/embed/364623', 
  87         'only_matching': True, 
  89         'url': 'http://videomore.ru/video/tracks/364623.xml', 
  90         'only_matching': True, 
  92         'url': 'http://videomore.ru/video/tracks/364623.json', 
  93         'only_matching': True, 
  95         'url': 'http://videomore.ru/video/tracks/158031/quotes/33248', 
  96         'only_matching': True, 
  98         'url': 'videomore:367617', 
  99         'only_matching': True, 
 101         'url': 'https://player.videomore.ru/?partner_id=97&track_id=736234&autoplay=0&userToken=', 
 102         'only_matching': True, 
 106     def _extract_url(webpage
): 
 108             r
'<object[^>]+data=(["\'])https?
://videomore\
.ru
/player\
.swf
\?.*config
=(?P
<url
>https?
://videomore\
.ru
/(?
:[^
/]+/)+\d
+\
.xml
).*\
1', 
 112                 r'<iframe
[^
>]+src
=([\'"])(?P<url>https?://videomore\.ru/embed/\d+)', 
 116             return mobj.group('url') 
 118     def _real_extract(self, url): 
 119         mobj = re.match(self._VALID_URL, url) 
 120         video_id = mobj.group('sid') or mobj.group('id') 
 122         video = self._download_xml( 
 123             'http://videomore.ru/video/tracks/%s.xml' % video_id, 
 124             video_id, 'Downloading video XML') 
 126         item = xpath_element(video, './/playlist/item', fatal=True) 
 129             item, ('./title', './episode_name'), 'title', fatal=True) 
 131         video_url = xpath_text(item, './video_url', 'video url', fatal=True) 
 132         formats = self._extract_f4m_formats(video_url, video_id, f4m_id='hds') 
 133         self._sort_formats(formats) 
 135         thumbnail = xpath_text(item, './thumbnail_url') 
 136         duration = int_or_none(xpath_text(item, './duration')) 
 137         view_count = int_or_none(xpath_text(item, './views')) 
 138         comment_count = int_or_none(xpath_text(item, './count_comments')) 
 139         age_limit = int_or_none(xpath_text(item, './min_age')) 
 141         series = xpath_text(item, './project_name') 
 142         episode = xpath_text(item, './episode_name') 
 149             'thumbnail': thumbnail, 
 150             'duration': duration, 
 151             'view_count': view_count, 
 152             'comment_count': comment_count, 
 153             'age_limit': age_limit, 
 158 class VideomoreVideoIE(InfoExtractor): 
 159     IE_NAME = 'videomore:video' 
 160     _VALID_URL = r'https?://videomore\.ru/(?:(?:[^/]+/){2})?(?P<id>[^/?#&]+)(?:/*|[?#&].*?)$' 
 162         # single video with og:video:iframe 
 163         'url': 'http://videomore.ru/elki_3', 
 169             'thumbnail': r're:^https?://.*\.jpg', 
 175             'skip_download': True, 
 178         # season single series with og:video:iframe 
 179         'url': 'http://videomore.ru/poslednii_ment/1_sezon/14_seriya', 
 180         'only_matching': True, 
 182         'url': 'http://videomore.ru/sejchas_v_seti/serii_221-240/226_vypusk', 
 183         'only_matching': True, 
 185         # single video without og:video:iframe 
 186         'url': 'http://videomore.ru/marin_i_ego_druzya', 
 190             'title': '1 серия. Здравствуй, Аквавилль!', 
 191             'description': 'md5:c6003179538b5d353e7bcd5b1372b2d7', 
 192             'thumbnail': r're:^https?://.*\.jpg', 
 198             'skip_download': True, 
 201         'url': 'https://videomore.ru/molodezhka/6_sezon/29_seriya?utm_so', 
 202         'only_matching': True, 
 206     def suitable(cls, url): 
 207         return False if VideomoreIE.suitable(url) else super(VideomoreVideoIE, cls).suitable(url) 
 209     def _real_extract(self, url): 
 210         display_id = self._match_id(url) 
 212         webpage = self._download_webpage(url, display_id) 
 214         video_url = self._og_search_property( 
 215             'video:iframe', webpage, 'video url', default=None) 
 218             video_id = self._search_regex( 
 219                 (r'config\s*:\s*["\']https?
://videomore\
.ru
/video
/tracks
/(\d
+)\
.xml
', 
 220                  r'track
-id=["\'](\d+)', 
 221                  r'xcnt_product_id\s*=\s*(\d+)'), webpage, 'video id') 
 222             video_url = 'videomore:%s' % video_id 
 226         return self.url_result( 
 227             video_url, ie=VideomoreIE.ie_key(), video_id=video_id) 
 230 class VideomoreSeasonIE(InfoExtractor): 
 231     IE_NAME = 'videomore:season' 
 232     _VALID_URL = r'https?://videomore\.ru/(?!embed)(?P<id>[^/]+/[^/?#&]+)(?:/*|[?#&].*?)$' 
 234         'url': 'http://videomore.ru/molodezhka/sezon_promo', 
 236             'id': 'molodezhka/sezon_promo', 
 237             'title': 'Молодежка Промо', 
 239         'playlist_mincount': 12, 
 241         'url': 'http://videomore.ru/molodezhka/sezon_promo?utm_so', 
 242         'only_matching': True, 
 246     def suitable(cls, url): 
 247         return (False if (VideomoreIE.suitable(url) or VideomoreVideoIE.suitable(url)) 
 248                 else super(VideomoreSeasonIE, cls).suitable(url)) 
 250     def _real_extract(self, url): 
 251         display_id = self._match_id(url) 
 253         webpage = self._download_webpage(url, display_id) 
 255         title = self._og_search_title(webpage) 
 257         data = self._parse_json( 
 258             self._html_search_regex( 
 259                 r'\bclass=["\']seasons
-tracks
["\'][^>]+\bdata-custom-data=(["\'])(?P
<value
>{.+?
})\
1', 
 260                 webpage, 'data
', default='{}', group='value
'), 
 261             display_id, fatal=False) 
 266             episodes = data.get('episodes
') 
 267             if isinstance(episodes, list): 
 269                     if not isinstance(ep, dict): 
 271                     ep_id = int_or_none(ep.get('id')) 
 272                     ep_url = url_or_none(ep.get('url
')) 
 275                             'url
': 'videomore
:%s' % ep_id, 
 276                             'id': compat_str(ep_id), 
 284                         'ie_key
': VideomoreIE.ie_key(), 
 285                         'title
': str_or_none(ep.get('title
')), 
 286                         'thumbnail
': url_or_none(ep.get('image
')), 
 287                         'duration
': parse_duration(ep.get('duration
')), 
 288                         'episode_number
': int_or_none(ep.get('number
')), 
 289                         'upload_date
': unified_strdate(ep.get('date
')), 
 296                     'videomore
:%s' % video_id, ie=VideomoreIE.ie_key(), 
 298                 for video_id in orderedSet(re.findall( 
 299                     r':(?
:id|key
)=["\'](\d+)["\']', webpage))] 
 303                 self.url_result(item) for item in re.findall( 
 304                     r'<a
[^
>]+href
="((?:https?:)?//videomore\.ru/%s/[^/]+)"[^
>]+class="widget-item-desc"' 
 305                     % display_id, webpage)] 
 307         return self.playlist_result(entries, display_id, title)