Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/twentyfourvideo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     parse_iso8601,
   7     int_or_none,
   8 )
   9
  10
  11 class TwentyFourVideoIE(InfoExtractor):
  12     IE_NAME = '24video'
  13     _VALID_URL = r'https?://(?:www\.)?24video\.net/(?:video/(?:view|xml)/|player/new24_play\.swf\?id=)(?P<id>\d+)'
  14
  15     _TESTS = [
  16         {
  17             'url': 'http://www.24video.net/video/view/1044982',
  18             'md5': 'd041af8b5b4246ea466226a0d6693345',
  19             'info_dict': {
  20                 'id': '1044982',
  21                 'ext': 'mp4',
  22                 'title': 'Эротика каменного века',
  23                 'description': 'Как смотрели порно в каменном веке.',
  24                 'thumbnail': 're:^https?://.*\.jpg$',
  25                 'uploader': 'SUPERTELO',
  26                 'duration': 31,
  27                 'timestamp': 1275937857,
  28                 'upload_date': '20100607',
  29                 'age_limit': 18,
  30                 'like_count': int,
  31                 'dislike_count': int,
  32             },
  33         },
  34         {
  35             'url': 'http://www.24video.net/player/new24_play.swf?id=1044982',
  36             'only_matching': True,
  37         }
  38     ]
  39
  40     def _real_extract(self, url):
  41         video_id = self._match_id(url)
  42
  43         webpage = self._download_webpage(
  44             'http://www.24video.net/video/view/%s' % video_id, video_id)
  45
  46         title = self._og_search_title(webpage)
  47         description = self._html_search_regex(
  48             r'<span itemprop="description">([^<]+)</span>', webpage, 'description', fatal=False)
  49         thumbnail = self._og_search_thumbnail(webpage)
  50         duration = int_or_none(self._og_search_property(
  51             'duration', webpage, 'duration', fatal=False))
  52         timestamp = parse_iso8601(self._search_regex(
  53             r'<time id="video-timeago" datetime="([^"]+)" itemprop="uploadDate">',
  54             webpage, 'upload date'))
  55
  56         uploader = self._html_search_regex(
  57             r'class="video-uploaded"[^>]*>\s*<a href="/jsecUser/movies/[^"]+"[^>]*>([^<]+)</a>',
  58             webpage, 'uploader', fatal=False)
  59
  60         view_count = int_or_none(self._html_search_regex(
  61             r'<span class="video-views">(\d+) просмотр',
  62             webpage, 'view count', fatal=False))
  63         comment_count = int_or_none(self._html_search_regex(
  64             r'<div class="comments-title" id="comments-count">(\d+) комментари',
  65             webpage, 'comment count', fatal=False))
  66
  67         formats = []
  68
  69         pc_video = self._download_xml(
  70             'http://www.24video.net/video/xml/%s?mode=play' % video_id,
  71             video_id, 'Downloading PC video URL').find('.//video')
  72
  73         formats.append({
  74             'url': pc_video.attrib['url'],
  75             'format_id': 'pc',
  76             'quality': 1,
  77         })
  78
  79         like_count = int_or_none(pc_video.get('ratingPlus'))
  80         dislike_count = int_or_none(pc_video.get('ratingMinus'))
  81         age_limit = 18 if pc_video.get('adult') == 'true' else 0
  82
  83         mobile_video = self._download_xml(
  84             'http://www.24video.net/video/xml/%s' % video_id,
  85             video_id, 'Downloading mobile video URL').find('.//video')
  86
  87         formats.append({
  88             'url': mobile_video.attrib['url'],
  89             'format_id': 'mobile',
  90             'quality': 0,
  91         })
  92
  93         self._sort_formats(formats)
  94
  95         return {
  96             'id': video_id,
  97             'title': title,
  98             'description': description,
  99             'thumbnail': thumbnail,
 100             'uploader': uploader,
 101             'duration': duration,
 102             'timestamp': timestamp,
 103             'view_count': view_count,
 104             'comment_count': comment_count,
 105             'like_count': like_count,
 106             'dislike_count': dislike_count,
 107             'age_limit': age_limit,
 108             'formats': formats,
 109         }