Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hitrecord.py

   1 from __future__ import unicode_literals
   2
   3 from .common import InfoExtractor
   4 from ..compat import compat_str
   5 from ..utils import (
   6     clean_html,
   7     float_or_none,
   8     int_or_none,
   9     try_get,
  10 )
  11
  12
  13 class HitRecordIE(InfoExtractor):
  14     _VALID_URL = r'https?://(?:www\.)?hitrecord\.org/records/(?P<id>\d+)'
  15     _TEST = {
  16         'url': 'https://hitrecord.org/records/2954362',
  17         'md5': 'fe1cdc2023bce0bbb95c39c57426aa71',
  18         'info_dict': {
  19             'id': '2954362',
  20             'ext': 'mp4',
  21             'title': 'A Very Different World (HITRECORD x ACLU)',
  22             'description': 'md5:e62defaffab5075a5277736bead95a3d',
  23             'duration': 139.327,
  24             'timestamp': 1471557582,
  25             'upload_date': '20160818',
  26             'uploader': 'Zuzi.C12',
  27             'uploader_id': '362811',
  28             'view_count': int,
  29             'like_count': int,
  30             'comment_count': int,
  31             'tags': list,
  32         }
  33     }
  34
  35     def _real_extract(self, url):
  36         video_id = self._match_id(url)
  37
  38         video = self._download_json(
  39             'https://hitrecord.org/api/web/records/%s' % video_id, video_id)
  40
  41         title = video['title']
  42         video_url = video['source_url']['mp4_url']
  43
  44         tags = None
  45         tags_list = try_get(video, lambda x: x['tags'], list)
  46         if tags_list:
  47             tags = [
  48                 t['text']
  49                 for t in tags_list
  50                 if isinstance(t, dict) and t.get('text')
  51                 and isinstance(t['text'], compat_str)]
  52
  53         return {
  54             'id': video_id,
  55             'url': video_url,
  56             'title': title,
  57             'description': clean_html(video.get('body')),
  58             'duration': float_or_none(video.get('duration'), 1000),
  59             'timestamp': int_or_none(video.get('created_at_i')),
  60             'uploader': try_get(
  61                 video, lambda x: x['user']['username'], compat_str),
  62             'uploader_id': try_get(
  63                 video, lambda x: compat_str(x['user']['id'])),
  64             'view_count': int_or_none(video.get('total_views_count')),
  65             'like_count': int_or_none(video.get('hearts_count')),
  66             'comment_count': int_or_none(video.get('comments_count')),
  67             'tags': tags,
  68         }