Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/huajiao.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     parse_duration,
   7     parse_iso8601,
   8 )
   9
  10
  11 class HuajiaoIE(InfoExtractor):
  12     IE_DESC = '花椒直播'
  13     _VALID_URL = r'https?://(?:www\.)?huajiao\.com/l/(?P<id>[0-9]+)'
  14     _TEST = {
  15         'url': 'http://www.huajiao.com/l/38941232',
  16         'md5': 'd08bf9ac98787d24d1e4c0283f2d372d',
  17         'info_dict': {
  18             'id': '38941232',
  19             'ext': 'mp4',
  20             'title': '#新人求关注#',
  21             'description': 're:.*',
  22             'duration': 2424.0,
  23             'thumbnail': 're:^https?://.*\.jpg$',
  24             'timestamp': 1475866459,
  25             'upload_date': '20161007',
  26             'uploader': 'Penny_余姿昀',
  27             'uploader_id': '75206005',
  28         }
  29     }
  30
  31     def _real_extract(self, url):
  32         video_id = self._match_id(url)
  33         webpage = self._download_webpage(url, video_id)
  34
  35         feed_json = self._search_regex(
  36             r'var\s+feed\s*=\s*({.+})', webpage, 'feed json')
  37         feed = self._parse_json(feed_json, video_id)
  38
  39         description = self._html_search_meta(
  40             'description', webpage, 'description', fatal=False)
  41
  42         def get(section, field):
  43             return feed.get(section, {}).get(field)
  44
  45         return {
  46             'id': video_id,
  47             'title': feed['feed']['formated_title'],
  48             'description': description,
  49             'duration': parse_duration(get('feed', 'duration')),
  50             'thumbnail': get('feed', 'image'),
  51             'timestamp': parse_iso8601(feed.get('creatime'), ' '),
  52             'uploader': get('author', 'nickname'),
  53             'uploader_id': get('author', 'uid'),
  54             'formats': self._extract_m3u8_formats(
  55                 feed['feed']['m3u8'], video_id, 'mp4', 'm3u8_native'),
  56         }