Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/vlive.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 from .common import InfoExtractor
   5 from ..utils import (
   6     dict_get,
   7     float_or_none,
   8     int_or_none,
   9 )
  10 from ..compat import compat_urllib_parse
  11
  12
  13 class VLiveIE(InfoExtractor):
  14     IE_NAME = 'vlive'
  15     _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
  16     _TEST = {
  17         'url': 'http://www.vlive.tv/video/1326',
  18         'md5': 'cc7314812855ce56de70a06a27314983',
  19         'info_dict': {
  20             'id': '1326',
  21             'ext': 'mp4',
  22             'title': "[V] Girl's Day's Broadcast",
  23             'creator': "Girl's Day",
  24             'view_count': int,
  25         },
  26     }
  27
  28     def _real_extract(self, url):
  29         video_id = self._match_id(url)
  30
  31         webpage = self._download_webpage(
  32             'http://www.vlive.tv/video/%s' % video_id, video_id)
  33
  34         long_video_id = self._search_regex(
  35             r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"([^"]+)"',
  36             webpage, 'long video id')
  37
  38         key = self._search_regex(
  39             r'vlive\.tv\.video\.ajax\.request\.handler\.init\(\s*"[0-9]+"\s*,\s*"[^"]*"\s*,\s*"[^"]+"\s*,\s*"([^"]+)"',
  40             webpage, 'key')
  41
  42         title = self._og_search_title(webpage)
  43
  44         playinfo = self._download_json(
  45             'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
  46             % compat_urllib_parse.urlencode({
  47                 'videoId': long_video_id,
  48                 'key': key,
  49                 'ptc': 'http',
  50                 'doct': 'json',  # document type (xml or json)
  51                 'cpt': 'vtt',  # captions type (vtt or ttml)
  52             }), video_id)
  53
  54         formats = [{
  55             'url': vid['source'],
  56             'format_id': vid.get('encodingOption', {}).get('name'),
  57             'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
  58             'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
  59             'width': int_or_none(vid.get('encodingOption', {}).get('width')),
  60             'height': int_or_none(vid.get('encodingOption', {}).get('height')),
  61             'filesize': int_or_none(vid.get('size')),
  62         } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
  63         self._sort_formats(formats)
  64
  65         thumbnail = self._og_search_thumbnail(webpage)
  66         creator = self._html_search_regex(
  67             r'<div[^>]+class="info_area"[^>]*>\s*<strong[^>]+class="name"[^>]*>([^<]+)</strong>',
  68             webpage, 'creator', fatal=False)
  69
  70         view_count = int_or_none(playinfo.get('meta', {}).get('count'))
  71
  72         subtitles = {}
  73         for caption in playinfo.get('captions', {}).get('list', []):
  74             lang = dict_get(caption, ('language', 'locale', 'country', 'label'))
  75             if lang and caption.get('source'):
  76                 subtitles[lang] = [{
  77                     'ext': 'vtt',
  78                     'url': caption['source']}]
  79
  80         return {
  81             'id': video_id,
  82             'title': title,
  83             'creator': creator,
  84             'thumbnail': thumbnail,
  85             'view_count': view_count,
  86             'formats': formats,
  87             'subtitles': subtitles,
  88         }