Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/cctv.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import float_or_none
   8
   9
  10 class CCTVIE(InfoExtractor):
  11     _VALID_URL = r'''(?x)https?://(?:.+?\.)?
  12         (?:
  13             cctv\.(?:com|cn)|
  14             cntv\.cn
  15         )/
  16         (?:
  17             video/[^/]+/(?P<id>[0-9a-f]{32})|
  18             \d{4}/\d{2}/\d{2}/(?P<display_id>VID[0-9A-Za-z]+)
  19         )'''
  20     _TESTS = [{
  21         'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
  22         'md5': '819c7b49fc3927d529fb4cd555621823',
  23         'info_dict': {
  24             'id': '454368eb19ad44a1925bf1eb96140a61',
  25             'ext': 'mp4',
  26             'title': 'Portrait of Real Current Life 09/03/2016 Modern Inventors Part 1',
  27         }
  28     }, {
  29         'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
  30         'only_matching': True,
  31     }, {
  32         'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
  33         'only_matching': True
  34     }]
  35
  36     def _real_extract(self, url):
  37         video_id, display_id = re.match(self._VALID_URL, url).groups()
  38         if not video_id:
  39             webpage = self._download_webpage(url, display_id)
  40             video_id = self._search_regex(
  41                 r'(?:fo\.addVariable\("videoCenterId",\s*|guid\s*=\s*)"([0-9a-f]{32})',
  42                 webpage, 'video_id')
  43         api_data = self._download_json(
  44             'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do?pid=' + video_id, video_id)
  45         m3u8_url = re.sub(r'maxbr=\d+&?', '', api_data['hls_url'])
  46
  47         return {
  48             'id': video_id,
  49             'title': api_data['title'],
  50             'formats': self._extract_m3u8_formats(
  51                 m3u8_url, video_id, 'mp4', 'm3u8_native', fatal=False),
  52             'duration': float_or_none(api_data.get('video', {}).get('totalLength')),
  53         }