Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hbo.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import re
   5
   6 from .common import InfoExtractor
   7 from ..utils import (
   8     xpath_text,
   9     xpath_element,
  10     int_or_none,
  11     parse_duration,
  12 )
  13
  14
  15 class HBOIE(InfoExtractor):
  16     _VALID_URL = r'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
  17     _TEST = {
  18         'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
  19         'md5': '1c33253f0c7782142c993c0ba62a8753',
  20         'info_dict': {
  21             'id': '1437839',
  22             'ext': 'mp4',
  23             'title': 'Ep. 64 Clip: Encryption',
  24         }
  25     }
  26     _FORMATS_INFO = {
  27         '1920': {
  28             'width': 1280,
  29             'height': 720,
  30         },
  31         '640': {
  32             'width': 768,
  33             'height': 432,
  34         },
  35         'highwifi': {
  36             'width': 640,
  37             'height': 360,
  38         },
  39         'high3g': {
  40             'width': 640,
  41             'height': 360,
  42         },
  43         'medwifi': {
  44             'width': 400,
  45             'height': 224,
  46         },
  47         'med3g': {
  48             'width': 400,
  49             'height': 224,
  50         },
  51     }
  52
  53     def _real_extract(self, url):
  54         video_id = self._match_id(url)
  55         video_data = self._download_xml(
  56             'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id, video_id)
  57         title = xpath_text(video_data, 'title', 'title', True)
  58
  59         formats = []
  60         for source in xpath_element(video_data, 'videos', 'sources', True):
  61             if source.tag == 'size':
  62                 path = xpath_text(source, './/path')
  63                 if not path:
  64                     continue
  65                 width = source.attrib.get('width')
  66                 format_info = self._FORMATS_INFO.get(width, {})
  67                 height = format_info.get('height')
  68                 fmt = {
  69                     'url': path,
  70                     'format_id': 'http%s' % ('-%dp' % height if height else ''),
  71                     'width': format_info.get('width'),
  72                     'height': height,
  73                 }
  74                 rtmp = re.search(r'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path)
  75                 if rtmp:
  76                     fmt.update({
  77                         'url': rtmp.group('url'),
  78                         'play_path': rtmp.group('playpath'),
  79                         'app': rtmp.group('app'),
  80                         'ext': 'flv',
  81                         'format_id': fmt['format_id'].replace('http', 'rtmp'),
  82                     })
  83                 formats.append(fmt)
  84             else:
  85                 video_url = source.text
  86                 if not video_url:
  87                     continue
  88                 if source.tag == 'tarball':
  89                     formats.extend(self._extract_m3u8_formats(
  90                         video_url.replace('.tar', '/base_index_w8.m3u8'),
  91                         video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False))
  92                 else:
  93                     format_info = self._FORMATS_INFO.get(source.tag, {})
  94                     formats.append({
  95                         'format_id': 'http-%s' % source.tag,
  96                         'url': video_url,
  97                         'width': format_info.get('width'),
  98                         'height': format_info.get('height'),
  99                     })
 100         self._sort_formats(formats, ('width', 'height', 'tbr', 'format_id'))
 101
 102         thumbnails = []
 103         card_sizes = xpath_element(video_data, 'titleCardSizes')
 104         if card_sizes is not None:
 105             for size in card_sizes:
 106                 path = xpath_text(size, 'path')
 107                 if not path:
 108                     continue
 109                 width = int_or_none(size.get('width'))
 110                 thumbnails.append({
 111                     'id': width,
 112                     'url': path,
 113                     'width': width,
 114                 })
 115
 116         return {
 117             'id': video_id,
 118             'title': title,
 119             'duration': parse_duration(xpath_element(video_data, 'duration/tv14')),
 120             'formats': formats,
 121             'thumbnails': thumbnails,
 122         }