]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hbo.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
  15 class HBOBaseIE(InfoExtractor
): 
  43     def _extract_from_id(self
, video_id
): 
  44         video_data 
= self
._download
_xml
( 
  45             'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id
, video_id
) 
  46         title 
= xpath_text(video_data
, 'title', 'title', True) 
  49         for source 
in xpath_element(video_data
, 'videos', 'sources', True): 
  50             if source
.tag 
== 'size': 
  51                 path 
= xpath_text(source
, './/path') 
  54                 width 
= source
.attrib
.get('width') 
  55                 format_info 
= self
._FORMATS
_INFO
.get(width
, {}) 
  56                 height 
= format_info
.get('height') 
  59                     'format_id': 'http%s' % ('-%dp' % height 
if height 
else ''), 
  60                     'width': format_info
.get('width'), 
  63                 rtmp 
= re
.search(r
'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path
) 
  66                         'url': rtmp
.group('url'), 
  67                         'play_path': rtmp
.group('playpath'), 
  68                         'app': rtmp
.group('app'), 
  70                         'format_id': fmt
['format_id'].replace('http', 'rtmp'), 
  74                 video_url 
= source
.text
 
  77                 if source
.tag 
== 'tarball': 
  78                     formats
.extend(self
._extract
_m
3u8_formats
( 
  79                         video_url
.replace('.tar', '/base_index_w8.m3u8'), 
  80                         video_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
  82                     format_info 
= self
._FORMATS
_INFO
.get(source
.tag
, {}) 
  84                         'format_id': 'http-%s' % source
.tag
, 
  86                         'width': format_info
.get('width'), 
  87                         'height': format_info
.get('height'), 
  89         self
._sort
_formats
(formats
, ('width', 'height', 'tbr', 'format_id')) 
  92         card_sizes 
= xpath_element(video_data
, 'titleCardSizes') 
  93         if card_sizes 
is not None: 
  94             for size 
in card_sizes
: 
  95                 path 
= xpath_text(size
, 'path') 
  98                 width 
= int_or_none(size
.get('width')) 
 108             'duration': parse_duration(xpath_text(video_data
, 'duration/tv14')), 
 110             'thumbnails': thumbnails
, 
 114 class HBOIE(HBOBaseIE
): 
 115     _VALID_URL 
= r
'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)' 
 117         'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839', 
 118         'md5': '1c33253f0c7782142c993c0ba62a8753', 
 122             'title': 'Ep. 64 Clip: Encryption', 
 123             'thumbnail': 're:https?://.*\.jpg$', 
 128     def _real_extract(self
, url
): 
 129         video_id 
= self
._match
_id
(url
) 
 130         return self
._extract
_from
_id
(video_id
) 
 133 class HBOEpisodeIE(HBOBaseIE
): 
 134     _VALID_URL 
= r
'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html' 
 137         'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true', 
 138         'md5': '689132b253cc0ab7434237fc3a293210', 
 141             'display_id': 'ep-52-inside-the-episode', 
 143             'title': 'Ep. 52: Inside the Episode', 
 144             'thumbnail': 're:https?://.*\.jpg$', 
 148         'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true', 
 149         'only_matching': True, 
 152     def _real_extract(self
, url
): 
 153         display_id 
= self
._match
_id
(url
) 
 155         webpage 
= self
._download
_webpage
(url
, display_id
) 
 157         video_id 
= self
._search
_regex
( 
 158             r
'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P
<video_id
>\d
+)(?P
=q2
)', 
 159             webpage, 'video ID
', group='video_id
') 
 161         info_dict = self._extract_from_id(video_id) 
 162         info_dict['display_id
'] = display_id