]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/hbo.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
15 class HBOBaseIE(InfoExtractor
):
43 def _extract_from_id(self
, video_id
):
44 video_data
= self
._download
_xml
(
45 'http://render.lv3.hbo.com/data/content/global/videos/data/%s.xml' % video_id
, video_id
)
46 title
= xpath_text(video_data
, 'title', 'title', True)
49 for source
in xpath_element(video_data
, 'videos', 'sources', True):
50 if source
.tag
== 'size':
51 path
= xpath_text(source
, './/path')
54 width
= source
.attrib
.get('width')
55 format_info
= self
._FORMATS
_INFO
.get(width
, {})
56 height
= format_info
.get('height')
59 'format_id': 'http%s' % ('-%dp' % height
if height
else ''),
60 'width': format_info
.get('width'),
63 rtmp
= re
.search(r
'^(?P<url>rtmpe?://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', path
)
66 'url': rtmp
.group('url'),
67 'play_path': rtmp
.group('playpath'),
68 'app': rtmp
.group('app'),
70 'format_id': fmt
['format_id'].replace('http', 'rtmp'),
74 video_url
= source
.text
77 if source
.tag
== 'tarball':
78 formats
.extend(self
._extract
_m
3u8_formats
(
79 video_url
.replace('.tar', '/base_index_w8.m3u8'),
80 video_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False))
82 format_info
= self
._FORMATS
_INFO
.get(source
.tag
, {})
84 'format_id': 'http-%s' % source
.tag
,
86 'width': format_info
.get('width'),
87 'height': format_info
.get('height'),
89 self
._sort
_formats
(formats
, ('width', 'height', 'tbr', 'format_id'))
92 card_sizes
= xpath_element(video_data
, 'titleCardSizes')
93 if card_sizes
is not None:
94 for size
in card_sizes
:
95 path
= xpath_text(size
, 'path')
98 width
= int_or_none(size
.get('width'))
108 'duration': parse_duration(xpath_text(video_data
, 'duration/tv14')),
110 'thumbnails': thumbnails
,
114 class HBOIE(HBOBaseIE
):
115 _VALID_URL
= r
'https?://(?:www\.)?hbo\.com/video/video\.html\?.*vid=(?P<id>[0-9]+)'
117 'url': 'http://www.hbo.com/video/video.html?autoplay=true&g=u&vid=1437839',
118 'md5': '1c33253f0c7782142c993c0ba62a8753',
122 'title': 'Ep. 64 Clip: Encryption',
123 'thumbnail': 're:https?://.*\.jpg$',
128 def _real_extract(self
, url
):
129 video_id
= self
._match
_id
(url
)
130 return self
._extract
_from
_id
(video_id
)
133 class HBOEpisodeIE(HBOBaseIE
):
134 _VALID_URL
= r
'https?://(?:www\.)?hbo\.com/(?!video)([^/]+/)+video/(?P<id>[0-9a-z-]+)\.html'
137 'url': 'http://www.hbo.com/girls/episodes/5/52-i-love-you-baby/video/ep-52-inside-the-episode.html?autoplay=true',
138 'md5': '689132b253cc0ab7434237fc3a293210',
141 'display_id': 'ep-52-inside-the-episode',
143 'title': 'Ep. 52: Inside the Episode',
144 'thumbnail': 're:https?://.*\.jpg$',
148 'url': 'http://www.hbo.com/game-of-thrones/about/video/season-5-invitation-to-the-set.html?autoplay=true',
149 'only_matching': True,
152 def _real_extract(self
, url
):
153 display_id
= self
._match
_id
(url
)
155 webpage
= self
._download
_webpage
(url
, display_id
)
157 video_id
= self
._search
_regex
(
158 r
'(?P<q1>[\'"])videoId(?P=q1)\s*:\s*(?P<q2>[\'"])(?P
<video_id
>\d
+)(?P
=q2
)',
159 webpage, 'video ID
', group='video_id
')
161 info_dict = self._extract_from_id(video_id)
162 info_dict['display_id
'] = display_id