]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rice.py
2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..compat
import compat_parse_qs
17 class RICEIE(InfoExtractor
):
18 _VALID_URL
= r
'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)'
20 'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw',
21 'md5': '9b83b4a2eead4912dc3b7fac7c449b6a',
23 'id': 'YEWIvbhb40aqdjMD1ALSqw',
25 'title': 'Active Learning in Archeology',
26 'upload_date': '20140616',
27 'timestamp': 1402926346,
30 _NS
= 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config'
32 def _real_extract(self
, url
):
33 qs
= compat_parse_qs(re
.match(self
._VALID
_URL
, url
).group('query'))
34 if not qs
.get('PortalID') or not qs
.get('DestinationID') or not qs
.get('ContentID'):
35 raise ExtractorError('Invalid URL', expected
=True)
37 portal_id
= qs
['PortalID'][0]
38 playlist_id
= qs
['DestinationID'][0]
39 content_id
= qs
['ContentID'][0]
41 content_data
= self
._download
_xml
('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id
, query
={
42 'portalId': portal_id
,
43 'playlistId': playlist_id
,
44 'contentId': content_id
46 metadata
= xpath_element(content_data
, './/metaData', fatal
=True)
47 title
= xpath_text(metadata
, 'primaryTitle', fatal
=True)
48 encodings
= xpath_element(content_data
, './/encodings', fatal
=True)
49 player_data
= self
._download
_xml
('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id
, query
={
50 'temporaryLinkId': xpath_text(encodings
, 'temporaryLinkId', fatal
=True),
51 'contentId': content_id
,
55 dimensions
= xpath_text(encodings
, 'dimensions')
57 wh
= dimensions
.split('x')
60 'width': int_or_none(wh
[0]),
61 'height': int_or_none(wh
[1]),
65 rtsp_path
= xpath_text(player_data
, self
._xpath
_ns
('RtspPath', self
._NS
))
71 fmt
.update(common_fmt
)
73 for source
in player_data
.findall(self
._xpath
_ns
('.//Source', self
._NS
)):
74 video_url
= xpath_text(source
, self
._xpath
_ns
('File', self
._NS
))
77 if '.m3u8' in video_url
:
78 formats
.extend(self
._extract
_m
3u8_formats
(video_url
, content_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False))
82 'format_id': video_url
.split(':')[0],
84 fmt
.update(common_fmt
)
85 rtmp
= re
.search(r
'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url
)
88 'url': rtmp
.group('url'),
89 'play_path': rtmp
.group('playpath'),
90 'app': rtmp
.group('app'),
94 self
._sort
_formats
(formats
)
97 for content_asset
in content_data
.findall('.//contentAssets'):
98 asset_type
= xpath_text(content_asset
, 'type')
99 if asset_type
== 'image':
100 image_url
= xpath_text(content_asset
, 'httpPath')
104 'id': xpath_text(content_asset
, 'ID'),
111 'description': xpath_text(metadata
, 'abstract'),
112 'duration': int_or_none(xpath_text(metadata
, 'duration')),
113 'timestamp': parse_iso8601(xpath_text(metadata
, 'dateUpdated')),
114 'thumbnails': thumbnails
,