]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/rice.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_parse_qs
 
  17 class RICEIE(InfoExtractor
): 
  18     _VALID_URL 
= r
'https?://mediahub\.rice\.edu/app/[Pp]ortal/video\.aspx\?(?P<query>.+)' 
  20         'url': 'https://mediahub.rice.edu/app/Portal/video.aspx?PortalID=25ffd62c-3d01-4b29-8c70-7c94270efb3e&DestinationID=66bc9434-03bd-4725-b47e-c659d8d809db&ContentID=YEWIvbhb40aqdjMD1ALSqw', 
  21         'md5': '9b83b4a2eead4912dc3b7fac7c449b6a', 
  23             'id': 'YEWIvbhb40aqdjMD1ALSqw', 
  25             'title': 'Active Learning in Archeology', 
  26             'upload_date': '20140616', 
  27             'timestamp': 1402926346, 
  30     _NS 
= 'http://schemas.datacontract.org/2004/07/ensembleVideo.Data.Service.Contracts.Models.Player.Config' 
  32     def _real_extract(self
, url
): 
  33         qs 
= compat_parse_qs(re
.match(self
._VALID
_URL
, url
).group('query')) 
  34         if not qs
.get('PortalID') or not qs
.get('DestinationID') or not qs
.get('ContentID'): 
  35             raise ExtractorError('Invalid URL', expected
=True) 
  37         portal_id 
= qs
['PortalID'][0] 
  38         playlist_id 
= qs
['DestinationID'][0] 
  39         content_id 
= qs
['ContentID'][0] 
  41         content_data 
= self
._download
_xml
('https://mediahub.rice.edu/api/portal/GetContentTitle', content_id
, query
={ 
  42             'portalId': portal_id
, 
  43             'playlistId': playlist_id
, 
  44             'contentId': content_id
 
  46         metadata 
= xpath_element(content_data
, './/metaData', fatal
=True) 
  47         title 
= xpath_text(metadata
, 'primaryTitle', fatal
=True) 
  48         encodings 
= xpath_element(content_data
, './/encodings', fatal
=True) 
  49         player_data 
= self
._download
_xml
('https://mediahub.rice.edu/api/player/GetPlayerConfig', content_id
, query
={ 
  50             'temporaryLinkId': xpath_text(encodings
, 'temporaryLinkId', fatal
=True), 
  51             'contentId': content_id
, 
  55         dimensions 
= xpath_text(encodings
, 'dimensions') 
  57             wh 
= dimensions
.split('x') 
  60                     'width': int_or_none(wh
[0]), 
  61                     'height': int_or_none(wh
[1]), 
  65         rtsp_path 
= xpath_text(player_data
, self
._xpath
_ns
('RtspPath', self
._NS
)) 
  71             fmt
.update(common_fmt
) 
  73         for source 
in player_data
.findall(self
._xpath
_ns
('.//Source', self
._NS
)): 
  74             video_url 
= xpath_text(source
, self
._xpath
_ns
('File', self
._NS
)) 
  77             if '.m3u8' in video_url
: 
  78                 formats
.extend(self
._extract
_m
3u8_formats
(video_url
, content_id
, 'mp4', 'm3u8_native', m3u8_id
='hls', fatal
=False)) 
  82                     'format_id': video_url
.split(':')[0], 
  84                 fmt
.update(common_fmt
) 
  85                 rtmp 
= re
.search(r
'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>mp4:.+)$', video_url
) 
  88                         'url': rtmp
.group('url'), 
  89                         'play_path': rtmp
.group('playpath'), 
  90                         'app': rtmp
.group('app'), 
  94         self
._sort
_formats
(formats
) 
  97         for content_asset 
in content_data
.findall('.//contentAssets'): 
  98             asset_type 
= xpath_text(content_asset
, 'type') 
  99             if asset_type 
== 'image': 
 100                 image_url 
= xpath_text(content_asset
, 'httpPath') 
 104                     'id': xpath_text(content_asset
, 'ID'), 
 111             'description': xpath_text(metadata
, 'abstract'), 
 112             'duration': int_or_none(xpath_text(metadata
, 'duration')), 
 113             'timestamp': parse_iso8601(xpath_text(metadata
, 'dateUpdated')), 
 114             'thumbnails': thumbnails
,