]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mediasite.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
  22  class  MediasiteIE ( InfoExtractor
):  
  23      _VALID_URL 
=  r
'(?xi)https?://[^/]+/Mediasite/Play/(?P<id>[0-9a-f]{32,34})(?P<query>\?[^#]+|)'  
  26              'url' :  'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d' ,  
  28                  'id' :  '2db6c271681e4f199af3c60d1f82869b1d' ,  
  30                  'title' :  'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles' ,  
  31                  'description' :  'Sir Andrew Wiles: “Equations in arithmetic” \\ n \\ nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers \\ u0027.' ,  
  32                  'timestamp' :  1474268400.0 ,  
  33                  'upload_date' :  '20160919' ,  
  37              'url' :  'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb' ,  
  39                  'id' :  '90bb363295d945d6b548c867d01181361d' ,  
  41                  'upload_date' :  '20150429' ,  
  42                  'title' :  '5) IT-forum 2015-Dag 1  - Dungbeetle -  How and why Rain created a tiny bug tracker for Unity' ,  
  43                  'timestamp' :  1430311380.0 ,  
  47              'url' :  'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d' ,  
  48              'md5' :  '481fda1c11f67588c0d9d8fbdced4e39' ,  
  50                  'id' :  '585a43626e544bdd97aeb71a0ec907a01d' ,  
  52                  'title' :  'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.' ,  
  54                  'thumbnail' :  r
're:^https?://.*\.jpg(?:\?.*)?$' ,  
  56                  'timestamp' :  1413309600 ,  
  57                  'upload_date' :  '20141014' ,  
  61              'url' :  'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4' ,  
  62              'md5' :  'ef1fdded95bdf19b12c5999949419c92' ,  
  64                  'id' :  '86a9ea9f53e149079fbdb4202b521ed21d' ,  
  66                  'title' :  '64ste Vakantiecursus: Afvalwater' ,  
  67                  'description' :  'md5:7fd774865cc69d972f542b157c328305' ,  
  68                  'thumbnail' :  r
're:^https?://.*\.jpg(?:\?.*?)?$' ,  
  70                  'timestamp' :  1326446400 ,  
  71                  'upload_date' :  '20120113' ,  
  75              'url' :  'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d' ,  
  76              'md5' :  '9422edc9b9a60151727e4b6d8bef393d' ,  
  78                  'id' :  '24aace4429fc450fb5b38cdbf424a66e1d' ,  
  80                  'title' :  'Xyce Software Training - Section 1' ,  
  81                  'description' :  r
're:(?s)SAND Number: SAND 2013-7800.{200,}' ,  
  82                  'upload_date' :  '20120409' ,  
  83                  'timestamp' :  1333983600 ,  
  89      # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])  
  91          0 :  'video1' ,   # the main video  
  94          4 :  'video2' ,   # screencast?  
  99      def  _extract_urls ( webpage
):  
 101              unescapeHTML ( mobj
. group ( 'url' ))  
 102              for  mobj 
in  re
. finditer (  
 103                  r
'(?xi)<iframe\b[^>]+\bsrc=(["\' ])( ?P
< url
>( ?
:( ?
: https?
:) ?
//[ ^
/]+) ?
/ Mediasite
/ Play
/[ 0 - 9 a
- f
]{ 32 , 34 }( ?
: \? .* ?
) ?
) \
1 ',  
 106      def _real_extract(self, url):  
 107          url, data = unsmuggle_url(url, {})  
 108          mobj = re.match(self._VALID_URL, url)  
 109          resource_id = mobj.group(' id ')  
 110          query = mobj.group(' query
')  
 112          webpage, urlh = self._download_webpage_handle(url, resource_id)  # XXX: add UrlReferrer?  
 113          redirect_url = compat_str(urlh.geturl())  
 115          # XXX: might have also extracted UrlReferrer and QueryString from the html  
 116          service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(  
 117              r' < div
[ ^
>]+ \b id
=[ " \' ]ServicePath[^>]+>(.+?)</div>', webpage, resource_id,  
 118              default='/Mediasite/PlayerService/PlayerService.svc/json'))  
 120          player_options = self._download_json(  
 121              ' %s /GetPlayerOptions' % service_path, resource_id,  
 123                  'Content-type': 'application/json; charset=utf-8',  
 124                  'X-Requested-With': 'XMLHttpRequest',  
 127                  'getPlayerOptionsRequest': {  
 128                      'ResourceId': resource_id,  
 129                      'QueryString': query,  
 130                      'UrlReferrer': data.get('UrlReferrer', ''),  
 131                      'UseScreenReader': False,  
 133              }).encode('utf-8'))['d']  
 135          presentation = player_options['Presentation']  
 136          title = presentation['Title']  
 138          if presentation is None:  
 139              raise ExtractorError(  
 140                  'Mediasite says:  %s ' % player_options['PlayerPresentationStatusMessage'],  
 145          for snum, Stream in enumerate(presentation['Streams']):  
 146              stream_type = Stream.get('StreamType')  
 147              if stream_type is None:  
 150              video_urls = Stream.get('VideoUrls')  
 151              if not isinstance(video_urls, list):  
 154              stream_id = self._STREAM_TYPES.get(  
 155                  stream_type, 'type %u ' % stream_type)  
 158              for unum, VideoUrl in enumerate(video_urls):  
 159                  video_url = VideoUrl.get('Location')  
 160                  if not video_url or not isinstance(video_url, compat_str):  
 162                  # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS  
 164                  media_type = VideoUrl.get('MediaType')  
 165                  if media_type == 'SS':  
 166                      stream_formats.extend(self._extract_ism_formats(  
 167                          video_url, resource_id,  
 168                          ism_id=' %s-%u . %u ' % (stream_id, snum, unum),  
 170                  elif media_type == 'Dash':  
 171                      stream_formats.extend(self._extract_mpd_formats(  
 172                          video_url, resource_id,  
 173                          mpd_id=' %s-%u . %u ' % (stream_id, snum, unum),  
 176                      stream_formats.append({  
 177                          'format_id': ' %s-%u . %u ' % (stream_id, snum, unum),  
 179                          'ext': mimetype2ext(VideoUrl.get('MimeType')),  
 182              # TODO: if Stream['HasSlideContent']:  
 183              # synthesise an MJPEG video stream ' %s-%u .slides' % (stream_type, snum)  
 184              # from Stream['Slides']  
 185              # this will require writing a custom downloader...  
 187              # disprefer 'secondary' streams  
 189                  for fmt in stream_formats:  
 190                      fmt['preference'] = -1  
 192              thumbnail_url = Stream.get('ThumbnailUrl')  
 195                      'id': ' %s-%u ' % (stream_id, snum),  
 196                      'url': urljoin(redirect_url, thumbnail_url),  
 197                      'preference': -1 if stream_type != 0 else 0,  
 199              formats.extend(stream_formats)  
 201          self._sort_formats(formats)  
 203          # XXX: Presentation['Presenters']  
 204          # XXX: Presentation['Transcript']  
 209              'description': presentation.get('Description'),  
 210              'duration': float_or_none(presentation.get('Duration'), 1000),  
 211              'timestamp': float_or_none(presentation.get('UnixTime'), 1000),  
 213              'thumbnails': thumbnails,