]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/mediasite.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
  25  _ID_RE 
=  r
'(?:[0-9a-f]{32,34}|[0-9a-f] {8} -[0-9a-f] {4} -[0-9a-f] {4} -[0-9a-f] {4} -[0-9a-f]{12,14})'  
  28  class  MediasiteIE ( InfoExtractor
):  
  29      _VALID_URL 
=  r
'(?xi)https?://[^/]+/Mediasite/(?:Play|Showcase/(?:default|livebroadcast)/Presentation)/(?P<id> %s )(?P<query>\?[^#]+|)'  %  _ID_RE
 
  32              'url' :  'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271681e4f199af3c60d1f82869b1d' ,  
  34                  'id' :  '2db6c271681e4f199af3c60d1f82869b1d' ,  
  36                  'title' :  'Lecture: Tuesday, September 20, 2016 - Sir Andrew Wiles' ,  
  37                  'description' :  'Sir Andrew Wiles: “Equations in arithmetic” \\ n \\ nI will describe some of the interactions between modern number theory and the problem of solving equations in rational numbers or integers \\ u0027.' ,  
  38                  'timestamp' :  1474268400.0 ,  
  39                  'upload_date' :  '20160919' ,  
  43              'url' :  'http://mediasite.uib.no/Mediasite/Play/90bb363295d945d6b548c867d01181361d?catalog=a452b7df-9ae1-46b7-a3ba-aceeb285f3eb' ,  
  45                  'id' :  '90bb363295d945d6b548c867d01181361d' ,  
  47                  'upload_date' :  '20150429' ,  
  48                  'title' :  '5) IT-forum 2015-Dag 1  - Dungbeetle -  How and why Rain created a tiny bug tracker for Unity' ,  
  49                  'timestamp' :  1430311380.0 ,  
  53              'url' :  'https://collegerama.tudelft.nl/Mediasite/Play/585a43626e544bdd97aeb71a0ec907a01d' ,  
  54              'md5' :  '481fda1c11f67588c0d9d8fbdced4e39' ,  
  56                  'id' :  '585a43626e544bdd97aeb71a0ec907a01d' ,  
  58                  'title' :  'Een nieuwe wereld: waarden, bewustzijn en techniek van de mensheid 2.0.' ,  
  60                  'thumbnail' :  r
're:^https?://.*\.jpg(?:\?.*)?$' ,  
  62                  'timestamp' :  1413309600 ,  
  63                  'upload_date' :  '20141014' ,  
  67              'url' :  'https://collegerama.tudelft.nl/Mediasite/Play/86a9ea9f53e149079fbdb4202b521ed21d?catalog=fd32fd35-6c99-466c-89d4-cd3c431bc8a4' ,  
  68              'md5' :  'ef1fdded95bdf19b12c5999949419c92' ,  
  70                  'id' :  '86a9ea9f53e149079fbdb4202b521ed21d' ,  
  72                  'title' :  '64ste Vakantiecursus: Afvalwater' ,  
  73                  'description' :  'md5:7fd774865cc69d972f542b157c328305' ,  
  74                  'thumbnail' :  r
're:^https?://.*\.jpg(?:\?.*?)?$' ,  
  76                  'timestamp' :  1326446400 ,  
  77                  'upload_date' :  '20120113' ,  
  81              'url' :  'http://digitalops.sandia.gov/Mediasite/Play/24aace4429fc450fb5b38cdbf424a66e1d' ,  
  82              'md5' :  '9422edc9b9a60151727e4b6d8bef393d' ,  
  84                  'id' :  '24aace4429fc450fb5b38cdbf424a66e1d' ,  
  86                  'title' :  'Xyce Software Training - Section 1' ,  
  87                  'description' :  r
're:(?s)SAND Number: SAND 2013-7800.{200,}' ,  
  88                  'upload_date' :  '20120409' ,  
  89                  'timestamp' :  1333983600 ,  
  94              'url' :  'https://collegerama.tudelft.nl/Mediasite/Showcase/livebroadcast/Presentation/ada7020854f743c49fbb45c9ec7dbb351d' ,  
  95              'only_matching' :  True ,  
  98              'url' :  'https://mediasite.ntnu.no/Mediasite/Showcase/default/Presentation/7d8b913259334b688986e970fae6fcb31d' ,  
  99              'only_matching' :  True ,  
 103              'url' :  'https://hitsmediaweb.h-its.org/mediasite/Play/2db6c271-681e-4f19-9af3-c60d1f82869b1d' ,  
 104              'only_matching' :  True ,  
 108      # look in Mediasite.Core.js (Mediasite.ContentStreamType[*])  
 110          0 :  'video1' ,   # the main video  
 113          4 :  'video2' ,   # screencast?  
 118      def  _extract_urls ( webpage
):  
 120              unescapeHTML ( mobj
. group ( 'url' ))  
 121              for  mobj 
in  re
. finditer (  
 122                  r
'(?xi)<iframe\b[^>]+\bsrc=(["\' ])( ?P
< url
>( ?
:( ?
: https?
:) ?
//[ ^
/]+) ?
/ Mediasite
/ Play
/ %s( ?
: \? .* ?
) ?
) \
1 ' % _ID_RE,  
 125      def _real_extract(self, url):  
 126          url, data = unsmuggle_url(url, {})  
 127          mobj = re.match(self._VALID_URL, url)  
 128          resource_id = mobj.group(' id ')  
 129          query = mobj.group(' query
')  
 131          webpage, urlh = self._download_webpage_handle(url, resource_id)  # XXX: add UrlReferrer?  
 132          redirect_url = compat_str(urlh.geturl())  
 134          # XXX: might have also extracted UrlReferrer and QueryString from the html  
 135          service_path = compat_urlparse.urljoin(redirect_url, self._html_search_regex(  
 136              r' < div
[ ^
>]+ \b id
=[ " \' ]ServicePath[^>]+>(.+?)</div>', webpage, resource_id,  
 137              default='/Mediasite/PlayerService/PlayerService.svc/json'))  
 139          player_options = self._download_json(  
 140              ' %s /GetPlayerOptions' % service_path, resource_id,  
 142                  'Content-type': 'application/json; charset=utf-8',  
 143                  'X-Requested-With': 'XMLHttpRequest',  
 146                  'getPlayerOptionsRequest': {  
 147                      'ResourceId': resource_id,  
 148                      'QueryString': query,  
 149                      'UrlReferrer': data.get('UrlReferrer', ''),  
 150                      'UseScreenReader': False,  
 152              }).encode('utf-8'))['d']  
 154          presentation = player_options['Presentation']  
 155          title = presentation['Title']  
 157          if presentation is None:  
 158              raise ExtractorError(  
 159                  'Mediasite says:  %s ' % player_options['PlayerPresentationStatusMessage'],  
 164          for snum, Stream in enumerate(presentation['Streams']):  
 165              stream_type = Stream.get('StreamType')  
 166              if stream_type is None:  
 169              video_urls = Stream.get('VideoUrls')  
 170              if not isinstance(video_urls, list):  
 173              stream_id = self._STREAM_TYPES.get(  
 174                  stream_type, 'type %u ' % stream_type)  
 177              for unum, VideoUrl in enumerate(video_urls):  
 178                  video_url = url_or_none(VideoUrl.get('Location'))  
 181                  # XXX: if Stream.get('CanChangeScheme', False), switch scheme to HTTP/HTTPS  
 183                  media_type = VideoUrl.get('MediaType')  
 184                  if media_type == 'SS':  
 185                      stream_formats.extend(self._extract_ism_formats(  
 186                          video_url, resource_id,  
 187                          ism_id=' %s-%u . %u ' % (stream_id, snum, unum),  
 189                  elif media_type == 'Dash':  
 190                      stream_formats.extend(self._extract_mpd_formats(  
 191                          video_url, resource_id,  
 192                          mpd_id=' %s-%u . %u ' % (stream_id, snum, unum),  
 195                      stream_formats.append({  
 196                          'format_id': ' %s-%u . %u ' % (stream_id, snum, unum),  
 198                          'ext': mimetype2ext(VideoUrl.get('MimeType')),  
 201              # TODO: if Stream['HasSlideContent']:  
 202              # synthesise an MJPEG video stream ' %s-%u .slides' % (stream_type, snum)  
 203              # from Stream['Slides']  
 204              # this will require writing a custom downloader...  
 206              # disprefer 'secondary' streams  
 208                  for fmt in stream_formats:  
 209                      fmt['preference'] = -1  
 211              thumbnail_url = Stream.get('ThumbnailUrl')  
 214                      'id': ' %s-%u ' % (stream_id, snum),  
 215                      'url': urljoin(redirect_url, thumbnail_url),  
 216                      'preference': -1 if stream_type != 0 else 0,  
 218              formats.extend(stream_formats)  
 220          self._sort_formats(formats)  
 222          # XXX: Presentation['Presenters']  
 223          # XXX: Presentation['Transcript']  
 228              'description': presentation.get('Description'),  
 229              'duration': float_or_none(presentation.get('Duration'), 1000),  
 230              'timestamp': float_or_none(presentation.get('UnixTime'), 1000),  
 232              'thumbnails': thumbnails,  
 236  class MediasiteCatalogIE(InfoExtractor):  
 237      _VALID_URL = r'''(?xi)  
 238                          (?P<url>https?://[^/]+/Mediasite)  
 242                              /(?P<current_folder_id> {0} )  
 243                              /(?P<root_dynamic_folder_id> {0} )  
 247          'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48530d454381549f955d08c75e21',  
 249              'id': '631f9e48530d454381549f955d08c75e21',  
 250              'title': 'WCET Summit: Adaptive Learning in Higher Ed: Improving Outcomes Dynamically',  
 253          'expected_warnings': ['is not a supported codec'],  
 255          # with CurrentFolderId and RootDynamicFolderId  
 256          'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',  
 258              'id': '9518c4a6c5cf4993b21cbd53e828a92521',  
 259              'title': 'IUSM Family and Friends Sessions',  
 263          'url': 'http://uipsyc.mediasite.com/mediasite/Catalog/Full/d5d79287c75243c58c50fef50174ec1b21',  
 264          'only_matching': True,  
 266          # no AntiForgeryToken  
 267          'url': 'https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21',  
 268          'only_matching': True,  
 270          'url': 'https://medaudio.medicine.iu.edu/Mediasite/Catalog/Full/9518c4a6c5cf4993b21cbd53e828a92521/97a9db45f7ab47428c77cd2ed74bb98f14/9518c4a6c5cf4993b21cbd53e828a92521',  
 271          'only_matching': True,  
 274          'url': 'http://events7.mediasite.com/Mediasite/Catalog/Full/631f9e48-530d-4543-8154-9f955d08c75e',  
 275          'only_matching': True,  
 278      def _real_extract(self, url):  
 279          mobj = re.match(self._VALID_URL, url)  
 280          mediasite_url = mobj.group('url')  
 281          catalog_id = mobj.group('catalog_id')  
 282          current_folder_id = mobj.group('current_folder_id') or catalog_id  
 283          root_dynamic_folder_id = mobj.group('root_dynamic_folder_id')  
 285          webpage = self._download_webpage(url, catalog_id)  
 287          # AntiForgeryToken is optional (e.g. [1])  
 288          # 1. https://live.libraries.psu.edu/Mediasite/Catalog/Full/8376d4b24dd1457ea3bfe4cf9163feda21  
 289          anti_forgery_token = self._search_regex(  
 290              r'AntiForgeryToken\s*:\s*([" \' ])( ?P
< value
>( ?
:( ?
! \
1 ).)+) \
1 ',  
 291              webpage, ' anti forgery token
', default=None, group=' value
')  
 292          if anti_forgery_token:  
 293              anti_forgery_header = self._search_regex(  
 294                  r' AntiForgeryHeaderName\s
*: \s
*([ " \' ])(?P<value>(?:(?!\1).)+)\1',  
 295                  webpage, 'anti forgery header name',  
 296                  default='X-SOFO-AntiForgeryHeader', group='value')  
 302              'CatalogId': catalog_id,  
 303              'CurrentFolderId': current_folder_id,  
 304              'RootDynamicFolderId': root_dynamic_folder_id,  
 305              'ItemsPerPage': 1000,  
 307              'PermissionMask': 'Execute',  
 308              'CatalogSearchType': 'SearchInFolder',  
 310              'SortDirection': 'Descending',  
 313              'StatusFilterList': None,  
 319              'Content-Type': 'application/json; charset=UTF-8',  
 321              'X-Requested-With': 'XMLHttpRequest',  
 323          if anti_forgery_token:  
 324              headers[anti_forgery_header] = anti_forgery_token  
 326          catalog = self._download_json(  
 327              ' %s /Catalog/Data/GetPresentationsForFolder' % mediasite_url,  
 328              catalog_id, data=json.dumps(data).encode(), headers=headers)  
 331          for video in catalog['PresentationDetailsList']:  
 332              if not isinstance(video, dict):  
 334              video_id = str_or_none(video.get('Id'))  
 337              entries.append(self.url_result(  
 338                  ' %s /Play/ %s ' % (mediasite_url, video_id),  
 339                  ie=MediasiteIE.ie_key(), video_id=video_id))  
 342              catalog, lambda x: x['CurrentFolder']['Name'], compat_str)  
 344          return self.playlist_result(entries, catalog_id, title,)  
 347  class MediasiteNamedCatalogIE(InfoExtractor):  
 348      _VALID_URL = r'(?xi)(?P<url>https?://[^/]+/Mediasite)/Catalog/catalogs/(?P<catalog_name>[^/?#&]+)'  
 350          'url': 'https://msite.misis.ru/Mediasite/Catalog/catalogs/2016-industrial-management-skriabin-o-o',  
 351          'only_matching': True,  
 354      def _real_extract(self, url):  
 355          mobj = re.match(self._VALID_URL, url)  
 356          mediasite_url = mobj.group('url')  
 357          catalog_name = mobj.group('catalog_name')  
 359          webpage = self._download_webpage(url, catalog_name)  
 361          catalog_id = self._search_regex(  
 362              r'CatalogId\s*:\s*[" \' ]( %s) ' % _ID_RE, webpage, ' catalog 
id ')  
 364          return self.url_result(  
 365              ' %s/ Catalog
/ Full
/ %s ' % (mediasite_url, catalog_id),  
 366              ie=MediasiteCatalogIE.ie_key(), video_id=catalog_id)