1 from __future__ 
import unicode_literals
 
   5 from .common 
import InfoExtractor
 
  18 class MicrosoftVirtualAcademyBaseIE(InfoExtractor
): 
  19     def _extract_base_url(self
, course_id
, display_id
): 
  20         return self
._download
_json
( 
  21             'https://api-mlxprod.microsoft.com/services/products/anonymous/%s' % course_id
, 
  22             display_id
, 'Downloading course base URL') 
  24     def _extract_chapter_and_title(self
, title
): 
  27         m 
= re
.search(r
'(?P<chapter>\d+)\s*\|\s*(?P<title>.+)', title
) 
  28         return (int(m
.group('chapter')), m
.group('title')) if m 
else (None, title
) 
  31 class MicrosoftVirtualAcademyIE(MicrosoftVirtualAcademyBaseIE
): 
  33     IE_DESC 
= 'Microsoft Virtual Academy videos' 
  34     _VALID_URL 
= r
'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/[^/?#&]+-)(?P<course_id>\d+)(?::|\?l=)(?P<id>[\da-zA-Z]+_\d+)' % IE_NAME
 
  37         'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788?l=gfVXISmEB_6804984382', 
  38         'md5': '7826c44fc31678b12ad8db11f6b5abb9', 
  40             'id': 'gfVXISmEB_6804984382', 
  42             'title': 'Course Introduction', 
  43             'formats': 'mincount:3', 
  51         'url': 'mva:11788:gfVXISmEB_6804984382', 
  52         'only_matching': True, 
  55     def _real_extract(self
, url
): 
  56         url
, smuggled_data 
= unsmuggle_url(url
, {}) 
  58         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  59         course_id 
= mobj
.group('course_id') 
  60         video_id 
= mobj
.group('id') 
  62         base_url 
= smuggled_data
.get('base_url') or self
._extract
_base
_url
(course_id
, video_id
) 
  64         settings 
= self
._download
_xml
( 
  65             '%s/content/content_%s/videosettings.xml?v=1' % (base_url
, video_id
), 
  66             video_id
, 'Downloading video settings XML') 
  68         _
, title 
= self
._extract
_chapter
_and
_title
(xpath_text( 
  69             settings
, './/Title', 'title', fatal
=True)) 
  73         for sources 
in settings
.findall(compat_xpath('.//MediaSources')): 
  74             if sources
.get('videoType') == 'smoothstreaming': 
  76             for source 
in sources
.findall(compat_xpath('./MediaSource')): 
  77                 video_url 
= source
.text
 
  78                 if not video_url 
or not video_url
.startswith('http'): 
  80                 video_mode 
= source
.get('videoMode') 
  81                 height 
= int_or_none(self
._search
_regex
( 
  82                     r
'^(\d+)[pP]$', video_mode 
or '', 'height', default
=None)) 
  83                 codec 
= source
.get('codec') 
  84                 acodec
, vcodec 
= [None] * 2 
  86                     codecs 
= codec
.split(',') 
  88                         acodec
, vcodec 
= codecs
 
  89                     elif len(codecs
) == 1: 
  93                     'format_id': video_mode
, 
  98         self
._sort
_formats
(formats
) 
 101         for source 
in settings
.findall(compat_xpath('.//MarkerResourceSource')): 
 102             subtitle_url 
= source
.text
 
 105             subtitles
.setdefault('en', []).append({ 
 106                 'url': '%s/%s' % (base_url
, subtitle_url
), 
 107                 'ext': source
.get('type'), 
 113             'subtitles': subtitles
, 
 118 class MicrosoftVirtualAcademyCourseIE(MicrosoftVirtualAcademyBaseIE
): 
 119     IE_NAME 
= 'mva:course' 
 120     IE_DESC 
= 'Microsoft Virtual Academy courses' 
 121     _VALID_URL 
= r
'(?:%s:|https?://(?:mva\.microsoft|(?:www\.)?microsoftvirtualacademy)\.com/[^/]+/training-courses/(?P<display_id>[^/?#&]+)-)(?P<id>\d+)' % IE_NAME
 
 124         'url': 'https://mva.microsoft.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', 
 127             'title': 'Microsoft Azure Fundamentals: Virtual Machines', 
 129         'playlist_count': 36, 
 131         # with emphasized chapters 
 132         'url': 'https://mva.microsoft.com/en-US/training-courses/developing-windows-10-games-with-construct-2-16335', 
 135             'title': 'Developing Windows 10 Games with Construct 2', 
 137         'playlist_count': 10, 
 139         'url': 'https://www.microsoftvirtualacademy.com/en-US/training-courses/microsoft-azure-fundamentals-virtual-machines-11788', 
 140         'only_matching': True, 
 142         'url': 'mva:course:11788', 
 143         'only_matching': True, 
 147     def suitable(cls
, url
): 
 148         return False if MicrosoftVirtualAcademyIE
.suitable(url
) else super( 
 149             MicrosoftVirtualAcademyCourseIE
, cls
).suitable(url
) 
 151     def _real_extract(self
, url
): 
 152         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 153         course_id 
= mobj
.group('id') 
 154         display_id 
= mobj
.group('display_id') 
 156         base_url 
= self
._extract
_base
_url
(course_id
, display_id
) 
 158         manifest 
= self
._download
_json
( 
 159             '%s/imsmanifestlite.json' % base_url
, 
 160             display_id
, 'Downloading course manifest JSON')['manifest'] 
 162         organization 
= manifest
['organizations']['organization'][0] 
 165         for chapter 
in organization
['item']: 
 166             chapter_number
, chapter_title 
= self
._extract
_chapter
_and
_title
(chapter
.get('title')) 
 167             chapter_id 
= chapter
.get('@identifier') 
 168             for item 
in chapter
.get('item', []): 
 169                 item_id 
= item
.get('@identifier') 
 172                 metadata 
= item
.get('resource', {}).get('metadata') or {} 
 173                 if metadata
.get('learningresourcetype') != 'Video': 
 175                 _
, title 
= self
._extract
_chapter
_and
_title
(item
.get('title')) 
 176                 duration 
= parse_duration(metadata
.get('duration')) 
 177                 description 
= metadata
.get('description') 
 179                     '_type': 'url_transparent', 
 181                         'mva:%s:%s' % (course_id
, item_id
), {'base_url': base_url
}), 
 183                     'description': description
, 
 184                     'duration': duration
, 
 185                     'chapter': chapter_title
, 
 186                     'chapter_number': chapter_number
, 
 187                     'chapter_id': chapter_id
, 
 190         title 
= organization
.get('title') or manifest
.get('metadata', {}).get('title') 
 192         return self
.playlist_result(entries
, course_id
, title
)