1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from .youtube 
import YoutubeIE
 
  16 class TechTVMITIE(InfoExtractor
): 
  17     IE_NAME 
= 'techtv.mit.edu' 
  18     _VALID_URL 
= r
'https?://techtv\.mit\.edu/(videos|embeds)/(?P<id>\d+)' 
  21         'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', 
  22         'md5': '1f8cb3e170d41fd74add04d3c9330e5f', 
  26             'title': 'MIT DNA Learning Center Set', 
  27             'description': 'md5:82313335e8a8a3f243351ba55bc1b474', 
  31     def _real_extract(self
, url
): 
  32         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  33         video_id 
= mobj
.group('id') 
  34         raw_page 
= self
._download
_webpage
( 
  35             'http://techtv.mit.edu/videos/%s' % video_id
, video_id
) 
  36         clean_page 
= re
.compile(r
'<!--.*?-->', re
.S
).sub('', raw_page
) 
  38         base_url 
= self
._search
_regex
( 
  39             r
'ipadUrl: \'(.+?cloudfront
.net
/)', raw_page, 'base url
') 
  40         formats_json = self._search_regex( 
  41             r'bitrates
: (\
[.+?\
])', raw_page, 'video formats
') 
  42         formats_mit = json.loads(formats_json) 
  45                 'format_id
': f['label
'], 
  46                 'url
': base_url + f['url
'].partition(':')[2], 
  47                 'ext
': f['url
'].partition(':')[0], 
  55         title = get_element_by_id('edit
-title
', clean_page) 
  56         description = clean_html(get_element_by_id('edit
-description
', clean_page)) 
  57         thumbnail = self._search_regex( 
  58             r'playlist
:.*?url
: \'(.+?
)\'', 
  59             raw_page, 'thumbnail
', flags=re.DOTALL) 
  65             'description
': description, 
  66             'thumbnail
': thumbnail, 
  70 class MITIE(TechTVMITIE): 
  71     IE_NAME = 'video
.mit
.edu
' 
  72     _VALID_URL = r'https?
://video\
.mit\
.edu
/watch
/(?P
<title
>[^
/]+)' 
  75         'url
': 'http
://video
.mit
.edu
/watch
/the
-government
-is-profiling
-you
-13222/', 
  76         'md5
': '7db01d5ccc1895fc5010e9c9e13648da
', 
  80             'title
': 'The Government 
is Profiling You
', 
  81             'description
': 'md5
:ad5795fe1e1623b73620dbfd47df9afd
', 
  85     def _real_extract(self, url): 
  86         mobj = re.match(self._VALID_URL, url) 
  87         page_title = mobj.group('title
') 
  88         webpage = self._download_webpage(url, page_title) 
  89         embed_url = self._search_regex( 
  90             r'<iframe 
.*?src
="(.+?)"', webpage, 'embed url
') 
  91         return self.url_result(embed_url, ie='TechTVMIT
') 
  94 class OCWMITIE(InfoExtractor): 
  95     IE_NAME = 'ocw
.mit
.edu
' 
  96     _VALID_URL = r'^http
://ocw\
.mit\
.edu
/courses
/(?P
<topic
>[a
-z0
-9\
-]+)' 
  97     _BASE_URL = 'http
://ocw
.mit
.edu
/' 
 101             'url
': 'http
://ocw
.mit
.edu
/courses
/electrical
-engineering
-and-computer
-science
/6-041-probabilistic
-systems
-analysis
-and-applied
-probability
-fall
-2010/video
-lectures
/lecture
-7-multiple
-variables
-expectations
-independence
/', 
 105                 'title
': 'Lecture 
7: Multiple Discrete Random Variables
: Expectations
, Conditioning
, Independence
', 
 106                 'description
': 'In this lecture
, the professor discussed multiple random variables
, expectations
, and binomial distribution
.', 
 107                 #'subtitles
': 'http
://ocw
.mit
.edu
/courses
/electrical
-engineering
-and-computer
-science
/6-041-probabilistic
-systems
-analysis
-and-applied
-probability
-fall
-2010/video
-lectures
/lecture
-7-multiple
-variables
-expectations
-independence
/MIT6_041F11_lec07_300k
.mp4
.srt
' 
 111             'url
': 'http
://ocw
.mit
.edu
/courses
/mathematics
/18-01sc
-single
-variable
-calculus
-fall
-2010/1.-differentiation
/part
-a
-definition
-and-basic
-rules
/session
-1-introduction
-to
-derivatives
/', 
 115                 'title
': 'Session 
1: Introduction to Derivatives
', 
 116                 'description
': 'This section contains lecture video excerpts
, lecture notes
, an interactive mathlet 
with supporting documents
, and problem solving videos
.', 
 117                 #'subtitles
': 'http
://ocw
.mit
.edu
//courses
/mathematics
/18-01sc
-single
-variable
-calculus
-fall
-2010/ocw
-18.01-f07
-lec01_300k
.SRT
' 
 122     def _real_extract(self, url): 
 123         mobj = re.match(self._VALID_URL, url) 
 124         topic = mobj.group('topic
') 
 126         webpage = self._download_webpage(url, topic) 
 127         title = self._html_search_meta('WT
.cg_s
', webpage) 
 128         description = self._html_search_meta('Description
', webpage) 
 130         # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file) 
 131         embed_chapter_media = re.search(r'ocw_embed_chapter_media\
((.+?
)\
)', webpage) 
 132         if embed_chapter_media: 
 133             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) 
 134             metadata = re.split(r', ?', metadata) 
 136             subs = compat_urlparse.urljoin(self._BASE_URL, metadata[7]) 
 138             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) 
 139             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) 
 141                 metadata = re.sub(r'[\'"]', '', embed_media.group(1)) 
 142                 metadata = re.split(r', ?
', metadata) 
 144                 subs = compat_urlparse.urljoin(self._BASE_URL, metadata[5]) 
 146                 raise ExtractorError('Unable to find embedded YouTube video
.') 
 147         video_id = YoutubeIE.extract_id(yt) 
 150             '_type
': 'url_transparent
', 
 153             'description
': description,