1 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from .youtube 
import YoutubeIE
 
  15 class TechTVMITIE(InfoExtractor
): 
  16     IE_NAME 
= 'techtv.mit.edu' 
  17     _VALID_URL 
= r
'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)' 
  20         'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set', 
  21         'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7', 
  25             'title': 'MIT DNA and Protein Sets', 
  26             'description': 'md5:46f5c69ce434f0a97e7c628cc142802d', 
  30     def _real_extract(self
, url
): 
  31         video_id 
= self
._match
_id
(url
) 
  32         raw_page 
= self
._download
_webpage
( 
  33             'http://techtv.mit.edu/videos/%s' % video_id
, video_id
) 
  34         clean_page 
= re
.compile(r
'<!--.*?-->', re
.S
).sub('', raw_page
) 
  36         base_url 
= self
._proto
_relative
_url
(self
._search
_regex
( 
  37             r
'ipadUrl: \'(.+?cloudfront
.net
/)', raw_page, 'base url
'), 'http
:') 
  38         formats_json = self._search_regex( 
  39             r'bitrates
: (\
[.+?\
])', raw_page, 'video formats
') 
  40         formats_mit = json.loads(formats_json) 
  43                 'format_id
': f['label
'], 
  44                 'url
': base_url + f['url
'].partition(':')[2], 
  45                 'ext
': f['url
'].partition(':')[0], 
  53         title = get_element_by_id('edit
-title
', clean_page) 
  54         description = clean_html(get_element_by_id('edit
-description
', clean_page)) 
  55         thumbnail = self._search_regex( 
  56             r'playlist
:.*?url
: \'(.+?
)\'', 
  57             raw_page, 'thumbnail
', flags=re.DOTALL) 
  63             'description
': description, 
  64             'thumbnail
': thumbnail, 
  68 class MITIE(TechTVMITIE): 
  69     IE_NAME = 'video
.mit
.edu
' 
  70     _VALID_URL = r'https?
://video\
.mit\
.edu
/watch
/(?P
<title
>[^
/]+)' 
  73         'url
': 'http
://video
.mit
.edu
/watch
/the
-government
-is-profiling
-you
-13222/', 
  74         'md5
': '7db01d5ccc1895fc5010e9c9e13648da
', 
  78             'title
': 'The Government 
is Profiling You
', 
  79             'description
': 'md5
:ad5795fe1e1623b73620dbfd47df9afd
', 
  83     def _real_extract(self, url): 
  84         mobj = re.match(self._VALID_URL, url) 
  85         page_title = mobj.group('title
') 
  86         webpage = self._download_webpage(url, page_title) 
  87         embed_url = self._search_regex( 
  88             r'<iframe 
.*?src
="(.+?)"', webpage, 'embed url
') 
  89         return self.url_result(embed_url) 
  92 class OCWMITIE(InfoExtractor): 
  93     IE_NAME = 'ocw
.mit
.edu
' 
  94     _VALID_URL = r'^https?
://ocw\
.mit\
.edu
/courses
/(?P
<topic
>[a
-z0
-9\
-]+)' 
  95     _BASE_URL = 'http
://ocw
.mit
.edu
/' 
  99             'url
': 'http
://ocw
.mit
.edu
/courses
/electrical
-engineering
-and-computer
-science
/6-041-probabilistic
-systems
-analysis
-and-applied
-probability
-fall
-2010/video
-lectures
/lecture
-7-multiple
-variables
-expectations
-independence
/', 
 103                 'title
': 'Lecture 
7: Multiple Discrete Random Variables
: Expectations
, Conditioning
, Independence
', 
 104                 'description
': 'In this lecture
, the professor discussed multiple random variables
, expectations
, and binomial distribution
.', 
 105                 'upload_date
': '20121109', 
 106                 'uploader_id
': 'MIT
', 
 107                 'uploader
': 'MIT OpenCourseWare
', 
 111             'url
': 'http
://ocw
.mit
.edu
/courses
/mathematics
/18-01sc
-single
-variable
-calculus
-fall
-2010/1.-differentiation
/part
-a
-definition
-and-basic
-rules
/session
-1-introduction
-to
-derivatives
/', 
 115                 'title
': 'Session 
1: Introduction to Derivatives
', 
 116                 'upload_date
': '20090818', 
 117                 'uploader_id
': 'MIT
', 
 118                 'uploader
': 'MIT OpenCourseWare
', 
 119                 'description
': 'This section contains lecture video excerpts
, lecture notes
, an interactive mathlet 
with supporting documents
, and problem solving videos
.', 
 124     def _real_extract(self, url): 
 125         mobj = re.match(self._VALID_URL, url) 
 126         topic = mobj.group('topic
') 
 128         webpage = self._download_webpage(url, topic) 
 129         title = self._html_search_meta('WT
.cg_s
', webpage) 
 130         description = self._html_search_meta('Description
', webpage) 
 132         # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file) 
 133         embed_chapter_media = re.search(r'ocw_embed_chapter_media\
((.+?
)\
)', webpage) 
 134         if embed_chapter_media: 
 135             metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1)) 
 136             metadata = re.split(r', ?', metadata) 
 139             # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file) 
 140             embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage) 
 142                 metadata = re.sub(r'[\'"]', '', embed_media.group(1)) 
 143                 metadata = re.split(r', ?
', metadata) 
 146                 raise ExtractorError('Unable to find embedded YouTube video
.') 
 147         video_id = YoutubeIE.extract_id(yt) 
 150             '_type
': 'url_transparent
', 
 153             'description
': description,