1 from __future__
import unicode_literals
5 from .common
import InfoExtractor
7 get_element_by_attribute
,
12 class TechTalksIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://techtalks\.tv/talks/[^/]*/(?P<id>\d+)/'
16 'url': 'http://techtalks.tv/talks/learning-topic-models-going-beyond-svd/57758/',
19 'title': 'Learning Topic Models --- Going beyond SVD',
26 'title': 'Learning Topic Models --- Going beyond SVD',
33 'title': 'Learning Topic Models --- Going beyond SVD',
39 'skip_download': True,
43 def _real_extract(self
, url
):
44 mobj
= re
.match(self
._VALID
_URL
, url
)
45 talk_id
= mobj
.group('id')
46 webpage
= self
._download
_webpage
(url
, talk_id
)
47 rtmp_url
= self
._search
_regex
(
48 r
'netConnectionUrl: \'(.*?
)\'', webpage, 'rtmp url
')
49 play_path = self._search_regex(
50 r'href
=\'(.*?
)\' [^
>]*id="flowplayer_presenter"',
51 webpage, 'presenter play path
')
52 title = clean_html(get_element_by_attribute('class', 'title
', webpage))
57 'play_path
': play_path,
60 m_slides = re.search(r'<a
class="slides" href
=\'(.*?
)\'', webpage)
72 'id': talk_id + '-slides
',
75 'play_path
': m_slides.group(1),