]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ted.py
   4 from .common 
import InfoExtractor
 
   7 class TEDIE(InfoExtractor
): 
   8     _VALID_URL
=r
'''http://www\.ted\.com/ 
  10                         ((?P<type_playlist>playlists)/(?P<playlist_id>\d+)) # We have a playlist 
  12                         ((?P<type_talk>talks)) # We have a simple talk 
  14                    (/lang/(.*?))? # The url may contain the language 
  15                    /(?P<name>\w+) # Here goes the name and then ".html" 
  19     def suitable(cls
, url
): 
  20         """Receives a URL and returns True if suitable for this IE.""" 
  21         return re
.match(cls
._VALID
_URL
, url
, re
.VERBOSE
) is not None 
  23     def _real_extract(self
, url
): 
  24         m
=re
.match(self
._VALID
_URL
, url
, re
.VERBOSE
) 
  25         if m
.group('type_talk'): 
  26             return [self
._talk
_info
(url
)] 
  28             playlist_id
=m
.group('playlist_id') 
  30             self
.to_screen(u
'Getting info of playlist %s: "%s"' % (playlist_id
,name
)) 
  31             return [self
._playlist
_videos
_info
(url
,name
,playlist_id
)] 
  33     def _playlist_videos_info(self
,url
,name
,playlist_id
=0): 
  34         '''Returns the videos of the playlist''' 
  36                      <li\ id="talk_(\d+)"([.\s]*?)data-id="(?P<video_id>\d+)" 
  37                      ([.\s]*?)data-playlist_item_id="(\d+)" 
  38                      ([.\s]*?)data-mediaslug="(?P<mediaSlug>.+?)" 
  40         video_name_RE
=r
'<p\ class="talk-title"><a href="(?P<talk_url>/talks/(.+).html)">(?P<fullname>.+?)</a></p>' 
  41         webpage
=self
._download
_webpage
(url
, playlist_id
, 'Downloading playlist webpage') 
  42         m_videos
=re
.finditer(video_RE
,webpage
,re
.VERBOSE
) 
  43         m_names
=re
.finditer(video_name_RE
,webpage
) 
  45         playlist_title 
= self
._html
_search
_regex
(r
'div class="headline">\s*?<h1>\s*?<span>(.*?)</span>', 
  46                                                  webpage
, 'playlist title') 
  49         for m_video
, m_name 
in zip(m_videos
,m_names
): 
  50             talk_url
='http://www.ted.com%s' % m_name
.group('talk_url') 
  51             playlist_entries
.append(self
.url_result(talk_url
, 'TED')) 
  52         return self
.playlist_result(playlist_entries
, playlist_id 
= playlist_id
, playlist_title 
= playlist_title
) 
  54     def _talk_info(self
, url
, video_id
=0): 
  55         """Return the video for the talk in the url""" 
  56         m 
= re
.match(self
._VALID
_URL
, url
,re
.VERBOSE
) 
  57         video_name 
= m
.group('name') 
  58         webpage 
= self
._download
_webpage
(url
, video_id
, 'Downloading \"%s\" page' % video_name
) 
  59         self
.report_extraction(video_name
) 
  60         # If the url includes the language we get the title translated 
  61         title 
= self
._html
_search
_regex
(r
'<span id="altHeadline" >(?P<title>.*)</span>', 
  63         json_data 
= self
._search
_regex
(r
'<script.*?>var talkDetails = ({.*?})</script>', 
  65         info 
= json
.loads(json_data
) 
  66         desc 
= self
._html
_search
_regex
(r
'<div class="talk-intro">.*?<p.*?>(.*?)</p>', 
  67                                        webpage
, 'description', flags 
= re
.DOTALL
) 
  69         thumbnail 
= self
._search
_regex
(r
'</span>[\s.]*</div>[\s.]*<img src="(.*?)"', 
  73                 'url': info
['htmlStreams'][-1]['file'], 
  76                 'thumbnail': thumbnail
,