]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ted.py 
10b3b706a9c82ef8398d408a948e72b6c52b31c3
   1  from  __future__ 
import  unicode_literals
   6  from  . subtitles 
import  SubtitlesInfoExtractor
  13  class  TEDIE ( SubtitlesInfoExtractor
):   16          (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/   18              (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist   20              ((?P<type_talk>talks)) # We have a simple talk   22              (?P<type_watch>watch)/[^/]+/[^/]+   24          (/lang/(.*?))? # The url may contain the language   25          /(?P<name>[\w-]+) # Here goes the name and then ".html"   29          'url' :  'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' ,   30          'md5' :  'fc94ac279feebbce69f21c0c6ee82810' ,   34              'title' :  'The illusion of consciousness' ,   35              'description' : ( 'Philosopher Dan Dennett makes a compelling '   36                              'argument that not only don \' t we understand our own '   37                              'consciousness, but that half the time our brains are '   38                              'actively fooling us.' ),   39              'uploader' :  'Dan Dennett' ,   44          'url' :  'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms' ,   45          'md5' :  '226f4fb9c62380d11b7995efa4c87994' ,   47              'id' :  'vishal-sikka-the-beauty-and-power-of-algorithms' ,   49              'title' :  'Vishal Sikka: The beauty and power of algorithms' ,   50              'thumbnail' :  're:^https?://.+\.jpg' ,   51              'description' :  'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.' ,   54          'url' :  'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best' ,   58              'title' :  'Be passionate. Be courageous. Be your best.' ,   59              'uploader' :  'Gabby Giffords and Mark Kelly' ,   60              'description' :  'md5:5174aed4d0f16021b704120360f72b92' ,   64          'url' :  'http://www.ted.com/playlists/who_are_the_hackers' ,   67              'title' :  'Who are the hackers?' ,   69          'playlist_mincount' :  6 ,   71          # contains a youtube video   72          'url' :  'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything' ,   73          'add_ie' : [ 'Youtube' ],   77              'title' :  'Douglas Adams: Parrots the Universe and Everything' ,   78              'description' :  'md5:01ad1e199c49ac640cb1196c0e9016af' ,   79              'uploader' :  'University of California Television (UCTV)' ,   80              'uploader_id' :  'UCtelevision' ,   81              'upload_date' :  '20080522' ,   84              'skip_download' :  True ,   89          'low' : { 'preference' :  1 ,  'width' :  320 ,  'height' :  180 },   90          'medium' : { 'preference' :  2 ,  'width' :  512 ,  'height' :  288 },   91          'high' : { 'preference' :  3 ,  'width' :  854 ,  'height' :  480 },   94      def  _extract_info ( self
,  webpage
):   95          info_json 
=  self
._ search
_ regex
( r
'q\("\w+.init",({.+})\)</script>' ,   97          return  json
. loads ( info_json
)   99      def  _real_extract ( self
,  url
):  100          m 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  101          if  m
. group ( 'type' ). startswith ( 'embed' ):  102              desktop_url 
=  m
. group ( 'proto' ) +  'www'  +  m
. group ( 'urlmain' )  103              return  self
. url_result ( desktop_url
,  'TED' )  104          name 
=  m
. group ( 'name' )  105          if  m
. group ( 'type_talk' ):  106              return  self
._ talk
_ info
( url
,  name
)  107          elif  m
. group ( 'type_watch' ):  108              return  self
._ watch
_ info
( url
,  name
)  110              return  self
._ playlist
_ videos
_ info
( url
,  name
)  112      def  _playlist_videos_info ( self
,  url
,  name
):  113          '''Returns the videos of the playlist'''  115          webpage 
=  self
._ download
_ webpage
( url
,  name
,  116                                           'Downloading playlist webpage' )  117          info 
=  self
._ extract
_ info
( webpage
)  118          playlist_info 
=  info
[ 'playlist' ]  121              self
. url_result ( 'http://www.ted.com/talks/'  +  talk
[ 'slug' ],  self
. ie_key ())  122              for  talk 
in  info
[ 'talks' ]  124          return  self
. playlist_result (  126              playlist_id
= compat_str ( playlist_info
[ 'id' ]),  127              playlist_title
= playlist_info
[ 'title' ])  129      def  _talk_info ( self
,  url
,  video_name
):  130          webpage 
=  self
._ download
_ webpage
( url
,  video_name
)  131          self
. report_extraction ( video_name
)  133          talk_info 
=  self
._ extract
_ info
( webpage
)[ 'talks' ][ 0 ]  135          if  talk_info
. get ( 'external' )  is not None :  136              self
. to_screen ( 'Found video from  %s '  %  talk_info
[ 'external' ][ 'service' ])  139                  'url' :  talk_info
[ 'external' ][ 'uri' ],  144              'format_id' :  format_id
,  146          }  for  ( format_id
,  format_url
)  in  talk_info
[ 'nativeDownloads' ]. items ()  if  format_url 
is not None ]  149                  finfo 
=  self
._ NATIVE
_ FORMATS
. get ( f
[ 'format_id' ])  155                  'format_id' :  f
[ 'name' ],  156                  'url' :  talk_info
[ 'streamer' ],  157                  'play_path' :  f
[ 'file' ],  160                  'height' :  f
[ 'height' ],  162              }  for  f 
in  talk_info
[ 'resources' ][ 'rtmp' ]]  163          self
._ sort
_ formats
( formats
)  165          video_id 
=  compat_str ( talk_info
[ 'id' ])  167          video_subtitles 
=  self
. extract_subtitles ( video_id
,  talk_info
)  168          if  self
._ downloader
. params
. get ( 'listsubtitles' ,  False ):  169              self
._l ist
_ available
_ subtitles
( video_id
,  talk_info
)  172          thumbnail 
=  talk_info
[ 'thumb' ]  173          if not  thumbnail
. startswith ( 'http' ):  174              thumbnail 
=  'http://'  +  thumbnail
 177              'title' :  talk_info
[ 'title' ]. strip (),  178              'uploader' :  talk_info
[ 'speaker' ],  179              'thumbnail' :  thumbnail
,  180              'description' :  self
._ og
_ search
_ description
( webpage
),  181              'subtitles' :  video_subtitles
,  183              'duration' :  talk_info
. get ( 'duration' ),  186      def  _get_available_subtitles ( self
,  video_id
,  talk_info
):  187          languages 
= [ lang
[ 'languageCode' ]  for  lang 
in  talk_info
. get ( 'languages' , [])]  191                  url 
=  'http://www.ted.com/talks/subtitles/id/ %s /lang/ %s /format/srt'  % ( video_id
,  l
)  192                  sub_lang_list
[ l
] =  url
 195              self
._ downloader
. report_warning ( 'video doesn \' t have subtitles' )  198      def  _watch_info ( self
,  url
,  name
):  199          webpage 
=  self
._ download
_ webpage
( url
,  name
)  201          config_json 
=  self
._ html
_ search
_ regex
(  202              r
'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>' ,  204          config 
=  json
. loads ( config_json
)[ 'config' ]  205          video_url 
=  config
[ 'video' ][ 'url' ]  206          thumbnail 
=  config
. get ( 'image' , {}). get ( 'url' )  208          title 
=  self
._ html
_ search
_ regex
(  209              r
"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>" ,  webpage
,  'title' )  210          description 
=  self
._ html
_ search
_ regex
(  212                  r
'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>' ,  213                  r
'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>' ,  215              webpage
,  'description' ,  fatal
= False )  221              'thumbnail' :  thumbnail
,  222              'description' :  description
,