]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ted.py 
   1  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
   8  from  .. compat 
import  compat_str
   9  from  .. utils 
import  int_or_none
  12  class  TEDIE ( InfoExtractor
):   16          (?P<type>www|embed(?:-ssl)?)(?P<urlmain>\.ted\.com/   18              (?P<type_playlist>playlists(?:/\d+)?) # We have a playlist   20              ((?P<type_talk>talks)) # We have a simple talk   22              (?P<type_watch>watch)/[^/]+/[^/]+   24          (/lang/(.*?))? # The url may contain the language   25          /(?P<name>[\w-]+) # Here goes the name and then ".html"   29          'url' :  'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' ,   30          'md5' :  'fc94ac279feebbce69f21c0c6ee82810' ,   34              'title' :  'The illusion of consciousness' ,   35              'description' : ( 'Philosopher Dan Dennett makes a compelling '   36                              'argument that not only don \' t we understand our own '   37                              'consciousness, but that half the time our brains are '   38                              'actively fooling us.' ),   39              'uploader' :  'Dan Dennett' ,   44          'url' :  'http://www.ted.com/watch/ted-institute/ted-bcg/vishal-sikka-the-beauty-and-power-of-algorithms' ,   45          'md5' :  '226f4fb9c62380d11b7995efa4c87994' ,   47              'id' :  'vishal-sikka-the-beauty-and-power-of-algorithms' ,   49              'title' :  'Vishal Sikka: The beauty and power of algorithms' ,   50              'thumbnail' :  're:^https?://.+\.jpg' ,   51              'description' :  'Adaptive, intelligent, and consistent, algorithms are emerging as the ultimate app for everything from matching consumers to products to assessing medical diagnoses. Vishal Sikka shares his appreciation for the algorithm, charting both its inherent beauty and its growing power.' ,   54          'url' :  'http://www.ted.com/talks/gabby_giffords_and_mark_kelly_be_passionate_be_courageous_be_your_best' ,   58              'title' :  'Be passionate. Be courageous. Be your best.' ,   59              'uploader' :  'Gabby Giffords and Mark Kelly' ,   60              'description' :  'md5:5174aed4d0f16021b704120360f72b92' ,   64          'url' :  'http://www.ted.com/playlists/who_are_the_hackers' ,   67              'title' :  'Who are the hackers?' ,   69          'playlist_mincount' :  6 ,   71          # contains a youtube video   72          'url' :  'https://www.ted.com/talks/douglas_adams_parrots_the_universe_and_everything' ,   73          'add_ie' : [ 'Youtube' ],   77              'title' :  'Douglas Adams: Parrots the Universe and Everything' ,   78              'description' :  'md5:01ad1e199c49ac640cb1196c0e9016af' ,   79              'uploader' :  'University of California Television (UCTV)' ,   80              'uploader_id' :  'UCtelevision' ,   81              'upload_date' :  '20080522' ,   84              'skip_download' :  True ,   88          'url' :  'http://www.ted.com/talks/jeffrey_kluger_the_sibling_bond' ,   89          'add_ie' : [ 'Youtube' ],   93              'title' :  'The hidden power of siblings: Jeff Kluger at TEDxAsheville' ,   94              'description' :  'md5:3d7a4f50d95ca5dd67104e2a20f43fe1' ,   95              'uploader' :  'TEDx Talks' ,   96              'uploader_id' :  'TEDxTalks' ,   97              'upload_date' :  '20111216' ,  100              'skip_download' :  True ,  105          'low' : { 'preference' :  1 ,  'width' :  320 ,  'height' :  180 },  106          'medium' : { 'preference' :  2 ,  'width' :  512 ,  'height' :  288 },  107          'high' : { 'preference' :  3 ,  'width' :  854 ,  'height' :  480 },  110      def  _extract_info ( self
,  webpage
):  111          info_json 
=  self
._ search
_ regex
( r
'q\("\w+.init",({.+})\)</script>' ,  112                                         webpage
,  'info json' )  113          return  json
. loads ( info_json
)  115      def  _real_extract ( self
,  url
):  116          m 
=  re
. match ( self
._ VALID
_U RL
,  url
,  re
. VERBOSE
)  117          if  m
. group ( 'type' ). startswith ( 'embed' ):  118              desktop_url 
=  m
. group ( 'proto' ) +  'www'  +  m
. group ( 'urlmain' )  119              return  self
. url_result ( desktop_url
,  'TED' )  120          name 
=  m
. group ( 'name' )  121          if  m
. group ( 'type_talk' ):  122              return  self
._ talk
_ info
( url
,  name
)  123          elif  m
. group ( 'type_watch' ):  124              return  self
._ watch
_ info
( url
,  name
)  126              return  self
._ playlist
_ videos
_ info
( url
,  name
)  128      def  _playlist_videos_info ( self
,  url
,  name
):  129          '''Returns the videos of the playlist'''  131          webpage 
=  self
._ download
_ webpage
( url
,  name
,  132                                           'Downloading playlist webpage' )  133          info 
=  self
._ extract
_ info
( webpage
)  134          playlist_info 
=  info
[ 'playlist' ]  137              self
. url_result ( 'http://www.ted.com/talks/'  +  talk
[ 'slug' ],  self
. ie_key ())  138              for  talk 
in  info
[ 'talks' ]  140          return  self
. playlist_result (  142              playlist_id
= compat_str ( playlist_info
[ 'id' ]),  143              playlist_title
= playlist_info
[ 'title' ])  145      def  _talk_info ( self
,  url
,  video_name
):  146          webpage 
=  self
._ download
_ webpage
( url
,  video_name
)  147          self
. report_extraction ( video_name
)  149          talk_info 
=  self
._ extract
_ info
( webpage
)[ 'talks' ][ 0 ]  151          external 
=  talk_info
. get ( 'external' )  153              service 
=  external
[ 'service' ]  154              self
. to_screen ( 'Found video from  %s '  %  service
)  156              if  service
. lower () ==  'youtube' :  157                  ext_url 
=  external
. get ( 'code' )  160                  'url' :  ext_url 
or  external
[ 'uri' ],  165              'format_id' :  format_id
,  167          }  for  ( format_id
,  format_url
)  in  talk_info
[ 'nativeDownloads' ]. items ()  if  format_url 
is not None ]  170                  finfo 
=  self
._ NATIVE
_ FORMATS
. get ( f
[ 'format_id' ])  174          for  format_id
,  resources 
in  talk_info
[ 'resources' ]. items ():  175              if  format_id 
==  'h264' :  176                  for  resource 
in  resources
:  177                      bitrate 
=  int_or_none ( resource
. get ( 'bitrate' ))  179                          'url' :  resource
[ 'file' ],  180                          'format_id' :  ' %s-%s k'  % ( format_id
,  bitrate
),  183              elif  format_id 
==  'rtmp' :  184                  streamer 
=  talk_info
. get ( 'streamer' )  187                  for  resource 
in  resources
:  189                          'format_id' :  ' %s-%s '  % ( format_id
,  resource
. get ( 'name' )),  191                          'play_path' :  resource
[ 'file' ],  193                          'width' :  int_or_none ( resource
. get ( 'width' )),  194                          'height' :  int_or_none ( resource
. get ( 'height' )),  195                          'tbr' :  int_or_none ( resource
. get ( 'bitrate' )),  197              elif  format_id 
==  'hls' :  198                  hls_formats 
=  self
._ extract
_ m
3u8_ formats
(  199                      resources
. get ( 'stream' ),  video_name
,  'mp4' ,  m3u8_id
= format_id
)  200                  for  f 
in  hls_formats
:  201                      if  f
. get ( 'format_id' ) ==  'hls-meta' :  203                      if not  f
. get ( 'height' ):  207                  formats
. extend ( hls_formats
)  209          audio_download 
=  talk_info
. get ( 'audioDownload' )  212                  'url' :  audio_download
,  213                  'format_id' :  'audio' ,  218          self
._ sort
_ formats
( formats
)  220          video_id 
=  compat_str ( talk_info
[ 'id' ])  222          thumbnail 
=  talk_info
[ 'thumb' ]  223          if not  thumbnail
. startswith ( 'http' ):  224              thumbnail 
=  'http://'  +  thumbnail
 227              'title' :  talk_info
[ 'title' ]. strip (),  228              'uploader' :  talk_info
[ 'speaker' ],  229              'thumbnail' :  thumbnail
,  230              'description' :  self
._ og
_ search
_ description
( webpage
),  231              'subtitles' :  self
._ get
_ subtitles
( video_id
,  talk_info
),  233              'duration' :  talk_info
. get ( 'duration' ),  236      def  _get_subtitles ( self
,  video_id
,  talk_info
):  237          languages 
= [ lang
[ 'languageCode' ]  for  lang 
in  talk_info
. get ( 'languages' , [])]  243                          'url' :  'http://www.ted.com/talks/subtitles/id/ %s /lang/ %s /format/ %s '  % ( video_id
,  l
,  ext
),  246                      for  ext 
in  [ 'ted' ,  'srt' ]  252      def  _watch_info ( self
,  url
,  name
):  253          webpage 
=  self
._ download
_ webpage
( url
,  name
)  255          config_json 
=  self
._ html
_ search
_ regex
(  256              r
'"pages\.jwplayer"\s*,\s*({.+?})\s*\)\s*</script>' ,  258          config 
=  json
. loads ( config_json
)[ 'config' ]  259          video_url 
=  config
[ 'video' ][ 'url' ]  260          thumbnail 
=  config
. get ( 'image' , {}). get ( 'url' )  262          title 
=  self
._ html
_ search
_ regex
(  263              r
"(?s)<h1(?:\s+class='[^']+')?>(.+?)</h1>" ,  webpage
,  'title' )  264          description 
=  self
._ html
_ search
_ regex
(  266                  r
'(?s)<h4 class="[^"]+" id="h3--about-this-talk">.*?</h4>(.*?)</div>' ,  267                  r
'(?s)<p><strong>About this talk:</strong>\s+(.*?)</p>' ,  269              webpage
,  'description' ,  fatal
= False )  275              'thumbnail' :  thumbnail
,  276              'description' :  description
,