]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/huffpost.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   5  from  . common 
import  InfoExtractor
 
  13  class  HuffPostIE ( InfoExtractor
):  
  14      IE_DESC 
=  'Huffington Post'  
  16          https?://(embed\.)?live\.huffingtonpost\.com/  
  19              HPLEmbedPlayer/\?segmentId=  
  24          'url' :  'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677' ,  
  25          'md5' :  '55f5e8981c1c80a64706a44b74833de8' ,  
  27              'id' :  '52dd3e4b02a7602131000677' ,  
  29              'title' :  'Legalese It! with @MikeSacksHP' ,  
  30              'description' :  'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG \' s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC \' s delay of Kenya \' s President and more.  ' ,  
  32              'upload_date' :  '20140124' ,  
  36              'skip_download' :  True ,  
  38          'expected_warnings' : [ 'HTTP Error 404: Not Found' ],  
  41      def  _real_extract ( self
,  url
):  
  42          video_id 
=  self
._ match
_ id
( url
)  
  44          api_url 
=  'http://embed.live.huffingtonpost.com/api/segments/ %s .json'  %  video_id
 
  45          data 
=  self
._ download
_ json
( api_url
,  video_id
)[ 'data' ]  
  47          video_title 
=  data
[ 'title' ]  
  48          duration 
=  parse_duration ( data
. get ( 'running_time' ))  
  49          upload_date 
=  unified_strdate (  
  50              data
. get ( 'schedule' , {}). get ( 'starts_at' )  or  data
. get ( 'segment_start_date_time' ))  
  51          description 
=  data
. get ( 'description' )  
  54          for  url 
in  filter ( None ,  data
[ 'images' ]. values ()):  
  55              m 
=  re
. match ( '.*-([0-9]+x[0-9]+)\.' ,  url
)  
  60                  'resolution' :  m
. group ( 1 ),  
  64          sources 
=  data
. get ( 'sources' , {})  
  65          live_sources 
=  list ( sources
. get ( 'live' , {}). items ()) +  list ( sources
. get ( 'live_again' , {}). items ())  
  66          for  key
,  url 
in  live_sources
:  
  67              ext 
=  determine_ext ( url
)  
  69                  formats
. extend ( self
._ extract
_ m
3u8_ formats
(  
  70                      url
,  video_id
,  ext
= 'mp4' ,  m3u8_id
= 'hls' ,  fatal
= False ))  
  72                  formats
. extend ( self
._ extract
_ f
4 m
_ formats
(  
  73                      url 
+  '?hdcore=2.9.5' ,  video_id
,  f4m_id
= 'hds' ,  fatal
= False ))  
  77                      'format_id' :  key
. replace ( '/' ,  '.' ),  
  80                      'vcodec' :  'none'  if  key
. startswith ( 'audio/' )  else None ,  
  83          if not  formats 
and  data
. get ( 'fivemin_id' ):  
  84              return  self
. url_result ( '5min: %s '  %  data
[ 'fivemin_id' ])  
  86          self
._ sort
_ formats
( formats
)  
  91              'description' :  description
,  
  94              'upload_date' :  upload_date
,  
  95              'thumbnails' :  thumbnails
,