]>
 
 
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/democracynow.py 
 
 
 
 
 
 
 
 
   2  from  __future__ 
import  unicode_literals
 
   7  from  . common 
import  InfoExtractor
 
   8  from  .. compat 
import  compat_urlparse
 
  15  class  DemocracynowIE ( InfoExtractor
):  
  16      _VALID_URL 
=  r
'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'  
  17      IE_NAME 
=  'democracynow'  
  19          'url' :  'http://www.democracynow.org/shows/2015/7/3' ,  
  20          'md5' :  'fbb8fe3d7a56a5e12431ce2f9b2fab0d' ,  
  22              'id' :  '2015-0703-001' ,  
  24              'title' :  'July 03, 2015 - Democracy Now!' ,  
  25              'description' :  'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs' ,  
  28          'url' :  'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' ,  
  29          'md5' :  'fbb8fe3d7a56a5e12431ce2f9b2fab0d' ,  
  31              'id' :  '2015-0703-001' ,  
  33              'title' :  '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag' ,  
  34              'description' :  'md5:4d2bc4f0d29f5553c2210a4bc7761a21' ,  
  38      def  _real_extract ( self
,  url
):  
  39          display_id 
=  self
._ match
_ id
( url
)  
  40          webpage 
=  self
._ download
_ webpage
( url
,  display_id
)  
  41          description 
=  self
._ og
_ search
_ description
( webpage
)  
  43          json_data 
=  self
._ parse
_ json
( self
._ search
_ regex
(  
  44              r
'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})' ,  webpage
,  'json' ),  
  53          def  add_subtitle_item ( lang
,  info_dict
):  
  54              if  lang 
not in  subtitles
:  
  56              subtitles
[ lang
]. append ( info_dict
)  
  58          # chapter_file are not subtitles  
  59          if  'caption_file'  in  json_data
:  
  60              add_subtitle_item ( default_lang
, {  
  61                  'url' :  compat_urlparse
. urljoin ( url
,  json_data
[ 'caption_file' ]),  
  64          for  subtitle_item 
in  json_data
. get ( 'captions' , []):  
  65              lang 
=  subtitle_item
. get ( 'language' ,  '' ). lower ()  or  default_lang
 
  66              add_subtitle_item ( lang
, {  
  67                  'url' :  compat_urlparse
. urljoin ( url
,  subtitle_item
[ 'url' ]),  
  70          for  key 
in  ( 'file' ,  'audio' ,  'video' ):  
  71              media_url 
=  json_data
. get ( key
,  '' )  
  74              media_url 
=  re
. sub ( r
'\?.*' ,  '' ,  compat_urlparse
. urljoin ( url
,  media_url
))  
  75              video_id 
=  video_id 
or  remove_start ( os
. path
. splitext ( url_basename ( media_url
))[ 0 ],  'dn' )  
  80          self
._ sort
_ formats
( formats
)  
  83              'id' :  video_id 
or  display_id
,  
  84              'title' :  json_data
[ 'title' ],  
  85              'description' :  description
,  
  86              'subtitles' :  subtitles
,