]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/democracynow.py
6cd395e1169d8253c589efe3da2d24f1632b0356
2 from __future__
import unicode_literals
7 from . common
import InfoExtractor
8 from .. compat
import compat_urlparse
15 class DemocracynowIE ( InfoExtractor
):
16 _VALID_URL
= r
'https?://(?:www\.)?democracynow.org/(?P<id>[^\?]*)'
17 IE_NAME
= 'democracynow'
19 'url' : 'http://www.democracynow.org/shows/2015/7/3' ,
20 'md5' : 'fbb8fe3d7a56a5e12431ce2f9b2fab0d' ,
22 'id' : '2015-0703-001' ,
24 'title' : 'July 03, 2015 - Democracy Now!' ,
25 'description' : 'A daily independent global news hour with Amy Goodman & Juan González "What to the Slave is 4th of July?": James Earl Jones Reads Frederick Douglass\u2019 Historic Speech : "This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag : "We Shall Overcome": Remembering Folk Icon, Activist Pete Seeger in His Own Words & Songs' ,
28 'url' : 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' ,
29 'md5' : 'fbb8fe3d7a56a5e12431ce2f9b2fab0d' ,
31 'id' : '2015-0703-001' ,
33 'title' : '"This Flag Comes Down Today": Bree Newsome Scales SC Capitol Flagpole, Takes Down Confederate Flag' ,
34 'description' : 'md5:4d2bc4f0d29f5553c2210a4bc7761a21' ,
38 def _real_extract ( self
, url
):
39 display_id
= self
._ match
_ id
( url
)
40 webpage
= self
._ download
_ webpage
( url
, display_id
)
41 description
= self
._ og
_ search
_ description
( webpage
)
43 json_data
= self
._ parse
_ json
( self
._ search
_ regex
(
44 r
'<script[^>]+type="text/json"[^>]*>\s*({[^>]+})' , webpage
, 'json' ),
53 def add_subtitle_item ( lang
, info_dict
):
54 if lang
not in subtitles
:
56 subtitles
[ lang
]. append ( info_dict
)
58 # chapter_file are not subtitles
59 if 'caption_file' in json_data
:
60 add_subtitle_item ( default_lang
, {
61 'url' : compat_urlparse
. urljoin ( url
, json_data
[ 'caption_file' ]),
64 for subtitle_item
in json_data
. get ( 'captions' , []):
65 lang
= subtitle_item
. get ( 'language' , '' ). lower () or default_lang
66 add_subtitle_item ( lang
, {
67 'url' : compat_urlparse
. urljoin ( url
, subtitle_item
[ 'url' ]),
70 for key
in ( 'file' , 'audio' , 'video' ):
71 media_url
= json_data
. get ( key
, '' )
74 media_url
= re
. sub ( r
'\?.*' , '' , compat_urlparse
. urljoin ( url
, media_url
))
75 video_id
= video_id
or remove_start ( os
. path
. splitext ( url_basename ( media_url
))[ 0 ], 'dn' )
80 self
._ sort
_ formats
( formats
)
83 'id' : video_id
or display_id
,
84 'title' : json_data
[ 'title' ],
85 'description' : description
,
86 'subtitles' : subtitles
,