]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
12 class WashingtonPostIE ( InfoExtractor
):
13 _VALID_URL
= r
'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
15 'url' : 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,
17 'title' : 'Sinkhole of bureaucracy' ,
20 'md5' : 'c3f4b4922ffa259243f68e928db2db8c' ,
22 'id' : 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,
24 'title' : 'Breaking Points: The Paper Mine' ,
26 'description' : 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,
27 'uploader' : 'The Washington Post' ,
28 'timestamp' : 1395527908 ,
29 'upload_date' : '20140322' ,
32 'md5' : 'f645a07652c2950cd9134bb852c5f5eb' ,
34 'id' : '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,
36 'title' : 'The town bureaucracy sustains' ,
37 'description' : 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,
39 'timestamp' : 1395528005 ,
40 'upload_date' : '20140322' ,
41 'uploader' : 'The Washington Post' ,
46 def _real_extract ( self
, url
):
47 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
48 page_id
= mobj
. group ( 'id' )
50 webpage
= self
._ download
_ webpage
( url
, page_id
)
51 title
= self
._ og
_ search
_ title
( webpage
)
52 uuids
= re
. findall ( r
'data-video-uuid="([^"]+)"' , webpage
)
54 for i
, uuid
in enumerate ( uuids
, start
= 1 ):
55 vinfo_all
= self
._ download
_ json
(
56 'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp' % uuid
,
58 transform_source
= strip_jsonp
,
59 note
= 'Downloading information of video %d / %d ' % ( i
, len ( uuids
))
61 vinfo
= vinfo_all
[ 0 ][ 'contentConfig' ]
62 uploader
= vinfo
. get ( 'credits' , {}). get ( 'source' )
63 timestamp
= int_or_none (
64 vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ), 1000 )
68 ' %s-%s-%s ' % ( s
. get ( 'type' ), s
. get ( 'width' ), s
. get ( 'bitrate' ))
71 'vbr' : s
. get ( 'bitrate' ) if s
. get ( 'width' ) != 0 else None ,
72 'width' : s
. get ( 'width' ),
73 'height' : s
. get ( 'height' ),
74 'acodec' : s
. get ( 'audioCodec' ),
75 'vcodec' : s
. get ( 'videoCodec' ) if s
. get ( 'width' ) != 0 else 'none' ,
76 'filesize' : s
. get ( 'fileSize' ),
83 } for s
in vinfo
. get ( 'streams' , [])]
84 source_media_url
= vinfo
. get ( 'sourceMediaURL' )
87 'format_id' : 'source_media' ,
88 'url' : source_media_url
,
90 self
._ sort
_ formats
( formats
)
93 'title' : vinfo
[ 'title' ],
94 'description' : vinfo
. get ( 'blurb' ),
97 'duration' : int_or_none ( vinfo
. get ( 'videoDuration' ), 100 ),
98 'timestamp' : timestamp
,