]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
12 class WashingtonPostIE ( InfoExtractor
):
13 _VALID_URL
= r
'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
15 'url' : 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,
17 'md5' : 'c3f4b4922ffa259243f68e928db2db8c' ,
19 'id' : 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,
21 'title' : 'Breaking Points: The Paper Mine' ,
23 'description' : 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,
24 'uploader' : 'The Washington Post' ,
25 'timestamp' : 1395527908 ,
26 'upload_date' : '20140322' ,
29 'md5' : 'f645a07652c2950cd9134bb852c5f5eb' ,
31 'id' : '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,
33 'title' : 'The town bureaucracy sustains' ,
34 'description' : 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,
36 'timestamp' : 1395528005 ,
37 'upload_date' : '20140322' ,
38 'uploader' : 'The Washington Post' ,
43 def _real_extract ( self
, url
):
44 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
45 page_id
= mobj
. group ( 'id' )
47 webpage
= self
._ download
_ webpage
( url
, page_id
)
48 title
= self
._ og
_ search
_ title
( webpage
)
49 uuids
= re
. findall ( r
'data-video-uuid="([^"]+)"' , webpage
)
51 for i
, uuid
in enumerate ( uuids
, start
= 1 ):
52 vinfo_all
= self
._ download
_ json
(
53 'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp' % uuid
,
55 transform_source
= strip_jsonp
,
56 note
= 'Downloading information of video %d / %d ' % ( i
, len ( uuids
))
58 vinfo
= vinfo_all
[ 0 ][ 'contentConfig' ]
59 uploader
= vinfo
. get ( 'credits' , {}). get ( 'source' )
60 timestamp
= int_or_none (
61 vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ), 1000 )
65 ' %s-%s-%s ' % ( s
. get ( 'type' ), s
. get ( 'width' ), s
. get ( 'bitrate' ))
68 'vbr' : s
. get ( 'bitrate' ) if s
. get ( 'width' ) != 0 else None ,
69 'width' : s
. get ( 'width' ),
70 'height' : s
. get ( 'height' ),
71 'acodec' : s
. get ( 'audioCodec' ),
72 'vcodec' : s
. get ( 'videoCodec' ) if s
. get ( 'width' ) != 0 else 'none' ,
73 'filesize' : s
. get ( 'fileSize' ),
80 } for s
in vinfo
. get ( 'streams' , [])]
81 source_media_url
= vinfo
. get ( 'sourceMediaURL' )
84 'format_id' : 'source_media' ,
85 'url' : source_media_url
,
87 self
._ sort
_ formats
( formats
)
90 'title' : vinfo
[ 'title' ],
91 'description' : vinfo
. get ( 'blurb' ),
94 'duration' : int_or_none ( vinfo
. get ( 'videoDuration' ), 100 ),
95 'timestamp' : timestamp
,