]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
12 class WashingtonPostIE ( InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
15 'url' : 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,
17 'title' : 'Sinkhole of bureaucracy' ,
20 'md5' : '79132cc09ec5309fa590ae46e4cc31bc' ,
22 'id' : 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,
24 'title' : 'Breaking Points: The Paper Mine' ,
26 'description' : 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,
27 'uploader' : 'The Washington Post' ,
28 'timestamp' : 1395527908 ,
29 'upload_date' : '20140322' ,
32 'md5' : 'e1d5734c06865cc504ad99dc2de0d443' ,
34 'id' : '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,
36 'title' : 'The town bureaucracy sustains' ,
37 'description' : 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,
39 'timestamp' : 1395528005 ,
40 'upload_date' : '20140322' ,
41 'uploader' : 'The Washington Post' ,
46 def _real_extract ( self
, url
):
47 page_id
= self
._ match
_ id
( url
)
48 webpage
= self
._ download
_ webpage
( url
, page_id
)
50 title
= self
._ og
_ search
_ title
( webpage
)
51 uuids
= re
. findall ( r
'data-video-uuid="([^"]+)"' , webpage
)
53 for i
, uuid
in enumerate ( uuids
, start
= 1 ):
54 vinfo_all
= self
._ download
_ json
(
55 'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp' % uuid
,
57 transform_source
= strip_jsonp
,
58 note
= 'Downloading information of video %d / %d ' % ( i
, len ( uuids
))
60 vinfo
= vinfo_all
[ 0 ][ 'contentConfig' ]
61 uploader
= vinfo
. get ( 'credits' , {}). get ( 'source' )
62 timestamp
= int_or_none (
63 vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ), 1000 )
67 ' %s-%s-%s ' % ( s
. get ( 'type' ), s
. get ( 'width' ), s
. get ( 'bitrate' ))
70 'vbr' : s
. get ( 'bitrate' ) if s
. get ( 'width' ) != 0 else None ,
71 'width' : s
. get ( 'width' ),
72 'height' : s
. get ( 'height' ),
73 'acodec' : s
. get ( 'audioCodec' ),
74 'vcodec' : s
. get ( 'videoCodec' ) if s
. get ( 'width' ) != 0 else 'none' ,
75 'filesize' : s
. get ( 'fileSize' ),
82 } for s
in vinfo
. get ( 'streams' , [])]
83 source_media_url
= vinfo
. get ( 'sourceMediaURL' )
86 'format_id' : 'source_media' ,
87 'url' : source_media_url
,
89 self
._ sort
_ formats
( formats
)
92 'title' : vinfo
[ 'title' ],
93 'description' : vinfo
. get ( 'blurb' ),
96 'duration' : int_or_none ( vinfo
. get ( 'videoDuration' ), 100 ),
97 'timestamp' : timestamp
,