]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py
2 from __future__
import unicode_literals
6 from . common
import InfoExtractor
13 class WashingtonPostIE ( InfoExtractor
):
14 _VALID_URL
= r
'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'
16 'url' : 'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,
18 'id' : 'sinkhole-of-bureaucracy' ,
19 'title' : 'Sinkhole of bureaucracy' ,
22 'md5' : '79132cc09ec5309fa590ae46e4cc31bc' ,
24 'id' : 'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,
26 'title' : 'Breaking Points: The Paper Mine' ,
28 'description' : 'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,
29 'uploader' : 'The Washington Post' ,
30 'timestamp' : 1395527908 ,
31 'upload_date' : '20140322' ,
34 'md5' : 'e1d5734c06865cc504ad99dc2de0d443' ,
36 'id' : '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,
38 'title' : 'The town bureaucracy sustains' ,
39 'description' : 'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,
41 'timestamp' : 1395528005 ,
42 'upload_date' : '20140322' ,
43 'uploader' : 'The Washington Post' ,
47 'url' : 'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/' ,
49 'id' : 'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear' ,
50 'title' : 'One airline figured out how to make sure its airplanes never disappear' ,
53 'md5' : 'a7c1b5634ba5e57a6a82cdffa5b1e0d0' ,
55 'id' : '0e4bb54c-9065-11e4-a66f-0ca5037a597d' ,
57 'description' : 'Washington Post transportation reporter Ashley Halsey III explains why a plane \' s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.' ,
58 'upload_date' : '20141230' ,
59 'uploader' : 'The Washington Post' ,
60 'timestamp' : 1419974765 ,
61 'title' : 'Why black boxes don’t transmit data in real time' ,
66 def _real_extract ( self
, url
):
67 page_id
= self
._ match
_ id
( url
)
68 webpage
= self
._ download
_ webpage
( url
, page_id
)
70 title
= self
._ og
_ search
_ title
( webpage
)
72 uuids
= re
. findall ( r
'''(?x)
74 <div\s+class="posttv-video-embed[^>]*?data-uuid=|
76 )"([^"]+)"''' , webpage
)
78 for i
, uuid
in enumerate ( uuids
, start
= 1 ):
79 vinfo_all
= self
._ download
_ json
(
80 'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp' % uuid
,
82 transform_source
= strip_jsonp
,
83 note
= 'Downloading information of video %d / %d ' % ( i
, len ( uuids
))
85 vinfo
= vinfo_all
[ 0 ][ 'contentConfig' ]
86 uploader
= vinfo
. get ( 'credits' , {}). get ( 'source' )
87 timestamp
= int_or_none (
88 vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ), 1000 )
92 ' %s-%s-%s ' % ( s
. get ( 'type' ), s
. get ( 'width' ), s
. get ( 'bitrate' ))
95 'vbr' : s
. get ( 'bitrate' ) if s
. get ( 'width' ) != 0 else None ,
96 'width' : s
. get ( 'width' ),
97 'height' : s
. get ( 'height' ),
98 'acodec' : s
. get ( 'audioCodec' ),
99 'vcodec' : s
. get ( 'videoCodec' ) if s
. get ( 'width' ) != 0 else 'none' ,
100 'filesize' : s
. get ( 'fileSize' ),
103 'preference' : - 100 if s
. get ( 'type' ) == 'smil' else None ,
107 }. get ( s
. get ( 'type' )),
108 } for s
in vinfo
. get ( 'streams' , [])]
109 source_media_url
= vinfo
. get ( 'sourceMediaURL' )
112 'format_id' : 'source_media' ,
113 'url' : source_media_url
,
115 self
._ sort
_ formats
( formats
)
118 'title' : vinfo
[ 'title' ],
119 'description' : vinfo
. get ( 'blurb' ),
120 'uploader' : uploader
,
122 'duration' : int_or_none ( vinfo
. get ( 'videoDuration' ), 100 ),
123 'timestamp' : timestamp
,