]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py 
   1  from  __future__ 
import  unicode_literals
   5  from  . common 
import  InfoExtractor
  12  class  WashingtonPostIE ( InfoExtractor
):   13      _VALID_URL 
=  r
'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'   15          'url' :  'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,   17              'title' :  'Sinkhole of bureaucracy' ,   20              'md5' :  '79132cc09ec5309fa590ae46e4cc31bc' ,   22                  'id' :  'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,   24                  'title' :  'Breaking Points: The Paper Mine' ,   26                  'description' :  'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,   27                  'uploader' :  'The Washington Post' ,   28                  'timestamp' :  1395527908 ,   29                  'upload_date' :  '20140322' ,   32              'md5' :  'e1d5734c06865cc504ad99dc2de0d443' ,   34                  'id' :  '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,   36                  'title' :  'The town bureaucracy sustains' ,   37                  'description' :  'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,   39                  'timestamp' :  1395528005 ,   40                  'upload_date' :  '20140322' ,   41                  'uploader' :  'The Washington Post' ,   46      def  _real_extract ( self
,  url
):   47          page_id 
=  self
._ match
_ id
( url
)   48          webpage 
=  self
._ download
_ webpage
( url
,  page_id
)   50          title 
=  self
._ og
_ search
_ title
( webpage
)   51          uuids 
=  re
. findall ( r
'data-video-uuid="([^"]+)"' ,  webpage
)   53          for  i
,  uuid 
in  enumerate ( uuids
,  start
= 1 ):   54              vinfo_all 
=  self
._ download
_ json
(   55                  'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp'  %  uuid
,   57                  transform_source
= strip_jsonp
,   58                  note
= 'Downloading information of video  %d / %d '  % ( i
,  len ( uuids
))   60              vinfo 
=  vinfo_all
[ 0 ][ 'contentConfig' ]   61              uploader 
=  vinfo
. get ( 'credits' , {}). get ( 'source' )   62              timestamp 
=  int_or_none (   63                  vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ),  1000 )   67                      ' %s-%s-%s '  % ( s
. get ( 'type' ),  s
. get ( 'width' ),  s
. get ( 'bitrate' ))   70                  'vbr' :  s
. get ( 'bitrate' )  if  s
. get ( 'width' ) !=  0  else None ,   71                  'width' :  s
. get ( 'width' ),   72                  'height' :  s
. get ( 'height' ),   73                  'acodec' :  s
. get ( 'audioCodec' ),   74                  'vcodec' :  s
. get ( 'videoCodec' )  if  s
. get ( 'width' ) !=  0  else  'none' ,   75                  'filesize' :  s
. get ( 'fileSize' ),   82              }  for  s 
in  vinfo
. get ( 'streams' , [])]   83              source_media_url 
=  vinfo
. get ( 'sourceMediaURL' )   86                      'format_id' :  'source_media' ,   87                      'url' :  source_media_url
,   89              self
._ sort
_ formats
( formats
)   92                  'title' :  vinfo
[ 'title' ],   93                  'description' :  vinfo
. get ( 'blurb' ),   96                  'duration' :  int_or_none ( vinfo
. get ( 'videoDuration' ),  100 ),   97                  'timestamp' :  timestamp
,