]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py 
 
 
 
 
 
 
 
 
cb8f0887de292c2d36f5f2ac698e7321674e4b2d
   1  from  __future__ 
import  unicode_literals
 
   5  from  . common 
import  InfoExtractor
 
  12  class  WashingtonPostIE ( InfoExtractor
):  
  13      _VALID_URL 
=  r
'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'  
  15          'url' :  'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,  
  17              'md5' :  'c3f4b4922ffa259243f68e928db2db8c' ,  
  19                  'id' :  'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,  
  21                  'title' :  'Breaking Points: The Paper Mine' ,  
  23                  'description' :  'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,  
  24                  'uploader' :  'The Washington Post' ,  
  25                  'timestamp' :  1395527908 ,  
  26                  'upload_date' :  '20140322' ,  
  29              'md5' :  'f645a07652c2950cd9134bb852c5f5eb' ,  
  31                  'id' :  '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,  
  33                  'title' :  'The town bureaucracy sustains' ,  
  34                  'description' :  'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,  
  36                  'timestamp' :  1395528005 ,  
  37                  'upload_date' :  '20140322' ,  
  38                  'uploader' :  'The Washington Post' ,  
  43      def  _real_extract ( self
,  url
):  
  44          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  45          page_id 
=  mobj
. group ( 'id' )  
  47          webpage 
=  self
._ download
_ webpage
( url
,  page_id
)  
  48          title 
=  self
._ og
_ search
_ title
( webpage
)  
  49          uuids 
=  re
. findall ( r
'data-video-uuid="([^"]+)"' ,  webpage
)  
  51          for  i
,  uuid 
in  enumerate ( uuids
,  start
= 1 ):  
  52              vinfo_all 
=  self
._ download
_ json
(  
  53                  'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp'  %  uuid
,  
  55                  transform_source
= strip_jsonp
,  
  56                  note
= 'Downloading information of video  %d / %d '  % ( i
,  len ( uuids
))  
  58              vinfo 
=  vinfo_all
[ 0 ][ 'contentConfig' ]  
  59              uploader 
=  vinfo
. get ( 'credits' , {}). get ( 'source' )  
  60              timestamp 
=  int_or_none (  
  61                  vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ),  1000 )  
  65                      ' %s-%s-%s '  % ( s
. get ( 'type' ),  s
. get ( 'width' ),  s
. get ( 'bitrate' ))  
  68                  'vbr' :  s
. get ( 'bitrate' )  if  s
. get ( 'width' ) !=  0  else None ,  
  69                  'width' :  s
. get ( 'width' ),  
  70                  'height' :  s
. get ( 'height' ),  
  71                  'acodec' :  s
. get ( 'audioCodec' ),  
  72                  'vcodec' :  s
. get ( 'videoCodec' )  if  s
. get ( 'width' ) !=  0  else  'none' ,  
  73                  'filesize' :  s
. get ( 'fileSize' ),  
  80              }  for  s 
in  vinfo
. get ( 'streams' , [])]  
  81              source_media_url 
=  vinfo
. get ( 'sourceMediaURL' )  
  84                      'format_id' :  'source_media' ,  
  85                      'url' :  source_media_url
,  
  87              self
._ sort
_ formats
( formats
)  
  90                  'title' :  vinfo
[ 'title' ],  
  91                  'description' :  vinfo
. get ( 'blurb' ),  
  94                  'duration' :  int_or_none ( vinfo
. get ( 'videoDuration' ),  100 ),  
  95                  'timestamp' :  timestamp
,