]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py 
   1  from  __future__ 
import  unicode_literals
   5  from  . common 
import  InfoExtractor
  12  class  WashingtonPostIE ( InfoExtractor
):   13      _VALID_URL 
=  r
'^https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'   15          'url' :  'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,   17              'md5' :  'c3f4b4922ffa259243f68e928db2db8c' ,   19                  'id' :  'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,   21                  'title' :  'Breaking Points: The Paper Mine' ,   23                  'description' :  'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,   24                  'uploader' :  'The Washington Post' ,   25                  'timestamp' :  1395527908 ,   26                  'upload_date' :  '20140322' ,   29              'md5' :  'f645a07652c2950cd9134bb852c5f5eb' ,   31                  'id' :  '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,   33                  'title' :  'The town bureaucracy sustains' ,   34                  'description' :  'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,   36                  'timestamp' :  1395528005 ,   37                  'upload_date' :  '20140322' ,   38                  'uploader' :  'The Washington Post' ,   43      def  _real_extract ( self
,  url
):   44          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)   45          page_id 
=  mobj
. group ( 'id' )   47          webpage 
=  self
._ download
_ webpage
( url
,  page_id
)   48          title 
=  self
._ og
_ search
_ title
( webpage
)   49          uuids 
=  re
. findall ( r
'data-video-uuid="([^"]+)"' ,  webpage
)   51          for  i
,  uuid 
in  enumerate ( uuids
,  start
= 1 ):   52              vinfo_all 
=  self
._ download
_ json
(   53                  'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp'  %  uuid
,   55                  transform_source
= strip_jsonp
,   56                  note
= 'Downloading information of video  %d / %d '  % ( i
,  len ( uuids
))   58              vinfo 
=  vinfo_all
[ 0 ][ 'contentConfig' ]   59              uploader 
=  vinfo
. get ( 'credits' , {}). get ( 'source' )   60              timestamp 
=  int_or_none (   61                  vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ),  1000 )   65                      ' %s-%s-%s '  % ( s
. get ( 'type' ),  s
. get ( 'width' ),  s
. get ( 'bitrate' ))   68                  'vbr' :  s
. get ( 'bitrate' )  if  s
. get ( 'width' ) !=  0  else None ,   69                  'width' :  s
. get ( 'width' ),   70                  'height' :  s
. get ( 'height' ),   71                  'acodec' :  s
. get ( 'audioCodec' ),   72                  'vcodec' :  s
. get ( 'videoCodec' )  if  s
. get ( 'width' ) !=  0  else  'none' ,   73                  'filesize' :  s
. get ( 'fileSize' ),   80              }  for  s 
in  vinfo
. get ( 'streams' , [])]   81              source_media_url 
=  vinfo
. get ( 'sourceMediaURL' )   84                      'format_id' :  'source_media' ,   85                      'url' :  source_media_url
,   87              self
._ sort
_ formats
( formats
)   90                  'title' :  vinfo
[ 'title' ],   91                  'description' :  vinfo
. get ( 'blurb' ),   94                  'duration' :  int_or_none ( vinfo
. get ( 'videoDuration' ),  100 ),   95                  'timestamp' :  timestamp
,