]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/washingtonpost.py 
   2  from  __future__ 
import  unicode_literals
   6  from  . common 
import  InfoExtractor
  13  class  WashingtonPostIE ( InfoExtractor
):   14      _VALID_URL 
=  r
'https?://(?:www\.)?washingtonpost\.com/.*?/(?P<id>[^/]+)/(?:$|[?#])'   16          'url' :  'http://www.washingtonpost.com/sf/national/2014/03/22/sinkhole-of-bureaucracy/' ,   18              'id' :  'sinkhole-of-bureaucracy' ,   19              'title' :  'Sinkhole of bureaucracy' ,   22              'md5' :  'b9be794ceb56c7267d410a13f99d801a' ,   24                  'id' :  'fc433c38-b146-11e3-b8b3-44b1d1cd4c1f' ,   26                  'title' :  'Breaking Points: The Paper Mine' ,   28                  'description' :  'Overly complicated paper pushing is nothing new to government bureaucracy. But the way federal retirement applications are filed may be the most outdated. David Fahrenthold explains.' ,   29                  'uploader' :  'The Washington Post' ,   30                  'timestamp' :  1395527908 ,   31                  'upload_date' :  '20140322' ,   34              'md5' :  '1fff6a689d8770966df78c8cb6c8c17c' ,   36                  'id' :  '41255e28-b14a-11e3-b8b3-44b1d1cd4c1f' ,   38                  'title' :  'The town bureaucracy sustains' ,   39                  'description' :  'Underneath the friendly town of Boyers is a sea of government paperwork. In a disused limestone mine, hundreds of locals now track, file and process retirement applications for the federal government. We set out to find out what it \' s like to do paperwork 230 feet underground.' ,   41                  'timestamp' :  1395528005 ,   42                  'upload_date' :  '20140322' ,   43                  'uploader' :  'The Washington Post' ,   47          'url' :  'http://www.washingtonpost.com/blogs/wonkblog/wp/2014/12/31/one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear/' ,   49              'id' :  'one-airline-figured-out-how-to-make-sure-its-airplanes-never-disappear' ,   50              'title' :  'One airline figured out how to make sure its airplanes never disappear' ,   53              'md5' :  'a7c1b5634ba5e57a6a82cdffa5b1e0d0' ,   55                  'id' :  '0e4bb54c-9065-11e4-a66f-0ca5037a597d' ,   57                  'description' :  'Washington Post transportation reporter Ashley Halsey III explains why a plane \' s black box needs to be recovered from a crash site instead of having its information streamed in real time throughout the flight.' ,   58                  'upload_date' :  '20141230' ,   59                  'uploader' :  'The Washington Post' ,   60                  'timestamp' :  1419974765 ,   61                  'title' :  'Why black boxes don’t transmit data in real time' ,   66      def  _real_extract ( self
,  url
):   67          page_id 
=  self
._ match
_ id
( url
)   68          webpage 
=  self
._ download
_ webpage
( url
,  page_id
)   70          title 
=  self
._ og
_ search
_ title
( webpage
)   72          uuids 
=  re
. findall ( r
'''(?x)   74                  <div\s+class="posttv-video-embed[^>]*?data-uuid=|   76              )"([^"]+)"''' ,  webpage
)   78          for  i
,  uuid 
in  enumerate ( uuids
,  start
= 1 ):   79              vinfo_all 
=  self
._ download
_ json
(   80                  'http://www.washingtonpost.com/posttv/c/videojson/ %s ?resType=jsonp'  %  uuid
,   82                  transform_source
= strip_jsonp
,   83                  note
= 'Downloading information of video  %d / %d '  % ( i
,  len ( uuids
))   85              vinfo 
=  vinfo_all
[ 0 ][ 'contentConfig' ]   86              uploader 
=  vinfo
. get ( 'credits' , {}). get ( 'source' )   87              timestamp 
=  int_or_none (   88                  vinfo
. get ( 'dateConfig' , {}). get ( 'dateFirstPublished' ),  1000 )   92                      ' %s-%s-%s '  % ( s
. get ( 'type' ),  s
. get ( 'width' ),  s
. get ( 'bitrate' ))   95                  'vbr' :  s
. get ( 'bitrate' )  if  s
. get ( 'width' ) !=  0  else None ,   96                  'width' :  s
. get ( 'width' ),   97                  'height' :  s
. get ( 'height' ),   98                  'acodec' :  s
. get ( 'audioCodec' ),   99                  'vcodec' :  s
. get ( 'videoCodec' )  if  s
. get ( 'width' ) !=  0  else  'none' ,  100                  'filesize' :  s
. get ( 'fileSize' ),  103                  'preference' : - 100  if  s
. get ( 'type' ) ==  'smil'  else None ,  107                  }. get ( s
. get ( 'type' )),  108              }  for  s 
in  vinfo
. get ( 'streams' , [])]  109              source_media_url 
=  vinfo
. get ( 'sourceMediaURL' )  112                      'format_id' :  'source_media' ,  113                      'url' :  source_media_url
,  115              self
._ sort
_ formats
( formats
)  118                  'title' :  vinfo
[ 'title' ],  119                  'description' :  vinfo
. get ( 'blurb' ),  120                  'uploader' :  uploader
,  122                  'duration' :  int_or_none ( vinfo
. get ( 'videoDuration' ),  100 ),  123                  'timestamp' :  timestamp
,