]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nytimes.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   3  from  . common 
import  InfoExtractor
 
  11  class  NYTimesBaseIE ( InfoExtractor
):  
  12      def  _extract_video_from_id ( self
,  video_id
):  
  13          video_data 
=  self
._ download
_ json
(  
  14              'http://www.nytimes.com/svc/video/api/v2/video/ %s '  %  video_id
,  
  15              video_id
,  'Downloading video JSON' )  
  17          title 
=  video_data
[ 'headline' ]  
  18          description 
=  video_data
. get ( 'summary' )  
  19          duration 
=  float_or_none ( video_data
. get ( 'duration' ),  1000 )  
  21          uploader 
=  video_data
[ 'byline' ]  
  22          timestamp 
=  parse_iso8601 ( video_data
[ 'publication_date' ][:- 8 ])  
  24          def  get_file_size ( file_size
):  
  25              if  isinstance ( file_size
,  int ):  
  27              elif  isinstance ( file_size
,  dict ):  
  28                  return  int ( file_size
. get ( 'value' ,  0 ))  
  35                  'format_id' :  video
. get ( 'type' ),  
  36                  'vcodec' :  video
. get ( 'video_codec' ),  
  37                  'width' :  int_or_none ( video
. get ( 'width' )),  
  38                  'height' :  int_or_none ( video
. get ( 'height' )),  
  39                  'filesize' :  get_file_size ( video
. get ( 'fileSize' )),  
  40              }  for  video 
in  video_data
[ 'renditions' ]  
  42          self
._ sort
_ formats
( formats
)  
  46                  'url' :  'http://www.nytimes.com/ %s '  %  image
[ 'url' ],  
  47                  'width' :  int_or_none ( image
. get ( 'width' )),  
  48                  'height' :  int_or_none ( image
. get ( 'height' )),  
  49              }  for  image 
in  video_data
[ 'images' ]  
  55              'description' :  description
,  
  56              'timestamp' :  timestamp
,  
  60              'thumbnails' :  thumbnails
,  
  64  class  NYTimesIE ( NYTimesBaseIE
):  
  65      _VALID_URL 
=  r
'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'  
  68          'url' :  'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263' ,  
  69          'md5' :  '18a525a510f942ada2720db5f31644c0' ,  
  71              'id' :  '100000002847155' ,  
  73              'title' :  'Verbatim: What Is a Photocopier?' ,  
  74              'description' :  'md5:93603dada88ddbda9395632fdc5da260' ,  
  75              'timestamp' :  1398631707 ,  
  76              'upload_date' :  '20140427' ,  
  77              'uploader' :  'Brett Weiner' ,  
  81          'url' :  'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html' ,  
  82          'only_matching' :  True ,  
  85      def  _real_extract ( self
,  url
):  
  86          video_id 
=  self
._ match
_ id
( url
)  
  88          return  self
._ extract
_ video
_ from
_ id
( video_id
)  
  91  class  NYTimesArticleIE ( NYTimesBaseIE
):  
  92      _VALID_URL 
=  r
'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'  
  94          'url' :  'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0' ,  
  95          'md5' :  'e2076d58b4da18e6a001d53fd56db3c9' ,  
  97              'id' :  '100000003628438' ,  
  99              'title' :  'New Minimum Wage: $70,000 a Year' ,  
 100              'description' :  'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.' ,  
 101              'timestamp' :  1429033037 ,  
 102              'upload_date' :  '20150414' ,  
 103              'uploader' :  'Matthew Williams' ,  
 106          'url' :  'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1' ,  
 107          'only_matching' :  True ,  
 110      def  _real_extract ( self
,  url
):  
 111          video_id 
=  self
._ match
_ id
( url
)  
 113          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  
 115          video_id 
=  self
._ html
_ search
_ regex
( r
'data-videoid="(\d+)"' ,  webpage
,  'video id' )  
 117          return  self
._ extract
_ video
_ from
_ id
( video_id
)