]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nytimes.py 
   1  from  __future__ 
import  unicode_literals
   3  from  . common 
import  InfoExtractor
  11  class  NYTimesBaseIE ( InfoExtractor
):   12      def  _extract_video_from_id ( self
,  video_id
):   13          video_data 
=  self
._ download
_ json
(   14              'http://www.nytimes.com/svc/video/api/v2/video/ %s '  %  video_id
,   15              video_id
,  'Downloading video JSON' )   17          title 
=  video_data
[ 'headline' ]   18          description 
=  video_data
. get ( 'summary' )   19          duration 
=  float_or_none ( video_data
. get ( 'duration' ),  1000 )   21          uploader 
=  video_data
[ 'byline' ]   22          timestamp 
=  parse_iso8601 ( video_data
[ 'publication_date' ][:- 8 ])   24          def  get_file_size ( file_size
):   25              if  isinstance ( file_size
,  int ):   27              elif  isinstance ( file_size
,  dict ):   28                  return  int ( file_size
. get ( 'value' ,  0 ))   35                  'format_id' :  video
. get ( 'type' ),   36                  'vcodec' :  video
. get ( 'video_codec' ),   37                  'width' :  int_or_none ( video
. get ( 'width' )),   38                  'height' :  int_or_none ( video
. get ( 'height' )),   39                  'filesize' :  get_file_size ( video
. get ( 'fileSize' )),   40              }  for  video 
in  video_data
[ 'renditions' ]   42          self
._ sort
_ formats
( formats
)   46                  'url' :  'http://www.nytimes.com/ %s '  %  image
[ 'url' ],   47                  'width' :  int_or_none ( image
. get ( 'width' )),   48                  'height' :  int_or_none ( image
. get ( 'height' )),   49              }  for  image 
in  video_data
[ 'images' ]   55              'description' :  description
,   56              'timestamp' :  timestamp
,   60              'thumbnails' :  thumbnails
,   64  class  NYTimesIE ( NYTimesBaseIE
):   65      _VALID_URL 
=  r
'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'   68          'url' :  'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263' ,   69          'md5' :  '18a525a510f942ada2720db5f31644c0' ,   71              'id' :  '100000002847155' ,   73              'title' :  'Verbatim: What Is a Photocopier?' ,   74              'description' :  'md5:93603dada88ddbda9395632fdc5da260' ,   75              'timestamp' :  1398631707 ,   76              'upload_date' :  '20140427' ,   77              'uploader' :  'Brett Weiner' ,   81          'url' :  'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html' ,   82          'only_matching' :  True ,   85      def  _real_extract ( self
,  url
):   86          video_id 
=  self
._ match
_ id
( url
)   88          return  self
._ extract
_ video
_ from
_ id
( video_id
)   91  class  NYTimesArticleIE ( NYTimesBaseIE
):   92      _VALID_URL 
=  r
'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'   94          'url' :  'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0' ,   95          'md5' :  'e2076d58b4da18e6a001d53fd56db3c9' ,   97              'id' :  '100000003628438' ,   99              'title' :  'New Minimum Wage: $70,000 a Year' ,  100              'description' :  'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.' ,  101              'timestamp' :  1429033037 ,  102              'upload_date' :  '20150414' ,  103              'uploader' :  'Matthew Williams' ,  106          'url' :  'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1' ,  107          'only_matching' :  True ,  110      def  _real_extract ( self
,  url
):  111          video_id 
=  self
._ match
_ id
( url
)  113          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  115          video_id 
=  self
._ html
_ search
_ regex
( r
'data-videoid="(\d+)"' ,  webpage
,  'video id' )  117          return  self
._ extract
_ video
_ from
_ id
( video_id
)