]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nytimes.py
1 from __future__
import unicode_literals
3 from . common
import InfoExtractor
11 class NYTimesBaseIE ( InfoExtractor
):
12 def _extract_video_from_id ( self
, video_id
):
13 video_data
= self
._ download
_ json
(
14 'http://www.nytimes.com/svc/video/api/v2/video/ %s ' % video_id
,
15 video_id
, 'Downloading video JSON' )
17 title
= video_data
[ 'headline' ]
18 description
= video_data
. get ( 'summary' )
19 duration
= float_or_none ( video_data
. get ( 'duration' ), 1000 )
21 uploader
= video_data
[ 'byline' ]
22 timestamp
= parse_iso8601 ( video_data
[ 'publication_date' ][:- 8 ])
24 def get_file_size ( file_size
):
25 if isinstance ( file_size
, int ):
27 elif isinstance ( file_size
, dict ):
28 return int ( file_size
. get ( 'value' , 0 ))
35 'format_id' : video
. get ( 'type' ),
36 'vcodec' : video
. get ( 'video_codec' ),
37 'width' : int_or_none ( video
. get ( 'width' )),
38 'height' : int_or_none ( video
. get ( 'height' )),
39 'filesize' : get_file_size ( video
. get ( 'fileSize' )),
40 } for video
in video_data
[ 'renditions' ]
42 self
._ sort
_ formats
( formats
)
46 'url' : 'http://www.nytimes.com/ %s ' % image
[ 'url' ],
47 'width' : int_or_none ( image
. get ( 'width' )),
48 'height' : int_or_none ( image
. get ( 'height' )),
49 } for image
in video_data
[ 'images' ]
55 'description' : description
,
56 'timestamp' : timestamp
,
60 'thumbnails' : thumbnails
,
64 class NYTimesIE ( NYTimesBaseIE
):
65 _VALID_URL
= r
'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
68 'url' : 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263' ,
69 'md5' : '18a525a510f942ada2720db5f31644c0' ,
71 'id' : '100000002847155' ,
73 'title' : 'Verbatim: What Is a Photocopier?' ,
74 'description' : 'md5:93603dada88ddbda9395632fdc5da260' ,
75 'timestamp' : 1398631707 ,
76 'upload_date' : '20140427' ,
77 'uploader' : 'Brett Weiner' ,
81 'url' : 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html' ,
82 'only_matching' : True ,
85 def _real_extract ( self
, url
):
86 video_id
= self
._ match
_ id
( url
)
88 return self
._ extract
_ video
_ from
_ id
( video_id
)
91 class NYTimesArticleIE ( NYTimesBaseIE
):
92 _VALID_URL
= r
'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
94 'url' : 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0' ,
95 'md5' : 'e2076d58b4da18e6a001d53fd56db3c9' ,
97 'id' : '100000003628438' ,
99 'title' : 'New Minimum Wage: $70,000 a Year' ,
100 'description' : 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.' ,
101 'timestamp' : 1429033037 ,
102 'upload_date' : '20150414' ,
103 'uploader' : 'Matthew Williams' ,
106 'url' : 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1' ,
107 'only_matching' : True ,
110 def _real_extract ( self
, url
):
111 video_id
= self
._ match
_ id
( url
)
113 webpage
= self
._ download
_ webpage
( url
, video_id
)
115 video_id
= self
._ html
_ search
_ regex
( r
'data-videoid="(\d+)"' , webpage
, 'video id' )
117 return self
._ extract
_ video
_ from
_ id
( video_id
)