]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/nytimes.py
1 from __future__
import unicode_literals
3 from . common
import InfoExtractor
11 class NYTimesBaseIE ( InfoExtractor
):
12 def _extract_video_from_id ( self
, video_id
):
13 video_data
= self
._ download
_ json
(
14 'http://www.nytimes.com/svc/video/api/v2/video/ %s ' % video_id
,
15 video_id
, 'Downloading video JSON' )
17 title
= video_data
[ 'headline' ]
18 description
= video_data
. get ( 'summary' )
19 duration
= float_or_none ( video_data
. get ( 'duration' ), 1000 )
21 uploader
= video_data
. get ( 'byline' )
22 publication_date
= video_data
. get ( 'publication_date' )
23 timestamp
= parse_iso8601 ( publication_date
[:- 8 ]) if publication_date
else None
25 def get_file_size ( file_size
):
26 if isinstance ( file_size
, int ):
28 elif isinstance ( file_size
, dict ):
29 return int ( file_size
. get ( 'value' , 0 ))
36 'format_id' : video
. get ( 'type' ),
37 'vcodec' : video
. get ( 'video_codec' ),
38 'width' : int_or_none ( video
. get ( 'width' )),
39 'height' : int_or_none ( video
. get ( 'height' )),
40 'filesize' : get_file_size ( video
. get ( 'fileSize' )),
41 } for video
in video_data
[ 'renditions' ] if video
. get ( 'url' )
43 self
._ sort
_ formats
( formats
)
47 'url' : 'http://www.nytimes.com/ %s ' % image
[ 'url' ],
48 'width' : int_or_none ( image
. get ( 'width' )),
49 'height' : int_or_none ( image
. get ( 'height' )),
50 } for image
in video_data
. get ( 'images' , []) if image
. get ( 'url' )
56 'description' : description
,
57 'timestamp' : timestamp
,
61 'thumbnails' : thumbnails
,
65 class NYTimesIE ( NYTimesBaseIE
):
66 _VALID_URL
= r
'https?://(?:(?:www\.)?nytimes\.com/video/(?:[^/]+/)+?|graphics8\.nytimes\.com/bcvideo/\d+(?:\.\d+)?/iframe/embed\.html\?videoId=)(?P<id>\d+)'
69 'url' : 'http://www.nytimes.com/video/opinion/100000002847155/verbatim-what-is-a-photocopier.html?playlistId=100000001150263' ,
70 'md5' : '18a525a510f942ada2720db5f31644c0' ,
72 'id' : '100000002847155' ,
74 'title' : 'Verbatim: What Is a Photocopier?' ,
75 'description' : 'md5:93603dada88ddbda9395632fdc5da260' ,
76 'timestamp' : 1398631707 ,
77 'upload_date' : '20140427' ,
78 'uploader' : 'Brett Weiner' ,
82 'url' : 'http://www.nytimes.com/video/travel/100000003550828/36-hours-in-dubai.html' ,
83 'only_matching' : True ,
86 def _real_extract ( self
, url
):
87 video_id
= self
._ match
_ id
( url
)
89 return self
._ extract
_ video
_ from
_ id
( video_id
)
92 class NYTimesArticleIE ( NYTimesBaseIE
):
93 _VALID_URL
= r
'https?://(?:www\.)?nytimes\.com/(.(?<!video))*?/(?:[^/]+/)*(?P<id>[^.]+)(?:\.html)?'
95 'url' : 'http://www.nytimes.com/2015/04/14/business/owner-of-gravity-payments-a-credit-card-processor-is-setting-a-new-minimum-wage-70000-a-year.html?_r=0' ,
96 'md5' : 'e2076d58b4da18e6a001d53fd56db3c9' ,
98 'id' : '100000003628438' ,
100 'title' : 'New Minimum Wage: $70,000 a Year' ,
101 'description' : 'Dan Price, C.E.O. of Gravity Payments, surprised his 120-person staff by announcing that he planned over the next three years to raise the salary of every employee to $70,000 a year.' ,
102 'timestamp' : 1429033037 ,
103 'upload_date' : '20150414' ,
104 'uploader' : 'Matthew Williams' ,
107 'url' : 'http://www.nytimes.com/news/minute/2014/03/17/times-minute-whats-next-in-crimea/?_php=true&_type=blogs&_php=true&_type=blogs&_r=1' ,
108 'only_matching' : True ,
111 def _real_extract ( self
, url
):
112 video_id
= self
._ match
_ id
( url
)
114 webpage
= self
._ download
_ webpage
( url
, video_id
)
116 video_id
= self
._ html
_ search
_ regex
( r
'data-videoid="(\d+)"' , webpage
, 'video id' )
118 return self
._ extract
_ video
_ from
_ id
( video_id
)