]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/ndtv.py
3 from . common
import InfoExtractor
4 from .. utils
import month_by_name
7 class NDTVIE ( InfoExtractor
):
8 _VALID_URL
= r
'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)'
11 u
"url" : u
"http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710" ,
12 u
"file" : u
"300710.mp4" ,
13 u
"md5" : u
"39f992dbe5fb531c395d8bbedb1e5e88" ,
15 u
"title" : u
"NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal" ,
16 u
"description" : u
"In an exclusive interview to NDTV, Aam Aadmi Party's Arvind Kejriwal says it makes no difference to him that Rahul Gandhi said the Congress needs to learn from his party." ,
17 u
"upload_date" : u
"20131208" ,
19 u
"thumbnail" : u
"http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg" ,
23 def _real_extract ( self
, url
):
24 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
25 video_id
= mobj
. group ( 'id' )
27 webpage
= self
._ download
_ webpage
( url
, video_id
)
29 filename
= self
._ search
_ regex
(
30 r
"__filename='([^']+)'" , webpage
, u
'video filename' )
31 video_url
= ( u
'http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/ %s ' %
34 duration_str
= filename
= self
._ search
_ regex
(
35 r
"__duration='([^']+)'" , webpage
, u
'duration' , fatal
= False )
36 duration
= None if duration_str
is None else int ( duration_str
)
38 date_m
= re
. search ( r
'''(?x)
39 <p\s+class="vod_dateline">\s*
41 (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+)
45 if date_m
is not None :
46 month
= month_by_name ( date_m
. group ( 'monthname' ))
48 upload_date
= ' %s%0 2d %0 2d' % (
49 date_m
. group ( 'year' ), month
, int ( date_m
. group ( 'day' )))
51 description
= self
._ og
_ search
_ description
( webpage
)
52 READ_MORE
= u
' (Read more)'
53 if description
. endswith ( READ_MORE
):
54 description
= description
[:- len ( READ_MORE
)]
59 'title' : self
._ og
_ search
_ title
( webpage
),
60 'description' : description
,
61 'thumbnail' : self
._ og
_ search
_ thumbnail
( webpage
),
63 'upload_date' : upload_date
,