]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/huffpost.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
12 class HuffPostIE ( InfoExtractor
):
13 IE_DESC
= 'Huffington Post'
15 https?://(embed\.)?live\.huffingtonpost\.com/
18 HPLEmbedPlayer/\?segmentId=
23 'url' : 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677' ,
24 'md5' : '55f5e8981c1c80a64706a44b74833de8' ,
26 'id' : '52dd3e4b02a7602131000677' ,
28 'title' : 'Legalese It! with @MikeSacksHP' ,
29 'description' : 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG \' s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC \' s delay of Kenya \' s President and more. ' ,
31 'upload_date' : '20140124' ,
35 def _real_extract ( self
, url
):
36 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
37 video_id
= mobj
. group ( 'id' )
39 api_url
= 'http://embed.live.huffingtonpost.com/api/segments/ %s .json' % video_id
40 data
= self
._ download
_ json
( api_url
, video_id
)[ 'data' ]
42 video_title
= data
[ 'title' ]
43 duration
= parse_duration ( data
[ 'running_time' ])
44 upload_date
= unified_strdate ( data
[ 'schedule' ][ 'starts_at' ])
45 description
= data
. get ( 'description' )
48 for url
in data
[ 'images' ]. values ():
49 m
= re
. match ( '.*-([0-9]+x[0-9]+)\.' , url
)
54 'resolution' : m
. group ( 1 ),
59 'format_id' : key
. replace ( '/' , '.' ),
62 'vcodec' : 'none' if key
. startswith ( 'audio/' ) else None ,
63 } for key
, url
in data
[ 'sources' ][ 'live' ]. items ()]
64 if data
. get ( 'fivemin_id' ):
65 fid
= data
[ 'fivemin_id' ]
66 fcat
= str ( int ( fid
) // 100 + 1 )
67 furl
= 'http://avideos.5min.com/2/' + fcat
[- 3 :] + '/' + fcat
+ '/' + fid
+ '.mp4'
73 self
._ sort
_ formats
( formats
)
78 'description' : description
,
81 'upload_date' : upload_date
,
82 'thumbnails' : thumbnails
,