]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/huffpost.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
12 class HuffPostIE ( InfoExtractor
):
13 IE_DESC
= 'Huffington Post'
15 https?://(embed\.)?live\.huffingtonpost\.com/
18 HPLEmbedPlayer/\?segmentId=
23 'url' : 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677' ,
24 'md5' : '55f5e8981c1c80a64706a44b74833de8' ,
26 'id' : '52dd3e4b02a7602131000677' ,
28 'title' : 'Legalese It! with @MikeSacksHP' ,
29 'description' : 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG \' s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC \' s delay of Kenya \' s President and more. ' ,
31 'upload_date' : '20140124' ,
35 def _real_extract ( self
, url
):
36 video_id
= self
._ match
_ id
( url
)
38 api_url
= 'http://embed.live.huffingtonpost.com/api/segments/ %s .json' % video_id
39 data
= self
._ download
_ json
( api_url
, video_id
)[ 'data' ]
41 video_title
= data
[ 'title' ]
42 duration
= parse_duration ( data
. get ( 'running_time' ))
43 upload_date
= unified_strdate (
44 data
. get ( 'schedule' , {}). get ( 'starts_at' ) or data
. get ( 'segment_start_date_time' ))
45 description
= data
. get ( 'description' )
48 for url
in data
[ 'images' ]. values ():
49 m
= re
. match ( '.*-([0-9]+x[0-9]+)\.' , url
)
54 'resolution' : m
. group ( 1 ),
59 'format_id' : key
. replace ( '/' , '.' ),
62 'vcodec' : 'none' if key
. startswith ( 'audio/' ) else None ,
63 } for key
, url
in data
. get ( 'sources' , {}). get ( 'live' , {}). items ()]
65 if not formats
and data
. get ( 'fivemin_id' ):
66 return self
. url_result ( '5min: %s ' % data
[ 'fivemin_id' ])
68 self
._ sort
_ formats
( formats
)
73 'description' : description
,
76 'upload_date' : upload_date
,
77 'thumbnails' : thumbnails
,