]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/huffpost.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
13 class HuffPostIE ( InfoExtractor
):
14 IE_DESC
= 'Huffington Post'
16 https?://(embed\.)?live\.huffingtonpost\.com/
19 HPLEmbedPlayer/\?segmentId=
24 'url' : 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677' ,
25 'md5' : '55f5e8981c1c80a64706a44b74833de8' ,
27 'id' : '52dd3e4b02a7602131000677' ,
29 'title' : 'Legalese It! with @MikeSacksHP' ,
30 'description' : 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG \' s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC \' s delay of Kenya \' s President and more. ' ,
32 'upload_date' : '20140124' ,
36 'skip_download' : True ,
38 'expected_warnings' : [ 'HTTP Error 404: Not Found' ],
41 def _real_extract ( self
, url
):
42 video_id
= self
._ match
_ id
( url
)
44 api_url
= 'http://embed.live.huffingtonpost.com/api/segments/ %s .json' % video_id
45 data
= self
._ download
_ json
( api_url
, video_id
)[ 'data' ]
47 video_title
= data
[ 'title' ]
48 duration
= parse_duration ( data
. get ( 'running_time' ))
49 upload_date
= unified_strdate (
50 data
. get ( 'schedule' , {}). get ( 'starts_at' ) or data
. get ( 'segment_start_date_time' ))
51 description
= data
. get ( 'description' )
54 for url
in filter ( None , data
[ 'images' ]. values ()):
55 m
= re
. match ( r
'.*-([0-9]+x[0-9]+)\.' , url
)
60 'resolution' : m
. group ( 1 ),
64 sources
= data
. get ( 'sources' , {})
65 live_sources
= list ( sources
. get ( 'live' , {}). items ()) + list ( sources
. get ( 'live_again' , {}). items ())
66 for key
, url
in live_sources
:
67 ext
= determine_ext ( url
)
69 formats
. extend ( self
._ extract
_ m
3u8_ formats
(
70 url
, video_id
, ext
= 'mp4' , m3u8_id
= 'hls' , fatal
= False ))
72 formats
. extend ( self
._ extract
_ f
4 m
_ formats
(
73 url
+ '?hdcore=2.9.5' , video_id
, f4m_id
= 'hds' , fatal
= False ))
77 'format_id' : key
. replace ( '/' , '.' ),
80 'vcodec' : 'none' if key
. startswith ( 'audio/' ) else None ,
83 if not formats
and data
. get ( 'fivemin_id' ):
84 return self
. url_result ( '5min: %s ' % data
[ 'fivemin_id' ])
86 self
._ sort
_ formats
( formats
)
91 'description' : description
,
94 'upload_date' : upload_date
,
95 'thumbnails' : thumbnails
,