]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/huffpost.py
0d1ea6802503d60c5ec05033b7d0f3fefa638fbf
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
12 class HuffPostIE ( InfoExtractor
):
13 IE_DESC
= 'Huffington Post'
15 https?://(embed\.)?live\.huffingtonpost\.com/
18 HPLEmbedPlayer/\?segmentId=
23 'url' : 'http://live.huffingtonpost.com/r/segment/legalese-it/52dd3e4b02a7602131000677' ,
24 'file' : '52dd3e4b02a7602131000677.mp4' ,
25 'md5' : '55f5e8981c1c80a64706a44b74833de8' ,
27 'title' : 'Legalese It! with @MikeSacksHP' ,
28 'description' : 'This week on Legalese It, Mike talks to David Bosco about his new book on the ICC, "Rough Justice," he also discusses the Virginia AG \' s historic stance on gay marriage, the execution of Edgar Tamayo, the ICC \' s delay of Kenya \' s President and more. ' ,
30 'upload_date' : '20140124' ,
34 def _real_extract ( self
, url
):
35 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
36 video_id
= mobj
. group ( 'id' )
38 api_url
= 'http://embed.live.huffingtonpost.com/api/segments/ %s .json' % video_id
39 data
= self
._ download
_ json
( api_url
, video_id
)[ 'data' ]
41 video_title
= data
[ 'title' ]
42 duration
= parse_duration ( data
[ 'running_time' ])
43 upload_date
= unified_strdate ( data
[ 'schedule' ][ 'starts_at' ])
44 description
= data
. get ( 'description' )
47 for url
in data
[ 'images' ]. values ():
48 m
= re
. match ( '.*-([0-9]+x[0-9]+)\.' , url
)
53 'resolution' : m
. group ( 1 ),
58 'format_id' : key
. replace ( '/' , '.' ),
61 'vcodec' : 'none' if key
. startswith ( 'audio/' ) else None ,
62 } for key
, url
in data
[ 'sources' ][ 'live' ]. items ()]
63 if data
. get ( 'fivemin_id' ):
64 fid
= data
[ 'fivemin_id' ]
65 fcat
= str ( int ( fid
) // 100 + 1 )
66 furl
= 'http://avideos.5min.com/2/' + fcat
[- 3 :] + '/' + fcat
+ '/' + fid
+ '.mp4'
72 self
._ sort
_ formats
( formats
)
77 'description' : description
,
80 'upload_date' : upload_date
,
81 'thumbnails' : thumbnails
,