]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
483cc6f9e62da3bc272ba66efc540b95c17116e7
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
15 class InternetVideoArchiveIE ( InfoExtractor
):
16 _VALID_URL
= r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
19 'url' : 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,
24 'description' : 'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,
30 def _build_url ( query
):
31 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
34 def _clean_query ( query
):
35 NEEDED_ARGS
= [ 'publishedid' , 'customerid' ]
36 query_dic
= compat_urlparse
. parse_qs ( query
)
37 cleaned_dic
= dict (( k
, v
[ 0 ]) for ( k
, v
) in query_dic
. items () if k
in NEEDED_ARGS
)
38 # Other player ids return m3u8 urls
39 cleaned_dic
[ 'playerid' ] = '247'
40 cleaned_dic
[ 'videokbrate' ] = '100000'
41 return compat_urllib_parse
. urlencode ( cleaned_dic
)
43 def _real_extract ( self
, url
):
44 query
= compat_urlparse
. urlparse ( url
). query
45 query_dic
= compat_urlparse
. parse_qs ( query
)
46 video_id
= query_dic
[ 'publishedid' ][ 0 ]
47 url
= self
._ build
_u rl
( query
)
49 flashconfiguration
= self
._ download
_ xml
( url
, video_id
,
50 'Downloading flash configuration' )
51 file_url
= flashconfiguration
. find ( 'file' ). text
52 file_url
= file_url
. replace ( '/playlist.aspx' , '/mrssplaylist.aspx' )
53 # Replace some of the parameters in the query to get the best quality
54 # and http links (no m3u8 manifests)
55 file_url
= re
. sub ( r
'(?<=\?)(.+)$' ,
56 lambda m
: self
._ clean
_ query
( m
. group ()),
58 info
= self
._ download
_ xml
( file_url
, video_id
,
59 'Downloading video info' )
60 item
= info
. find ( 'channel/item' )
66 'media' : 'http://search.yahoo.com/mrss/' ,
67 'jwplayer' : 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' ,
71 for content
in item
. findall ( _bp ( 'media:group/media:content' )):
74 width
= int ( attr
[ 'width' ])
75 bitrate
= int ( attr
[ 'bitrate' ])
76 format_id
= ' %d-%d k' % ( width
, bitrate
)
78 'format_id' : format_id
,
84 self
._ sort
_ formats
( formats
)
88 'title' : item
. find ( 'title' ). text
,
90 'thumbnail' : item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],
91 'description' : item
. find ( 'description' ). text
,
92 'duration' : int ( attr
[ 'duration' ]),