]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
3 from . common
import InfoExtractor
12 class InternetVideoArchiveIE ( InfoExtractor
):
13 _VALID_URL
= r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
16 u
'url' : u
'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,
17 u
'file' : u
'452693.mp4' ,
20 u
'description' : u
'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,
26 def _build_url ( query
):
27 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
30 def _clean_query ( query
):
31 NEEDED_ARGS
= [ 'publishedid' , 'customerid' ]
32 query_dic
= compat_urlparse
. parse_qs ( query
)
33 cleaned_dic
= dict (( k
, v
[ 0 ]) for ( k
, v
) in query_dic
. items () if k
in NEEDED_ARGS
)
34 # Other player ids return m3u8 urls
35 cleaned_dic
[ 'playerid' ] = '247'
36 cleaned_dic
[ 'videokbrate' ] = '100000'
37 return compat_urllib_parse
. urlencode ( cleaned_dic
)
39 def _real_extract ( self
, url
):
40 query
= compat_urlparse
. urlparse ( url
). query
41 query_dic
= compat_urlparse
. parse_qs ( query
)
42 video_id
= query_dic
[ 'publishedid' ][ 0 ]
43 url
= self
._ build
_u rl
( query
)
45 flashconfiguration
= self
._ download
_ xml
( url
, video_id
,
46 u
'Downloading flash configuration' )
47 file_url
= flashconfiguration
. find ( 'file' ). text
48 file_url
= file_url
. replace ( '/playlist.aspx' , '/mrssplaylist.aspx' )
49 # Replace some of the parameters in the query to get the best quality
50 # and http links (no m3u8 manifests)
51 file_url
= re
. sub ( r
'(?<=\?)(.+)$' ,
52 lambda m
: self
._ clean
_ query
( m
. group ()),
54 info
= self
._ download
_ xml
( file_url
, video_id
,
55 u
'Downloading video info' )
56 item
= info
. find ( 'channel/item' )
59 return xpath_with_ns ( p
,
60 { 'media' : 'http://search.yahoo.com/mrss/' ,
61 'jwplayer' : 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' })
63 for content
in item
. findall ( _bp ( 'media:group/media:content' )):
68 'ext' : determine_ext ( f_url
),
69 'width' : int ( attr
[ 'width' ]),
70 'bitrate' : int ( attr
[ 'bitrate' ]),
72 formats
= sorted ( formats
, key
= lambda f
: f
[ 'bitrate' ])
76 'title' : item
. find ( 'title' ). text
,
78 'thumbnail' : item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],
79 'description' : item
. find ( 'description' ). text
,
80 'duration' : int ( attr
[ 'duration' ]),