]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
3 from . common
import InfoExtractor
11 class InternetVideoArchiveIE ( InfoExtractor
):
12 _VALID_URL
= r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
15 u
'url' : u
'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,
16 u
'file' : u
'452693.mp4' ,
19 u
'description' : u
'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,
25 def _build_url ( query
):
26 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
29 def _clean_query ( query
):
30 NEEDED_ARGS
= [ 'publishedid' , 'customerid' ]
31 query_dic
= compat_urlparse
. parse_qs ( query
)
32 cleaned_dic
= dict (( k
, v
[ 0 ]) for ( k
, v
) in query_dic
. items () if k
in NEEDED_ARGS
)
33 # Other player ids return m3u8 urls
34 cleaned_dic
[ 'playerid' ] = '247'
35 cleaned_dic
[ 'videokbrate' ] = '100000'
36 return compat_urllib_parse
. urlencode ( cleaned_dic
)
38 def _real_extract ( self
, url
):
39 query
= compat_urlparse
. urlparse ( url
). query
40 query_dic
= compat_urlparse
. parse_qs ( query
)
41 video_id
= query_dic
[ 'publishedid' ][ 0 ]
42 url
= self
._ build
_u rl
( query
)
44 flashconfiguration
= self
._ download
_ xml
( url
, video_id
,
45 u
'Downloading flash configuration' )
46 file_url
= flashconfiguration
. find ( 'file' ). text
47 file_url
= file_url
. replace ( '/playlist.aspx' , '/mrssplaylist.aspx' )
48 # Replace some of the parameters in the query to get the best quality
49 # and http links (no m3u8 manifests)
50 file_url
= re
. sub ( r
'(?<=\?)(.+)$' ,
51 lambda m
: self
._ clean
_ query
( m
. group ()),
53 info
= self
._ download
_ xml
( file_url
, video_id
,
54 u
'Downloading video info' )
55 item
= info
. find ( 'channel/item' )
58 return xpath_with_ns ( p
,
59 { 'media' : 'http://search.yahoo.com/mrss/' ,
60 'jwplayer' : 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' })
62 for content
in item
. findall ( _bp ( 'media:group/media:content' )):
65 width
= int ( attr
[ 'width' ])
66 bitrate
= int ( attr
[ 'bitrate' ])
67 format_id
= ' %d-%d k' % ( width
, bitrate
)
69 'format_id' : format_id
,
75 self
._ sort
_ formats
( formats
)
79 'title' : item
. find ( 'title' ). text
,
81 'thumbnail' : item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],
82 'description' : item
. find ( 'description' ). text
,
83 'duration' : int ( attr
[ 'duration' ]),