]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
1 from __future__
import unicode_literals
5 from . common
import InfoExtractor
13 class InternetVideoArchiveIE ( InfoExtractor
):
14 _VALID_URL
= r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
17 'url' : 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,
22 'description' : 'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,
28 def _build_url ( query
):
29 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
32 def _clean_query ( query
):
33 NEEDED_ARGS
= [ 'publishedid' , 'customerid' ]
34 query_dic
= compat_urlparse
. parse_qs ( query
)
35 cleaned_dic
= dict (( k
, v
[ 0 ]) for ( k
, v
) in query_dic
. items () if k
in NEEDED_ARGS
)
36 # Other player ids return m3u8 urls
37 cleaned_dic
[ 'playerid' ] = '247'
38 cleaned_dic
[ 'videokbrate' ] = '100000'
39 return compat_urllib_parse
. urlencode ( cleaned_dic
)
41 def _real_extract ( self
, url
):
42 query
= compat_urlparse
. urlparse ( url
). query
43 query_dic
= compat_urlparse
. parse_qs ( query
)
44 video_id
= query_dic
[ 'publishedid' ][ 0 ]
45 url
= self
._ build
_u rl
( query
)
47 flashconfiguration
= self
._ download
_ xml
( url
, video_id
,
48 'Downloading flash configuration' )
49 file_url
= flashconfiguration
. find ( 'file' ). text
50 file_url
= file_url
. replace ( '/playlist.aspx' , '/mrssplaylist.aspx' )
51 # Replace some of the parameters in the query to get the best quality
52 # and http links (no m3u8 manifests)
53 file_url
= re
. sub ( r
'(?<=\?)(.+)$' ,
54 lambda m
: self
._ clean
_ query
( m
. group ()),
56 info
= self
._ download
_ xml
( file_url
, video_id
,
57 'Downloading video info' )
58 item
= info
. find ( 'channel/item' )
61 return xpath_with_ns ( p
,
62 { 'media' : 'http://search.yahoo.com/mrss/' ,
63 'jwplayer' : 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' })
65 for content
in item
. findall ( _bp ( 'media:group/media:content' )):
68 width
= int ( attr
[ 'width' ])
69 bitrate
= int ( attr
[ 'bitrate' ])
70 format_id
= ' %d-%d k' % ( width
, bitrate
)
72 'format_id' : format_id
,
78 self
._ sort
_ formats
( formats
)
82 'title' : item
. find ( 'title' ). text
,
84 'thumbnail' : item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],
85 'description' : item
. find ( 'description' ). text
,
86 'duration' : int ( attr
[ 'duration' ]),