]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py
be8e05f539d7f64c301f7a63a488aedbf9d129cd
2 import xml
. etree
. ElementTree
4 from . common
import InfoExtractor
13 class InternetVideoArchiveIE ( InfoExtractor
):
14 _VALID_URL
= r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'
17 u
'url' : u
'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,
18 u
'file' : u
'452693.mp4' ,
21 u
'description' : u
'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,
27 def _build_url ( query
):
28 return 'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?' + query
31 def _clean_query ( query
):
32 NEEDED_ARGS
= [ 'publishedid' , 'customerid' ]
33 query_dic
= compat_urlparse
. parse_qs ( query
)
34 cleaned_dic
= dict (( k
, v
[ 0 ]) for ( k
, v
) in query_dic
. items () if k
in NEEDED_ARGS
)
35 # Other player ids return m3u8 urls
36 cleaned_dic
[ 'playerid' ] = '247'
37 cleaned_dic
[ 'videokbrate' ] = '100000'
38 return compat_urllib_parse
. urlencode ( cleaned_dic
)
40 def _real_extract ( self
, url
):
41 query
= compat_urlparse
. urlparse ( url
). query
42 query_dic
= compat_urlparse
. parse_qs ( query
)
43 video_id
= query_dic
[ 'publishedid' ][ 0 ]
44 url
= self
._ build
_u rl
( query
)
46 flashconfiguration_xml
= self
._ download
_ webpage
( url
, video_id
,
47 u
'Downloading flash configuration' )
48 flashconfiguration
= xml
. etree
. ElementTree
. fromstring ( flashconfiguration_xml
. encode ( 'utf-8' ))
49 file_url
= flashconfiguration
. find ( 'file' ). text
50 file_url
= file_url
. replace ( '/playlist.aspx' , '/mrssplaylist.aspx' )
51 # Replace some of the parameters in the query to get the best quality
52 # and http links (no m3u8 manifests)
53 file_url
= re
. sub ( r
'(?<=\?)(.+)$' ,
54 lambda m
: self
._ clean
_ query
( m
. group ()),
56 info_xml
= self
._ download
_ webpage
( file_url
, video_id
,
57 u
'Downloading video info' )
58 info
= xml
. etree
. ElementTree
. fromstring ( info_xml
. encode ( 'utf-8' ))
59 item
= info
. find ( 'channel/item' )
62 return xpath_with_ns ( p
,
63 { 'media' : 'http://search.yahoo.com/mrss/' ,
64 'jwplayer' : 'http://developer.longtailvideo.com/trac/wiki/FlashFormats' })
66 for content
in item
. findall ( _bp ( 'media:group/media:content' )):
71 'ext' : determine_ext ( f_url
),
72 'width' : int ( attr
[ 'width' ]),
73 'bitrate' : int ( attr
[ 'bitrate' ]),
75 formats
= sorted ( formats
, key
= lambda f
: f
[ 'bitrate' ])
79 'title' : item
. find ( 'title' ). text
,
81 'thumbnail' : item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],
82 'description' : item
. find ( 'description' ). text
,
83 'duration' : int ( attr
[ 'duration' ]),