]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   5  from  . common 
import  InfoExtractor
 
  15  class  InternetVideoArchiveIE ( InfoExtractor
):  
  16      _VALID_URL 
=  r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'  
  19          'url' :  'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,  
  24              'description' :  'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,  
  30      def  _build_url ( query
):  
  31          return  'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?'  +  query
 
  34      def  _clean_query ( query
):  
  35          NEEDED_ARGS 
= [ 'publishedid' ,  'customerid' ]  
  36          query_dic 
=  compat_urlparse
. parse_qs ( query
)  
  37          cleaned_dic 
=  dict (( k
,  v
[ 0 ])  for  ( k
,  v
)  in  query_dic
. items ()  if  k 
in  NEEDED_ARGS
)  
  38          # Other player ids return m3u8 urls  
  39          cleaned_dic
[ 'playerid' ] =  '247'  
  40          cleaned_dic
[ 'videokbrate' ] =  '100000'  
  41          return  compat_urllib_parse
. urlencode ( cleaned_dic
)  
  43      def  _real_extract ( self
,  url
):  
  44          query 
=  compat_urlparse
. urlparse ( url
). query
 
  45          query_dic 
=  compat_urlparse
. parse_qs ( query
)  
  46          video_id 
=  query_dic
[ 'publishedid' ][ 0 ]  
  47          url 
=  self
._ build
_u rl
( query
)  
  49          flashconfiguration 
=  self
._ download
_ xml
( url
,  video_id
,  
  50                                                  'Downloading flash configuration' )  
  51          file_url 
=  flashconfiguration
. find ( 'file' ). text
 
  52          file_url 
=  file_url
. replace ( '/playlist.aspx' ,  '/mrssplaylist.aspx' )  
  53          # Replace some of the parameters in the query to get the best quality  
  54          # and http links (no m3u8 manifests)  
  55          file_url 
=  re
. sub ( r
'(?<=\?)(.+)$' ,  
  56                            lambda  m
:  self
._ clean
_ query
( m
. group ()),  
  58          info 
=  self
._ download
_ xml
( file_url
,  video_id
,  
  59                                    'Downloading video info' )  
  60          item 
=  info
. find ( 'channel/item' )  
  66                      'media' :  'http://search.yahoo.com/mrss/' ,  
  67                      'jwplayer' :  'http://developer.longtailvideo.com/trac/wiki/FlashFormats' ,  
  71          for  content 
in  item
. findall ( _bp ( 'media:group/media:content' )):  
  74              width 
=  int ( attr
[ 'width' ])  
  75              bitrate 
=  int ( attr
[ 'bitrate' ])  
  76              format_id 
=  ' %d-%d k'  % ( width
,  bitrate
)  
  78                  'format_id' :  format_id
,  
  84          self
._ sort
_ formats
( formats
)  
  88              'title' :  item
. find ( 'title' ). text
,  
  90              'thumbnail' :  item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],  
  91              'description' :  item
. find ( 'description' ). text
,  
  92              'duration' :  int ( attr
[ 'duration' ]),