]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/internetvideoarchive.py 
 
 
 
 
 
 
 
 
   3  from  . common 
import  InfoExtractor
 
  11  class  InternetVideoArchiveIE ( InfoExtractor
):  
  12      _VALID_URL 
=  r
'https?://video\.internetvideoarchive\.net/flash/players/.*?\?.*?publishedid.*?'  
  15          u
'url' :  u
'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?customerid=69249&publishedid=452693&playerid=247' ,  
  16          u
'file' :  u
'452693.mp4' ,  
  19              u
'description' :  u
'In SKYFALL, Bond \' s loyalty to M is tested as her past comes back to haunt her. As MI6 comes under attack, 007 must track down and destroy the threat, no matter how personal the cost.' ,  
  25      def  _build_url ( query
):  
  26          return  'http://video.internetvideoarchive.net/flash/players/flashconfiguration.aspx?'  +  query
 
  29      def  _clean_query ( query
):  
  30          NEEDED_ARGS 
= [ 'publishedid' ,  'customerid' ]  
  31          query_dic 
=  compat_urlparse
. parse_qs ( query
)  
  32          cleaned_dic 
=  dict (( k
, v
[ 0 ])  for  ( k
, v
)  in  query_dic
. items ()  if  k 
in  NEEDED_ARGS
)  
  33          # Other player ids return m3u8 urls  
  34          cleaned_dic
[ 'playerid' ] =  '247'  
  35          cleaned_dic
[ 'videokbrate' ] =  '100000'  
  36          return  compat_urllib_parse
. urlencode ( cleaned_dic
)  
  38      def  _real_extract ( self
,  url
):  
  39          query 
=  compat_urlparse
. urlparse ( url
). query
 
  40          query_dic 
=  compat_urlparse
. parse_qs ( query
)  
  41          video_id 
=  query_dic
[ 'publishedid' ][ 0 ]  
  42          url 
=  self
._ build
_u rl
( query
)  
  44          flashconfiguration 
=  self
._ download
_ xml
( url
,  video_id
,  
  45              u
'Downloading flash configuration' )  
  46          file_url 
=  flashconfiguration
. find ( 'file' ). text
 
  47          file_url 
=  file_url
. replace ( '/playlist.aspx' ,  '/mrssplaylist.aspx' )  
  48          # Replace some of the parameters in the query to get the best quality  
  49          # and http links (no m3u8 manifests)  
  50          file_url 
=  re
. sub ( r
'(?<=\?)(.+)$' ,  
  51              lambda  m
:  self
._ clean
_ query
( m
. group ()),  
  53          info 
=  self
._ download
_ xml
( file_url
,  video_id
,  
  54              u
'Downloading video info' )  
  55          item 
=  info
. find ( 'channel/item' )  
  58              return  xpath_with_ns ( p
,  
  59                  { 'media' :  'http://search.yahoo.com/mrss/' ,  
  60                  'jwplayer' :  'http://developer.longtailvideo.com/trac/wiki/FlashFormats' })  
  62          for  content 
in  item
. findall ( _bp ( 'media:group/media:content' )):  
  65              width 
=  int ( attr
[ 'width' ])  
  66              bitrate 
=  int ( attr
[ 'bitrate' ])  
  67              format_id 
=  ' %d-%d k'  % ( width
,  bitrate
)  
  69                  'format_id' :  format_id
,  
  75          self
._ sort
_ formats
( formats
)  
  79              'title' :  item
. find ( 'title' ). text
,  
  81              'thumbnail' :  item
. find ( _bp ( 'media:thumbnail' )). attrib
[ 'url' ],  
  82              'description' :  item
. find ( 'description' ). text
,  
  83              'duration' :  int ( attr
[ 'duration' ]),