]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py 
 
 
 
 
 
 
 
 
   5  from  . common 
import  InfoExtractor
,  SearchInfoExtractor
 
  14  class  YahooIE ( InfoExtractor
):  
  15      IE_DESC 
=  u
'Yahoo screen'  
  16      _VALID_URL 
=  r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'  
  19              u
'url' :  u
'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,  
  20              u
'file' :  u
'214727115.mp4' ,  
  22                  u
'title' :  u
'Julian Smith & Travis Legg Watch Julian Smith' ,  
  23                  u
'description' :  u
'Julian and Travis watch Julian Smith' ,  
  27              u
'url' :  u
'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,  
  28              u
'file' :  u
'103000935.flv' ,  
  30                  u
'title' :  u
'The Cougar Lies with Spanish Moss' ,  
  31                  u
'description' :  u
'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,  
  35                  u
'skip_download' :  True ,  
  40      def  _real_extract ( self
,  url
):  
  41          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  42          video_id 
=  mobj
. group ( 'id' )  
  43          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  
  45          items_json 
=  self
._ search
_ regex
( r
'YVIDEO_INIT_ITEMS = ({.*?});$' ,  
  46              webpage
,  u
'items' ,  flags
= re
. MULTILINE
)  
  47          items 
=  json
. loads ( items_json
)  
  48          info 
=  items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]  
  52          for  s 
in  info
[ 'streams' ]:  
  54                  'width' :  s
. get ( 'width' ),  
  55                  'height' :  s
. get ( 'height' ),  
  56                  'bitrate' :  s
. get ( 'bitrate' ),  
  61              if  host
. startswith ( 'rtmp' ):  
  68                  format_url 
=  compat_urlparse
. urljoin ( host
,  path
)  
  69                  format_info
[ 'url' ] =  format_url
 
  70                  format_info
[ 'ext' ] =  determine_ext ( format_url
)  
  72              formats
. append ( format_info
)  
  73          formats 
=  sorted ( formats
,  key
= lambda  f
:( f
[ 'height' ],  f
[ 'width' ]))  
  77              'title' :  meta
[ 'title' ],  
  79              'description' :  clean_html ( meta
[ 'description' ]),  
  80              'thumbnail' :  meta
[ 'thumbnail' ],  
  82          # TODO: Remove when #980 has been merged  
  83          info
. update ( formats
[- 1 ])  
  88  class  YahooSearchIE ( SearchInfoExtractor
):  
  89      IE_DESC 
=  u
'Yahoo screen search'  
  91      IE_NAME 
=  u
'screen.yahoo:search'  
  92      _SEARCH_KEY 
=  'yvsearch'  
  94      def  _get_n_results ( self
,  query
,  n
):  
  95          """Get a specified number of results for a query"""  
 102          for  pagenum 
in  itertools
. count ( 0 ):   
 103              result_url 
=  u
'http://video.search.yahoo.com/search/?p= %s &fr=screen&o=js&gs=0&b= %d '  % ( compat_urllib_parse
. quote_plus ( query
),  pagenum 
*  30 )  
 104              webpage 
=  self
._ download
_ webpage
( result_url
,  query
,  
 105                                               note
= 'Downloading results page ' + str ( pagenum
+ 1 ))  
 106              info 
=  json
. loads ( webpage
)  
 108              results 
=  info
[ u
'results' ]  
 110              for  ( i
,  r
)  in  enumerate ( results
):  
 111                  if  ( pagenum 
*  30 ) + i 
>=  n
:  
 113                  mobj 
=  re
. search ( r
'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"' ,  r
)  
 114                  e 
=  self
. url_result ( 'http://'  +  mobj
. group ( 'url' ),  'Yahoo' )  
 115                  res
[ 'entries' ]. append ( e
)  
 116              if  ( pagenum 
*  30  + i 
>=  n
)  or  ( m
[ u
'last' ] >= ( m
[ u
'total' ] - 1  )):