]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py 
 
 
 
 
 
 
 
 
   5  from  . common 
import  InfoExtractor
,  SearchInfoExtractor
 
  14  class  YahooIE ( InfoExtractor
):  
  15      IE_DESC 
=  u
'Yahoo screen'  
  16      _VALID_URL 
=  r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'  
  19              u
'url' :  u
'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,  
  20              u
'file' :  u
'214727115.flv' ,  
  22                  u
'title' :  u
'Julian Smith & Travis Legg Watch Julian Smith' ,  
  23                  u
'description' :  u
'Julian and Travis watch Julian Smith' ,  
  27                  u
'skip_download' :  True ,  
  31              u
'url' :  u
'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,  
  32              u
'file' :  u
'103000935.flv' ,  
  34                  u
'title' :  u
'Codefellas - The Cougar Lies with Spanish Moss' ,  
  35                  u
'description' :  u
'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,  
  39                  u
'skip_download' :  True ,  
  44      def  _real_extract ( self
,  url
):  
  45          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
  46          video_id 
=  mobj
. group ( 'id' )  
  47          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  
  49          items_json 
=  self
._ search
_ regex
( r
'YVIDEO_INIT_ITEMS = ({.*?});$' ,  
  50              webpage
,  u
'items' ,  flags
= re
. MULTILINE
)  
  51          items 
=  json
. loads ( items_json
)  
  52          info 
=  items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]  
  53          # The 'meta' field is not always in the video webpage, we request it  
  56          query 
= ( 'SELECT * FROM yahoo.media.video.streams WHERE id=" %s "'  
  57                   ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"'  %  long_id
)  
  58          data 
=  compat_urllib_parse
. urlencode ({  
  63          query_result_json 
=  self
._ download
_ webpage
(  
  64              'http://video.query.yahoo.com/v1/public/yql?'  +  data
,  
  65              video_id
,  u
'Downloading video info' )  
  66          query_result 
=  json
. loads ( query_result_json
)  
  67          info 
=  query_result
[ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]  
  71          for  s 
in  info
[ 'streams' ]:  
  73                  'width' :  s
. get ( 'width' ),  
  74                  'height' :  s
. get ( 'height' ),  
  75                  'bitrate' :  s
. get ( 'bitrate' ),  
  80              if  host
. startswith ( 'rtmp' ):  
  87                  format_url 
=  compat_urlparse
. urljoin ( host
,  path
)  
  88                  format_info
[ 'url' ] =  format_url
 
  89                  format_info
[ 'ext' ] =  determine_ext ( format_url
)  
  91              formats
. append ( format_info
)  
  92          formats 
=  sorted ( formats
,  key
= lambda  f
:( f
[ 'height' ],  f
[ 'width' ]))  
  96              'title' :  meta
[ 'title' ],  
  98              'description' :  clean_html ( meta
[ 'description' ]),  
  99              'thumbnail' :  meta
[ 'thumbnail' ],  
 101          # TODO: Remove when #980 has been merged  
 102          info
. update ( formats
[- 1 ])  
 107  class  YahooSearchIE ( SearchInfoExtractor
):  
 108      IE_DESC 
=  u
'Yahoo screen search'  
 110      IE_NAME 
=  u
'screen.yahoo:search'  
 111      _SEARCH_KEY 
=  'yvsearch'  
 113      def  _get_n_results ( self
,  query
,  n
):  
 114          """Get a specified number of results for a query"""  
 121          for  pagenum 
in  itertools
. count ( 0 ):   
 122              result_url 
=  u
'http://video.search.yahoo.com/search/?p= %s &fr=screen&o=js&gs=0&b= %d '  % ( compat_urllib_parse
. quote_plus ( query
),  pagenum 
*  30 )  
 123              webpage 
=  self
._ download
_ webpage
( result_url
,  query
,  
 124                                               note
= 'Downloading results page ' + str ( pagenum
+ 1 ))  
 125              info 
=  json
. loads ( webpage
)  
 127              results 
=  info
[ u
'results' ]  
 129              for  ( i
,  r
)  in  enumerate ( results
):  
 130                  if  ( pagenum 
*  30 ) + i 
>=  n
:  
 132                  mobj 
=  re
. search ( r
'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"' ,  r
)  
 133                  e 
=  self
. url_result ( 'http://'  +  mobj
. group ( 'url' ),  'Yahoo' )  
 134                  res
[ 'entries' ]. append ( e
)  
 135              if  ( pagenum 
*  30  + i 
>=  n
)  or  ( m
[ u
'last' ] >= ( m
[ u
'total' ] - 1  )):