]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py 
d92d14f718158f285b2696944afb155fdd664538
   1  from  __future__ 
import  unicode_literals
   7  from  . common 
import  InfoExtractor
,  SearchInfoExtractor
  16  class  YahooIE ( InfoExtractor
):   17      IE_DESC 
=  'Yahoo screen'   18      _VALID_URL 
=  r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'   21              'url' :  'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,   22              'file' :  '214727115.mp4' ,   23              'md5' :  '4962b075c08be8690a922ee026d05e69' ,   25                  'title' :  'Julian Smith & Travis Legg Watch Julian Smith' ,   26                  'description' :  'Julian and Travis watch Julian Smith' ,   30              'url' :  'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,   31              'file' :  '103000935.mp4' ,   32              'md5' :  'd6e6fc6e1313c608f316ddad7b82b306' ,   34                  'title' :  'Codefellas - The Cougar Lies with Spanish Moss' ,   35                  'description' :  'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,   40      def  _real_extract ( self
,  url
):   41          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)   42          video_id 
=  mobj
. group ( 'id' )   43          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)   45          items_json 
=  self
._ search
_ regex
( r
'mediaItems: ({.*?})$' ,   46              webpage
,  'items' ,  flags
= re
. MULTILINE
)   47          items 
=  json
. loads ( items_json
)   48          info 
=  items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]   49          # The 'meta' field is not always in the video webpage, we request it   52          return  self
._ get
_ info
( long_id
,  video_id
)   54      def  _get_info ( self
,  long_id
,  video_id
):   55          query 
= ( 'SELECT * FROM yahoo.media.video.streams WHERE id=" %s "'   56                   ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'   57                   ' AND protocol="http"'  %  long_id
)   58          data 
=  compat_urllib_parse
. urlencode ({   63          query_result_json 
=  self
._ download
_ webpage
(   64              'http://video.query.yahoo.com/v1/public/yql?'  +  data
,   65              video_id
,  'Downloading video info' )   66          query_result 
=  json
. loads ( query_result_json
)   67          info 
=  query_result
[ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]   71          for  s 
in  info
[ 'streams' ]:   73                  'width' :  int_or_none ( s
. get ( 'width' )),   74                  'height' :  int_or_none ( s
. get ( 'height' )),   75                  'tbr' :  int_or_none ( s
. get ( 'bitrate' )),   80              if  host
. startswith ( 'rtmp' ):   87                  format_url 
=  compat_urlparse
. urljoin ( host
,  path
)   88                  format_info
[ 'url' ] =  format_url
  90              formats
. append ( format_info
)   92          self
._ sort
_ formats
( formats
)   96              'title' :  meta
[ 'title' ],   98              'description' :  clean_html ( meta
[ 'description' ]),   99              'thumbnail' :  meta
[ 'thumbnail' ],  103  class  YahooNewsIE ( YahooIE
):  104      IE_NAME 
=  'yahoo:news'  105      _VALID_URL 
=  r
'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'  108          'url' :  'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html' ,  109          'md5' :  '67010fdf3a08d290e060a4dd96baa07b' ,  113              'title' :  'China Moses Is Crazy About the Blues' ,  114              'description' :  'md5:9900ab8cd5808175c7b3fe55b979bed0' ,  118      # Overwrite YahooIE properties we don't want  121      def  _real_extract ( self
,  url
):  122          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  123          video_id 
=  mobj
. group ( 'id' )  124          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  125          long_id 
=  self
._ search
_ regex
( r
'contentId: \' (.+ ?
) \' , ', webpage, ' long id ')  126          return self._get_info(long_id, video_id)  129  class YahooSearchIE(SearchInfoExtractor):  130      IE_DESC = ' Yahoo screen search
'  132      IE_NAME = ' screen
. yahoo
: search
'  133      _SEARCH_KEY = ' yvsearch
'  135      def _get_n_results(self, query, n):  136          """Get a specified number of results for a query"""  143          for pagenum in itertools.count(0):   144              result_url = ' http
:// video
. search
. yahoo
. com
/ search
/ ?p
= %s& fr
= screen
& o
= js
& gs
= 0 & b
= %d ' % (compat_urllib_parse.quote_plus(query), pagenum * 30)  145              webpage = self._download_webpage(result_url, query,  146                                               note=' Downloading results page 
'+str(pagenum+1))  147              info = json.loads(webpage)  149              results = info[' results
']  151              for (i, r) in enumerate(results):  152                  if (pagenum * 30) +i >= n:  154                  mobj = re.search(r' ( ?P
< url
> screen\
. yahoo\
. com
/.* ?
- \d
* ?\
. html
) "', r)  155                  e = self.url_result('http://' + mobj.group('url'), 'Yahoo')  156                  res['entries'].append(e)  157              if (pagenum * 30 +i >= n) or (m['last'] >= (m['total'] -1)):