]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py 
   1  from  __future__ 
import  unicode_literals
   7  from  . common 
import  InfoExtractor
,  SearchInfoExtractor
  16  class  YahooIE ( InfoExtractor
):   17      IE_DESC 
=  'Yahoo screen and movies'   18      _VALID_URL 
=  r
'https?://(?:screen|movies)\.yahoo\.com/.*?-(?P<id>[0-9]+)(?:-[a-z]+)?\.html'   21              'url' :  'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,   22              'md5' :  '4962b075c08be8690a922ee026d05e69' ,   24                  'id' :  '2d25e626-2378-391f-ada0-ddaf1417e588' ,   26                  'title' :  'Julian Smith & Travis Legg Watch Julian Smith' ,   27                  'description' :  'Julian and Travis watch Julian Smith' ,   31              'url' :  'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,   32              'md5' :  'd6e6fc6e1313c608f316ddad7b82b306' ,   34                  'id' :  'd1dedf8c-d58c-38c3-8963-e899929ae0a9' ,   36                  'title' :  'Codefellas - The Cougar Lies with Spanish Moss' ,   37                  'description' :  'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,   41              'url' :  'https://movies.yahoo.com/video/world-loves-spider-man-190819223.html' ,   42              'md5' :  '410b7104aa9893b765bc22787a22f3d9' ,   44                  'id' :  '516ed8e2-2c4f-339f-a211-7a8b49d30845' ,   46                  'title' :  'The World Loves Spider-Man' ,   47                  'description' :  '''People all over the world are celebrating the release of  \" The Amazing Spider-Man 2. \"  We're taking a look at the enthusiastic response Spider-Man has received from viewers all over the world.''' ,   52      def  _real_extract ( self
,  url
):   53          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)   54          video_id 
=  mobj
. group ( 'id' )   55          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)   57          items_json 
=  self
._ search
_ regex
(   58              r
'mediaItems: ({.*?})$' ,  webpage
,  'items' ,  flags
= re
. MULTILINE
,   60          if  items_json 
is None :   61              CONTENT_ID_REGEXES 
= [   62                  r
'YUI\.namespace\("Media"\)\.CONTENT_ID\s*=\s*"([^"]+)"' ,   63                  r
'root\.App\.Cache\.context\.videoCache\.curVideo = \{"([^"]+)"'   65              long_id 
=  self
._ search
_ regex
( CONTENT_ID_REGEXES
,  webpage
,  'content ID' )   68              items 
=  json
. loads ( items_json
)   69              info 
=  items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]   70              # The 'meta' field is not always in the video webpage, we request it   73          return  self
._ get
_ info
( long_id
,  video_id
,  webpage
)   75      def  _get_info ( self
,  long_id
,  video_id
,  webpage
):   76          query 
= ( 'SELECT * FROM yahoo.media.video.streams WHERE id=" %s "'   77                   ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2" AND region="US"'   78                   ' AND protocol="http"'  %  long_id
)   79          data 
=  compat_urllib_parse
. urlencode ({   84          query_result 
=  self
._ download
_ json
(   85              'http://video.query.yahoo.com/v1/public/yql?'  +  data
,   86              video_id
,  'Downloading video info' )   87          info 
=  query_result
[ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]   91          for  s 
in  info
[ 'streams' ]:   93                  'width' :  int_or_none ( s
. get ( 'width' )),   94                  'height' :  int_or_none ( s
. get ( 'height' )),   95                  'tbr' :  int_or_none ( s
. get ( 'bitrate' )),  100              if  host
. startswith ( 'rtmp' ):  107                  format_url 
=  compat_urlparse
. urljoin ( host
,  path
)  108                  format_info
[ 'url' ] =  format_url
 109              formats
. append ( format_info
)  111          self
._ sort
_ formats
( formats
)  115              'title' :  meta
[ 'title' ],  117              'description' :  clean_html ( meta
[ 'description' ]),  118              'thumbnail' :  meta
[ 'thumbnail' ]  if  meta
. get ( 'thumbnail' )  else  self
._ og
_ search
_ thumbnail
( webpage
),  122  class  YahooNewsIE ( YahooIE
):  123      IE_NAME 
=  'yahoo:news'  124      _VALID_URL 
=  r
'http://news\.yahoo\.com/video/.*?-(?P<id>\d*?)\.html'  127          'url' :  'http://news.yahoo.com/video/china-moses-crazy-blues-104538833.html' ,  128          'md5' :  '67010fdf3a08d290e060a4dd96baa07b' ,  132              'title' :  'China Moses Is Crazy About the Blues' ,  133              'description' :  'md5:9900ab8cd5808175c7b3fe55b979bed0' ,  137      def  _real_extract ( self
,  url
):  138          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  139          video_id 
=  mobj
. group ( 'id' )  140          webpage 
=  self
._ download
_ webpage
( url
,  video_id
)  141          long_id 
=  self
._ search
_ regex
( r
'contentId: \' (.+ ?
) \' , ', webpage, ' long id ')  142          return self._get_info(long_id, video_id, webpage)  145  class YahooSearchIE(SearchInfoExtractor):  146      IE_DESC = ' Yahoo screen search
'  148      IE_NAME = ' screen
. yahoo
: search
'  149      _SEARCH_KEY = ' yvsearch
'  151      def _get_n_results(self, query, n):  152          """Get a specified number of results for a query"""  154          for pagenum in itertools.count(0):  155              result_url = ' http
:// video
. search
. yahoo
. com
/ search
/ ?p
= %s& fr
= screen
& o
= js
& gs
= 0 & b
= %d ' % (compat_urllib_parse.quote_plus(query), pagenum * 30)  156              info = self._download_json(result_url, query,  157                  note=' Downloading results page 
'+str(pagenum+1))  159              results = info[' results
']  161              for (i, r) in enumerate(results):  162                  if (pagenum * 30) + i >= n:  164                  mobj = re.search(r' ( ?P
< url
> screen\
. yahoo\
. com
/.* ?
- \d
* ?\
. html
) "', r)  165                  e = self.url_result('http://' + mobj.group('url'), 'Yahoo')  167              if (pagenum * 30 + i >= n) or (m['last'] >= (m['total'] - 1)):