]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py
5 from . common
import InfoExtractor
, SearchInfoExtractor
14 class YahooIE ( InfoExtractor
):
15 IE_DESC
= u
'Yahoo screen'
16 _VALID_URL
= r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
19 u
'url' : u
'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,
20 u
'file' : u
'214727115.mp4' ,
22 u
'title' : u
'Julian Smith & Travis Legg Watch Julian Smith' ,
23 u
'description' : u
'Julian and Travis watch Julian Smith' ,
27 u
'url' : u
'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,
28 u
'file' : u
'103000935.flv' ,
30 u
'title' : u
'The Cougar Lies with Spanish Moss' ,
31 u
'description' : u
'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,
35 u
'skip_download' : True ,
40 def _real_extract ( self
, url
):
41 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
42 video_id
= mobj
. group ( 'id' )
43 webpage
= self
._ download
_ webpage
( url
, video_id
)
45 items_json
= self
._ search
_ regex
( r
'YVIDEO_INIT_ITEMS = ({.*?});$' ,
46 webpage
, u
'items' , flags
= re
. MULTILINE
)
47 items
= json
. loads ( items_json
)
48 info
= items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]
52 for s
in info
[ 'streams' ]:
54 'width' : s
. get ( 'width' ),
55 'height' : s
. get ( 'height' ),
56 'bitrate' : s
. get ( 'bitrate' ),
61 if host
. startswith ( 'rtmp' ):
68 format_url
= compat_urlparse
. urljoin ( host
, path
)
69 format_info
[ 'url' ] = format_url
70 format_info
[ 'ext' ] = determine_ext ( format_url
)
72 formats
. append ( format_info
)
73 formats
= sorted ( formats
, key
= lambda f
:( f
[ 'height' ], f
[ 'width' ]))
77 'title' : meta
[ 'title' ],
79 'description' : clean_html ( meta
[ 'description' ]),
80 'thumbnail' : meta
[ 'thumbnail' ],
82 # TODO: Remove when #980 has been merged
83 info
. update ( formats
[- 1 ])
88 class YahooSearchIE ( SearchInfoExtractor
):
89 IE_DESC
= u
'Yahoo screen search'
91 IE_NAME
= u
'screen.yahoo:search'
92 _SEARCH_KEY
= 'yvsearch'
94 def _get_n_results ( self
, query
, n
):
95 """Get a specified number of results for a query"""
102 for pagenum
in itertools
. count ( 0 ):
103 result_url
= u
'http://video.search.yahoo.com/search/?p= %s &fr=screen&o=js&gs=0&b= %d ' % ( compat_urllib_parse
. quote_plus ( query
), pagenum
* 30 )
104 webpage
= self
._ download
_ webpage
( result_url
, query
,
105 note
= 'Downloading results page ' + str ( pagenum
+ 1 ))
106 info
= json
. loads ( webpage
)
108 results
= info
[ u
'results' ]
110 for ( i
, r
) in enumerate ( results
):
111 if ( pagenum
* 30 ) + i
>= n
:
113 mobj
= re
. search ( r
'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"' , r
)
114 e
= self
. url_result ( 'http://' + mobj
. group ( 'url' ), 'Yahoo' )
115 res
[ 'entries' ]. append ( e
)
116 if ( pagenum
* 30 + i
>= n
) or ( m
[ u
'last' ] >= ( m
[ u
'total' ] - 1 )):