]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py
34e6afb20fb6833ab21501785deb54cf5f0a0e24
5 from . common
import InfoExtractor
, SearchInfoExtractor
14 class YahooIE ( InfoExtractor
):
15 IE_DESC
= u
'Yahoo screen'
16 _VALID_URL
= r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
19 u
'url' : u
'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html' ,
20 u
'file' : u
'214727115.flv' ,
22 u
'title' : u
'Julian Smith & Travis Legg Watch Julian Smith' ,
23 u
'description' : u
'Julian and Travis watch Julian Smith' ,
27 u
'skip_download' : True ,
31 u
'url' : u
'http://screen.yahoo.com/wired/codefellas-s1-ep12-cougar-lies-103000935.html' ,
32 u
'file' : u
'103000935.flv' ,
34 u
'title' : u
'Codefellas - The Cougar Lies with Spanish Moss' ,
35 u
'description' : u
'Agent Topple \' s mustache does its dirty work, and Nicole brokers a deal for peace. But why is the NSA collecting millions of Instagram brunch photos? And if your waffles have nothing to hide, what are they so worried about?' ,
39 u
'skip_download' : True ,
44 def _real_extract ( self
, url
):
45 mobj
= re
. match ( self
._ VALID
_U RL
, url
)
46 video_id
= mobj
. group ( 'id' )
47 webpage
= self
._ download
_ webpage
( url
, video_id
)
49 items_json
= self
._ search
_ regex
( r
'YVIDEO_INIT_ITEMS = ({.*?});$' ,
50 webpage
, u
'items' , flags
= re
. MULTILINE
)
51 items
= json
. loads ( items_json
)
52 info
= items
[ 'mediaItems' ][ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]
53 # The 'meta' field is not always in the video webpage, we request it
56 query
= ( 'SELECT * FROM yahoo.media.video.streams WHERE id=" %s "'
57 ' AND plrs="86Gj0vCaSzV_Iuf6hNylf2"' % long_id
)
58 data
= compat_urllib_parse
. urlencode ({
63 query_result_json
= self
._ download
_ webpage
(
64 'http://video.query.yahoo.com/v1/public/yql?' + data
,
65 video_id
, u
'Downloading video info' )
66 query_result
= json
. loads ( query_result_json
)
67 info
= query_result
[ 'query' ][ 'results' ][ 'mediaObj' ][ 0 ]
71 for s
in info
[ 'streams' ]:
73 'width' : s
. get ( 'width' ),
74 'height' : s
. get ( 'height' ),
75 'bitrate' : s
. get ( 'bitrate' ),
80 if host
. startswith ( 'rtmp' ):
87 format_url
= compat_urlparse
. urljoin ( host
, path
)
88 format_info
[ 'url' ] = format_url
89 format_info
[ 'ext' ] = determine_ext ( format_url
)
91 formats
. append ( format_info
)
92 formats
= sorted ( formats
, key
= lambda f
:( f
[ 'height' ], f
[ 'width' ]))
96 'title' : meta
[ 'title' ],
98 'description' : clean_html ( meta
[ 'description' ]),
99 'thumbnail' : meta
[ 'thumbnail' ],
101 # TODO: Remove when #980 has been merged
102 info
. update ( formats
[- 1 ])
107 class YahooSearchIE ( SearchInfoExtractor
):
108 IE_DESC
= u
'Yahoo screen search'
110 IE_NAME
= u
'screen.yahoo:search'
111 _SEARCH_KEY
= 'yvsearch'
113 def _get_n_results ( self
, query
, n
):
114 """Get a specified number of results for a query"""
121 for pagenum
in itertools
. count ( 0 ):
122 result_url
= u
'http://video.search.yahoo.com/search/?p= %s &fr=screen&o=js&gs=0&b= %d ' % ( compat_urllib_parse
. quote_plus ( query
), pagenum
* 30 )
123 webpage
= self
._ download
_ webpage
( result_url
, query
,
124 note
= 'Downloading results page ' + str ( pagenum
+ 1 ))
125 info
= json
. loads ( webpage
)
127 results
= info
[ u
'results' ]
129 for ( i
, r
) in enumerate ( results
):
130 if ( pagenum
* 30 ) + i
>= n
:
132 mobj
= re
. search ( r
'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"' , r
)
133 e
= self
. url_result ( 'http://' + mobj
. group ( 'url' ), 'Yahoo' )
134 res
[ 'entries' ]. append ( e
)
135 if ( pagenum
* 30 + i
>= n
) or ( m
[ u
'last' ] >= ( m
[ u
'total' ] - 1 )):