]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py
6 from .common
import InfoExtractor
, SearchInfoExtractor
13 class YahooIE(InfoExtractor
):
14 IE_DESC
= u
'Yahoo screen'
15 _VALID_URL
= r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html'
17 u
'url': u
'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html',
18 u
'file': u
'214727115.flv',
19 u
'md5': u
'2e717f169c1be93d84d3794a00d4a325',
21 u
"title": u
"Julian Smith & Travis Legg Watch Julian Smith"
23 u
'skip': u
'Requires rtmpdump'
26 def _real_extract(self
, url
):
27 mobj
= re
.match(self
._VALID
_URL
, url
)
29 raise ExtractorError(u
'Invalid URL: %s' % url
)
30 video_id
= mobj
.group('id')
31 webpage
= self
._download
_webpage
(url
, video_id
)
32 m_id
= re
.search(r
'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage
)
35 # TODO: Check which url parameters are required
36 info_url
= 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
37 webpage
= self
._download
_webpage
(info_url
, video_id
, u
'Downloading info webpage')
38 info_re
= r
'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.*
39 <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.*
40 <media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.*
41 <media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB"
43 self
.report_extraction(video_id
)
44 m_info
= re
.search(info_re
, webpage
, re
.VERBOSE|re
.DOTALL
)
46 raise ExtractorError(u
'Unable to extract video info')
47 video_title
= m_info
.group('title')
48 video_description
= m_info
.group('description')
49 video_thumb
= m_info
.group('thumb')
50 video_date
= m_info
.group('date')
51 video_date
= datetime
.datetime
.strptime(video_date
, '%m/%d/%Y').strftime('%Y%m%d')
53 # TODO: Find a way to get mp4 videos
54 rest_url
= 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
55 webpage
= self
._download
_webpage
(rest_url
, video_id
, u
'Downloading video url webpage')
56 m_rest
= re
.search(r
'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage
)
57 video_url
= m_rest
.group('url')
58 video_path
= m_rest
.group('path')
60 raise ExtractorError(u
'Unable to extract video url')
62 else: # We have to use a different method if another id is defined
63 long_id
= m_id
.group('new_id')
64 info_url
= 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id
+ '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335'
65 webpage
= self
._download
_webpage
(info_url
, video_id
, u
'Downloading info json')
66 json_str
= re
.search(r
'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage
).group(1)
67 info
= json
.loads(json_str
)
68 res
= info
[u
'query'][u
'results'][u
'mediaObj'][0]
69 stream
= res
[u
'streams'][0]
70 video_path
= stream
[u
'path']
71 video_url
= stream
[u
'host']
73 video_title
= meta
[u
'title']
74 video_description
= meta
[u
'description']
75 video_thumb
= meta
[u
'thumbnail']
76 video_date
= None # I can't find it
81 'play_path': video_path
,
83 'description': video_description
,
84 'thumbnail': video_thumb
,
85 'upload_date': video_date
,
90 class YahooSearchIE(SearchInfoExtractor
):
91 IE_DESC
= u
'Yahoo screen search'
93 IE_NAME
= u
'screen.yahoo:search'
94 _SEARCH_KEY
= 'yvsearch'
96 def _get_n_results(self
, query
, n
):
97 """Get a specified number of results for a query"""
104 for pagenum
in itertools
.count(0):
105 result_url
= u
'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse
.quote_plus(query
), pagenum
* 30)
106 webpage
= self
._download
_webpage
(result_url
, query
,
107 note
='Downloading results page '+str(pagenum
+1))
108 info
= json
.loads(webpage
)
110 results
= info
[u
'results']
112 for (i
, r
) in enumerate(results
):
113 if (pagenum
* 30) +i
>= n
:
115 mobj
= re
.search(r
'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r
)
116 e
= self
.url_result('http://' + mobj
.group('url'), 'Yahoo')
117 res
['entries'].append(e
)
118 if (pagenum
* 30 +i
>= n
) or (m
[u
'last'] >= (m
[u
'total'] -1 )):