]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/yahoo.py
   6 from .common 
import InfoExtractor
, SearchInfoExtractor
 
  13 class YahooIE(InfoExtractor
): 
  14     IE_DESC 
= u
'Yahoo screen' 
  15     _VALID_URL 
= r
'http://screen\.yahoo\.com/.*?-(?P<id>\d*?)\.html' 
  17         u
'url': u
'http://screen.yahoo.com/julian-smith-travis-legg-watch-214727115.html', 
  18         u
'file': u
'214727115.flv', 
  19         u
'md5': u
'2e717f169c1be93d84d3794a00d4a325', 
  21             u
"title": u
"Julian Smith & Travis Legg Watch Julian Smith" 
  23         u
'skip': u
'Requires rtmpdump' 
  26     def _real_extract(self
, url
): 
  27         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  29             raise ExtractorError(u
'Invalid URL: %s' % url
) 
  30         video_id 
= mobj
.group('id') 
  31         webpage 
= self
._download
_webpage
(url
, video_id
) 
  32         m_id 
= re
.search(r
'YUI\.namespace\("Media"\)\.CONTENT_ID = "(?P<new_id>.+?)";', webpage
) 
  35             # TODO: Check which url parameters are required 
  36             info_url 
= 'http://cosmos.bcst.yahoo.com/rest/v2/pops;lmsoverride=1;outputformat=mrss;cb=974419660;id=%s;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
 
  37             webpage 
= self
._download
_webpage
(info_url
, video_id
, u
'Downloading info webpage') 
  38             info_re 
= r
'''<title><!\[CDATA\[(?P<title>.*?)\]\]></title>.* 
  39                         <description><!\[CDATA\[(?P<description>.*?)\]\]></description>.* 
  40                         <media:pubStart><!\[CDATA\[(?P<date>.*?)\ .*\]\]></media:pubStart>.* 
  41                         <media:content\ medium="image"\ url="(?P<thumb>.*?)"\ name="LARGETHUMB" 
  43             self
.report_extraction(video_id
) 
  44             m_info 
= re
.search(info_re
, webpage
, re
.VERBOSE|re
.DOTALL
) 
  46                 raise ExtractorError(u
'Unable to extract video info') 
  47             video_title 
= m_info
.group('title') 
  48             video_description 
= m_info
.group('description') 
  49             video_thumb 
= m_info
.group('thumb') 
  50             video_date 
= m_info
.group('date') 
  51             video_date 
= datetime
.datetime
.strptime(video_date
, '%m/%d/%Y').strftime('%Y%m%d') 
  53             # TODO: Find a way to get mp4 videos 
  54             rest_url 
= 'http://cosmos.bcst.yahoo.com/rest/v2/pops;element=stream;outputformat=mrss;id=%s;lmsoverride=1;bw=375;dynamicstream=1;cb=83521105;tech=flv,mp4;rd=news.yahoo.com;datacontext=mdb;lg=KCa2IihxG3qE60vQ7HtyUy' % video_id
 
  55             webpage 
= self
._download
_webpage
(rest_url
, video_id
, u
'Downloading video url webpage') 
  56             m_rest 
= re
.search(r
'<media:content url="(?P<url>.*?)" path="(?P<path>.*?)"', webpage
) 
  57             video_url 
= m_rest
.group('url') 
  58             video_path 
= m_rest
.group('path') 
  60                 raise ExtractorError(u
'Unable to extract video url') 
  62         else: # We have to use a different method if another id is defined 
  63             long_id 
= m_id
.group('new_id') 
  64             info_url 
= 'http://video.query.yahoo.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.media.video.streams%20WHERE%20id%3D%22' + long_id 
+ '%22%20AND%20format%3D%22mp4%2Cflv%22%20AND%20protocol%3D%22rtmp%2Chttp%22%20AND%20plrs%3D%2286Gj0vCaSzV_Iuf6hNylf2%22%20AND%20acctid%3D%22389%22%20AND%20plidl%3D%22%22%20AND%20pspid%3D%22792700001%22%20AND%20offnetwork%3D%22false%22%20AND%20site%3D%22ivy%22%20AND%20lang%3D%22en-US%22%20AND%20region%3D%22US%22%20AND%20override%3D%22none%22%3B&env=prod&format=json&callback=YUI.Env.JSONP.yui_3_8_1_1_1368368376830_335' 
  65             webpage 
= self
._download
_webpage
(info_url
, video_id
, u
'Downloading info json') 
  66             json_str 
= re
.search(r
'YUI.Env.JSONP.yui.*?\((.*?)\);', webpage
).group(1) 
  67             info 
= json
.loads(json_str
) 
  68             res 
= info
[u
'query'][u
'results'][u
'mediaObj'][0] 
  69             stream 
= res
[u
'streams'][0] 
  70             video_path 
= stream
[u
'path'] 
  71             video_url 
= stream
[u
'host'] 
  73             video_title 
= meta
[u
'title'] 
  74             video_description 
= meta
[u
'description'] 
  75             video_thumb 
= meta
[u
'thumbnail'] 
  76             video_date 
= None # I can't find it 
  81                      'play_path': video_path
, 
  83                      'description': video_description
, 
  84                      'thumbnail': video_thumb
, 
  85                      'upload_date': video_date
, 
  90 class YahooSearchIE(SearchInfoExtractor
): 
  91     IE_DESC 
= u
'Yahoo screen search' 
  93     IE_NAME 
= u
'screen.yahoo:search' 
  94     _SEARCH_KEY 
= 'yvsearch' 
  96     def _get_n_results(self
, query
, n
): 
  97         """Get a specified number of results for a query""" 
 104         for pagenum 
in itertools
.count(0):  
 105             result_url 
= u
'http://video.search.yahoo.com/search/?p=%s&fr=screen&o=js&gs=0&b=%d' % (compat_urllib_parse
.quote_plus(query
), pagenum 
* 30) 
 106             webpage 
= self
._download
_webpage
(result_url
, query
, 
 107                                              note
='Downloading results page '+str(pagenum
+1)) 
 108             info 
= json
.loads(webpage
) 
 110             results 
= info
[u
'results'] 
 112             for (i
, r
) in enumerate(results
): 
 113                 if (pagenum 
* 30) +i 
>= n
: 
 115                 mobj 
= re
.search(r
'(?P<url>screen\.yahoo\.com/.*?-\d*?\.html)"', r
) 
 116                 e 
= self
.url_result('http://' + mobj
.group('url'), 'Yahoo') 
 117                 res
['entries'].append(e
) 
 118             if (pagenum 
* 30 +i 
>= n
) or (m
[u
'last'] >= (m
[u
'total'] -1 )):