]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/metacafe.py
   4 from .common 
import InfoExtractor
 
  10     compat_urllib_request
, 
  16 class MetacafeIE(InfoExtractor
): 
  17     """Information Extractor for metacafe.com.""" 
  19     _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
  20     _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
  21     _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
  24         u
"add_ie": ["Youtube"], 
  25         u
"url":  u
"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", 
  26         u
"file":  u
"_aUehQsCQtM.flv", 
  28             u
"upload_date": u
"20090102", 
  29             u
"title": u
"The Electric Company | \"Short I\" | PBS KIDS GO!", 
  30             u
"description": u
"md5:2439a8ef6d5a70e380c22f5ad323e5a8", 
  32             u
"uploader_id": u
"PBS" 
  37     def report_disclaimer(self
): 
  38         """Report disclaimer retrieval.""" 
  39         self
.to_screen(u
'Retrieving disclaimer') 
  41     def _real_initialize(self
): 
  43         request 
= compat_urllib_request
.Request(self
._DISCLAIMER
) 
  45             self
.report_disclaimer() 
  46             compat_urllib_request
.urlopen(request
).read() 
  47         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  48             raise ExtractorError(u
'Unable to retrieve disclaimer: %s' % compat_str(err
)) 
  53             'submit': "Continue - I'm over 18", 
  55         request 
= compat_urllib_request
.Request(self
._FILTER
_POST
, compat_urllib_parse
.urlencode(disclaimer_form
)) 
  57             self
.report_age_confirmation() 
  58             compat_urllib_request
.urlopen(request
).read() 
  59         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  60             raise ExtractorError(u
'Unable to confirm age: %s' % compat_str(err
)) 
  62     def _real_extract(self
, url
): 
  63         # Extract id and simplified title from URL 
  64         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  66             raise ExtractorError(u
'Invalid URL: %s' % url
) 
  68         video_id 
= mobj
.group(1) 
  70         # Check if video comes from YouTube 
  71         mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
  73             return [self
.url_result('http://www.youtube.com/watch?v=%s' % mobj2
.group(1), 'Youtube')] 
  75         # Retrieve video webpage to extract further information 
  76         webpage 
= self
._download
_webpage
('http://www.metacafe.com/watch/%s/' % video_id
, video_id
) 
  78         # Extract URL, uploader and title from webpage 
  79         self
.report_extraction(video_id
) 
  80         mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
  82             mediaURL 
= compat_urllib_parse
.unquote(mobj
.group(1)) 
  83             video_extension 
= mediaURL
[-3:] 
  85             # Extract gdaKey if available 
  86             mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
  90                 gdaKey 
= mobj
.group(1) 
  91                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
  93             mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
  95                 raise ExtractorError(u
'Unable to extract media URL') 
  96             vardict 
= compat_parse_qs(mobj
.group(1)) 
  97             if 'mediaData' not in vardict
: 
  98                 raise ExtractorError(u
'Unable to extract media URL') 
  99             mobj 
= re
.search(r
'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict
['mediaData'][0]) 
 101                 raise ExtractorError(u
'Unable to extract media URL') 
 102             mediaURL 
= mobj
.group('mediaURL').replace('\\/', '/') 
 103             video_extension 
= mediaURL
[-3:] 
 104             video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group('key')) 
 106         mobj 
= re
.search(r
'(?im)<title>(.*) - Video</title>', webpage
) 
 108             raise ExtractorError(u
'Unable to extract title') 
 109         video_title 
= mobj
.group(1).decode('utf-8') 
 111         mobj 
= re
.search(r
'submitter=(.*?);', webpage
) 
 113             raise ExtractorError(u
'Unable to extract uploader nickname') 
 114         video_uploader 
= mobj
.group(1) 
 117             'id':       video_id
.decode('utf-8'), 
 118             'url':      video_url
.decode('utf-8'), 
 119             'uploader': video_uploader
.decode('utf-8'), 
 121             'title':    video_title
, 
 122             'ext':      video_extension
.decode('utf-8'),