]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/metacafe.py
e38dc98b4c2702be6b488e2e516e1a6ea95c9d8d
   4 from .common 
import InfoExtractor
 
  10     compat_urllib_request
, 
  16 class MetacafeIE(InfoExtractor
): 
  17     """Information Extractor for metacafe.com.""" 
  19     _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
  20     _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
  21     _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
  24         u
"add_ie": ["Youtube"], 
  25         u
"url":  u
"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", 
  26         u
"file":  u
"_aUehQsCQtM.flv", 
  28             u
"upload_date": u
"20090102", 
  29             u
"title": u
"The Electric Company | \"Short I\" | PBS KIDS GO!", 
  30             u
"description": u
"md5:2439a8ef6d5a70e380c22f5ad323e5a8", 
  32             u
"uploader_id": u
"PBS" 
  36         u
"url": u
"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/", 
  37         u
"file": u
"an-dVVXnuY7Jh77J.mp4", 
  39             u
"title": u
"The Andromeda Strain (1971): Stop the Bomb Part 3", 
  40             u
"uploader": u
"anyclip", 
  41             u
"description": u
"md5:38c711dd98f5bb87acf973d573442e67" 
  46     def report_disclaimer(self
): 
  47         """Report disclaimer retrieval.""" 
  48         self
.to_screen(u
'Retrieving disclaimer') 
  50     def _real_initialize(self
): 
  52         request 
= compat_urllib_request
.Request(self
._DISCLAIMER
) 
  54             self
.report_disclaimer() 
  55             compat_urllib_request
.urlopen(request
).read() 
  56         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  57             raise ExtractorError(u
'Unable to retrieve disclaimer: %s' % compat_str(err
)) 
  62             'submit': "Continue - I'm over 18", 
  64         request 
= compat_urllib_request
.Request(self
._FILTER
_POST
, compat_urllib_parse
.urlencode(disclaimer_form
)) 
  66             self
.report_age_confirmation() 
  67             compat_urllib_request
.urlopen(request
).read() 
  68         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  69             raise ExtractorError(u
'Unable to confirm age: %s' % compat_str(err
)) 
  71     def _real_extract(self
, url
): 
  72         # Extract id and simplified title from URL 
  73         mobj 
= re
.match(self
._VALID
_URL
, url
) 
  75             raise ExtractorError(u
'Invalid URL: %s' % url
) 
  77         video_id 
= mobj
.group(1) 
  79         # Check if video comes from YouTube 
  80         mobj2 
= re
.match(r
'^yt-(.*)$', video_id
) 
  82             return [self
.url_result('http://www.youtube.com/watch?v=%s' % mobj2
.group(1), 'Youtube')] 
  84         # Retrieve video webpage to extract further information 
  85         req 
= compat_urllib_request
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
  86         req
.headers
['Cookie'] = 'flashVersion=0;' 
  87         webpage 
= self
._download
_webpage
(req
, video_id
) 
  89         # Extract URL, uploader and title from webpage 
  90         self
.report_extraction(video_id
) 
  91         mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
  93             mediaURL 
= compat_urllib_parse
.unquote(mobj
.group(1)) 
  94             video_ext 
= mediaURL
[-3:] 
  96             # Extract gdaKey if available 
  97             mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
 101                 gdaKey 
= mobj
.group(1) 
 102                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
 104             mobj 
= re
.search(r
'<video src="([^"]+)"', webpage
) 
 106                 video_url 
= mobj
.group(1) 
 109                 mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
 111                     raise ExtractorError(u
'Unable to extract media URL') 
 112                 vardict 
= compat_parse_qs(mobj
.group(1)) 
 113                 if 'mediaData' not in vardict
: 
 114                     raise ExtractorError(u
'Unable to extract media URL') 
 115                 mobj 
= re
.search(r
'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict
['mediaData'][0]) 
 117                     raise ExtractorError(u
'Unable to extract media URL') 
 118                 mediaURL 
= mobj
.group('mediaURL').replace('\\/', '/') 
 119                 video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group('key')) 
 120                 video_ext 
= determine_ext(video_url
) 
 122         video_title 
= self
._html
_search
_regex
(r
'(?im)<title>(.*) - Video</title>', webpage
, u
'title') 
 123         description 
= self
._og
_search
_description
(webpage
) 
 124         video_uploader 
= self
._html
_search
_regex
( 
 125                 r
'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("channel","([^"]+)"\);', 
 126                 webpage
, u
'uploader nickname', fatal
=False) 
 132             'description': description
, 
 133             'uploader': video_uploader
, 
 135             'title':    video_title
,