]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/metacafe.py
   3 from .common 
import InfoExtractor
 
  12 class MetacafeIE(InfoExtractor
): 
  13     """Information Extractor for metacafe.com.""" 
  15     _VALID_URL 
= r
'(?:http://)?(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*' 
  16     _DISCLAIMER 
= 'http://www.metacafe.com/family_filter/' 
  17     _FILTER_POST 
= 'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user' 
  22         u
"add_ie": ["Youtube"], 
  23         u
"url":  u
"http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/", 
  24         u
"file":  u
"_aUehQsCQtM.mp4", 
  26             u
"upload_date": u
"20090102", 
  27             u
"title": u
"The Electric Company | \"Short I\" | PBS KIDS GO!", 
  28             u
"description": u
"md5:2439a8ef6d5a70e380c22f5ad323e5a8", 
  30             u
"uploader_id": u
"PBS" 
  33     # Normal metacafe video 
  35         u
'url': u
'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/', 
  36         u
'md5': u
'6e0bca200eaad2552e6915ed6fd4d9ad', 
  40             u
'title': u
'News: Stuff You Won\'t Do with Your PlayStation 4', 
  42             u
'description': u
'Sony released a massive FAQ on the PlayStation Blog detailing the PS4\'s capabilities and limitations.', 
  47         u
"url": u
"http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/", 
  48         u
"file": u
"an-dVVXnuY7Jh77J.mp4", 
  50             u
"title": u
"The Andromeda Strain (1971): Stop the Bomb Part 3", 
  51             u
"uploader": u
"anyclip", 
  52             u
"description": u
"md5:38c711dd98f5bb87acf973d573442e67", 
  55     # age-restricted video 
  57         u
'url': u
'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/', 
  58         u
'md5': u
'98dde7c1a35d02178e8ab7560fe8bd09', 
  62             u
'title': u
'BBC INTERNAL Christmas Tape \'79 - UNCENSORED Outtakes, Etc.', 
  63             u
'uploader': u
'Dwayne Pipe', 
  64             u
'description': u
'md5:950bf4c581e2c059911fa3ffbe377e4b', 
  70         u
'url': u
'http://www.metacafe.com/watch/cb-0rOxMBabDXN6/samsung_galaxy_note_2_samsungs_next_generation_phablet/', 
  72             u
'id': u
'0rOxMBabDXN6', 
  74             u
'title': u
'Samsung Galaxy Note 2: Samsung\'s next-generation phablet', 
  75             u
'description': u
'md5:54d49fac53d26d5a0aaeccd061ada09d', 
  80             u
'skip_download': True, 
  86     def report_disclaimer(self
): 
  87         """Report disclaimer retrieval.""" 
  88         self
.to_screen(u
'Retrieving disclaimer') 
  90     def _real_initialize(self
): 
  92         self
.report_disclaimer() 
  93         self
._download
_webpage
(self
._DISCLAIMER
, None, False, u
'Unable to retrieve disclaimer') 
  98             'submit': "Continue - I'm over 18", 
 100         request 
= compat_urllib_request
.Request(self
._FILTER
_POST
, compat_urllib_parse
.urlencode(disclaimer_form
)) 
 101         request
.add_header('Content-Type', 'application/x-www-form-urlencoded') 
 102         self
.report_age_confirmation() 
 103         self
._download
_webpage
(request
, None, False, u
'Unable to confirm age') 
 105     def _real_extract(self
, url
): 
 106         # Extract id and simplified title from URL 
 107         mobj 
= re
.match(self
._VALID
_URL
, url
) 
 109             raise ExtractorError(u
'Invalid URL: %s' % url
) 
 111         video_id 
= mobj
.group(1) 
 113         # the video may come from an external site 
 114         m_external 
= re
.match('^(\w{2})-(.*)$', video_id
) 
 115         if m_external 
is not None: 
 116             prefix
, ext_id 
= m_external
.groups() 
 117             # Check if video comes from YouTube 
 119                 return self
.url_result('http://www.youtube.com/watch?v=%s' % ext_id
, 'Youtube') 
 120             # CBS videos use theplatform.com 
 122                 return self
.url_result('theplatform:%s' % ext_id
, 'ThePlatform') 
 124         # Retrieve video webpage to extract further information 
 125         req 
= compat_urllib_request
.Request('http://www.metacafe.com/watch/%s/' % video_id
) 
 127         # AnyClip videos require the flashversion cookie so that we get the link 
 129         mobj_an 
= re
.match(r
'^an-(.*?)$', video_id
) 
 131             req
.headers
['Cookie'] = 'flashVersion=0;' 
 132         webpage 
= self
._download
_webpage
(req
, video_id
) 
 134         # Extract URL, uploader and title from webpage 
 135         self
.report_extraction(video_id
) 
 136         mobj 
= re
.search(r
'(?m)&mediaURL=([^&]+)', webpage
) 
 138             mediaURL 
= compat_urllib_parse
.unquote(mobj
.group(1)) 
 139             video_ext 
= mediaURL
[-3:] 
 141             # Extract gdaKey if available 
 142             mobj 
= re
.search(r
'(?m)&gdaKey=(.*?)&', webpage
) 
 146                 gdaKey 
= mobj
.group(1) 
 147                 video_url 
= '%s?__gda__=%s' % (mediaURL
, gdaKey
) 
 149             mobj 
= re
.search(r
'<video src="([^"]+)"', webpage
) 
 151                 video_url 
= mobj
.group(1) 
 154                 mobj 
= re
.search(r
' name="flashvars" value="(.*?)"', webpage
) 
 156                     raise ExtractorError(u
'Unable to extract media URL') 
 157                 vardict 
= compat_parse_qs(mobj
.group(1)) 
 158                 if 'mediaData' not in vardict
: 
 159                     raise ExtractorError(u
'Unable to extract media URL') 
 160                 mobj 
= re
.search(r
'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"', vardict
['mediaData'][0]) 
 162                     raise ExtractorError(u
'Unable to extract media URL') 
 163                 mediaURL 
= mobj
.group('mediaURL').replace('\\/', '/') 
 164                 video_url 
= '%s?__gda__=%s' % (mediaURL
, mobj
.group('key')) 
 165                 video_ext 
= determine_ext(video_url
) 
 167         video_title 
= self
._html
_search
_regex
(r
'(?im)<title>(.*) - Video</title>', webpage
, u
'title') 
 168         description 
= self
._og
_search
_description
(webpage
) 
 169         video_uploader 
= self
._html
_search
_regex
( 
 170                 r
'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);', 
 171                 webpage
, u
'uploader nickname', fatal
=False) 
 173         if re
.search(r
'"contentRating":"restricted"', webpage
) is not None: 
 182             'description': description
, 
 183             'uploader': video_uploader
, 
 185             'title':    video_title
, 
 187             'age_limit': age_limit
,