]>
 
 
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/metacafe.py 
 
 
 
 
 
 
 
 
   1  from  __future__ 
import  unicode_literals
 
   5  from  . common 
import  InfoExtractor
 
  18  class  MetacafeIE ( InfoExtractor
):  
  19      _VALID_URL 
=  r
'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'  
  20      _DISCLAIMER 
=  'http://www.metacafe.com/family_filter/'  
  21      _FILTER_POST 
=  'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'  
  26              'add_ie' : [ 'Youtube' ],  
  27              'url' :  'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/' ,  
  31                  'upload_date' :  '20090102' ,  
  32                  'title' :  'The Electric Company | "Short I" | PBS KIDS GO!' ,  
  33                  'description' :  'md5:2439a8ef6d5a70e380c22f5ad323e5a8' ,  
  38          # Normal metacafe video  
  40              'url' :  'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/' ,  
  41              'md5' :  '6e0bca200eaad2552e6915ed6fd4d9ad' ,  
  45                  'title' :  'News: Stuff You Won \' t Do with Your PlayStation 4' ,  
  47                  'description' :  'Sony released a massive FAQ on the PlayStation Blog detailing the PS4 \' s capabilities and limitations.' ,  
  52              'url' :  'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/' ,  
  54                  'id' :  'an-dVVXnuY7Jh77J' ,  
  56                  'title' :  'The Andromeda Strain (1971): Stop the Bomb Part 3' ,  
  57                  'uploader' :  'anyclip' ,  
  58                  'description' :  'md5:38c711dd98f5bb87acf973d573442e67' ,  
  61          # age-restricted video  
  63              'url' :  'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/' ,  
  64              'md5' :  '98dde7c1a35d02178e8ab7560fe8bd09' ,  
  68                  'title' :  'BBC INTERNAL Christmas Tape  \' 79 - UNCENSORED Outtakes, Etc.' ,  
  69                  'uploader' :  'Dwayne Pipe' ,  
  70                  'description' :  'md5:950bf4c581e2c059911fa3ffbe377e4b' ,  
  76              'url' :  'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/' ,  
  80                  'title' :  'Open: This is Face the Nation, February 9' ,  
  81                  'description' :  'md5:8a9ceec26d1f7ed6eab610834cc1a476' ,  
  86                  'skip_download' :  True ,  
  89          # Movieclips.com video  
  91              'url' :  'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/' ,  
  95                  'title' :  'My Week with Marilyn - Do You Love Me?' ,  
  96                  'description' :  'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.' ,  
  97                  'uploader' :  'movie_trailers' ,  
 101                  'skip_download' :  'requires rtmpdump' ,  
 106      def  report_disclaimer ( self
):  
 107          self
. to_screen ( 'Retrieving disclaimer' )  
 109      def  _real_initialize ( self
):  
 110          # Retrieve disclaimer  
 111          self
. report_disclaimer ()  
 112          self
._ download
_ webpage
( self
._ DISCLAIMER
,  None ,  False ,  'Unable to retrieve disclaimer' )  
 117              'submit' :  "Continue - I'm over 18" ,  
 119          request 
=  compat_urllib_request
. Request ( self
._ FILTER
_ POST
,  compat_urllib_parse
. urlencode ( disclaimer_form
))  
 120          request
. add_header ( 'Content-Type' ,  'application/x-www-form-urlencoded' )  
 121          self
. report_age_confirmation ()  
 122          self
._ download
_ webpage
( request
,  None ,  False ,  'Unable to confirm age' )  
 124      def  _real_extract ( self
,  url
):  
 125          # Extract id and simplified title from URL  
 126          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  
 128              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  
 130          video_id 
=  mobj
. group ( 1 )  
 132          # the video may come from an external site  
 133          m_external 
=  re
. match ( '^(\w {2} )-(.*)$' ,  video_id
)  
 134          if  m_external 
is not None :  
 135              prefix
,  ext_id 
=  m_external
. groups ()  
 136              # Check if video comes from YouTube  
 138                  return  self
. url_result ( 'http://www.youtube.com/watch?v= %s '  %  ext_id
,  'Youtube' )  
 139              # CBS videos use theplatform.com  
 141                  return  self
. url_result ( 'theplatform: %s '  %  ext_id
,  'ThePlatform' )  
 143          # Retrieve video webpage to extract further information  
 144          req 
=  compat_urllib_request
. Request ( 'http://www.metacafe.com/watch/ %s /'  %  video_id
)  
 146          # AnyClip videos require the flashversion cookie so that we get the link  
 148          mobj_an 
=  re
. match ( r
'^an-(.*?)$' ,  video_id
)  
 150              req
. headers
[ 'Cookie' ] =  'flashVersion=0;'  
 151          webpage 
=  self
._ download
_ webpage
( req
,  video_id
)  
 153          # Extract URL, uploader and title from webpage  
 154          self
. report_extraction ( video_id
)  
 156          mobj 
=  re
. search ( r
'(?m)&mediaURL=([^&]+)' ,  webpage
)  
 158              mediaURL 
=  compat_urllib_parse
. unquote ( mobj
. group ( 1 ))  
 159              video_ext 
=  mediaURL
[- 3 :]  
 161              # Extract gdaKey if available  
 162              mobj 
=  re
. search ( r
'(?m)&gdaKey=(.*?)&' ,  webpage
)  
 166                  gdaKey 
=  mobj
. group ( 1 )  
 167                  video_url 
=  ' %s ?__gda__= %s '  % ( mediaURL
,  gdaKey
)  
 168          if  video_url 
is None :  
 169              mobj 
=  re
. search ( r
'<video src="([^"]+)"' ,  webpage
)  
 171                  video_url 
=  mobj
. group ( 1 )  
 173          if  video_url 
is None :  
 174              flashvars 
=  self
._ search
_ regex
(  
 175                  r
' name="flashvars" value="(.*?)"' ,  webpage
,  'flashvars' ,  
 178                  vardict 
=  compat_parse_qs ( flashvars
)  
 179                  if  'mediaData'  not in  vardict
:  
 180                      raise  ExtractorError ( 'Unable to extract media URL' )  
 182                      r
'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"' ,  vardict
[ 'mediaData' ][ 0 ])  
 184                      raise  ExtractorError ( 'Unable to extract media URL' )  
 185                  mediaURL 
=  mobj
. group ( 'mediaURL' ). replace ( ' \\ /' ,  '/' )  
 186                  video_url 
=  ' %s ?__gda__= %s '  % ( mediaURL
,  mobj
. group ( 'key' ))  
 187                  video_ext 
=  determine_ext ( video_url
)  
 188          if  video_url 
is None :  
 189              player_url 
=  self
._ search
_ regex
(  
 190                  r
"swfobject\.embedSWF\('([^']+)'" ,  
 191                  webpage
,  'config URL' ,  default
= None )  
 193                  config_url 
=  self
._ search
_ regex
(  
 194                      r
'config=(.+)$' ,  player_url
,  'config URL' )  
 195                  config_doc 
=  self
._ download
_ xml
(  
 196                      config_url
,  video_id
,  
 197                      note
= 'Downloading video config' )  
 198                  smil_url 
=  config_doc
. find ( './/properties' ). attrib
[ 'smil_file' ]  
 199                  smil_doc 
=  self
._ download
_ xml
(  
 201                      note
= 'Downloading SMIL document' )  
 202                  base_url 
=  smil_doc
. find ( './head/meta' ). attrib
[ 'base' ]  
 204                  for  vn 
in  smil_doc
. findall ( './/video' ):  
 205                      br 
=  int ( vn
. attrib
[ 'system-bitrate' ])  
 206                      play_path 
=  vn
. attrib
[ 'src' ]  
 208                          'format_id' :  'smil- %d '  %  br
,  
 210                          'play_path' :  play_path
,  
 212                          'player_url' :  player_url
,  
 213                          'ext' :  play_path
. partition ( ':' )[ 0 ],  
 216          if  video_url 
is None :  
 217              raise  ExtractorError ( 'Unsupported video type' )  
 219          video_title 
=  self
._ html
_ search
_ regex
(  
 220              r
'(?im)<title>(.*) - Video</title>' ,  webpage
,  'title' )  
 221          description 
=  self
._ og
_ search
_ description
( webpage
)  
 222          thumbnail 
=  self
._ og
_ search
_ thumbnail
( webpage
)  
 223          video_uploader 
=  self
._ html
_ search
_ regex
(  
 224              r
'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);' ,  
 225              webpage
,  'uploader nickname' ,  fatal
= False )  
 226          duration 
=  int_or_none (  
 227              self
._ html
_ search
_ meta
( 'video:duration' ,  webpage
))  
 231              if  re
. search ( r
'"contentRating":"restricted"' ,  webpage
)  
 234          if  isinstance ( video_url
,  list ):  
 242          self
._ sort
_ formats
( formats
)  
 245              'description' :  description
,  
 246              'uploader' :  video_uploader
,  
 247              'title' :  video_title
,  
 248              'thumbnail' :  thumbnail
,  
 249              'age_limit' :  age_limit
,  
 251              'duration' :  duration
,