]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/metacafe.py 
858c1c0c31f4c08c3068a62983781129288dc3b8
   1  from  __future__ 
import  unicode_literals
   5  from  . common 
import  InfoExtractor
  16  class  MetacafeIE ( InfoExtractor
):   17      _VALID_URL 
=  r
'http://(?:www\.)?metacafe\.com/watch/([^/]+)/([^/]+)/.*'   18      _DISCLAIMER 
=  'http://www.metacafe.com/family_filter/'   19      _FILTER_POST 
=  'http://www.metacafe.com/f/index.php?inputType=filter&controllerGroup=user'   24              'add_ie' : [ 'Youtube' ],   25              'url' :  'http://metacafe.com/watch/yt-_aUehQsCQtM/the_electric_company_short_i_pbs_kids_go/' ,   29                  'upload_date' :  '20090102' ,   30                  'title' :  'The Electric Company | "Short I" | PBS KIDS GO!' ,   31                  'description' :  'md5:2439a8ef6d5a70e380c22f5ad323e5a8' ,   36          # Normal metacafe video   38              'url' :  'http://www.metacafe.com/watch/11121940/news_stuff_you_wont_do_with_your_playstation_4/' ,   39              'md5' :  '6e0bca200eaad2552e6915ed6fd4d9ad' ,   43                  'title' :  'News: Stuff You Won \' t Do with Your PlayStation 4' ,   45                  'description' :  'Sony released a massive FAQ on the PlayStation Blog detailing the PS4 \' s capabilities and limitations.' ,   50              'url' :  'http://www.metacafe.com/watch/an-dVVXnuY7Jh77J/the_andromeda_strain_1971_stop_the_bomb_part_3/' ,   52                  'id' :  'an-dVVXnuY7Jh77J' ,   54                  'title' :  'The Andromeda Strain (1971): Stop the Bomb Part 3' ,   55                  'uploader' :  'anyclip' ,   56                  'description' :  'md5:38c711dd98f5bb87acf973d573442e67' ,   59          # age-restricted video   61              'url' :  'http://www.metacafe.com/watch/5186653/bbc_internal_christmas_tape_79_uncensored_outtakes_etc/' ,   62              'md5' :  '98dde7c1a35d02178e8ab7560fe8bd09' ,   66                  'title' :  'BBC INTERNAL Christmas Tape  \' 79 - UNCENSORED Outtakes, Etc.' ,   67                  'uploader' :  'Dwayne Pipe' ,   68                  'description' :  'md5:950bf4c581e2c059911fa3ffbe377e4b' ,   74              'url' :  'http://www.metacafe.com/watch/cb-8VD4r_Zws8VP/open_this_is_face_the_nation_february_9/' ,   78                  'title' :  'Open: This is Face the Nation, February 9' ,   79                  'description' :  'md5:8a9ceec26d1f7ed6eab610834cc1a476' ,   84                  'skip_download' :  True ,   87          # Movieclips.com video   89              'url' :  'http://www.metacafe.com/watch/mv-Wy7ZU/my_week_with_marilyn_do_you_love_me/' ,   93                  'title' :  'My Week with Marilyn - Do You Love Me?' ,   94                  'description' :  'From the movie My Week with Marilyn - Colin (Eddie Redmayne) professes his love to Marilyn (Michelle Williams) and gets her to promise to return to set and finish the movie.' ,   95                  'uploader' :  'movie_trailers' ,   99                  'skip_download' :  'requires rtmpdump' ,  104      def  report_disclaimer ( self
):  105          self
. to_screen ( 'Retrieving disclaimer' )  107      def  _real_initialize ( self
):  108          # Retrieve disclaimer  109          self
. report_disclaimer ()  110          self
._ download
_ webpage
( self
._ DISCLAIMER
,  None ,  False ,  'Unable to retrieve disclaimer' )  115              'submit' :  "Continue - I'm over 18" ,  117          request 
=  compat_urllib_request
. Request ( self
._ FILTER
_ POST
,  compat_urllib_parse
. urlencode ( disclaimer_form
))  118          request
. add_header ( 'Content-Type' ,  'application/x-www-form-urlencoded' )  119          self
. report_age_confirmation ()  120          self
._ download
_ webpage
( request
,  None ,  False ,  'Unable to confirm age' )  122      def  _real_extract ( self
,  url
):  123          # Extract id and simplified title from URL  124          mobj 
=  re
. match ( self
._ VALID
_U RL
,  url
)  126              raise  ExtractorError ( 'Invalid URL:  %s '  %  url
)  128          video_id 
=  mobj
. group ( 1 )  130          # the video may come from an external site  131          m_external 
=  re
. match ( '^(\w {2} )-(.*)$' ,  video_id
)  132          if  m_external 
is not None :  133              prefix
,  ext_id 
=  m_external
. groups ()  134              # Check if video comes from YouTube  136                  return  self
. url_result ( 'http://www.youtube.com/watch?v= %s '  %  ext_id
,  'Youtube' )  137              # CBS videos use theplatform.com  139                  return  self
. url_result ( 'theplatform: %s '  %  ext_id
,  'ThePlatform' )  141          # Retrieve video webpage to extract further information  142          req 
=  compat_urllib_request
. Request ( 'http://www.metacafe.com/watch/ %s /'  %  video_id
)  144          # AnyClip videos require the flashversion cookie so that we get the link  146          mobj_an 
=  re
. match ( r
'^an-(.*?)$' ,  video_id
)  148              req
. headers
[ 'Cookie' ] =  'flashVersion=0;'  149          webpage 
=  self
._ download
_ webpage
( req
,  video_id
)  151          # Extract URL, uploader and title from webpage  152          self
. report_extraction ( video_id
)  154          mobj 
=  re
. search ( r
'(?m)&mediaURL=([^&]+)' ,  webpage
)  156              mediaURL 
=  compat_urllib_parse
. unquote ( mobj
. group ( 1 ))  157              video_ext 
=  mediaURL
[- 3 :]  159              # Extract gdaKey if available  160              mobj 
=  re
. search ( r
'(?m)&gdaKey=(.*?)&' ,  webpage
)  164                  gdaKey 
=  mobj
. group ( 1 )  165                  video_url 
=  ' %s ?__gda__= %s '  % ( mediaURL
,  gdaKey
)  166          if  video_url 
is None :  167              mobj 
=  re
. search ( r
'<video src="([^"]+)"' ,  webpage
)  169                  video_url 
=  mobj
. group ( 1 )  171          if  video_url 
is None :  172              flashvars 
=  self
._ search
_ regex
(  173                  r
' name="flashvars" value="(.*?)"' ,  webpage
,  'flashvars' ,  176                  vardict 
=  compat_parse_qs ( flashvars
)  177                  if  'mediaData'  not in  vardict
:  178                      raise  ExtractorError ( 'Unable to extract media URL' )  180                      r
'"mediaURL":"(?P<mediaURL>http.*?)",(.*?)"key":"(?P<key>.*?)"' ,  vardict
[ 'mediaData' ][ 0 ])  182                      raise  ExtractorError ( 'Unable to extract media URL' )  183                  mediaURL 
=  mobj
. group ( 'mediaURL' ). replace ( ' \\ /' ,  '/' )  184                  video_url 
=  ' %s ?__gda__= %s '  % ( mediaURL
,  mobj
. group ( 'key' ))  185                  video_ext 
=  determine_ext ( video_url
)  186          if  video_url 
is None :  187              player_url 
=  self
._ search
_ regex
(  188                  r
"swfobject\.embedSWF\('([^']+)'" ,  189                  webpage
,  'config URL' ,  default
= None )  191                  config_url 
=  self
._ search
_ regex
(  192                      r
'config=(.+)$' ,  player_url
,  'config URL' )  193                  config_doc 
=  self
._ download
_ xml
(  194                      config_url
,  video_id
,  195                      note
= 'Downloading video config' )  196                  smil_url 
=  config_doc
. find ( './/properties' ). attrib
[ 'smil_file' ]  197                  smil_doc 
=  self
._ download
_ xml
(  199                      note
= 'Downloading SMIL document' )  200                  base_url 
=  smil_doc
. find ( './head/meta' ). attrib
[ 'base' ]  202                  for  vn 
in  smil_doc
. findall ( './/video' ):  203                      br 
=  int ( vn
. attrib
[ 'system-bitrate' ])  204                      play_path 
=  vn
. attrib
[ 'src' ]  206                          'format_id' :  'smil- %d '  %  br
,  208                          'play_path' :  play_path
,  210                          'player_url' :  player_url
,  211                          'ext' :  play_path
. partition ( ':' )[ 0 ],  214          if  video_url 
is None :  215              raise  ExtractorError ( 'Unsupported video type' )  217          video_title 
=  self
._ html
_ search
_ regex
(  218              r
'(?im)<title>(.*) - Video</title>' ,  webpage
,  'title' )  219          description 
=  self
._ og
_ search
_ description
( webpage
)  220          thumbnail 
=  self
._ og
_ search
_ thumbnail
( webpage
)  221          video_uploader 
=  self
._ html
_ search
_ regex
(  222              r
'submitter=(.*?);|googletag\.pubads\(\)\.setTargeting\("(?:channel|submiter)","([^"]+)"\);' ,  223              webpage
,  'uploader nickname' ,  fatal
= False )  224          duration 
=  int_or_none (  225              self
._ html
_ search
_ meta
( 'video:duration' ,  webpage
))  229              if  re
. search ( r
'"contentRating":"restricted"' ,  webpage
)  232          if  isinstance ( video_url
,  list ):  240          self
._ sort
_ formats
( formats
)  243              'description' :  description
,  244              'uploader' :  video_uploader
,  245              'title' :  video_title
,  246              'thumbnail' :  thumbnail
,  247              'age_limit' :  age_limit
,  249              'duration' :  duration
,