]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/depositfiles.py
   5 from .common 
import InfoExtractor
 
  11     compat_urllib_request
, 
  17 class DepositFilesIE(InfoExtractor
): 
  18     """Information extractor for depositfiles.com""" 
  20     _VALID_URL 
= r
'(?:http://)?(?:\w+\.)?depositfiles\.com/(?:../(?#locale))?files/(.+)' 
  22     def _real_extract(self
, url
): 
  23         file_id 
= url
.split('/')[-1] 
  24         # Rebuild url in english locale 
  25         url 
= 'http://depositfiles.com/en/files/' + file_id
 
  27         # Retrieve file webpage with 'Free download' button pressed 
  28         free_download_indication 
= {'gateway_result' : '1'} 
  29         request 
= compat_urllib_request
.Request(url
, compat_urllib_parse
.urlencode(free_download_indication
)) 
  31             self
.report_download_webpage(file_id
) 
  32             webpage 
= compat_urllib_request
.urlopen(request
).read() 
  33         except (compat_urllib_error
.URLError
, compat_http_client
.HTTPException
, socket
.error
) as err
: 
  34             raise ExtractorError(u
'Unable to retrieve file webpage: %s' % compat_str(err
)) 
  36         # Search for the real file URL 
  37         mobj 
= re
.search(r
'<form action="(http://fileshare.+?)"', webpage
) 
  38         if (mobj 
is None) or (mobj
.group(1) is None): 
  39             # Try to figure out reason of the error. 
  40             mobj 
= re
.search(r
'<strong>(Attention.*?)</strong>', webpage
, re
.DOTALL
) 
  41             if (mobj 
is not None) and (mobj
.group(1) is not None): 
  42                 restriction_message 
= re
.sub('\s+', ' ', mobj
.group(1)).strip() 
  43                 raise ExtractorError(u
'%s' % restriction_message
) 
  45                 raise ExtractorError(u
'Unable to extract download URL from: %s' % url
) 
  47         file_url 
= mobj
.group(1) 
  48         file_extension 
= os
.path
.splitext(file_url
)[1][1:] 
  50         # Search for file title 
  51         file_title 
= self
._search
_regex
(r
'<b title="(.*?)">', webpage
, u
'title') 
  54             'id':       file_id
.decode('utf-8'), 
  55             'url':      file_url
.decode('utf-8'), 
  59             'ext':      file_extension
.decode('utf-8'),