]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/openload.py
   2 from __future__ 
import unicode_literals
 
  23 def cookie_to_dict(cookie
): 
  26         'value': cookie
.value
, 
  28     if cookie
.port_specified
: 
  29         cookie_dict
['port'] = cookie
.port
 
  30     if cookie
.domain_specified
: 
  31         cookie_dict
['domain'] = cookie
.domain
 
  32     if cookie
.path_specified
: 
  33         cookie_dict
['path'] = cookie
.path
 
  34     if cookie
.expires 
is not None: 
  35         cookie_dict
['expires'] = cookie
.expires
 
  36     if cookie
.secure 
is not None: 
  37         cookie_dict
['secure'] = cookie
.secure
 
  38     if cookie
.discard 
is not None: 
  39         cookie_dict
['discard'] = cookie
.discard
 
  41         if (cookie
.has_nonstandard_attr('httpOnly') 
  42                 or cookie
.has_nonstandard_attr('httponly') 
  43                 or cookie
.has_nonstandard_attr('HttpOnly')): 
  44             cookie_dict
['httponly'] = True 
  50 def cookie_jar_to_list(cookie_jar
): 
  51     return [cookie_to_dict(cookie
) for cookie 
in cookie_jar
] 
  54 class PhantomJSwrapper(object): 
  55     """PhantomJS wrapper class 
  57     This class is experimental. 
  61         phantom.onError = function(msg, trace) {{ 
  62           var msgStack = ['PHANTOM ERROR: ' + msg]; 
  63           if(trace && trace.length) {{ 
  64             msgStack.push('TRACE:'); 
  65             trace.forEach(function(t) {{ 
  66               msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line 
  67                 + (t.function ? ' (in function ' + t.function +')' : '')); 
  70           console.error(msgStack.join('\n')); 
  73         var page = require('webpage').create(); 
  74         var fs = require('fs'); 
  75         var read = {{ mode: 'r', charset: 'utf-8' }}; 
  76         var write = {{ mode: 'w', charset: 'utf-8' }}; 
  77         JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ 
  80         page.settings.resourceTimeout = {timeout}; 
  81         page.settings.userAgent = "{ua}"; 
  82         page.onLoadStarted = function() {{ 
  83           page.evaluate(function() {{ 
  84             delete window._phantom; 
  85             delete window.callPhantom; 
  88         var saveAndExit = function() {{ 
  89           fs.write("{html}", page.content, write); 
  90           fs.write("{cookies}", JSON.stringify(phantom.cookies), write); 
  93         page.onLoadFinished = function(status) {{ 
  94           if(page.url === "") {{ 
  95             page.setContent(fs.read("{html}", read), "{url}"); 
 104     _TMP_FILE_NAMES 
= ['script', 'html', 'cookies'] 
 108         return get_exe_version('phantomjs', version_re
=r
'([0-9.]+)') 
 110     def __init__(self
, extractor
, required_version
=None, timeout
=10000): 
 113         self
.exe 
= check_executable('phantomjs', ['-v']) 
 115             raise ExtractorError('PhantomJS executable not found in PATH, ' 
 116                                  'download it from http://phantomjs.org', 
 119         self
.extractor 
= extractor
 
 122             version 
= self
._version
() 
 123             if is_outdated_version(version
, required_version
): 
 124                 self
.extractor
._downloader
.report_warning( 
 125                     'Your copy of PhantomJS is outdated, update it to version ' 
 126                     '%s or newer if you encounter any errors.' % required_version
) 
 131         for name 
in self
._TMP
_FILE
_NAMES
: 
 132             tmp 
= tempfile
.NamedTemporaryFile(delete
=False) 
 134             self
._TMP
_FILES
[name
] = tmp
 
 137         for name 
in self
._TMP
_FILE
_NAMES
: 
 139                 os
.remove(self
._TMP
_FILES
[name
].name
) 
 140             except (IOError, OSError, KeyError): 
 143     def _save_cookies(self
, url
): 
 144         cookies 
= cookie_jar_to_list(self
.extractor
._downloader
.cookiejar
) 
 145         for cookie 
in cookies
: 
 146             if 'path' not in cookie
: 
 148             if 'domain' not in cookie
: 
 149                 cookie
['domain'] = compat_urlparse
.urlparse(url
).netloc
 
 150         with open(self
._TMP
_FILES
['cookies'].name
, 'wb') as f
: 
 151             f
.write(json
.dumps(cookies
).encode('utf-8')) 
 153     def _load_cookies(self
): 
 154         with open(self
._TMP
_FILES
['cookies'].name
, 'rb') as f
: 
 155             cookies 
= json
.loads(f
.read().decode('utf-8')) 
 156         for cookie 
in cookies
: 
 157             if cookie
['httponly'] is True: 
 158                 cookie
['rest'] = {'httpOnly': None} 
 159             if 'expiry' in cookie
: 
 160                 cookie
['expire_time'] = cookie
['expiry'] 
 161             self
.extractor
._set
_cookie
(**compat_kwargs(cookie
)) 
 163     def get(self
, url
, html
=None, video_id
=None, note
=None, note2
='Executing JS on webpage', headers
={}, jscode
='saveAndExit();'): 
 165         Downloads webpage (if needed) and executes JS 
 169             html: optional, html code of website 
 171             note: optional, displayed when downloading webpage 
 172             note2: optional, displayed when executing JS 
 173             headers: custom http headers 
 174             jscode: code to be executed when page is loaded 
 177             * downloaded website (after JS execution) 
 178             * anything you print with `console.log` (but not inside `page.execute`!) 
 180         In most cases you don't need to add any `jscode`. 
 181         It is executed in `page.onLoadFinished`. 
 182         `saveAndExit();` is mandatory, use it instead of `phantom.exit()` 
 183         It is possible to wait for some element on the webpage, for example: 
 184             var check = function() { 
 185               var elementFound = page.evaluate(function() { 
 186                 return document.querySelector('#b.done') !== null; 
 191                 window.setTimeout(check, 500); 
 194             page.evaluate(function(){ 
 195               document.querySelector('#a').click(); 
 199         if 'saveAndExit();' not in jscode
: 
 200             raise ExtractorError('`saveAndExit();` not found in `jscode`') 
 202             html 
= self
.extractor
._download
_webpage
(url
, video_id
, note
=note
, headers
=headers
) 
 203         with open(self
._TMP
_FILES
['html'].name
, 'wb') as f
: 
 204             f
.write(html
.encode('utf-8')) 
 206         self
._save
_cookies
(url
) 
 208         replaces 
= self
.options
 
 209         replaces
['url'] = url
 
 210         user_agent 
= headers
.get('User-Agent') or std_headers
['User-Agent'] 
 211         replaces
['ua'] = user_agent
.replace('"', '\\"') 
 212         replaces
['jscode'] = jscode
 
 214         for x 
in self
._TMP
_FILE
_NAMES
: 
 215             replaces
[x
] = self
._TMP
_FILES
[x
].name
.replace('\\', '\\\\').replace('"', '\\"') 
 217         with open(self
._TMP
_FILES
['script'].name
, 'wb') as f
: 
 218             f
.write(self
._TEMPLATE
.format(**replaces
).encode('utf-8')) 
 221             self
.extractor
.to_screen('%s' % (note2
,)) 
 223             self
.extractor
.to_screen('%s: %s' % (video_id
, note2
)) 
 225         p 
= subprocess
.Popen([ 
 226             self
.exe
, '--ssl-protocol=any', 
 227             self
._TMP
_FILES
['script'].name
 
 228         ], stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
 229         out
, err 
= p
.communicate() 
 230         if p
.returncode 
!= 0: 
 231             raise ExtractorError( 
 232                 'Executing JS failed\n:' + encodeArgument(err
)) 
 233         with open(self
._TMP
_FILES
['html'].name
, 'rb') as f
: 
 234             html 
= f
.read().decode('utf-8') 
 238         return (html
, encodeArgument(out
))