]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/openload.py
   2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..compat 
import ( 
  27 def cookie_to_dict(cookie
): 
  30         'value': cookie
.value
, 
  32     if cookie
.port_specified
: 
  33         cookie_dict
['port'] = cookie
.port
 
  34     if cookie
.domain_specified
: 
  35         cookie_dict
['domain'] = cookie
.domain
 
  36     if cookie
.path_specified
: 
  37         cookie_dict
['path'] = cookie
.path
 
  38     if cookie
.expires 
is not None: 
  39         cookie_dict
['expires'] = cookie
.expires
 
  40     if cookie
.secure 
is not None: 
  41         cookie_dict
['secure'] = cookie
.secure
 
  42     if cookie
.discard 
is not None: 
  43         cookie_dict
['discard'] = cookie
.discard
 
  45         if (cookie
.has_nonstandard_attr('httpOnly') or 
  46                 cookie
.has_nonstandard_attr('httponly') or 
  47                 cookie
.has_nonstandard_attr('HttpOnly')): 
  48             cookie_dict
['httponly'] = True 
  54 def cookie_jar_to_list(cookie_jar
): 
  55     return [cookie_to_dict(cookie
) for cookie 
in cookie_jar
] 
  58 class PhantomJSwrapper(object): 
  59     """PhantomJS wrapper class 
  61     This class is experimental. 
  65         phantom.onError = function(msg, trace) {{ 
  66           var msgStack = ['PHANTOM ERROR: ' + msg]; 
  67           if(trace && trace.length) {{ 
  68             msgStack.push('TRACE:'); 
  69             trace.forEach(function(t) {{ 
  70               msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line 
  71                 + (t.function ? ' (in function ' + t.function +')' : '')); 
  74           console.error(msgStack.join('\n')); 
  77         var page = require('webpage').create(); 
  78         var fs = require('fs'); 
  79         var read = {{ mode: 'r', charset: 'utf-8' }}; 
  80         var write = {{ mode: 'w', charset: 'utf-8' }}; 
  81         JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ 
  84         page.settings.resourceTimeout = {timeout}; 
  85         page.settings.userAgent = "{ua}"; 
  86         page.onLoadStarted = function() {{ 
  87           page.evaluate(function() {{ 
  88             delete window._phantom; 
  89             delete window.callPhantom; 
  92         var saveAndExit = function() {{ 
  93           fs.write("{html}", page.content, write); 
  94           fs.write("{cookies}", JSON.stringify(phantom.cookies), write); 
  97         page.onLoadFinished = function(status) {{ 
  98           if(page.url === "") {{ 
  99             page.setContent(fs.read("{html}", read), "{url}"); 
 108     _TMP_FILE_NAMES 
= ['script', 'html', 'cookies'] 
 112         return get_exe_version('phantomjs', version_re
=r
'([0-9.]+)') 
 114     def __init__(self
, extractor
, required_version
=None, timeout
=10000): 
 117         self
.exe 
= check_executable('phantomjs', ['-v']) 
 119             raise ExtractorError('PhantomJS executable not found in PATH, ' 
 120                                  'download it from http://phantomjs.org', 
 123         self
.extractor 
= extractor
 
 126             version 
= self
._version
() 
 127             if is_outdated_version(version
, required_version
): 
 128                 self
.extractor
._downloader
.report_warning( 
 129                     'Your copy of PhantomJS is outdated, update it to version ' 
 130                     '%s or newer if you encounter any errors.' % required_version
) 
 135         for name 
in self
._TMP
_FILE
_NAMES
: 
 136             tmp 
= tempfile
.NamedTemporaryFile(delete
=False) 
 138             self
._TMP
_FILES
[name
] = tmp
 
 141         for name 
in self
._TMP
_FILE
_NAMES
: 
 143                 os
.remove(self
._TMP
_FILES
[name
].name
) 
 144             except (IOError, OSError, KeyError): 
 147     def _save_cookies(self
, url
): 
 148         cookies 
= cookie_jar_to_list(self
.extractor
._downloader
.cookiejar
) 
 149         for cookie 
in cookies
: 
 150             if 'path' not in cookie
: 
 152             if 'domain' not in cookie
: 
 153                 cookie
['domain'] = compat_urlparse
.urlparse(url
).netloc
 
 154         with open(self
._TMP
_FILES
['cookies'].name
, 'wb') as f
: 
 155             f
.write(json
.dumps(cookies
).encode('utf-8')) 
 157     def _load_cookies(self
): 
 158         with open(self
._TMP
_FILES
['cookies'].name
, 'rb') as f
: 
 159             cookies 
= json
.loads(f
.read().decode('utf-8')) 
 160         for cookie 
in cookies
: 
 161             if cookie
['httponly'] is True: 
 162                 cookie
['rest'] = {'httpOnly': None} 
 163             if 'expiry' in cookie
: 
 164                 cookie
['expire_time'] = cookie
['expiry'] 
 165             self
.extractor
._set
_cookie
(**compat_kwargs(cookie
)) 
 167     def get(self
, url
, html
=None, video_id
=None, note
=None, note2
='Executing JS on webpage', headers
={}, jscode
='saveAndExit();'): 
 169         Downloads webpage (if needed) and executes JS 
 173             html: optional, html code of website 
 175             note: optional, displayed when downloading webpage 
 176             note2: optional, displayed when executing JS 
 177             headers: custom http headers 
 178             jscode: code to be executed when page is loaded 
 181             * downloaded website (after JS execution) 
 182             * anything you print with `console.log` (but not inside `page.execute`!) 
 184         In most cases you don't need to add any `jscode`. 
 185         It is executed in `page.onLoadFinished`. 
 186         `saveAndExit();` is mandatory, use it instead of `phantom.exit()` 
 187         It is possible to wait for some element on the webpage, for example: 
 188             var check = function() { 
 189               var elementFound = page.evaluate(function() { 
 190                 return document.querySelector('#b.done') !== null; 
 195                 window.setTimeout(check, 500); 
 198             page.evaluate(function(){ 
 199               document.querySelector('#a').click(); 
 203         if 'saveAndExit();' not in jscode
: 
 204             raise ExtractorError('`saveAndExit();` not found in `jscode`') 
 206             html 
= self
.extractor
._download
_webpage
(url
, video_id
, note
=note
, headers
=headers
) 
 207         with open(self
._TMP
_FILES
['html'].name
, 'wb') as f
: 
 208             f
.write(html
.encode('utf-8')) 
 210         self
._save
_cookies
(url
) 
 212         replaces 
= self
.options
 
 213         replaces
['url'] = url
 
 214         user_agent 
= headers
.get('User-Agent') or std_headers
['User-Agent'] 
 215         replaces
['ua'] = user_agent
.replace('"', '\\"') 
 216         replaces
['jscode'] = jscode
 
 218         for x 
in self
._TMP
_FILE
_NAMES
: 
 219             replaces
[x
] = self
._TMP
_FILES
[x
].name
.replace('\\', '\\\\').replace('"', '\\"') 
 221         with open(self
._TMP
_FILES
['script'].name
, 'wb') as f
: 
 222             f
.write(self
._TEMPLATE
.format(**replaces
).encode('utf-8')) 
 225             self
.extractor
.to_screen('%s' % (note2
,)) 
 227             self
.extractor
.to_screen('%s: %s' % (video_id
, note2
)) 
 229         p 
= subprocess
.Popen([ 
 230             self
.exe
, '--ssl-protocol=any', 
 231             self
._TMP
_FILES
['script'].name
 
 232         ], stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
 233         out
, err 
= p
.communicate() 
 234         if p
.returncode 
!= 0: 
 235             raise ExtractorError( 
 236                 'Executing JS failed\n:' + encodeArgument(err
)) 
 237         with open(self
._TMP
_FILES
['html'].name
, 'rb') as f
: 
 238             html 
= f
.read().decode('utf-8') 
 242         return (html
, encodeArgument(out
)) 
 245 class OpenloadIE(InfoExtractor
): 
 246     _VALID_URL 
= r
'https?://(?:www\.)?(?:openload\.(?:co|io|link)|oload\.(?:tv|stream|site|xyz|win|download))/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' 
 249         'url': 'https://openload.co/f/kUEfGclsU9o', 
 250         'md5': 'bf1c059b004ebc7a256f89408e65c36e', 
 254             'title': 'skyrim_no-audio_1080.mp4', 
 255             'thumbnail': r
're:^https?://.*\.jpg$', 
 258         'url': 'https://openload.co/embed/rjC09fkPLYs', 
 262             'title': 'movie.mp4', 
 263             'thumbnail': r
're:^https?://.*\.jpg$', 
 271             'skip_download': True,  # test subtitles only 
 274         'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', 
 275         'only_matching': True, 
 277         'url': 'https://openload.io/f/ZAn6oz-VZGE/', 
 278         'only_matching': True, 
 280         'url': 'https://openload.co/f/_-ztPaZtMhM/', 
 281         'only_matching': True, 
 283         # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout 
 285         'url': 'https://openload.co/embed/Sxz5sADo82g/', 
 286         'only_matching': True, 
 288         # unavailable via https://openload.co/embed/e-Ixz9ZR5L0/ but available 
 289         # via https://openload.co/f/e-Ixz9ZR5L0/ 
 290         'url': 'https://openload.co/f/e-Ixz9ZR5L0/', 
 291         'only_matching': True, 
 293         'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 
 294         'only_matching': True, 
 296         'url': 'http://www.openload.link/f/KnG-kKZdcfY', 
 297         'only_matching': True, 
 299         'url': 'https://oload.stream/f/KnG-kKZdcfY', 
 300         'only_matching': True, 
 302         'url': 'https://oload.xyz/f/WwRBpzW8Wtk', 
 303         'only_matching': True, 
 305         'url': 'https://oload.win/f/kUEfGclsU9o', 
 306         'only_matching': True, 
 308         'url': 'https://oload.download/f/kUEfGclsU9o', 
 309         'only_matching': True, 
 311         # Its title has not got its extension but url has it 
 312         'url': 'https://oload.download/f/N4Otkw39VCw/Tomb.Raider.2018.HDRip.XviD.AC3-EVO.avi.mp4', 
 313         'only_matching': True, 
 316     _USER_AGENT 
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' 
 319     def _extract_urls(webpage
): 
 321             r
'<iframe[^>]+src=["\']((?
:https?
://)?
(?
:openload\
.(?
:co|io
)|oload\
.tv
)/embed
/[a
-zA
-Z0
-9-_
]+)', 
 324     def _real_extract(self, url): 
 325         video_id = self._match_id(url) 
 326         url_pattern = 'https
://openload
.co
/%%s/%s/' % video_id 
 328             'User
-Agent
': self._USER_AGENT, 
 331         for path in ('embed
', 'f
'): 
 332             page_url = url_pattern % path 
 334             webpage = self._download_webpage( 
 335                 page_url, video_id, 'Downloading 
%s webpage
' % path, 
 336                 headers=headers, fatal=last) 
 339             if 'File 
not found
' in webpage or 'deleted by the owner
' in webpage: 
 342                 raise ExtractorError('File 
not found
', expected=True, video_id=video_id) 
 345         phantom = PhantomJSwrapper(self, required_version='2.0') 
 346         webpage, _ = phantom.get(page_url, html=webpage, video_id=video_id, headers=headers) 
 348         decoded_id = (get_element_by_id('streamurl
', webpage) or 
 349                       get_element_by_id('streamuri
', webpage) or 
 350                       get_element_by_id('streamurj
', webpage) or 
 352                           (r'>\s
*([\w
-]+~\d
{10,}~\d
+\
.\d
+\
.0\
.0~
[\w
-]+)\s
*<', 
 353                            r'>\s
*([\w~
-]+~\d
+\
.\d
+\
.\d
+\
.\d
+~
[\w~
-]+)', 
 354                            r'>\s
*([\w
-]+~\d
{10,}~
(?
:[a
-f\d
]+:){2}
:~
[\w
-]+)\s
*<', 
 355                            r'>\s
*([\w~
-]+~
[a
-f0
-9:]+~
[\w~
-]+)\s
*<', 
 356                            r'>\s
*([\w~
-]+~
[a
-f0
-9:]+~
[\w~
-]+)'), webpage, 
 359         video_url = 'https
://openload
.co
/stream
/%s?mime
=true
' % decoded_id 
 361         title = self._og_search_title(webpage, default=None) or self._search_regex( 
 362             r'<span
[^
>]+class=["\']title["\'][^
>]*>([^
<]+)', webpage, 
 363             'title
', default=None) or self._html_search_meta( 
 364             'description
', webpage, 'title
', fatal=True) 
 366         entries = self._parse_html5_media_entries(page_url, webpage, video_id) 
 367         entry = entries[0] if entries else {} 
 368         subtitles = entry.get('subtitles
') 
 373             'thumbnail
': entry.get('thumbnail
') or self._og_search_thumbnail(webpage, default=None), 
 375             'ext
': determine_ext(title, None) or determine_ext(url, 'mp4
'), 
 376             'subtitles
': subtitles, 
 377             'http_headers
': headers,