]>
Raphaƫl G. Git Repositories - youtubedl/blob - youtube_dl/extractor/openload.py
   2 from __future__ 
import unicode_literals
 
  10 from .common 
import InfoExtractor
 
  11 from ..compat 
import ( 
  27 def cookie_to_dict(cookie
): 
  30         'value': cookie
.value
, 
  32     if cookie
.port_specified
: 
  33         cookie_dict
['port'] = cookie
.port
 
  34     if cookie
.domain_specified
: 
  35         cookie_dict
['domain'] = cookie
.domain
 
  36     if cookie
.path_specified
: 
  37         cookie_dict
['path'] = cookie
.path
 
  38     if cookie
.expires 
is not None: 
  39         cookie_dict
['expires'] = cookie
.expires
 
  40     if cookie
.secure 
is not None: 
  41         cookie_dict
['secure'] = cookie
.secure
 
  42     if cookie
.discard 
is not None: 
  43         cookie_dict
['discard'] = cookie
.discard
 
  45         if (cookie
.has_nonstandard_attr('httpOnly') or 
  46                 cookie
.has_nonstandard_attr('httponly') or 
  47                 cookie
.has_nonstandard_attr('HttpOnly')): 
  48             cookie_dict
['httponly'] = True 
  54 def cookie_jar_to_list(cookie_jar
): 
  55     return [cookie_to_dict(cookie
) for cookie 
in cookie_jar
] 
  58 class PhantomJSwrapper(object): 
  59     """PhantomJS wrapper class 
  61     This class is experimental. 
  65         phantom.onError = function(msg, trace) {{ 
  66           var msgStack = ['PHANTOM ERROR: ' + msg]; 
  67           if(trace && trace.length) {{ 
  68             msgStack.push('TRACE:'); 
  69             trace.forEach(function(t) {{ 
  70               msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line 
  71                 + (t.function ? ' (in function ' + t.function +')' : '')); 
  74           console.error(msgStack.join('\n')); 
  77         var page = require('webpage').create(); 
  78         var fs = require('fs'); 
  79         var read = {{ mode: 'r', charset: 'utf-8' }}; 
  80         var write = {{ mode: 'w', charset: 'utf-8' }}; 
  81         JSON.parse(fs.read("{cookies}", read)).forEach(function(x) {{ 
  84         page.settings.resourceTimeout = {timeout}; 
  85         page.settings.userAgent = "{ua}"; 
  86         page.onLoadStarted = function() {{ 
  87           page.evaluate(function() {{ 
  88             delete window._phantom; 
  89             delete window.callPhantom; 
  92         var saveAndExit = function() {{ 
  93           fs.write("{html}", page.content, write); 
  94           fs.write("{cookies}", JSON.stringify(phantom.cookies), write); 
  97         page.onLoadFinished = function(status) {{ 
  98           if(page.url === "") {{ 
  99             page.setContent(fs.read("{html}", read), "{url}"); 
 108     _TMP_FILE_NAMES 
= ['script', 'html', 'cookies'] 
 112         return get_exe_version('phantomjs', version_re
=r
'([0-9.]+)') 
 114     def __init__(self
, extractor
, required_version
=None, timeout
=10000): 
 115         self
.exe 
= check_executable('phantomjs', ['-v']) 
 117             raise ExtractorError('PhantomJS executable not found in PATH, ' 
 118                                  'download it from http://phantomjs.org', 
 121         self
.extractor 
= extractor
 
 124             version 
= self
._version
() 
 125             if is_outdated_version(version
, required_version
): 
 126                 self
.extractor
._downloader
.report_warning( 
 127                     'Your copy of PhantomJS is outdated, update it to version ' 
 128                     '%s or newer if you encounter any errors.' % required_version
) 
 134         for name 
in self
._TMP
_FILE
_NAMES
: 
 135             tmp 
= tempfile
.NamedTemporaryFile(delete
=False) 
 137             self
._TMP
_FILES
[name
] = tmp
 
 140         for name 
in self
._TMP
_FILE
_NAMES
: 
 142                 os
.remove(self
._TMP
_FILES
[name
].name
) 
 146     def _save_cookies(self
, url
): 
 147         cookies 
= cookie_jar_to_list(self
.extractor
._downloader
.cookiejar
) 
 148         for cookie 
in cookies
: 
 149             if 'path' not in cookie
: 
 151             if 'domain' not in cookie
: 
 152                 cookie
['domain'] = compat_urlparse
.urlparse(url
).netloc
 
 153         with open(self
._TMP
_FILES
['cookies'].name
, 'wb') as f
: 
 154             f
.write(json
.dumps(cookies
).encode('utf-8')) 
 156     def _load_cookies(self
): 
 157         with open(self
._TMP
_FILES
['cookies'].name
, 'rb') as f
: 
 158             cookies 
= json
.loads(f
.read().decode('utf-8')) 
 159         for cookie 
in cookies
: 
 160             if cookie
['httponly'] is True: 
 161                 cookie
['rest'] = {'httpOnly': None} 
 162             if 'expiry' in cookie
: 
 163                 cookie
['expire_time'] = cookie
['expiry'] 
 164             self
.extractor
._set
_cookie
(**compat_kwargs(cookie
)) 
 166     def get(self
, url
, html
=None, video_id
=None, note
=None, note2
='Executing JS on webpage', headers
={}, jscode
='saveAndExit();'): 
 168         Downloads webpage (if needed) and executes JS 
 172             html: optional, html code of website 
 174             note: optional, displayed when downloading webpage 
 175             note2: optional, displayed when executing JS 
 176             headers: custom http headers 
 177             jscode: code to be executed when page is loaded 
 180             * downloaded website (after JS execution) 
 181             * anything you print with `console.log` (but not inside `page.execute`!) 
 183         In most cases you don't need to add any `jscode`. 
 184         It is executed in `page.onLoadFinished`. 
 185         `saveAndExit();` is mandatory, use it instead of `phantom.exit()` 
 186         It is possible to wait for some element on the webpage, for example: 
 187             var check = function() { 
 188               var elementFound = page.evaluate(function() { 
 189                 return document.querySelector('#b.done') !== null; 
 194                 window.setTimeout(check, 500); 
 197             page.evaluate(function(){ 
 198               document.querySelector('#a').click(); 
 202         if 'saveAndExit();' not in jscode
: 
 203             raise ExtractorError('`saveAndExit();` not found in `jscode`') 
 205             html 
= self
.extractor
._download
_webpage
(url
, video_id
, note
=note
, headers
=headers
) 
 206         with open(self
._TMP
_FILES
['html'].name
, 'wb') as f
: 
 207             f
.write(html
.encode('utf-8')) 
 209         self
._save
_cookies
(url
) 
 211         replaces 
= self
.options
 
 212         replaces
['url'] = url
 
 213         user_agent 
= headers
.get('User-Agent') or std_headers
['User-Agent'] 
 214         replaces
['ua'] = user_agent
.replace('"', '\\"') 
 215         replaces
['jscode'] = jscode
 
 217         for x 
in self
._TMP
_FILE
_NAMES
: 
 218             replaces
[x
] = self
._TMP
_FILES
[x
].name
.replace('\\', '\\\\').replace('"', '\\"') 
 220         with open(self
._TMP
_FILES
['script'].name
, 'wb') as f
: 
 221             f
.write(self
._TEMPLATE
.format(**replaces
).encode('utf-8')) 
 224             self
.extractor
.to_screen('%s' % (note2
,)) 
 226             self
.extractor
.to_screen('%s: %s' % (video_id
, note2
)) 
 228         p 
= subprocess
.Popen([ 
 229             self
.exe
, '--ssl-protocol=any', 
 230             self
._TMP
_FILES
['script'].name
 
 231         ], stdout
=subprocess
.PIPE
, stderr
=subprocess
.PIPE
) 
 232         out
, err 
= p
.communicate() 
 233         if p
.returncode 
!= 0: 
 234             raise ExtractorError( 
 235                 'Executing JS failed\n:' + encodeArgument(err
)) 
 236         with open(self
._TMP
_FILES
['html'].name
, 'rb') as f
: 
 237             html 
= f
.read().decode('utf-8') 
 241         return (html
, encodeArgument(out
)) 
 244 class OpenloadIE(InfoExtractor
): 
 245     _VALID_URL 
= r
'https?://(?:openload\.(?:co|io)|oload\.tv)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' 
 248         'url': 'https://openload.co/f/kUEfGclsU9o', 
 249         'md5': 'bf1c059b004ebc7a256f89408e65c36e', 
 253             'title': 'skyrim_no-audio_1080.mp4', 
 254             'thumbnail': r
're:^https?://.*\.jpg$', 
 257         'url': 'https://openload.co/embed/rjC09fkPLYs', 
 261             'title': 'movie.mp4', 
 262             'thumbnail': r
're:^https?://.*\.jpg$', 
 270             'skip_download': True,  # test subtitles only 
 273         'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', 
 274         'only_matching': True, 
 276         'url': 'https://openload.io/f/ZAn6oz-VZGE/', 
 277         'only_matching': True, 
 279         'url': 'https://openload.co/f/_-ztPaZtMhM/', 
 280         'only_matching': True, 
 282         # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout 
 284         'url': 'https://openload.co/embed/Sxz5sADo82g/', 
 285         'only_matching': True, 
 287         'url': 'https://oload.tv/embed/KnG-kKZdcfY/', 
 288         'only_matching': True, 
 291     _USER_AGENT 
= 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' 
 294     def _extract_urls(webpage
): 
 296             r
'<iframe[^>]+src=["\']((?
:https?
://)?
(?
:openload\
.(?
:co|io
)|oload\
.tv
)/embed
/[a
-zA
-Z0
-9-_
]+)', 
 299     def _real_extract(self, url): 
 300         video_id = self._match_id(url) 
 301         url = 'https
://openload
.co
/embed
/%s/' % video_id 
 303             'User
-Agent
': self._USER_AGENT, 
 306         webpage = self._download_webpage(url, video_id, headers=headers) 
 308         if 'File 
not found
' in webpage or 'deleted by the owner
' in webpage: 
 309             raise ExtractorError('File 
not found
', expected=True, video_id=video_id) 
 311         phantom = PhantomJSwrapper(self, required_version='2.0') 
 312         webpage, _ = phantom.get(url, html=webpage, video_id=video_id, headers=headers) 
 314         decoded_id = get_element_by_id('streamurl
', webpage) 
 316         video_url = 'https
://openload
.co
/stream
/%s?mime
=true
' % decoded_id 
 318         title = self._og_search_title(webpage, default=None) or self._search_regex( 
 319             r'<span
[^
>]+class=["\']title["\'][^
>]*>([^
<]+)', webpage, 
 320             'title
', default=None) or self._html_search_meta( 
 321             'description
', webpage, 'title
', fatal=True) 
 323         entries = self._parse_html5_media_entries(url, webpage, video_id) 
 324         entry = entries[0] if entries else {} 
 325         subtitles = entry.get('subtitles
') 
 330             'thumbnail
': entry.get('thumbnail
') or self._og_search_thumbnail(webpage, default=None), 
 332             # Seems all videos have extensions in their titles 
 333             'ext
': determine_ext(title, 'mp4
'), 
 334             'subtitles
': subtitles, 
 335             'http_headers
': headers,