]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/openload.py
7f19b1ba5c3c355977c694334694b51f71a9840c
   2 from __future__ 
import unicode_literals
, division
 
   6 from .common 
import InfoExtractor
 
  15 from ..jsinterp 
import ( 
  21 class OpenloadIE(InfoExtractor
): 
  22     _VALID_URL 
= r
'https?://openload\.(?:co|io)/(?:f|embed)/(?P<id>[a-zA-Z0-9-_]+)' 
  25         'url': 'https://openload.co/f/kUEfGclsU9o', 
  26         'md5': 'bf1c059b004ebc7a256f89408e65c36e', 
  30             'title': 'skyrim_no-audio_1080.mp4', 
  31             'thumbnail': 're:^https?://.*\.jpg$', 
  34         'url': 'https://openload.co/embed/rjC09fkPLYs', 
  39             'thumbnail': 're:^https?://.*\.jpg$', 
  47             'skip_download': True,  # test subtitles only 
  50         'url': 'https://openload.co/embed/kUEfGclsU9o/skyrim_no-audio_1080.mp4', 
  51         'only_matching': True, 
  53         'url': 'https://openload.io/f/ZAn6oz-VZGE/', 
  54         'only_matching': True, 
  56         'url': 'https://openload.co/f/_-ztPaZtMhM/', 
  57         'only_matching': True, 
  59         # unavailable via https://openload.co/f/Sxz5sADo82g/, different layout 
  61         'url': 'https://openload.co/embed/Sxz5sADo82g/', 
  62         'only_matching': True, 
  65     def openload_decode(self
, txt
): 
  77             '((゚ー゚) + (o^_^o))': '7', 
  78             '((o^_^o) +(o^_^o) +(c^_^o))': '6', 
  79             '((゚ー゚) + (゚Θ゚))': '5', 
  84             '((c^_^o)-(c^_^o))': '0', 
  87         end_token 
= '(゚Д゚)[゚o゚]' 
  88         symbols 
= '|'.join(map(re
.escape
, symbol_dict
.keys())) 
  89         txt 
= re
.sub('(%s)\+\s?' % symbols
, lambda m
: symbol_dict
[m
.group(1)], txt
) 
  91         for aacode 
in re
.findall(r
'{0}\+\s?{1}(.*?){0}'.format(re
.escape(end_token
), re
.escape(delim
)), txt
): 
  92             for aachar 
in aacode
.split(delim
): 
  94                     ret 
+= compat_chr(int(aachar
, 8)) 
  96                     m 
= re
.match(r
'^u([\da-f]{4})$', aachar
) 
  98                         ret 
+= compat_chr(int(m
.group(1), 16)) 
 100                         self
.report_warning("Cannot decode: %s" % aachar
) 
 103     def _real_extract(self
, url
): 
 104         video_id 
= self
._match
_id
(url
) 
 105         webpage 
= self
._download
_webpage
('https://openload.co/embed/%s/' % video_id
, video_id
) 
 107         if 'File not found' in webpage 
or 'deleted by the owner' in webpage
: 
 108             raise ExtractorError('File not found', expected
=True) 
 110         # The following decryption algorithm is written by @yokrysty and 
 111         # declared to be freely used in youtube-dl 
 112         # See https://github.com/rg3/youtube-dl/issues/10408 
 113         enc_data 
= self
._html
_search
_regex
( 
 114             r
'<span[^>]*>([^<]+)</span>\s*<span[^>]*>[^<]+</span>\s*<span[^>]+id="streamurl"', 
 115             webpage
, 'encrypted data') 
 117         enc_code 
= self
._html
_search
_regex
(r
'<script[^>]+>(゚ω゚[^<]+)</script>', 
 118                                            webpage
, 'encrypted code') 
 120         js_code 
= self
.openload_decode(enc_code
) 
 121         jsi 
= JSInterpreter(js_code
) 
 123         m_offset_fun 
= self
._search
_regex
(r
'slice\(0\s*-\s*(%s)\(\)' % _NAME_RE
, js_code
, 'javascript offset function') 
 124         m_diff_fun 
= self
._search
_regex
(r
'charCodeAt\(0\)\s*\+\s*(%s)\(\)' % _NAME_RE
, js_code
, 'javascript diff function') 
 126         offset 
= jsi
.call_function(m_offset_fun
) 
 127         diff 
= jsi
.call_function(m_diff_fun
) 
 131         for idx
, c 
in enumerate(enc_data
): 
 133             if j 
>= 33 and j 
<= 126: 
 134                 j 
= ((j 
+ 14) % 94) + 33 
 135             if idx 
== len(enc_data
) - offset
: 
 137             video_url_chars 
+= compat_chr(j
) 
 139         video_url 
= 'https://openload.co/stream/%s?mime=true' % ''.join(video_url_chars
) 
 141         title 
= self
._og
_search
_title
(webpage
, default
=None) or self
._search
_regex
( 
 142             r
'<span[^>]+class=["\']title
["\'][^>]*>([^<]+)', webpage, 
 143             'title', default=None) or self._html_search_meta( 
 144             'description', webpage, 'title', fatal=True) 
 146         entries = self._parse_html5_media_entries(url, webpage, video_id) 
 147         subtitles = entries[0]['subtitles'] if entries else None 
 152             'thumbnail': self._og_search_thumbnail(webpage, default=None), 
 154             # Seems all videos have extensions in their titles 
 155             'ext': determine_ext(title), 
 156             'subtitles': subtitles,