]>
Raphaël G. Git Repositories - youtubedl/blob - youtube_dl/extractor/xfileshare.py
   2 from __future__ 
import unicode_literals
 
   6 from .common 
import InfoExtractor
 
   7 from ..compat 
import compat_chr
 
  18 # based on openload_decode from 2bfeee69b976fe049761dd3012e30b637ee05a58 
  19 def aa_decode(aa_code
): 
  21         ('7', '((゚ー゚) + (o^_^o))'), 
  22         ('6', '((o^_^o) +(o^_^o))'), 
  23         ('5', '((゚ー゚) + (゚Θ゚))'), 
  24         ('2', '((o^_^o) - (゚Θ゚))'), 
  32     for aa_char 
in aa_code
.split(delim
): 
  33         for val
, pat 
in symbol_table
: 
  34             aa_char 
= aa_char
.replace(pat
, val
) 
  35         aa_char 
= aa_char
.replace('+ ', '') 
  36         m 
= re
.match(r
'^\d+', aa_char
) 
  38             ret 
+= compat_chr(int(m
.group(0), 8)) 
  40             m 
= re
.match(r
'^u([\da-f]+)', aa_char
) 
  42                 ret 
+= compat_chr(int(m
.group(1), 16)) 
  46 class XFileShareIE(InfoExtractor
): 
  48         (r
'clipwatching\.com', 'ClipWatching'), 
  49         (r
'gounlimited\.to', 'GoUnlimited'), 
  50         (r
'govid\.me', 'GoVid'), 
  51         (r
'holavid\.com', 'HolaVid'), 
  52         (r
'streamty\.com', 'Streamty'), 
  53         (r
'thevideobee\.to', 'TheVideoBee'), 
  54         (r
'uqload\.com', 'Uqload'), 
  55         (r
'vidbom\.com', 'VidBom'), 
  56         (r
'vidlo\.us', 'vidlo'), 
  57         (r
'vidlocker\.xyz', 'VidLocker'), 
  58         (r
'vidshare\.tv', 'VidShare'), 
  60         (r
'xvideosharing\.com', 'XVideoSharing'), 
  63     IE_DESC 
= 'XFileShare based sites: %s' % ', '.join(list(zip(*_SITES
))[1]) 
  64     _VALID_URL 
= (r
'https?://(?:www\.)?(?P<host>%s)/(?:embed-)?(?P<id>[0-9a-zA-Z]+)' 
  65                   % '|'.join(site 
for site 
in list(zip(*_SITES
))[0])) 
  67     _FILE_NOT_FOUND_REGEXES 
= ( 
  68         r
'>(?:404 - )?File Not Found<', 
  69         r
'>The file was removed by administrator<', 
  73         'url': 'http://xvideosharing.com/fq65f94nd2ve', 
  74         'md5': '4181f63957e8fe90ac836fa58dc3c8a6', 
  79             'thumbnail': r
're:http://.*\.jpg', 
  84     def _extract_urls(webpage
): 
  87             for mobj 
in re
.finditer( 
  88                 r
'<iframe\b[^>]+\bsrc=(["\'])(?P
<url
>(?
:https?
:)?
//(?
:%s)/embed
-[0-9a
-zA
-Z
]+.*?
)\
1' 
  89                 % '|
'.join(site for site in list(zip(*XFileShareIE._SITES))[0]), 
  92     def _real_extract(self, url): 
  93         host, video_id = re.match(self._VALID_URL, url).groups() 
  95         url = 'https
://%s/' % host + ('embed
-%s.html
' % video_id if host in ('govid
.me
', 'vidlo
.us
') else video_id) 
  96         webpage = self._download_webpage(url, video_id) 
  98         if any(re.search(p, webpage) for p in self._FILE_NOT_FOUND_REGEXES): 
  99             raise ExtractorError('Video 
%s does 
not exist
' % video_id, expected=True) 
 101         fields = self._hidden_inputs(webpage) 
 103         if fields.get('op
') == 'download1
': 
 104             countdown = int_or_none(self._search_regex( 
 105                 r'<span 
id="countdown_str">(?
:[Ww
]ait
)?\s
*<span 
id="cxc">(\d
+)</span
>\s
*(?
:seconds?
)?
</span
>', 
 106                 webpage, 'countdown
', default=None)) 
 108                 self._sleep(countdown, video_id) 
 110             webpage = self._download_webpage( 
 111                 url, video_id, 'Downloading video page
', 
 112                 data=urlencode_postdata(fields), headers={ 
 114                     'Content
-type': 'application
/x
-www
-form
-urlencoded
', 
 117         title = (self._search_regex( 
 118             (r'style
="z-index: [0-9]+;">([^
<]+)</span
>', 
 119              r'<td nowrap
>([^
<]+)</td
>', 
 120              r'h4
-fine
[^
>]*>([^
<]+)<', 
 122              r'<h2 
class="video-page-head">([^
<]+)</h2
>', 
 123              r'<h2 style
="[^"]*color
:#403f3d[^"]*"[^>]*>([^<]+)<',  # streamin.to 
 124              r
'title\s*:\s*"([^"]+)"'),  # govid.me 
 125             webpage
, 'title', default
=None) or self
._og
_search
_title
( 
 126             webpage
, default
=None) or video_id
).strip() 
 129                 (r
'(eval\(function\(p,a,c,k,e,d\){.+)', decode_packed_codes
), 
 130                 (r
'(゚.+)', aa_decode
)): 
 131             obf_code 
= self
._search
_regex
(regex
, webpage
, 'obfuscated code', default
=None) 
 133                 webpage 
= webpage
.replace(obf_code
, func(obf_code
)) 
 137         jwplayer_data 
= self
._search
_regex
( 
 139                 r
'jwplayer\("[^"]+"\)\.load\(\[({.+?})\]\);', 
 140                 r
'jwplayer\("[^"]+"\)\.setup\(({.+?})\);', 
 142             'jwplayer data', default
=None) 
 144             jwplayer_data 
= self
._parse
_json
( 
 145                 jwplayer_data
.replace(r
"\'", "'"), video_id
, js_to_json
) 
 147                 formats 
= self
._parse
_jwplayer
_data
( 
 148                     jwplayer_data
, video_id
, False, 
 149                     m3u8_id
='hls', mpd_id
='dash')['formats'] 
 154                     r
'(?:file|src)\s*:\s*(["\'])(?P
<url
>http(?
:(?
!\
1).)+\
.(?
:m3u8|mp4|flv
)(?
:(?
!\
1).)*)\
1', 
 155                     r'file_link\s
*=\s
*(["\'])(?P<url>http(?:(?!\1).)+)\1', 
 156                     r'addVariable\((\\?["\'])file\
1\s
*,\s
*(\\?
["\'])(?P<url>http(?:(?!\2).)+)\2\)', 
 157                     r'<embed[^>]+src=(["\'])(?P
<url
>http(?
:(?
!\
1).)+\
.(?
:m3u8|mp4|flv
)(?
:(?
!\
1).)*)\
1'): 
 158                 for mobj in re.finditer(regex, webpage): 
 159                     video_url = mobj.group('url
') 
 160                     if video_url not in urls: 
 161                         urls.append(video_url) 
 163             sources = self._search_regex( 
 164                 r'sources\s
*:\s
*(\
[(?
!{)[^\
]]+\
])', webpage, 'sources
', default=None) 
 166                 urls.extend(self._parse_json(sources, video_id)) 
 169             for video_url in urls: 
 170                 if determine_ext(video_url) == 'm3u8
': 
 171                     formats.extend(self._extract_m3u8_formats( 
 172                         video_url, video_id, 'mp4
', 
 173                         entry_protocol='m3u8_native
', m3u8_id='hls
', 
 180         self._sort_formats(formats) 
 182         thumbnail = self._search_regex( 
 184                 r'<video
[^
>]+poster
="([^"]+)"', 
 185                 r'(?:image|poster)\s*:\s*["\'](http
[^
"\']+)["\'],', 
 186             ], webpage, 'thumbnail
', default=None) 
 191             'thumbnail
': thumbnail,