2 from __future__ 
import unicode_literals
 
   5 This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check 
   6 if we are not 'age_limit' tagging some porn site 
   8 A second approach implemented relies on a list of porn domains, to activate it 
   9 pass the list filename as the only argument 
  12 # Allow direct execution 
  15 sys
.path
.insert(0, os
.path
.dirname(os
.path
.dirname(os
.path
.abspath(__file__
)))) 
  17 from test
.helper 
import get_testcases
 
  18 from youtube_dl
.utils 
import compat_urllib_parse_urlparse
 
  19 from youtube_dl
.utils 
import compat_urllib_request
 
  23     LIST 
= open(sys
.argv
[1]).read().decode('utf8').strip() 
  27 for test 
in get_testcases(): 
  28     if METHOD 
== 'EURISTIC': 
  30             webpage 
= compat_urllib_request
.urlopen(test
['url'], timeout
=10).read() 
  32             print('\nFail: {0}'.format(test
['name'])) 
  35         webpage 
= webpage
.decode('utf8', 'replace') 
  37         RESULT 
= 'porn' in webpage
.lower() 
  39     elif METHOD 
== 'LIST': 
  40         domain 
= compat_urllib_parse_urlparse(test
['url']).netloc
 
  42             print('\nFail: {0}'.format(test
['name'])) 
  44         domain 
= '.'.join(domain
.split('.')[-2:]) 
  46         RESULT 
= ('.' + domain 
+ '\n' in LIST 
or '\n' + domain 
+ '\n' in LIST
) 
  48     if RESULT 
and ('info_dict' not in test 
or 'age_limit' not in test
['info_dict'] 
  49                    or test
['info_dict']['age_limit'] != 18): 
  50         print('\nPotential missing age_limit check: {0}'.format(test
['name'])) 
  52     elif not RESULT 
and ('info_dict' in test 
and 'age_limit' in test
['info_dict'] 
  53                          and test
['info_dict']['age_limit'] == 18): 
  54         print('\nPotential false negative: {0}'.format(test
['name']))