4 This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check 
   5 if we are not 'age_limit' tagging some porn site 
   7 A second approach implemented relies on a list of porn domains, to activate it 
   8 pass the list filename as the only argument 
  11 # Allow direct execution 
  14 sys
.path
.insert(0, os
.path
.dirname(os
.path
.dirname(os
.path
.abspath(__file__
)))) 
  16 from test
.helper 
import get_testcases
 
  17 from youtube_dl
.utils 
import compat_urllib_parse_urlparse
 
  18 from youtube_dl
.utils 
import compat_urllib_request
 
  22     LIST 
= open(sys
.argv
[1]).read().decode('utf8').strip() 
  26 for test 
in get_testcases(): 
  27     if METHOD 
== 'EURISTIC': 
  29             webpage 
= compat_urllib_request
.urlopen(test
['url'], timeout
=10).read() 
  31             print('\nFail: {0}'.format(test
['name'])) 
  34         webpage 
= webpage
.decode('utf8', 'replace') 
  36         RESULT 
= 'porn' in webpage
.lower() 
  38     elif METHOD 
== 'LIST': 
  39         domain 
= compat_urllib_parse_urlparse(test
['url']).netloc
 
  41             print('\nFail: {0}'.format(test
['name'])) 
  43         domain 
= '.'.join(domain
.split('.')[-2:]) 
  45         RESULT 
= ('.' + domain 
+ '\n' in LIST 
or '\n' + domain 
+ '\n' in LIST
) 
  47     if RESULT 
and ('info_dict' not in test 
or 'age_limit' not in test
['info_dict'] 
  48                    or test
['info_dict']['age_limit'] != 18): 
  49         print('\nPotential missing age_limit check: {0}'.format(test
['name'])) 
  51     elif not RESULT 
and ('info_dict' in test 
and 'age_limit' in test
['info_dict'] 
  52                          and test
['info_dict']['age_limit'] == 18): 
  53         print('\nPotential false negative: {0}'.format(test
['name']))