Add findRedundantDomain.py script
commit
aaf73868db
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
print(sys.argv[1], sys.argv[2])
|
||||||
|
|
||||||
|
''' Find redundant items in domain lists.
|
||||||
|
e.g. 'bar.foo.com' is redundant for 'foo.com'.
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def load(list):
|
||||||
|
''' Parse conf file & Prepare data structure
|
||||||
|
Returns: [ ['abc', 'com'],
|
||||||
|
['bar', 'foo', 'com'],
|
||||||
|
... ]
|
||||||
|
'''
|
||||||
|
|
||||||
|
results = []
|
||||||
|
with open(list, 'r') as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
line = line.strip()
|
||||||
|
if line == '' or line.startswith('#'):
|
||||||
|
continue
|
||||||
|
# A domain name is case-insensitive and
|
||||||
|
# consists of several labels, separated by a full stop
|
||||||
|
domain_labels = line.lower().split('.')
|
||||||
|
results.append(domain_labels)
|
||||||
|
|
||||||
|
# Sort results by domain labels' length
|
||||||
|
results.sort(key=len)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def find(labelses, removedDomainFile):
|
||||||
|
''' Find redundant items by a tree of top-level domain label to sub-level.
|
||||||
|
`tree` is like { 'com': { 'foo: { 'bar': LEAF },
|
||||||
|
'abc': LEAF },
|
||||||
|
'org': ... }
|
||||||
|
'''
|
||||||
|
|
||||||
|
tree = {}
|
||||||
|
LEAF = 1
|
||||||
|
for labels in labelses:
|
||||||
|
domain = '.'.join(labels)
|
||||||
|
# Init root node as current node
|
||||||
|
node = tree
|
||||||
|
while len(labels) > 0:
|
||||||
|
label = labels.pop()
|
||||||
|
if label in node:
|
||||||
|
# If child node is a LEAF node,
|
||||||
|
# current domain must be an existed domain or a subdomain of an existed.
|
||||||
|
if node[label] == LEAF:
|
||||||
|
print(f"Redundant found: {domain} at {'.'.join(labels)}")
|
||||||
|
with open(removedDomainFile, "a") as f:
|
||||||
|
f.write(domain)
|
||||||
|
f.write("\n")
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Create a leaf node if current label is last one
|
||||||
|
if len(labels) == 0:
|
||||||
|
node[label] = LEAF
|
||||||
|
# Create a branch node
|
||||||
|
else:
|
||||||
|
node[label] = {}
|
||||||
|
# Iterate to child node
|
||||||
|
node = node[label]
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
find(load(sys.argv[1]), sys.argv[2])
|
Loading…
Reference in New Issue