Last active
August 3, 2018 16:02
-
-
Save itissid/d2ba2f996bb388c1795be861efd07813 to your computer and use it in GitHub Desktop.
A trie from scratch.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import string | |
class TrieNode(object): | |
""" | |
Context: Implementation of the trie using a dictionary is trivial: | |
https://gist.github.com/itissid/6a5bec95424b5630908e877c02d957d5 | |
But if one was to implement the Trie node as a DS then one needs to think about implementation a bit more. | |
So what I did was keep the API code for search, insert, search_prefix the same as the gist; i.e. | |
to pretend we are still using a dictionary, but I implemented some magic methods for the trie in the TrieNode ( | |
__getitem__ and __setitem__) to support the dictionary(ish) usage. So to the API its as if we are using the dictionary | |
for tries which keeps things simple, but offoad all the complexity to the internal magic methods. | |
ace | |
acid | |
acne | |
book | |
{ | |
a: { | |
c: { | |
i: { | |
d: {"$": True} | |
}, | |
n: { | |
e: {$:True} | |
} | |
}, | |
e: { | |
"$": True | |
} | |
b:{...} | |
} | |
1. The implmentation is done with a Node class. | |
It has an array the size of alphabet that needs to be indexed. | |
Assume that the index is coming from atoi() function and that the alphabet is known before hand. | |
A new node has a children array allocated. | |
Nodes don't have values themselves | |
2. When inserting a word, each alphabet is checked in the children array and if there is None in the index | |
then insert a new Node there. | |
""" | |
alpha_map = {s:i for i, s in enumerate(string.ascii_letters + string.digits + ".")} | |
@staticmethod | |
def atoi(a): | |
if a not in TrieNode.alpha_map: | |
raise ValueError | |
return TrieNode.alpha_map[a] | |
def __init__(self): | |
# TODO: If its the terminal then don't initialize this | |
self.children = [None for i in range(len(TrieNode.alpha_map))] | |
self.is_terminal = False | |
""" | |
Test by hand | |
fog | |
foggy | |
bond | |
root | |
s root children | |
f root [...5:TrieNode...] | |
o 5 [.........12:TrieNode...] | |
g 12 [....6:TrieNode...] | |
/ 6 $ | |
foggy | |
f root [.... 5:TrieNOde...] Already present | |
o 5 | |
g 12 | |
g 6 | |
y 6 [.... 24:TrieNode...] | |
/ 24 make terminal | |
""" | |
def __getitem__(self, item): | |
if item == '$': | |
return self.is_terminal | |
if item not in TrieNode.alpha_map: | |
raise ValueError("Cannot insert {} valid chars are {}".format(item, TrieNode.alpha_map.keys())) | |
idx = TrieNode.atoi(item) | |
if self.children[idx] is None: | |
self.children[idx] = TrieNode() | |
#print("Getting item {} at idx {}".format(self.children[idx], idx)) | |
return self.children[idx] | |
def __setitem__(self, key, value): | |
if key == '$': | |
self.is_terminal = value | |
return | |
raise ValueError("Cannot set key {} value {} pair".format(key, value)) | |
def __contains__(self, item): | |
if item == '$': | |
return self.is_terminal | |
return self.children[TrieNode.atoi(item)] is not None | |
def __delitem__(self, key): | |
if key == '$': | |
self.is_terminal = False | |
@staticmethod | |
def insert(trie, word): | |
root = trie | |
for s in word: | |
root = root[s] | |
root["$"] = True | |
@staticmethod | |
def search(trie, word): | |
root = trie | |
for s in word: | |
if s not in root: | |
return False | |
root = root[s] | |
return '$' in root | |
@staticmethod | |
def search_prefix(trie, word): | |
root = trie | |
for s in word: | |
if s not in root: | |
return False | |
root = root[s] | |
return True | |
@staticmethod | |
def _delete_helper(root, suffix, i): | |
# TODO: If Recursion may bottom out for long suffixes, use iteration. | |
# Optimization: Remove Dead paths. Consider a trie a-b-c-d when abcd is deleted and there is no other words that have prefixes | |
# common with all or part of abcd we can delete all the TrieNodes and save some space. To do this we can add an | |
# integer variable to TrieNode that counts the number of words that have the prefix common upto that TrieNode. | |
if root is None: | |
return False | |
if i == len(suffix): | |
if '$' in root: | |
del(root["$"]) | |
return True | |
return False | |
if suffix[i] in root: | |
if TrieNode._delete_helper(root[suffix[i]], suffix, i + 1): | |
# As per the above optimization NOTE I can check here if root.num_common_prefix == 1 then I can | |
# delete the node here. | |
if '$' not in root: | |
return True | |
return False | |
@staticmethod | |
def delete(trie, word): | |
TrieNode._delete_helper(trie, word, 0) | |
def test_insert_search(): | |
w = "frog" | |
root = TrieNode() | |
TrieNode.insert(root, w) | |
# TODO: Better testing by monkey patching and verifying calls | |
assert TrieNode.search(root, w) is True | |
print(".....") | |
def test_insert_search_multiple(): | |
words = ["fog", "foggy", "brag"] | |
root = TrieNode() | |
for i, w in enumerate(words): | |
TrieNode.insert(root, w) | |
# TODO: Better testing by monkey patching and verifying calls | |
assert TrieNode.search(root, w) is True | |
print('.'*(i+1)) | |
def test_partial_non_match(): | |
words = ["fog", "foggy"] | |
root = TrieNode() | |
for i, w in enumerate(words): | |
TrieNode.insert(root, w) | |
assert TrieNode.search(root, "fo") is False | |
assert TrieNode.search(root, "fogg") is False | |
assert TrieNode.search(root, "foggys") is False | |
print('....') | |
def test_delete(): | |
w = "frog" | |
root = TrieNode() | |
TrieNode.insert(root, w) | |
TrieNode.delete(root, w) | |
assert TrieNode.search(root, w) is False | |
print(".*") | |
def test_delete_prefix(): | |
words = ["fog", "foggy"] | |
root = TrieNode() | |
for i, w in enumerate(words): | |
TrieNode.insert(root, w) | |
TrieNode.delete(root, words[0]) | |
assert TrieNode.search(root, "fog") is False | |
assert TrieNode.search(root, "foggy") is True | |
print("..*") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment