Skip to content

Commit 11481ce

Browse files
fix(data_structures): handle exact prefix match in RadixNode.insert to fix IndexError
Fixes #11316 The insert() method's Case 3 (remaining_prefix == "") would call self.nodes[matching_string[0]].insert(remaining_word) even when remaining_word is empty string. This causes an IndexError in Case 2 when word[0] is accessed on an empty string. Fix: Check if remaining_word is non-empty before recursing. If remaining_word is empty, it means the inserted word exactly matches the prefix of the incoming node, so we just mark that node as a leaf. Also adds a doctest for the reported scenario: insert('fooaaa'), insert('foobbb'), insert('foo') And adds assertions to test_trie() to cover this case.
1 parent 48e35c8 commit 11481ce

1 file changed

Lines changed: 146 additions & 133 deletions

File tree

data_structures/trie/radix_tree.py

Lines changed: 146 additions & 133 deletions
Original file line numberDiff line numberDiff line change
@@ -6,193 +6,197 @@
66

77

88
class RadixNode:
9-
def __init__(self, prefix: str = "", is_leaf: bool = False) -> None:
10-
# Mapping from the first character of the prefix of the node
11-
self.nodes: dict[str, RadixNode] = {}
12-
13-
# A node will be a leaf if the tree contains its word
14-
self.is_leaf = is_leaf
15-
16-
self.prefix = prefix
17-
18-
def match(self, word: str) -> tuple[str, str, str]:
19-
"""Compute the common substring of the prefix of the node and a word
20-
21-
Args:
22-
word (str): word to compare
23-
24-
Returns:
25-
(str, str, str): common substring, remaining prefix, remaining word
26-
27-
>>> RadixNode("myprefix").match("mystring")
28-
('my', 'prefix', 'string')
29-
"""
30-
x = 0
31-
for q, w in zip(self.prefix, word):
32-
if q != w:
33-
break
34-
35-
x += 1
36-
37-
return self.prefix[:x], self.prefix[x:], word[x:]
38-
39-
def insert_many(self, words: list[str]) -> None:
40-
"""Insert many words in the tree
41-
42-
Args:
43-
words (list[str]): list of words
44-
45-
>>> RadixNode("myprefix").insert_many(["mystring", "hello"])
46-
"""
47-
for word in words:
48-
self.insert(word)
49-
50-
def insert(self, word: str) -> None:
51-
"""Insert a word into the tree
52-
53-
Args:
54-
word (str): word to insert
55-
56-
>>> RadixNode("myprefix").insert("mystring")
57-
58-
>>> root = RadixNode()
59-
>>> root.insert_many(['myprefix', 'myprefixA', 'myprefixAA'])
60-
>>> root.print_tree()
61-
- myprefix (leaf)
62-
-- A (leaf)
63-
--- A (leaf)
64-
"""
65-
# Case 1: If the word is the prefix of the node
66-
# Solution: We set the current node as leaf
67-
if self.prefix == word and not self.is_leaf:
68-
self.is_leaf = True
9+
def __init__(self, prefix: str = "", is_leaf: bool = False) -> None:
10+
# Mapping from the first character of the prefix of the node
11+
self.nodes: dict[str, RadixNode] = {}
12+
# A node will be a leaf if the tree contains its word
13+
self.is_leaf = is_leaf
14+
self.prefix = prefix
15+
16+
def match(self, word: str) -> tuple[str, str, str]:
17+
"""Compute the common substring of the prefix of the node and a word
18+
19+
Args:
20+
word (str): word to compare
21+
22+
Returns:
23+
(str, str, str): common substring, remaining prefix, remaining word
24+
25+
>>> RadixNode("myprefix").match("mystring")
26+
('my', 'prefix', 'string')
27+
"""
28+
x = 0
29+
for q, s in zip(self.prefix, word):
30+
if q != s:
31+
break
32+
x += 1
33+
34+
return self.prefix[:x], self.prefix[x:], word[x:]
35+
36+
def insert_many(self, words: list[str]) -> None:
37+
"""Insert many words in the tree
38+
39+
Args:
40+
words (list[str]): list of words
41+
42+
>>> RadixNode("myprefix").insert_many(["mystring", "hello"])
43+
"""
44+
for word in words:
45+
self.insert(word)
46+
47+
def insert(self, word: str) -> None:
48+
"""Insert a word into the tree
49+
50+
Args:
51+
word (str): word to insert
52+
53+
>>> RadixNode("myprefix").insert("mystring")
54+
>>> root = RadixNode()
55+
>>> root.insert_many(['myprefix', 'myprefixA', 'myprefixAA'])
56+
>>> root.print_tree()
57+
- myprefix (leaf)
58+
-- A (leaf)
59+
--- A (leaf)
60+
>>> root2 = RadixNode()
61+
>>> root2.insert_many(['fooaaa', 'foobbb', 'foo'])
62+
>>> root2.print_tree()
63+
- foo (leaf)
64+
-- aaa (leaf)
65+
-- bbb (leaf)
66+
"""
67+
# Case 1: If the word is the prefix of the node
68+
# Solution: We set the current node as leaf
69+
if self.prefix == word and not self.is_leaf:
70+
self.is_leaf = True
6971

7072
# Case 2: The node has no edges that have a prefix to the word
7173
# Solution: We create an edge from the current node to a new one
7274
# containing the word
73-
elif word[0] not in self.nodes:
75+
elif word[0] not in self.nodes:
7476
self.nodes[word[0]] = RadixNode(prefix=word, is_leaf=True)
7577

76-
else:
78+
else:
7779
incoming_node = self.nodes[word[0]]
78-
matching_string, remaining_prefix, remaining_word = incoming_node.match(
79-
word
80-
)
80+
matching_string, remaining_prefix, remaining_word = incoming_node.match(
81+
word
82+
)
8183

8284
# Case 3: The node prefix is equal to the matching
83-
# Solution: We insert remaining word on the next node
85+
# Solution: We insert remaining word on the next node, or mark as
86+
# leaf if remaining_word is empty (word is a prefix of existing node)
8487
if remaining_prefix == "":
85-
self.nodes[matching_string[0]].insert(remaining_word)
88+
if remaining_word:
89+
self.nodes[matching_string[0]].insert(remaining_word)
90+
else:
91+
# The word exactly matches the prefix of the incoming node
92+
self.nodes[matching_string[0]].is_leaf = True
8693

8794
# Case 4: The word is greater equal to the matching
8895
# Solution: Create a node in between both nodes, change
8996
# prefixes and add the new node for the remaining word
90-
else:
97+
else:
9198
incoming_node.prefix = remaining_prefix
92-
93-
aux_node = self.nodes[matching_string[0]]
99+
aux_node = self.nodes[matching_string[0]]
94100
self.nodes[matching_string[0]] = RadixNode(matching_string, False)
95101
self.nodes[matching_string[0]].nodes[remaining_prefix[0]] = aux_node
96102

97103
if remaining_word == "":
98-
self.nodes[matching_string[0]].is_leaf = True
99-
else:
104+
self.nodes[matching_string[0]].is_leaf = True
105+
else:
100106
self.nodes[matching_string[0]].insert(remaining_word)
101107

102108
def find(self, word: str) -> bool:
103-
"""Returns if the word is on the tree
109+
"""Returns if the word is on the tree
104110
105-
Args:
106-
word (str): word to check
111+
Args:
112+
word (str): word to check
107113
108-
Returns:
109-
bool: True if the word appears on the tree
114+
Returns:
115+
bool: True if the word appears on the tree
110116
111-
>>> RadixNode("myprefix").find("mystring")
112-
False
113-
"""
117+
>>> RadixNode("myprefix").find("mystring")
118+
False
119+
"""
114120
incoming_node = self.nodes.get(word[0], None)
115121
if not incoming_node:
116-
return False
117-
else:
122+
return False
123+
else:
118124
_matching_string, remaining_prefix, remaining_word = incoming_node.match(
119-
word
125+
word
120126
)
121127
# If there is remaining prefix, the word can't be on the tree
122-
if remaining_prefix != "":
123-
return False
124-
# This applies when the word and the prefix are equal
125-
elif remaining_word == "":
128+
if remaining_prefix:
129+
return False
130+
# This applies when the word and the prefix are equal
131+
elif not remaining_word:
126132
return incoming_node.is_leaf
127133
# We have word remaining so we check the next node
128-
else:
134+
else:
129135
return incoming_node.find(remaining_word)
130136

131137
def delete(self, word: str) -> bool:
132-
"""Deletes a word from the tree if it exists
138+
"""Deletes a word from the tree if it exists
133139
134-
Args:
135-
word (str): word to be deleted
140+
Args:
141+
word (str): word to be deleted
136142
137-
Returns:
138-
bool: True if the word was found and deleted. False if word is not found
143+
Returns:
144+
bool: True if the word was found and deleted. False if word is not found
139145
140-
>>> RadixNode("myprefix").delete("mystring")
141-
False
142-
"""
146+
>>> RadixNode("myprefix").delete("mystring")
147+
False
148+
"""
143149
incoming_node = self.nodes.get(word[0], None)
144150
if not incoming_node:
145-
return False
146-
else:
151+
return False
152+
else:
147153
_matching_string, remaining_prefix, remaining_word = incoming_node.match(
148154
word
149155
)
150156
# If there is remaining prefix, the word can't be on the tree
151-
if remaining_prefix != "":
152-
return False
153-
# We have word remaining so we check the next node
154-
elif remaining_word != "":
157+
if remaining_prefix:
158+
return False
159+
# We have word remaining so we check the next node
160+
elif remaining_word:
155161
return incoming_node.delete(remaining_word)
156162
# If it is not a leaf, we don't have to delete
157-
elif not incoming_node.is_leaf:
163+
elif not incoming_node.is_leaf:
158164
return False
159-
else:
165+
else:
160166
# We delete the nodes if no edges go from it
161-
if len(incoming_node.nodes) == 0:
162-
del self.nodes[word[0]]
163-
# We merge the current node with its only child
164-
if len(self.nodes) == 1 and not self.is_leaf:
165-
merging_node = next(iter(self.nodes.values()))
166-
self.is_leaf = merging_node.is_leaf
167-
self.prefix += merging_node.prefix
168-
self.nodes = merging_node.nodes
169-
# If there is more than 1 edge, we just mark it as non-leaf
170-
elif len(incoming_node.nodes) > 1:
167+
if not len(incoming_node.nodes):
168+
del self.nodes[word[0]]
169+
# We merge the current node with its only child
170+
if len(self.nodes) == 1 and not self.is_leaf:
171+
merging_node = next(iter(self.nodes.values()))
172+
self.is_leaf = merging_node.is_leaf
173+
self.prefix += merging_node.prefix
174+
self.nodes = merging_node.nodes
175+
# If there is more than 1 edge, we just mark it as non-leaf
176+
elif len(incoming_node.nodes) > 1:
171177
incoming_node.is_leaf = False
172178
# If there is 1 edge, we merge it with its child
173-
else:
179+
else:
174180
merging_node = next(iter(incoming_node.nodes.values()))
175-
incoming_node.is_leaf = merging_node.is_leaf
181+
incoming_node.is_leaf = merging_node.is_leaf
176182
incoming_node.prefix += merging_node.prefix
177183
incoming_node.nodes = merging_node.nodes
178-
179184
return True
180185

181186
def print_tree(self, height: int = 0) -> None:
182-
"""Print the tree
187+
"""Print the tree
183188
184-
Args:
185-
height (int, optional): Height of the printed node
186-
"""
187-
if self.prefix != "":
188-
print("-" * height, self.prefix, " (leaf)" if self.is_leaf else "")
189-
190-
for value in self.nodes.values():
191-
value.print_tree(height + 1)
189+
Args:
190+
height (int, optional): Height of the printed node
191+
"""
192+
if self.prefix:
193+
print("-" * height, self.prefix, "(leaf)" if self.is_leaf else "")
194+
for value in self.nodes.values():
195+
value.print_tree(height + 1)
192196

193197

194198
def test_trie() -> bool:
195-
words = "banana bananas bandana band apple all beast".split()
199+
words = "banana bananas bandana band apple all beast".split()
196200
root = RadixNode()
197201
root.insert_many(words)
198202

@@ -205,25 +209,34 @@ def test_trie() -> bool:
205209
assert not root.find("banana")
206210
assert root.find("bananas")
207211

212+
# Test fix for issue #11316: inserting a word that is a prefix of existing words
213+
root2 = RadixNode()
214+
root2.insert("fooaaa")
215+
root2.insert("foobbb")
216+
root2.insert("foo")
217+
assert root2.find("foo"), "foo should be found after insert"
218+
assert root2.find("fooaaa"), "fooaaa should still be found"
219+
assert root2.find("foobbb"), "foobbb should still be found"
220+
208221
return True
209222

210223

211224
def pytests() -> None:
212-
assert test_trie()
225+
assert test_trie()
213226

214227

215228
def main() -> None:
216-
"""
217-
>>> pytests()
218-
"""
229+
"""
230+
>>> pytests()
231+
"""
219232
root = RadixNode()
220233
words = "banana bananas bandanas bandana band apple all beast".split()
221234
root.insert_many(words)
222235

223236
print("Words:", words)
224237
print("Tree:")
225-
root.print_tree()
238+
root.print_tree()
226239

227240

228241
if __name__ == "__main__":
229-
main()
242+
main()

0 commit comments

Comments
 (0)