trie.hh (12332:2adbc3f0f65a) trie.hh (12334:e0ab29a34764)
1/*
2 * Copyright (c) 2012 Google
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Gabe Black
29 */
30
31#ifndef __BASE_TRIE_HH__
32#define __BASE_TRIE_HH__
33
34#include <cassert>
35
36#include "base/cprintf.hh"
1/*
2 * Copyright (c) 2012 Google
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Gabe Black
29 */
30
31#ifndef __BASE_TRIE_HH__
32#define __BASE_TRIE_HH__
33
34#include <cassert>
35
36#include "base/cprintf.hh"
37#include "base/misc.hh"
37#include "base/logging.hh"
38#include "base/types.hh"
39
40// Key has to be an integral type.
41template <class Key, class Value>
42class Trie
43{
44 protected:
45 struct Node
46 {
47 Key key;
48 Key mask;
49
50 bool
51 matches(Key test)
52 {
53 return (test & mask) == key;
54 }
55
56 Value *value;
57
58 Node *parent;
59 Node *kids[2];
60
61 Node(Key _key, Key _mask, Value *_val) :
62 key(_key & _mask), mask(_mask), value(_val),
63 parent(NULL)
64 {
65 kids[0] = NULL;
66 kids[1] = NULL;
67 }
68
69 void
70 clear()
71 {
72 if (kids[1]) {
73 kids[1]->clear();
74 delete kids[1];
75 kids[1] = NULL;
76 }
77 if (kids[0]) {
78 kids[0]->clear();
79 delete kids[0];
80 kids[0] = NULL;
81 }
82 }
83
84 void
85 dump(int level)
86 {
87 for (int i = 1; i < level; i++) {
88 cprintf("|");
89 }
90 if (level == 0)
91 cprintf("Root ");
92 else
93 cprintf("+ ");
94 cprintf("(%p, %p, %#X, %#X, %p)\n", parent, this, key, mask, value);
95 if (kids[0])
96 kids[0]->dump(level + 1);
97 if (kids[1])
98 kids[1]->dump(level + 1);
99 }
100 };
101
102 protected:
103 Node head;
104
105 public:
106 typedef Node *Handle;
107
108 Trie() : head(0, 0, NULL)
109 {}
110
111 static const unsigned MaxBits = sizeof(Key) * 8;
112
113 private:
114 /**
115 * A utility method which checks whether the key being looked up lies
116 * beyond the Node being examined. If so, it returns true and advances the
117 * node being examined.
118 * @param parent The node we're currently "at", which can be updated.
119 * @param kid The node we may want to move to.
120 * @param key The key we're looking for.
121 * @param new_mask The mask to use when matching against the key.
122 * @return Whether the current Node was advanced.
123 */
124 bool
125 goesAfter(Node **parent, Node *kid, Key key, Key new_mask)
126 {
127 if (kid && kid->matches(key) && (kid->mask & new_mask) == kid->mask) {
128 *parent = kid;
129 return true;
130 } else {
131 return false;
132 }
133 }
134
135 /**
136 * A utility method which extends a mask value one more bit towards the
137 * lsb. This is almost just a signed right shift, except that the shifted
138 * in bits are technically undefined. This is also slightly complicated by
139 * the zero case.
140 * @param orig The original mask to extend.
141 * @return The extended mask.
142 */
143 Key
144 extendMask(Key orig)
145 {
146 // Just in case orig was 0.
147 const Key msb = ULL(1) << (MaxBits - 1);
148 return orig | (orig >> 1) | msb;
149 }
150
151 /**
152 * Method which looks up the Handle corresponding to a particular key. This
153 * is useful if you want to delete the Handle corresponding to a key since
154 * the "remove" function takes a Handle as its argument.
155 * @param key The key to look up.
156 * @return The first Handle matching this key, or NULL if none was found.
157 */
158 Handle
159 lookupHandle(Key key)
160 {
161 Node *node = &head;
162 while (node) {
163 if (node->value)
164 return node;
165
166 if (node->kids[0] && node->kids[0]->matches(key))
167 node = node->kids[0];
168 else if (node->kids[1] && node->kids[1]->matches(key))
169 node = node->kids[1];
170 else
171 node = NULL;
172 }
173
174 return NULL;
175 }
176
177 public:
178 /**
179 * Method which inserts a key/value pair into the trie.
180 * @param key The key which can later be used to look up this value.
181 * @param width How many bits of the key (from msb) should be used.
182 * @param val A pointer to the value to store in the trie.
183 * @return A Handle corresponding to this value.
184 */
185 Handle
186 insert(Key key, unsigned width, Value *val)
187 {
188 // We use NULL value pointers to mark internal nodes of the trie, so
189 // we don't allow inserting them as real values.
190 assert(val);
191
192 // Build a mask which masks off all the bits we don't care about.
193 Key new_mask = ~(Key)0;
194 if (width < MaxBits)
195 new_mask <<= (MaxBits - width);
196 // Use it to tidy up the key.
197 key &= new_mask;
198
199 // Walk past all the nodes this new node will be inserted after. They
200 // can be ignored for the purposes of this function.
201 Node *node = &head;
202 while (goesAfter(&node, node->kids[0], key, new_mask) ||
203 goesAfter(&node, node->kids[1], key, new_mask))
204 {}
205 assert(node);
206
207 Key cur_mask = node->mask;
208 // If we're already where the value needs to be...
209 if (cur_mask == new_mask) {
210 assert(!node->value);
211 node->value = val;
212 return node;
213 }
214
215 for (unsigned int i = 0; i < 2; i++) {
216 Node *&kid = node->kids[i];
217 Node *new_node;
218 if (!kid) {
219 // No kid. Add a new one.
220 new_node = new Node(key, new_mask, val);
221 new_node->parent = node;
222 kid = new_node;
223 return new_node;
224 }
225
226 // Walk down the leg until something doesn't match or we run out
227 // of bits.
228 Key last_mask;
229 bool done;
230 do {
231 last_mask = cur_mask;
232 cur_mask = extendMask(cur_mask);
233 done = ((key & cur_mask) != (kid->key & cur_mask)) ||
234 last_mask == new_mask;
235 } while (!done);
236 cur_mask = last_mask;
237
238 // If this isn't the right leg to go down at all, skip it.
239 if (cur_mask == node->mask)
240 continue;
241
242 // At the point we walked to above, add a new node.
243 new_node = new Node(key, cur_mask, NULL);
244 new_node->parent = node;
245 kid->parent = new_node;
246 new_node->kids[0] = kid;
247 kid = new_node;
248
249 // If we ran out of bits, the value goes right here.
250 if (cur_mask == new_mask) {
251 new_node->value = val;
252 return new_node;
253 }
254
255 // Still more bits to deal with, so add a new node for that path.
256 new_node = new Node(key, new_mask, val);
257 new_node->parent = kid;
258 kid->kids[1] = new_node;
259 return new_node;
260 }
261
262 panic("Reached the end of the Trie insert function!\n");
263 return NULL;
264 }
265
266 /**
267 * Method which looks up the Value corresponding to a particular key.
268 * @param key The key to look up.
269 * @return The first Value matching this key, or NULL if none was found.
270 */
271 Value *
272 lookup(Key key)
273 {
274 Node *node = lookupHandle(key);
275 if (node)
276 return node->value;
277 else
278 return NULL;
279 }
280
281 /**
282 * Method to delete a value from the trie.
283 * @param node A Handle to remove.
284 * @return The Value pointer from the removed entry.
285 */
286 Value *
287 remove(Handle handle)
288 {
289 Node *node = handle;
290 Value *val = node->value;
291 if (node->kids[1]) {
292 assert(node->value);
293 node->value = NULL;
294 return val;
295 }
296 if (!node->parent)
297 panic("Trie: Can't remove root node.\n");
298
299 Node *parent = node->parent;
300
301 // If there's a kid, fix up it's parent pointer.
302 if (node->kids[0])
303 node->kids[0]->parent = parent;
304 // Figure out which kid we are, and update our parent's pointers.
305 if (parent->kids[0] == node)
306 parent->kids[0] = node->kids[0];
307 else if (parent->kids[1] == node)
308 parent->kids[1] = node->kids[0];
309 else
310 panic("Trie: Inconsistent parent/kid relationship.\n");
311 // Make sure if the parent only has one kid, it's kid[0].
312 if (parent->kids[1] && !parent->kids[0]) {
313 parent->kids[0] = parent->kids[1];
314 parent->kids[1] = NULL;
315 }
316
317 // If the parent has less than two kids and no cargo and isn't the
318 // root, delete it too.
319 if (!parent->kids[1] && !parent->value && parent->parent)
320 remove(parent);
321 delete node;
322 return val;
323 }
324
325 /**
326 * Method to lookup a value from the trie and then delete it.
327 * @param key The key to look up and then remove.
328 * @return The Value pointer from the removed entry, if any.
329 */
330 Value *
331 remove(Key key)
332 {
333 Handle handle = lookupHandle(key);
334 if (!handle)
335 return NULL;
336 return remove(handle);
337 }
338
339 /**
340 * A method which removes all key/value pairs from the trie. This is more
341 * efficient than trying to remove elements individually.
342 */
343 void
344 clear()
345 {
346 head.clear();
347 }
348
349 /**
350 * A debugging method which prints the contents of this trie.
351 * @param title An identifying title to put in the dump header.
352 */
353 void
354 dump(const char *title)
355 {
356 cprintf("**************************************************\n");
357 cprintf("*** Start of Trie: %s\n", title);
358 cprintf("*** (parent, me, key, mask, value pointer)\n");
359 cprintf("**************************************************\n");
360 head.dump(0);
361 }
362};
363
364#endif
38#include "base/types.hh"
39
40// Key has to be an integral type.
41template <class Key, class Value>
42class Trie
43{
44 protected:
45 struct Node
46 {
47 Key key;
48 Key mask;
49
50 bool
51 matches(Key test)
52 {
53 return (test & mask) == key;
54 }
55
56 Value *value;
57
58 Node *parent;
59 Node *kids[2];
60
61 Node(Key _key, Key _mask, Value *_val) :
62 key(_key & _mask), mask(_mask), value(_val),
63 parent(NULL)
64 {
65 kids[0] = NULL;
66 kids[1] = NULL;
67 }
68
69 void
70 clear()
71 {
72 if (kids[1]) {
73 kids[1]->clear();
74 delete kids[1];
75 kids[1] = NULL;
76 }
77 if (kids[0]) {
78 kids[0]->clear();
79 delete kids[0];
80 kids[0] = NULL;
81 }
82 }
83
84 void
85 dump(int level)
86 {
87 for (int i = 1; i < level; i++) {
88 cprintf("|");
89 }
90 if (level == 0)
91 cprintf("Root ");
92 else
93 cprintf("+ ");
94 cprintf("(%p, %p, %#X, %#X, %p)\n", parent, this, key, mask, value);
95 if (kids[0])
96 kids[0]->dump(level + 1);
97 if (kids[1])
98 kids[1]->dump(level + 1);
99 }
100 };
101
102 protected:
103 Node head;
104
105 public:
106 typedef Node *Handle;
107
108 Trie() : head(0, 0, NULL)
109 {}
110
111 static const unsigned MaxBits = sizeof(Key) * 8;
112
113 private:
114 /**
115 * A utility method which checks whether the key being looked up lies
116 * beyond the Node being examined. If so, it returns true and advances the
117 * node being examined.
118 * @param parent The node we're currently "at", which can be updated.
119 * @param kid The node we may want to move to.
120 * @param key The key we're looking for.
121 * @param new_mask The mask to use when matching against the key.
122 * @return Whether the current Node was advanced.
123 */
124 bool
125 goesAfter(Node **parent, Node *kid, Key key, Key new_mask)
126 {
127 if (kid && kid->matches(key) && (kid->mask & new_mask) == kid->mask) {
128 *parent = kid;
129 return true;
130 } else {
131 return false;
132 }
133 }
134
135 /**
136 * A utility method which extends a mask value one more bit towards the
137 * lsb. This is almost just a signed right shift, except that the shifted
138 * in bits are technically undefined. This is also slightly complicated by
139 * the zero case.
140 * @param orig The original mask to extend.
141 * @return The extended mask.
142 */
143 Key
144 extendMask(Key orig)
145 {
146 // Just in case orig was 0.
147 const Key msb = ULL(1) << (MaxBits - 1);
148 return orig | (orig >> 1) | msb;
149 }
150
151 /**
152 * Method which looks up the Handle corresponding to a particular key. This
153 * is useful if you want to delete the Handle corresponding to a key since
154 * the "remove" function takes a Handle as its argument.
155 * @param key The key to look up.
156 * @return The first Handle matching this key, or NULL if none was found.
157 */
158 Handle
159 lookupHandle(Key key)
160 {
161 Node *node = &head;
162 while (node) {
163 if (node->value)
164 return node;
165
166 if (node->kids[0] && node->kids[0]->matches(key))
167 node = node->kids[0];
168 else if (node->kids[1] && node->kids[1]->matches(key))
169 node = node->kids[1];
170 else
171 node = NULL;
172 }
173
174 return NULL;
175 }
176
177 public:
178 /**
179 * Method which inserts a key/value pair into the trie.
180 * @param key The key which can later be used to look up this value.
181 * @param width How many bits of the key (from msb) should be used.
182 * @param val A pointer to the value to store in the trie.
183 * @return A Handle corresponding to this value.
184 */
185 Handle
186 insert(Key key, unsigned width, Value *val)
187 {
188 // We use NULL value pointers to mark internal nodes of the trie, so
189 // we don't allow inserting them as real values.
190 assert(val);
191
192 // Build a mask which masks off all the bits we don't care about.
193 Key new_mask = ~(Key)0;
194 if (width < MaxBits)
195 new_mask <<= (MaxBits - width);
196 // Use it to tidy up the key.
197 key &= new_mask;
198
199 // Walk past all the nodes this new node will be inserted after. They
200 // can be ignored for the purposes of this function.
201 Node *node = &head;
202 while (goesAfter(&node, node->kids[0], key, new_mask) ||
203 goesAfter(&node, node->kids[1], key, new_mask))
204 {}
205 assert(node);
206
207 Key cur_mask = node->mask;
208 // If we're already where the value needs to be...
209 if (cur_mask == new_mask) {
210 assert(!node->value);
211 node->value = val;
212 return node;
213 }
214
215 for (unsigned int i = 0; i < 2; i++) {
216 Node *&kid = node->kids[i];
217 Node *new_node;
218 if (!kid) {
219 // No kid. Add a new one.
220 new_node = new Node(key, new_mask, val);
221 new_node->parent = node;
222 kid = new_node;
223 return new_node;
224 }
225
226 // Walk down the leg until something doesn't match or we run out
227 // of bits.
228 Key last_mask;
229 bool done;
230 do {
231 last_mask = cur_mask;
232 cur_mask = extendMask(cur_mask);
233 done = ((key & cur_mask) != (kid->key & cur_mask)) ||
234 last_mask == new_mask;
235 } while (!done);
236 cur_mask = last_mask;
237
238 // If this isn't the right leg to go down at all, skip it.
239 if (cur_mask == node->mask)
240 continue;
241
242 // At the point we walked to above, add a new node.
243 new_node = new Node(key, cur_mask, NULL);
244 new_node->parent = node;
245 kid->parent = new_node;
246 new_node->kids[0] = kid;
247 kid = new_node;
248
249 // If we ran out of bits, the value goes right here.
250 if (cur_mask == new_mask) {
251 new_node->value = val;
252 return new_node;
253 }
254
255 // Still more bits to deal with, so add a new node for that path.
256 new_node = new Node(key, new_mask, val);
257 new_node->parent = kid;
258 kid->kids[1] = new_node;
259 return new_node;
260 }
261
262 panic("Reached the end of the Trie insert function!\n");
263 return NULL;
264 }
265
266 /**
267 * Method which looks up the Value corresponding to a particular key.
268 * @param key The key to look up.
269 * @return The first Value matching this key, or NULL if none was found.
270 */
271 Value *
272 lookup(Key key)
273 {
274 Node *node = lookupHandle(key);
275 if (node)
276 return node->value;
277 else
278 return NULL;
279 }
280
281 /**
282 * Method to delete a value from the trie.
283 * @param node A Handle to remove.
284 * @return The Value pointer from the removed entry.
285 */
286 Value *
287 remove(Handle handle)
288 {
289 Node *node = handle;
290 Value *val = node->value;
291 if (node->kids[1]) {
292 assert(node->value);
293 node->value = NULL;
294 return val;
295 }
296 if (!node->parent)
297 panic("Trie: Can't remove root node.\n");
298
299 Node *parent = node->parent;
300
301 // If there's a kid, fix up it's parent pointer.
302 if (node->kids[0])
303 node->kids[0]->parent = parent;
304 // Figure out which kid we are, and update our parent's pointers.
305 if (parent->kids[0] == node)
306 parent->kids[0] = node->kids[0];
307 else if (parent->kids[1] == node)
308 parent->kids[1] = node->kids[0];
309 else
310 panic("Trie: Inconsistent parent/kid relationship.\n");
311 // Make sure if the parent only has one kid, it's kid[0].
312 if (parent->kids[1] && !parent->kids[0]) {
313 parent->kids[0] = parent->kids[1];
314 parent->kids[1] = NULL;
315 }
316
317 // If the parent has less than two kids and no cargo and isn't the
318 // root, delete it too.
319 if (!parent->kids[1] && !parent->value && parent->parent)
320 remove(parent);
321 delete node;
322 return val;
323 }
324
325 /**
326 * Method to lookup a value from the trie and then delete it.
327 * @param key The key to look up and then remove.
328 * @return The Value pointer from the removed entry, if any.
329 */
330 Value *
331 remove(Key key)
332 {
333 Handle handle = lookupHandle(key);
334 if (!handle)
335 return NULL;
336 return remove(handle);
337 }
338
339 /**
340 * A method which removes all key/value pairs from the trie. This is more
341 * efficient than trying to remove elements individually.
342 */
343 void
344 clear()
345 {
346 head.clear();
347 }
348
349 /**
350 * A debugging method which prints the contents of this trie.
351 * @param title An identifying title to put in the dump header.
352 */
353 void
354 dump(const char *title)
355 {
356 cprintf("**************************************************\n");
357 cprintf("*** Start of Trie: %s\n", title);
358 cprintf("*** (parent, me, key, mask, value pointer)\n");
359 cprintf("**************************************************\n");
360 head.dump(0);
361 }
362};
363
364#endif