trie.hh revision 12332:2adbc3f0f65a
1/* 2 * Copyright (c) 2012 Google 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are 7 * met: redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer; 9 * redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution; 12 * neither the name of the copyright holders nor the names of its 13 * contributors may be used to endorse or promote products derived from 14 * this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * Authors: Gabe Black 29 */ 30 31#ifndef __BASE_TRIE_HH__ 32#define __BASE_TRIE_HH__ 33 34#include <cassert> 35 36#include "base/cprintf.hh" 37#include "base/misc.hh" 38#include "base/types.hh" 39 40// Key has to be an integral type. 41template <class Key, class Value> 42class Trie 43{ 44 protected: 45 struct Node 46 { 47 Key key; 48 Key mask; 49 50 bool 51 matches(Key test) 52 { 53 return (test & mask) == key; 54 } 55 56 Value *value; 57 58 Node *parent; 59 Node *kids[2]; 60 61 Node(Key _key, Key _mask, Value *_val) : 62 key(_key & _mask), mask(_mask), value(_val), 63 parent(NULL) 64 { 65 kids[0] = NULL; 66 kids[1] = NULL; 67 } 68 69 void 70 clear() 71 { 72 if (kids[1]) { 73 kids[1]->clear(); 74 delete kids[1]; 75 kids[1] = NULL; 76 } 77 if (kids[0]) { 78 kids[0]->clear(); 79 delete kids[0]; 80 kids[0] = NULL; 81 } 82 } 83 84 void 85 dump(int level) 86 { 87 for (int i = 1; i < level; i++) { 88 cprintf("|"); 89 } 90 if (level == 0) 91 cprintf("Root "); 92 else 93 cprintf("+ "); 94 cprintf("(%p, %p, %#X, %#X, %p)\n", parent, this, key, mask, value); 95 if (kids[0]) 96 kids[0]->dump(level + 1); 97 if (kids[1]) 98 kids[1]->dump(level + 1); 99 } 100 }; 101 102 protected: 103 Node head; 104 105 public: 106 typedef Node *Handle; 107 108 Trie() : head(0, 0, NULL) 109 {} 110 111 static const unsigned MaxBits = sizeof(Key) * 8; 112 113 private: 114 /** 115 * A utility method which checks whether the key being looked up lies 116 * beyond the Node being examined. If so, it returns true and advances the 117 * node being examined. 118 * @param parent The node we're currently "at", which can be updated. 119 * @param kid The node we may want to move to. 120 * @param key The key we're looking for. 121 * @param new_mask The mask to use when matching against the key. 122 * @return Whether the current Node was advanced. 123 */ 124 bool 125 goesAfter(Node **parent, Node *kid, Key key, Key new_mask) 126 { 127 if (kid && kid->matches(key) && (kid->mask & new_mask) == kid->mask) { 128 *parent = kid; 129 return true; 130 } else { 131 return false; 132 } 133 } 134 135 /** 136 * A utility method which extends a mask value one more bit towards the 137 * lsb. This is almost just a signed right shift, except that the shifted 138 * in bits are technically undefined. This is also slightly complicated by 139 * the zero case. 140 * @param orig The original mask to extend. 141 * @return The extended mask. 142 */ 143 Key 144 extendMask(Key orig) 145 { 146 // Just in case orig was 0. 147 const Key msb = ULL(1) << (MaxBits - 1); 148 return orig | (orig >> 1) | msb; 149 } 150 151 /** 152 * Method which looks up the Handle corresponding to a particular key. This 153 * is useful if you want to delete the Handle corresponding to a key since 154 * the "remove" function takes a Handle as its argument. 155 * @param key The key to look up. 156 * @return The first Handle matching this key, or NULL if none was found. 157 */ 158 Handle 159 lookupHandle(Key key) 160 { 161 Node *node = &head; 162 while (node) { 163 if (node->value) 164 return node; 165 166 if (node->kids[0] && node->kids[0]->matches(key)) 167 node = node->kids[0]; 168 else if (node->kids[1] && node->kids[1]->matches(key)) 169 node = node->kids[1]; 170 else 171 node = NULL; 172 } 173 174 return NULL; 175 } 176 177 public: 178 /** 179 * Method which inserts a key/value pair into the trie. 180 * @param key The key which can later be used to look up this value. 181 * @param width How many bits of the key (from msb) should be used. 182 * @param val A pointer to the value to store in the trie. 183 * @return A Handle corresponding to this value. 184 */ 185 Handle 186 insert(Key key, unsigned width, Value *val) 187 { 188 // We use NULL value pointers to mark internal nodes of the trie, so 189 // we don't allow inserting them as real values. 190 assert(val); 191 192 // Build a mask which masks off all the bits we don't care about. 193 Key new_mask = ~(Key)0; 194 if (width < MaxBits) 195 new_mask <<= (MaxBits - width); 196 // Use it to tidy up the key. 197 key &= new_mask; 198 199 // Walk past all the nodes this new node will be inserted after. They 200 // can be ignored for the purposes of this function. 201 Node *node = &head; 202 while (goesAfter(&node, node->kids[0], key, new_mask) || 203 goesAfter(&node, node->kids[1], key, new_mask)) 204 {} 205 assert(node); 206 207 Key cur_mask = node->mask; 208 // If we're already where the value needs to be... 209 if (cur_mask == new_mask) { 210 assert(!node->value); 211 node->value = val; 212 return node; 213 } 214 215 for (unsigned int i = 0; i < 2; i++) { 216 Node *&kid = node->kids[i]; 217 Node *new_node; 218 if (!kid) { 219 // No kid. Add a new one. 220 new_node = new Node(key, new_mask, val); 221 new_node->parent = node; 222 kid = new_node; 223 return new_node; 224 } 225 226 // Walk down the leg until something doesn't match or we run out 227 // of bits. 228 Key last_mask; 229 bool done; 230 do { 231 last_mask = cur_mask; 232 cur_mask = extendMask(cur_mask); 233 done = ((key & cur_mask) != (kid->key & cur_mask)) || 234 last_mask == new_mask; 235 } while (!done); 236 cur_mask = last_mask; 237 238 // If this isn't the right leg to go down at all, skip it. 239 if (cur_mask == node->mask) 240 continue; 241 242 // At the point we walked to above, add a new node. 243 new_node = new Node(key, cur_mask, NULL); 244 new_node->parent = node; 245 kid->parent = new_node; 246 new_node->kids[0] = kid; 247 kid = new_node; 248 249 // If we ran out of bits, the value goes right here. 250 if (cur_mask == new_mask) { 251 new_node->value = val; 252 return new_node; 253 } 254 255 // Still more bits to deal with, so add a new node for that path. 256 new_node = new Node(key, new_mask, val); 257 new_node->parent = kid; 258 kid->kids[1] = new_node; 259 return new_node; 260 } 261 262 panic("Reached the end of the Trie insert function!\n"); 263 return NULL; 264 } 265 266 /** 267 * Method which looks up the Value corresponding to a particular key. 268 * @param key The key to look up. 269 * @return The first Value matching this key, or NULL if none was found. 270 */ 271 Value * 272 lookup(Key key) 273 { 274 Node *node = lookupHandle(key); 275 if (node) 276 return node->value; 277 else 278 return NULL; 279 } 280 281 /** 282 * Method to delete a value from the trie. 283 * @param node A Handle to remove. 284 * @return The Value pointer from the removed entry. 285 */ 286 Value * 287 remove(Handle handle) 288 { 289 Node *node = handle; 290 Value *val = node->value; 291 if (node->kids[1]) { 292 assert(node->value); 293 node->value = NULL; 294 return val; 295 } 296 if (!node->parent) 297 panic("Trie: Can't remove root node.\n"); 298 299 Node *parent = node->parent; 300 301 // If there's a kid, fix up it's parent pointer. 302 if (node->kids[0]) 303 node->kids[0]->parent = parent; 304 // Figure out which kid we are, and update our parent's pointers. 305 if (parent->kids[0] == node) 306 parent->kids[0] = node->kids[0]; 307 else if (parent->kids[1] == node) 308 parent->kids[1] = node->kids[0]; 309 else 310 panic("Trie: Inconsistent parent/kid relationship.\n"); 311 // Make sure if the parent only has one kid, it's kid[0]. 312 if (parent->kids[1] && !parent->kids[0]) { 313 parent->kids[0] = parent->kids[1]; 314 parent->kids[1] = NULL; 315 } 316 317 // If the parent has less than two kids and no cargo and isn't the 318 // root, delete it too. 319 if (!parent->kids[1] && !parent->value && parent->parent) 320 remove(parent); 321 delete node; 322 return val; 323 } 324 325 /** 326 * Method to lookup a value from the trie and then delete it. 327 * @param key The key to look up and then remove. 328 * @return The Value pointer from the removed entry, if any. 329 */ 330 Value * 331 remove(Key key) 332 { 333 Handle handle = lookupHandle(key); 334 if (!handle) 335 return NULL; 336 return remove(handle); 337 } 338 339 /** 340 * A method which removes all key/value pairs from the trie. This is more 341 * efficient than trying to remove elements individually. 342 */ 343 void 344 clear() 345 { 346 head.clear(); 347 } 348 349 /** 350 * A debugging method which prints the contents of this trie. 351 * @param title An identifying title to put in the dump header. 352 */ 353 void 354 dump(const char *title) 355 { 356 cprintf("**************************************************\n"); 357 cprintf("*** Start of Trie: %s\n", title); 358 cprintf("*** (parent, me, key, mask, value pointer)\n"); 359 cprintf("**************************************************\n"); 360 head.dump(0); 361 } 362}; 363 364#endif 365