trie.hh revision 8952
18951Sgblack@eecs.umich.edu/* 28951Sgblack@eecs.umich.edu * Copyright (c) 2012 Google 38951Sgblack@eecs.umich.edu * All rights reserved. 48951Sgblack@eecs.umich.edu * 58951Sgblack@eecs.umich.edu * Redistribution and use in source and binary forms, with or without 68951Sgblack@eecs.umich.edu * modification, are permitted provided that the following conditions are 78951Sgblack@eecs.umich.edu * met: redistributions of source code must retain the above copyright 88951Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer; 98951Sgblack@eecs.umich.edu * redistributions in binary form must reproduce the above copyright 108951Sgblack@eecs.umich.edu * notice, this list of conditions and the following disclaimer in the 118951Sgblack@eecs.umich.edu * documentation and/or other materials provided with the distribution; 128951Sgblack@eecs.umich.edu * neither the name of the copyright holders nor the names of its 138951Sgblack@eecs.umich.edu * contributors may be used to endorse or promote products derived from 148951Sgblack@eecs.umich.edu * this software without specific prior written permission. 158951Sgblack@eecs.umich.edu * 168951Sgblack@eecs.umich.edu * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 178951Sgblack@eecs.umich.edu * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 188951Sgblack@eecs.umich.edu * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 198951Sgblack@eecs.umich.edu * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 208951Sgblack@eecs.umich.edu * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 218951Sgblack@eecs.umich.edu * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 228951Sgblack@eecs.umich.edu * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 238951Sgblack@eecs.umich.edu * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 248951Sgblack@eecs.umich.edu * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 258951Sgblack@eecs.umich.edu * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 268951Sgblack@eecs.umich.edu * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278951Sgblack@eecs.umich.edu * 288951Sgblack@eecs.umich.edu * Authors: Gabe Black 298951Sgblack@eecs.umich.edu */ 308951Sgblack@eecs.umich.edu 318951Sgblack@eecs.umich.edu#ifndef __BASE_TRIE_HH__ 328951Sgblack@eecs.umich.edu#define __BASE_TRIE_HH__ 338951Sgblack@eecs.umich.edu 348951Sgblack@eecs.umich.edu#include "base/cprintf.hh" 358951Sgblack@eecs.umich.edu#include "base/misc.hh" 368951Sgblack@eecs.umich.edu#include "base/types.hh" 378951Sgblack@eecs.umich.edu 388951Sgblack@eecs.umich.edu// Key has to be an integral type. 398951Sgblack@eecs.umich.edutemplate <class Key, class Value> 408951Sgblack@eecs.umich.educlass Trie 418951Sgblack@eecs.umich.edu{ 428951Sgblack@eecs.umich.edu protected: 438951Sgblack@eecs.umich.edu struct Node 448951Sgblack@eecs.umich.edu { 458951Sgblack@eecs.umich.edu Key key; 468951Sgblack@eecs.umich.edu Key mask; 478951Sgblack@eecs.umich.edu 488951Sgblack@eecs.umich.edu bool 498951Sgblack@eecs.umich.edu matches(Key test) 508951Sgblack@eecs.umich.edu { 518951Sgblack@eecs.umich.edu return (test & mask) == key; 528951Sgblack@eecs.umich.edu } 538951Sgblack@eecs.umich.edu 548951Sgblack@eecs.umich.edu Value *value; 558951Sgblack@eecs.umich.edu 568951Sgblack@eecs.umich.edu Node *parent; 578951Sgblack@eecs.umich.edu Node *kids[2]; 588951Sgblack@eecs.umich.edu 598951Sgblack@eecs.umich.edu Node(Key _key, Key _mask, Value *_val) : 608951Sgblack@eecs.umich.edu key(_key & _mask), mask(_mask), value(_val), 618951Sgblack@eecs.umich.edu parent(NULL) 628951Sgblack@eecs.umich.edu { 638951Sgblack@eecs.umich.edu kids[0] = NULL; 648951Sgblack@eecs.umich.edu kids[1] = NULL; 658951Sgblack@eecs.umich.edu } 668951Sgblack@eecs.umich.edu 678951Sgblack@eecs.umich.edu void 688951Sgblack@eecs.umich.edu clear() 698951Sgblack@eecs.umich.edu { 708951Sgblack@eecs.umich.edu if (kids[1]) { 718951Sgblack@eecs.umich.edu kids[1]->clear(); 728951Sgblack@eecs.umich.edu delete kids[1]; 738951Sgblack@eecs.umich.edu kids[1] = NULL; 748951Sgblack@eecs.umich.edu } 758951Sgblack@eecs.umich.edu if (kids[0]) { 768951Sgblack@eecs.umich.edu kids[0]->clear(); 778951Sgblack@eecs.umich.edu delete kids[0]; 788951Sgblack@eecs.umich.edu kids[0] = NULL; 798951Sgblack@eecs.umich.edu } 808951Sgblack@eecs.umich.edu } 818951Sgblack@eecs.umich.edu 828951Sgblack@eecs.umich.edu void 838951Sgblack@eecs.umich.edu dump(int level) 848951Sgblack@eecs.umich.edu { 858951Sgblack@eecs.umich.edu for (int i = 1; i < level; i++) { 868951Sgblack@eecs.umich.edu cprintf("|"); 878951Sgblack@eecs.umich.edu } 888951Sgblack@eecs.umich.edu if (level == 0) 898951Sgblack@eecs.umich.edu cprintf("Root "); 908951Sgblack@eecs.umich.edu else 918951Sgblack@eecs.umich.edu cprintf("+ "); 928951Sgblack@eecs.umich.edu cprintf("(%p, %p, %#X, %#X, %p)\n", parent, this, key, mask, value); 938951Sgblack@eecs.umich.edu if (kids[0]) 948951Sgblack@eecs.umich.edu kids[0]->dump(level + 1); 958951Sgblack@eecs.umich.edu if (kids[1]) 968951Sgblack@eecs.umich.edu kids[1]->dump(level + 1); 978951Sgblack@eecs.umich.edu } 988951Sgblack@eecs.umich.edu }; 998951Sgblack@eecs.umich.edu 1008951Sgblack@eecs.umich.edu protected: 1018951Sgblack@eecs.umich.edu Node head; 1028951Sgblack@eecs.umich.edu 1038951Sgblack@eecs.umich.edu public: 1048951Sgblack@eecs.umich.edu typedef Node *Handle; 1058951Sgblack@eecs.umich.edu 1068951Sgblack@eecs.umich.edu Trie() : head(0, 0, NULL) 1078951Sgblack@eecs.umich.edu {} 1088951Sgblack@eecs.umich.edu 1098951Sgblack@eecs.umich.edu static const unsigned MaxBits = sizeof(Key) * 8; 1108951Sgblack@eecs.umich.edu 1118951Sgblack@eecs.umich.edu private: 1128951Sgblack@eecs.umich.edu /** 1138951Sgblack@eecs.umich.edu * A utility method which checks whether the key being looked up lies 1148951Sgblack@eecs.umich.edu * beyond the Node being examined. If so, it returns true and advances the 1158951Sgblack@eecs.umich.edu * node being examined. 1168951Sgblack@eecs.umich.edu * @param parent The node we're currently "at", which can be updated. 1178951Sgblack@eecs.umich.edu * @param kid The node we may want to move to. 1188951Sgblack@eecs.umich.edu * @param key The key we're looking for. 1198951Sgblack@eecs.umich.edu * @param new_mask The mask to use when matching against the key. 1208951Sgblack@eecs.umich.edu * @return Whether the current Node was advanced. 1218951Sgblack@eecs.umich.edu */ 1228951Sgblack@eecs.umich.edu bool 1238951Sgblack@eecs.umich.edu goesAfter(Node **parent, Node *kid, Key key, Key new_mask) 1248951Sgblack@eecs.umich.edu { 1258951Sgblack@eecs.umich.edu if (kid && kid->matches(key) && (kid->mask & new_mask) == kid->mask) { 1268951Sgblack@eecs.umich.edu *parent = kid; 1278951Sgblack@eecs.umich.edu return true; 1288951Sgblack@eecs.umich.edu } else { 1298951Sgblack@eecs.umich.edu return false; 1308951Sgblack@eecs.umich.edu } 1318951Sgblack@eecs.umich.edu } 1328951Sgblack@eecs.umich.edu 1338951Sgblack@eecs.umich.edu /** 1348951Sgblack@eecs.umich.edu * A utility method which extends a mask value one more bit towards the 1358951Sgblack@eecs.umich.edu * lsb. This is almost just a signed right shift, except that the shifted 1368951Sgblack@eecs.umich.edu * in bits are technically undefined. This is also slightly complicated by 1378951Sgblack@eecs.umich.edu * the zero case. 1388951Sgblack@eecs.umich.edu * @param orig The original mask to extend. 1398951Sgblack@eecs.umich.edu * @return The extended mask. 1408951Sgblack@eecs.umich.edu */ 1418951Sgblack@eecs.umich.edu Key 1428951Sgblack@eecs.umich.edu extendMask(Key orig) 1438951Sgblack@eecs.umich.edu { 1448951Sgblack@eecs.umich.edu // Just in case orig was 0. 1458951Sgblack@eecs.umich.edu const Key msb = ULL(1) << (MaxBits - 1); 1468951Sgblack@eecs.umich.edu return orig | (orig >> 1) | msb; 1478951Sgblack@eecs.umich.edu } 1488951Sgblack@eecs.umich.edu 1498951Sgblack@eecs.umich.edu /** 1508951Sgblack@eecs.umich.edu * Method which looks up the Handle corresponding to a particular key. This 1518952Sgblack@eecs.umich.edu * is useful if you want to delete the Handle corresponding to a key since 1528952Sgblack@eecs.umich.edu * the "remove" function takes a Handle as its argument. 1538951Sgblack@eecs.umich.edu * @param key The key to look up. 1548952Sgblack@eecs.umich.edu * @return The first Handle matching this key, or NULL if none was found. 1558951Sgblack@eecs.umich.edu */ 1568951Sgblack@eecs.umich.edu Handle 1578951Sgblack@eecs.umich.edu lookupHandle(Key key) 1588951Sgblack@eecs.umich.edu { 1598951Sgblack@eecs.umich.edu Node *node = &head; 1608951Sgblack@eecs.umich.edu while (node) { 1618951Sgblack@eecs.umich.edu if (node->value) 1628951Sgblack@eecs.umich.edu return node; 1638951Sgblack@eecs.umich.edu 1648951Sgblack@eecs.umich.edu if (node->kids[0] && node->kids[0]->matches(key)) 1658951Sgblack@eecs.umich.edu node = node->kids[0]; 1668951Sgblack@eecs.umich.edu else if (node->kids[1] && node->kids[1]->matches(key)) 1678951Sgblack@eecs.umich.edu node = node->kids[1]; 1688951Sgblack@eecs.umich.edu else 1698951Sgblack@eecs.umich.edu node = NULL; 1708951Sgblack@eecs.umich.edu } 1718951Sgblack@eecs.umich.edu 1728951Sgblack@eecs.umich.edu return NULL; 1738951Sgblack@eecs.umich.edu } 1748951Sgblack@eecs.umich.edu 1758951Sgblack@eecs.umich.edu public: 1768951Sgblack@eecs.umich.edu /** 1778951Sgblack@eecs.umich.edu * Method which inserts a key/value pair into the trie. 1788951Sgblack@eecs.umich.edu * @param key The key which can later be used to look up this value. 1798951Sgblack@eecs.umich.edu * @param width How many bits of the key (from msb) should be used. 1808951Sgblack@eecs.umich.edu * @param val A pointer to the value to store in the trie. 1818952Sgblack@eecs.umich.edu * @return A Handle corresponding to this value. 1828951Sgblack@eecs.umich.edu */ 1838951Sgblack@eecs.umich.edu Handle 1848951Sgblack@eecs.umich.edu insert(Key key, unsigned width, Value *val) 1858951Sgblack@eecs.umich.edu { 1868951Sgblack@eecs.umich.edu // Build a mask which masks off all the bits we don't care about. 1878951Sgblack@eecs.umich.edu Key new_mask = ~(Key)0; 1888951Sgblack@eecs.umich.edu if (width < MaxBits) 1898951Sgblack@eecs.umich.edu new_mask <<= (MaxBits - width); 1908951Sgblack@eecs.umich.edu // Use it to tidy up the key. 1918951Sgblack@eecs.umich.edu key &= new_mask; 1928951Sgblack@eecs.umich.edu 1938951Sgblack@eecs.umich.edu // Walk past all the nodes this new node will be inserted after. They 1948951Sgblack@eecs.umich.edu // can be ignored for the purposes of this function. 1958951Sgblack@eecs.umich.edu Node *node = &head; 1968951Sgblack@eecs.umich.edu while (goesAfter(&node, node->kids[0], key, new_mask) || 1978951Sgblack@eecs.umich.edu goesAfter(&node, node->kids[1], key, new_mask)) 1988951Sgblack@eecs.umich.edu {} 1998951Sgblack@eecs.umich.edu assert(node); 2008951Sgblack@eecs.umich.edu 2018951Sgblack@eecs.umich.edu Key cur_mask = node->mask; 2028951Sgblack@eecs.umich.edu // If we're already where the value needs to be... 2038951Sgblack@eecs.umich.edu if (cur_mask == new_mask) { 2048951Sgblack@eecs.umich.edu assert(!node->value); 2058951Sgblack@eecs.umich.edu node->value = val; 2068951Sgblack@eecs.umich.edu return node; 2078951Sgblack@eecs.umich.edu } 2088951Sgblack@eecs.umich.edu 2098951Sgblack@eecs.umich.edu for (unsigned int i = 0; i < 2; i++) { 2108951Sgblack@eecs.umich.edu Node *&kid = node->kids[i]; 2118951Sgblack@eecs.umich.edu Node *new_node; 2128951Sgblack@eecs.umich.edu if (!kid) { 2138951Sgblack@eecs.umich.edu // No kid. Add a new one. 2148951Sgblack@eecs.umich.edu new_node = new Node(key, new_mask, val); 2158951Sgblack@eecs.umich.edu new_node->parent = node; 2168951Sgblack@eecs.umich.edu kid = new_node; 2178951Sgblack@eecs.umich.edu return new_node; 2188951Sgblack@eecs.umich.edu } 2198951Sgblack@eecs.umich.edu 2208951Sgblack@eecs.umich.edu // Walk down the leg until something doesn't match or we run out 2218951Sgblack@eecs.umich.edu // of bits. 2228951Sgblack@eecs.umich.edu Key last_mask; 2238951Sgblack@eecs.umich.edu bool done; 2248951Sgblack@eecs.umich.edu do { 2258951Sgblack@eecs.umich.edu last_mask = cur_mask; 2268951Sgblack@eecs.umich.edu cur_mask = extendMask(cur_mask); 2278951Sgblack@eecs.umich.edu done = ((key & cur_mask) != (kid->key & cur_mask)) || 2288951Sgblack@eecs.umich.edu last_mask == new_mask; 2298951Sgblack@eecs.umich.edu } while (!done); 2308951Sgblack@eecs.umich.edu cur_mask = last_mask; 2318951Sgblack@eecs.umich.edu 2328951Sgblack@eecs.umich.edu // If this isn't the right leg to go down at all, skip it. 2338951Sgblack@eecs.umich.edu if (cur_mask == node->mask) 2348951Sgblack@eecs.umich.edu continue; 2358951Sgblack@eecs.umich.edu 2368951Sgblack@eecs.umich.edu // At the point we walked to above, add a new node. 2378951Sgblack@eecs.umich.edu new_node = new Node(key, cur_mask, NULL); 2388951Sgblack@eecs.umich.edu new_node->parent = node; 2398951Sgblack@eecs.umich.edu kid->parent = new_node; 2408951Sgblack@eecs.umich.edu new_node->kids[0] = kid; 2418951Sgblack@eecs.umich.edu kid = new_node; 2428951Sgblack@eecs.umich.edu 2438951Sgblack@eecs.umich.edu // If we ran out of bits, the value goes right here. 2448951Sgblack@eecs.umich.edu if (cur_mask == new_mask) { 2458951Sgblack@eecs.umich.edu new_node->value = val; 2468951Sgblack@eecs.umich.edu return new_node; 2478951Sgblack@eecs.umich.edu } 2488951Sgblack@eecs.umich.edu 2498951Sgblack@eecs.umich.edu // Still more bits to deal with, so add a new node for that path. 2508951Sgblack@eecs.umich.edu new_node = new Node(key, new_mask, val); 2518951Sgblack@eecs.umich.edu new_node->parent = kid; 2528951Sgblack@eecs.umich.edu kid->kids[1] = new_node; 2538951Sgblack@eecs.umich.edu return new_node; 2548951Sgblack@eecs.umich.edu } 2558951Sgblack@eecs.umich.edu 2568951Sgblack@eecs.umich.edu panic("Reached the end of the Trie insert function!\n"); 2578951Sgblack@eecs.umich.edu return NULL; 2588951Sgblack@eecs.umich.edu } 2598951Sgblack@eecs.umich.edu 2608951Sgblack@eecs.umich.edu /** 2618951Sgblack@eecs.umich.edu * Method which looks up the Value corresponding to a particular key. 2628951Sgblack@eecs.umich.edu * @param key The key to look up. 2638951Sgblack@eecs.umich.edu * @return The first Value matching this key, or NULL if none was found. 2648951Sgblack@eecs.umich.edu */ 2658951Sgblack@eecs.umich.edu Value * 2668951Sgblack@eecs.umich.edu lookup(Key key) 2678951Sgblack@eecs.umich.edu { 2688951Sgblack@eecs.umich.edu Node *node = lookupHandle(key); 2698951Sgblack@eecs.umich.edu if (node) 2708951Sgblack@eecs.umich.edu return node->value; 2718951Sgblack@eecs.umich.edu else 2728951Sgblack@eecs.umich.edu return NULL; 2738951Sgblack@eecs.umich.edu } 2748951Sgblack@eecs.umich.edu 2758951Sgblack@eecs.umich.edu /** 2768951Sgblack@eecs.umich.edu * Method to delete a value from the trie. 2778952Sgblack@eecs.umich.edu * @param node A Handle to remove. 2788951Sgblack@eecs.umich.edu * @return The Value pointer from the removed entry. 2798951Sgblack@eecs.umich.edu */ 2808951Sgblack@eecs.umich.edu Value * 2818951Sgblack@eecs.umich.edu remove(Handle handle) 2828951Sgblack@eecs.umich.edu { 2838951Sgblack@eecs.umich.edu Node *node = handle; 2848951Sgblack@eecs.umich.edu Value *val = node->value; 2858951Sgblack@eecs.umich.edu if (node->kids[1]) { 2868951Sgblack@eecs.umich.edu assert(node->value); 2878951Sgblack@eecs.umich.edu node->value = NULL; 2888951Sgblack@eecs.umich.edu return val; 2898951Sgblack@eecs.umich.edu } 2908951Sgblack@eecs.umich.edu if (!node->parent) 2918951Sgblack@eecs.umich.edu panic("Trie: Can't remove root node.\n"); 2928951Sgblack@eecs.umich.edu 2938951Sgblack@eecs.umich.edu Node *parent = node->parent; 2948951Sgblack@eecs.umich.edu 2958951Sgblack@eecs.umich.edu // If there's a kid, fix up it's parent pointer. 2968951Sgblack@eecs.umich.edu if (node->kids[0]) 2978951Sgblack@eecs.umich.edu node->kids[0]->parent = parent; 2988951Sgblack@eecs.umich.edu // Figure out which kid we are, and update our parent's pointers. 2998951Sgblack@eecs.umich.edu if (parent->kids[0] == node) 3008951Sgblack@eecs.umich.edu parent->kids[0] = node->kids[0]; 3018951Sgblack@eecs.umich.edu else if (parent->kids[1] == node) 3028951Sgblack@eecs.umich.edu parent->kids[1] = node->kids[0]; 3038951Sgblack@eecs.umich.edu else 3048951Sgblack@eecs.umich.edu panic("Trie: Inconsistent parent/kid relationship.\n"); 3058951Sgblack@eecs.umich.edu // Make sure if the parent only has one kid, it's kid[0]. 3068951Sgblack@eecs.umich.edu if (parent->kids[1] && !parent->kids[0]) { 3078951Sgblack@eecs.umich.edu parent->kids[0] = parent->kids[1]; 3088951Sgblack@eecs.umich.edu parent->kids[1] = NULL; 3098951Sgblack@eecs.umich.edu } 3108951Sgblack@eecs.umich.edu 3118951Sgblack@eecs.umich.edu // If the parent has less than two kids and no cargo and isn't the 3128951Sgblack@eecs.umich.edu // root, delete it too. 3138951Sgblack@eecs.umich.edu if (!parent->kids[1] && !parent->value && parent->parent) 3148951Sgblack@eecs.umich.edu remove(parent); 3158951Sgblack@eecs.umich.edu delete node; 3168951Sgblack@eecs.umich.edu return val; 3178951Sgblack@eecs.umich.edu } 3188951Sgblack@eecs.umich.edu 3198951Sgblack@eecs.umich.edu /** 3208951Sgblack@eecs.umich.edu * Method to lookup a value from the trie and then delete it. 3218951Sgblack@eecs.umich.edu * @param key The key to look up and then remove. 3228951Sgblack@eecs.umich.edu * @return The Value pointer from the removed entry, if any. 3238951Sgblack@eecs.umich.edu */ 3248951Sgblack@eecs.umich.edu Value * 3258951Sgblack@eecs.umich.edu remove(Key key) 3268951Sgblack@eecs.umich.edu { 3278951Sgblack@eecs.umich.edu Handle handle = lookupHandle(key); 3288951Sgblack@eecs.umich.edu if (!handle) 3298951Sgblack@eecs.umich.edu return NULL; 3308951Sgblack@eecs.umich.edu return remove(handle); 3318951Sgblack@eecs.umich.edu } 3328951Sgblack@eecs.umich.edu 3338951Sgblack@eecs.umich.edu /** 3348951Sgblack@eecs.umich.edu * A method which removes all key/value pairs from the trie. This is more 3358951Sgblack@eecs.umich.edu * efficient than trying to remove elements individually. 3368951Sgblack@eecs.umich.edu */ 3378951Sgblack@eecs.umich.edu void 3388951Sgblack@eecs.umich.edu clear() 3398951Sgblack@eecs.umich.edu { 3408951Sgblack@eecs.umich.edu head.clear(); 3418951Sgblack@eecs.umich.edu } 3428951Sgblack@eecs.umich.edu 3438951Sgblack@eecs.umich.edu /** 3448951Sgblack@eecs.umich.edu * A debugging method which prints the contents of this trie. 3458951Sgblack@eecs.umich.edu * @param title An identifying title to put in the dump header. 3468951Sgblack@eecs.umich.edu */ 3478951Sgblack@eecs.umich.edu void 3488951Sgblack@eecs.umich.edu dump(const char *title) 3498951Sgblack@eecs.umich.edu { 3508951Sgblack@eecs.umich.edu cprintf("**************************************************\n"); 3518951Sgblack@eecs.umich.edu cprintf("*** Start of Trie: %s\n", title); 3528951Sgblack@eecs.umich.edu cprintf("*** (parent, me, key, mask, value pointer)\n"); 3538951Sgblack@eecs.umich.edu cprintf("**************************************************\n"); 3548951Sgblack@eecs.umich.edu head.dump(0); 3558951Sgblack@eecs.umich.edu } 3568951Sgblack@eecs.umich.edu}; 3578951Sgblack@eecs.umich.edu 3588951Sgblack@eecs.umich.edu#endif 359