trie.hh revision 12334:e0ab29a34764
1/*
2 * Copyright (c) 2012 Google
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met: redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer;
9 * redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution;
12 * neither the name of the copyright holders nor the names of its
13 * contributors may be used to endorse or promote products derived from
14 * this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * Authors: Gabe Black
29 */
30
31#ifndef __BASE_TRIE_HH__
32#define __BASE_TRIE_HH__
33
34#include <cassert>
35
36#include "base/cprintf.hh"
37#include "base/logging.hh"
38#include "base/types.hh"
39
40// Key has to be an integral type.
41template <class Key, class Value>
42class Trie
43{
44  protected:
45    struct Node
46    {
47        Key key;
48        Key mask;
49
50        bool
51        matches(Key test)
52        {
53            return (test & mask) == key;
54        }
55
56        Value *value;
57
58        Node *parent;
59        Node *kids[2];
60
61        Node(Key _key, Key _mask, Value *_val) :
62            key(_key & _mask), mask(_mask), value(_val),
63            parent(NULL)
64        {
65            kids[0] = NULL;
66            kids[1] = NULL;
67        }
68
69        void
70        clear()
71        {
72            if (kids[1]) {
73                kids[1]->clear();
74                delete kids[1];
75                kids[1] = NULL;
76            }
77            if (kids[0]) {
78                kids[0]->clear();
79                delete kids[0];
80                kids[0] = NULL;
81            }
82        }
83
84        void
85        dump(int level)
86        {
87            for (int i = 1; i < level; i++) {
88                cprintf("|");
89            }
90            if (level == 0)
91                cprintf("Root ");
92            else
93                cprintf("+ ");
94            cprintf("(%p, %p, %#X, %#X, %p)\n", parent, this, key, mask, value);
95            if (kids[0])
96                kids[0]->dump(level + 1);
97            if (kids[1])
98                kids[1]->dump(level + 1);
99        }
100    };
101
102  protected:
103    Node head;
104
105  public:
106    typedef Node *Handle;
107
108    Trie() : head(0, 0, NULL)
109    {}
110
111    static const unsigned MaxBits = sizeof(Key) * 8;
112
113  private:
114    /**
115     * A utility method which checks whether the key being looked up lies
116     * beyond the Node being examined. If so, it returns true and advances the
117     * node being examined.
118     * @param parent The node we're currently "at", which can be updated.
119     * @param kid The node we may want to move to.
120     * @param key The key we're looking for.
121     * @param new_mask The mask to use when matching against the key.
122     * @return Whether the current Node was advanced.
123     */
124    bool
125    goesAfter(Node **parent, Node *kid, Key key, Key new_mask)
126    {
127        if (kid && kid->matches(key) && (kid->mask & new_mask) == kid->mask) {
128            *parent = kid;
129            return true;
130        } else {
131            return false;
132        }
133    }
134
135    /**
136     * A utility method which extends a mask value one more bit towards the
137     * lsb. This is almost just a signed right shift, except that the shifted
138     * in bits are technically undefined. This is also slightly complicated by
139     * the zero case.
140     * @param orig The original mask to extend.
141     * @return The extended mask.
142     */
143    Key
144    extendMask(Key orig)
145    {
146        // Just in case orig was 0.
147        const Key msb = ULL(1) << (MaxBits - 1);
148        return orig | (orig >> 1) | msb;
149    }
150
151    /**
152     * Method which looks up the Handle corresponding to a particular key. This
153     * is useful if you want to delete the Handle corresponding to a key since
154     * the "remove" function takes a Handle as its argument.
155     * @param key The key to look up.
156     * @return The first Handle matching this key, or NULL if none was found.
157     */
158    Handle
159    lookupHandle(Key key)
160    {
161        Node *node = &head;
162        while (node) {
163            if (node->value)
164                return node;
165
166            if (node->kids[0] && node->kids[0]->matches(key))
167                node = node->kids[0];
168            else if (node->kids[1] && node->kids[1]->matches(key))
169                node = node->kids[1];
170            else
171                node = NULL;
172        }
173
174        return NULL;
175    }
176
177  public:
178    /**
179     * Method which inserts a key/value pair into the trie.
180     * @param key The key which can later be used to look up this value.
181     * @param width How many bits of the key (from msb) should be used.
182     * @param val A pointer to the value to store in the trie.
183     * @return A Handle corresponding to this value.
184     */
185    Handle
186    insert(Key key, unsigned width, Value *val)
187    {
188        // We use NULL value pointers to mark internal nodes of the trie, so
189        // we don't allow inserting them as real values.
190        assert(val);
191
192        // Build a mask which masks off all the bits we don't care about.
193        Key new_mask = ~(Key)0;
194        if (width < MaxBits)
195            new_mask <<= (MaxBits - width);
196        // Use it to tidy up the key.
197        key &= new_mask;
198
199        // Walk past all the nodes this new node will be inserted after. They
200        // can be ignored for the purposes of this function.
201        Node *node = &head;
202        while (goesAfter(&node, node->kids[0], key, new_mask) ||
203               goesAfter(&node, node->kids[1], key, new_mask))
204        {}
205        assert(node);
206
207        Key cur_mask = node->mask;
208        // If we're already where the value needs to be...
209        if (cur_mask == new_mask) {
210            assert(!node->value);
211            node->value = val;
212            return node;
213        }
214
215        for (unsigned int i = 0; i < 2; i++) {
216            Node *&kid = node->kids[i];
217            Node *new_node;
218            if (!kid) {
219                // No kid. Add a new one.
220                new_node = new Node(key, new_mask, val);
221                new_node->parent = node;
222                kid = new_node;
223                return new_node;
224            }
225
226            // Walk down the leg until something doesn't match or we run out
227            // of bits.
228            Key last_mask;
229            bool done;
230            do {
231                last_mask = cur_mask;
232                cur_mask = extendMask(cur_mask);
233                done = ((key & cur_mask) != (kid->key & cur_mask)) ||
234                    last_mask == new_mask;
235            } while (!done);
236            cur_mask = last_mask;
237
238            // If this isn't the right leg to go down at all, skip it.
239            if (cur_mask == node->mask)
240                continue;
241
242            // At the point we walked to above, add a new node.
243            new_node = new Node(key, cur_mask, NULL);
244            new_node->parent = node;
245            kid->parent = new_node;
246            new_node->kids[0] = kid;
247            kid = new_node;
248
249            // If we ran out of bits, the value goes right here.
250            if (cur_mask == new_mask) {
251                new_node->value = val;
252                return new_node;
253            }
254
255            // Still more bits to deal with, so add a new node for that path.
256            new_node = new Node(key, new_mask, val);
257            new_node->parent = kid;
258            kid->kids[1] = new_node;
259            return new_node;
260        }
261
262        panic("Reached the end of the Trie insert function!\n");
263        return NULL;
264    }
265
266    /**
267     * Method which looks up the Value corresponding to a particular key.
268     * @param key The key to look up.
269     * @return The first Value matching this key, or NULL if none was found.
270     */
271    Value *
272    lookup(Key key)
273    {
274        Node *node = lookupHandle(key);
275        if (node)
276            return node->value;
277        else
278            return NULL;
279    }
280
281    /**
282     * Method to delete a value from the trie.
283     * @param node A Handle to remove.
284     * @return The Value pointer from the removed entry.
285     */
286    Value *
287    remove(Handle handle)
288    {
289        Node *node = handle;
290        Value *val = node->value;
291        if (node->kids[1]) {
292            assert(node->value);
293            node->value = NULL;
294            return val;
295        }
296        if (!node->parent)
297            panic("Trie: Can't remove root node.\n");
298
299        Node *parent = node->parent;
300
301        // If there's a kid, fix up it's parent pointer.
302        if (node->kids[0])
303            node->kids[0]->parent = parent;
304        // Figure out which kid we are, and update our parent's pointers.
305        if (parent->kids[0] == node)
306            parent->kids[0] = node->kids[0];
307        else if (parent->kids[1] == node)
308            parent->kids[1] = node->kids[0];
309        else
310            panic("Trie: Inconsistent parent/kid relationship.\n");
311        // Make sure if the parent only has one kid, it's kid[0].
312        if (parent->kids[1] && !parent->kids[0]) {
313            parent->kids[0] = parent->kids[1];
314            parent->kids[1] = NULL;
315        }
316
317        // If the parent has less than two kids and no cargo and isn't the
318        // root, delete it too.
319        if (!parent->kids[1] && !parent->value && parent->parent)
320            remove(parent);
321        delete node;
322        return val;
323    }
324
325    /**
326     * Method to lookup a value from the trie and then delete it.
327     * @param key The key to look up and then remove.
328     * @return The Value pointer from the removed entry, if any.
329     */
330    Value *
331    remove(Key key)
332    {
333        Handle handle = lookupHandle(key);
334        if (!handle)
335            return NULL;
336        return remove(handle);
337    }
338
339    /**
340     * A method which removes all key/value pairs from the trie. This is more
341     * efficient than trying to remove elements individually.
342     */
343    void
344    clear()
345    {
346        head.clear();
347    }
348
349    /**
350     * A debugging method which prints the contents of this trie.
351     * @param title An identifying title to put in the dump header.
352     */
353    void
354    dump(const char *title)
355    {
356        cprintf("**************************************************\n");
357        cprintf("*** Start of Trie: %s\n", title);
358        cprintf("*** (parent, me, key, mask, value pointer)\n");
359        cprintf("**************************************************\n");
360        head.dump(0);
361    }
362};
363
364#endif
365