sim: A trie data structure specifically to speed up paging lookups.

This change adds a trie data structure which stores an arbitrary pointer type
based on an address and a number of relevant bits. Then lookups can be done
against the trie where the tree is traversed and the first legitimate match
found is returned.
This commit is contained in:
Gabe Black 2012-04-14 23:19:34 -07:00
parent 14edc6013d
commit c4c27ded42
3 changed files with 486 additions and 0 deletions

358
src/base/trie.hh Normal file
View file

@ -0,0 +1,358 @@
/*
* Copyright (c) 2012 Google
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Gabe Black
*/
#ifndef __BASE_TRIE_HH__
#define __BASE_TRIE_HH__
#include "base/cprintf.hh"
#include "base/misc.hh"
#include "base/types.hh"
// Key has to be an integral type.
template <class Key, class Value>
class Trie
{
protected:
struct Node
{
Key key;
Key mask;
bool
matches(Key test)
{
return (test & mask) == key;
}
Value *value;
Node *parent;
Node *kids[2];
Node(Key _key, Key _mask, Value *_val) :
key(_key & _mask), mask(_mask), value(_val),
parent(NULL)
{
kids[0] = NULL;
kids[1] = NULL;
}
void
clear()
{
if (kids[1]) {
kids[1]->clear();
delete kids[1];
kids[1] = NULL;
}
if (kids[0]) {
kids[0]->clear();
delete kids[0];
kids[0] = NULL;
}
}
void
dump(int level)
{
for (int i = 1; i < level; i++) {
cprintf("|");
}
if (level == 0)
cprintf("Root ");
else
cprintf("+ ");
cprintf("(%p, %p, %#X, %#X, %p)\n", parent, this, key, mask, value);
if (kids[0])
kids[0]->dump(level + 1);
if (kids[1])
kids[1]->dump(level + 1);
}
};
protected:
Node head;
public:
typedef Node *Handle;
Trie() : head(0, 0, NULL)
{}
static const unsigned MaxBits = sizeof(Key) * 8;
private:
/**
* A utility method which checks whether the key being looked up lies
* beyond the Node being examined. If so, it returns true and advances the
* node being examined.
* @param parent The node we're currently "at", which can be updated.
* @param kid The node we may want to move to.
* @param key The key we're looking for.
* @param new_mask The mask to use when matching against the key.
* @return Whether the current Node was advanced.
*/
bool
goesAfter(Node **parent, Node *kid, Key key, Key new_mask)
{
if (kid && kid->matches(key) && (kid->mask & new_mask) == kid->mask) {
*parent = kid;
return true;
} else {
return false;
}
}
/**
* A utility method which extends a mask value one more bit towards the
* lsb. This is almost just a signed right shift, except that the shifted
* in bits are technically undefined. This is also slightly complicated by
* the zero case.
* @param orig The original mask to extend.
* @return The extended mask.
*/
Key
extendMask(Key orig)
{
// Just in case orig was 0.
const Key msb = ULL(1) << (MaxBits - 1);
return orig | (orig >> 1) | msb;
}
/**
* Method which looks up the Handle corresponding to a particular key. This
* is useful if you want to delete the Node corresponding to a key since
* the "remove" function takes a Node as its argument.
* @param key The key to look up.
* @return The first Node matching this key, or NULL if none was found.
*/
Handle
lookupHandle(Key key)
{
Node *node = &head;
while (node) {
if (node->value)
return node;
if (node->kids[0] && node->kids[0]->matches(key))
node = node->kids[0];
else if (node->kids[1] && node->kids[1]->matches(key))
node = node->kids[1];
else
node = NULL;
}
return NULL;
}
public:
/**
* Method which inserts a key/value pair into the trie.
* @param key The key which can later be used to look up this value.
* @param width How many bits of the key (from msb) should be used.
* @param val A pointer to the value to store in the trie.
* @return A pointer to the Node which holds this value.
*/
Handle
insert(Key key, unsigned width, Value *val)
{
// Build a mask which masks off all the bits we don't care about.
Key new_mask = ~(Key)0;
if (width < MaxBits)
new_mask <<= (MaxBits - width);
// Use it to tidy up the key.
key &= new_mask;
// Walk past all the nodes this new node will be inserted after. They
// can be ignored for the purposes of this function.
Node *node = &head;
while (goesAfter(&node, node->kids[0], key, new_mask) ||
goesAfter(&node, node->kids[1], key, new_mask))
{}
assert(node);
Key cur_mask = node->mask;
// If we're already where the value needs to be...
if (cur_mask == new_mask) {
assert(!node->value);
node->value = val;
return node;
}
for (unsigned int i = 0; i < 2; i++) {
Node *&kid = node->kids[i];
Node *new_node;
if (!kid) {
// No kid. Add a new one.
new_node = new Node(key, new_mask, val);
new_node->parent = node;
kid = new_node;
return new_node;
}
// Walk down the leg until something doesn't match or we run out
// of bits.
Key last_mask;
bool done;
do {
last_mask = cur_mask;
cur_mask = extendMask(cur_mask);
done = ((key & cur_mask) != (kid->key & cur_mask)) ||
last_mask == new_mask;
} while (!done);
cur_mask = last_mask;
// If this isn't the right leg to go down at all, skip it.
if (cur_mask == node->mask)
continue;
// At the point we walked to above, add a new node.
new_node = new Node(key, cur_mask, NULL);
new_node->parent = node;
kid->parent = new_node;
new_node->kids[0] = kid;
kid = new_node;
// If we ran out of bits, the value goes right here.
if (cur_mask == new_mask) {
new_node->value = val;
return new_node;
}
// Still more bits to deal with, so add a new node for that path.
new_node = new Node(key, new_mask, val);
new_node->parent = kid;
kid->kids[1] = new_node;
return new_node;
}
panic("Reached the end of the Trie insert function!\n");
return NULL;
}
/**
* Method which looks up the Value corresponding to a particular key.
* @param key The key to look up.
* @return The first Value matching this key, or NULL if none was found.
*/
Value *
lookup(Key key)
{
Node *node = lookupHandle(key);
if (node)
return node->value;
else
return NULL;
}
/**
* Method to delete a value from the trie.
* @param node A pointer to the Node to remove.
* @return The Value pointer from the removed entry.
*/
Value *
remove(Handle handle)
{
Node *node = handle;
Value *val = node->value;
if (node->kids[1]) {
assert(node->value);
node->value = NULL;
return val;
}
if (!node->parent)
panic("Trie: Can't remove root node.\n");
Node *parent = node->parent;
// If there's a kid, fix up it's parent pointer.
if (node->kids[0])
node->kids[0]->parent = parent;
// Figure out which kid we are, and update our parent's pointers.
if (parent->kids[0] == node)
parent->kids[0] = node->kids[0];
else if (parent->kids[1] == node)
parent->kids[1] = node->kids[0];
else
panic("Trie: Inconsistent parent/kid relationship.\n");
// Make sure if the parent only has one kid, it's kid[0].
if (parent->kids[1] && !parent->kids[0]) {
parent->kids[0] = parent->kids[1];
parent->kids[1] = NULL;
}
// If the parent has less than two kids and no cargo and isn't the
// root, delete it too.
if (!parent->kids[1] && !parent->value && parent->parent)
remove(parent);
delete node;
return val;
}
/**
* Method to lookup a value from the trie and then delete it.
* @param key The key to look up and then remove.
* @return The Value pointer from the removed entry, if any.
*/
Value *
remove(Key key)
{
Handle handle = lookupHandle(key);
if (!handle)
return NULL;
return remove(handle);
}
/**
* A method which removes all key/value pairs from the trie. This is more
* efficient than trying to remove elements individually.
*/
void
clear()
{
head.clear();
}
/**
* A debugging method which prints the contents of this trie.
* @param title An identifying title to put in the dump header.
*/
void
dump(const char *title)
{
cprintf("**************************************************\n");
cprintf("*** Start of Trie: %s\n", title);
cprintf("*** (parent, me, key, mask, value pointer)\n");
cprintf("**************************************************\n");
head.dump(0);
}
};
#endif

View file

@ -48,6 +48,7 @@ UnitTest('rangemaptest', 'rangemaptest.cc')
UnitTest('rangemultimaptest', 'rangemultimaptest.cc')
UnitTest('refcnttest', 'refcnttest.cc')
UnitTest('strnumtest', 'strnumtest.cc')
UnitTest('trietest', 'trietest.cc')
stattest_py = PySource('m5', 'stattestmain.py', skip_lib=True)
stattest_swig = SwigSource('m5.internal', 'stattest.i', skip_lib=True)

127
src/unittest/trietest.cc Normal file
View file

@ -0,0 +1,127 @@
/*
* Copyright (c) 2012 Google
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Gabe Black
*/
#include <cassert>
#include <iostream>
#include "base/cprintf.hh"
#include "base/trie.hh"
#include "base/types.hh"
#include "unittest/unittest.hh"
using UnitTest::setCase;
typedef Trie<Addr, uint32_t> TestTrie;
int
main()
{
// Create an empty Ptr and verify it's data pointer is NULL.
setCase("An empty trie.");
TestTrie trie1;
trie1.dump("Empty");
cprintf("\n\n");
setCase("A single entry.");
trie1.insert(0x0123456789abcdef, 40, (uint32_t *)(uintptr_t)(1));
trie1.dump("One entry");
cprintf("\n\n");
setCase("Two entries, one on the way to the other.");
TestTrie trie2;
trie2.insert(0x0123456789abcdef, 40, (uint32_t *)(uintptr_t)(1));
trie2.insert(0x0123456789abcdef, 36, (uint32_t *)(uintptr_t)(2));
trie2.dump("Two entries inline v1");
cprintf("\n\n");
TestTrie trie3;
trie3.insert(0x0123456789abcdef, 36, (uint32_t *)(uintptr_t)(2));
trie3.insert(0x0123456789abcdef, 40, (uint32_t *)(uintptr_t)(1));
trie3.dump("Two entries inline v2");
cprintf("\n\n");
setCase("Two entries on different paths.");
TestTrie trie4;
trie4.insert(0x0123456789abcdef, 40, (uint32_t *)(uintptr_t)(2));
trie4.insert(0x0123456776543210, 40, (uint32_t *)(uintptr_t)(1));
trie4.dump("Two split entries");
cprintf("\n\n");
setCase("Skipping past an entry but not two.");
TestTrie trie5;
trie5.insert(0x0123456789000000, 40, (uint32_t *)(uintptr_t)(4));
trie5.insert(0x0123000000000000, 40, (uint32_t *)(uintptr_t)(1));
trie5.insert(0x0123456780000000, 40, (uint32_t *)(uintptr_t)(3));
trie5.insert(0x0123456700000000, 40, (uint32_t *)(uintptr_t)(2));
trie5.dump("Complex insertion");
cprintf("\n\n");
setCase("Looking things up.");
EXPECT_EQ((uintptr_t)trie5.lookup(0x0123000000000000), 1);
EXPECT_EQ((uintptr_t)trie5.lookup(0x0123456700000000), 2);
EXPECT_EQ((uintptr_t)trie5.lookup(0x0123456780000000), 3);
EXPECT_EQ((uintptr_t)trie5.lookup(0x0123456789000000), 4);
setCase("Removing entries.");
TestTrie trie6;
TestTrie::Handle node1, node2;
trie6.insert(0x0123456789000000, 40, (uint32_t *)(uintptr_t)(4));
trie6.insert(0x0123000000000000, 40, (uint32_t *)(uintptr_t)(1));
trie6.insert(0x0123456780000000, 40, (uint32_t *)(uintptr_t)(3));
node1 = trie6.insert(0x0123456700000000, 40, (uint32_t *)(uintptr_t)(2));
node2 = trie6.insert(0x0123456700000000, 32, (uint32_t *)(uintptr_t)(10));
trie6.dump("Fill before removal");
cprintf("\n\n");
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123000000000000), 1);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456700000000), 10);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456780000000), 10);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456789000000), 10);
trie6.remove(node2);
trie6.dump("One node removed");
cprintf("\n\n");
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123000000000000), 1);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456700000000), 2);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456780000000), 3);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456789000000), 4);
trie6.remove(node1);
trie6.dump("Two nodes removed");
cprintf("\n\n");
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123000000000000), 1);
EXPECT_EQ(trie6.lookup(0x0123456700000000), NULL);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456780000000), 3);
EXPECT_EQ((uintptr_t)trie6.lookup(0x0123456789000000), 4);
return UnitTest::printResults();
}