#include <ros/errno.h>
#include <radix.h>
#include <slab.h>
+#include <kmalloc.h>
#include <string.h>
#include <stdio.h>
+#include <rcu.h>
struct kmem_cache *radix_kcache;
static struct radix_node *__radix_lookup_node(struct radix_tree *tree,
/* Initializes the radix tree system, mostly just builds the kcache */
void radix_init(void)
{
- radix_kcache = kmem_cache_create("radix_nodes", sizeof(struct radix_node),
- __alignof__(struct radix_node), 0, 0, 0);
+ radix_kcache = kmem_cache_create("radix_nodes",
+ sizeof(struct radix_node),
+ __alignof__(struct radix_node), 0,
+ NULL, 0, 0, NULL);
}
/* Initializes a tree dynamically */
void radix_tree_init(struct radix_tree *tree)
{
+ tree->seq = SEQCTR_INITIALIZER;
tree->root = 0;
tree->depth = 0;
tree->upper_bound = 0;
}
+static bool __should_not_run_cb(void **slot, unsigned long tree_idx, void *a)
+{
+ panic("Tried destroying a non-empty tree! (slot %p, idx %lu)",
+ *slot, tree_idx);
+}
+
/* Will clean up all the memory associated with a tree. Shouldn't be necessary
* if you delete all of the items, which you should do anyways since they are
* usually void*. Might expand this to have a function to call on every leaf
* slot. */
void radix_tree_destroy(struct radix_tree *tree)
{
- panic("Not implemented");
+ /* Currently, we may have a root node, even if all the elements were removed
+ */
+ radix_for_each_slot(tree, __should_not_run_cb, NULL);
+ if (tree->root) {
+ kmem_cache_free(radix_kcache, tree->root);
+ tree->root = NULL;
+ }
}
/* Attempts to insert an item in the tree at the given key. ENOMEM if we ran
- * out of memory, EEXIST if an item is already in the tree. */
-int radix_insert(struct radix_tree *tree, unsigned long key, void *item)
+ * out of memory, EEXIST if an item is already in the tree. On success, will
+ * also return the slot pointer, if requested.
+ *
+ * Caller must maintain mutual exclusion (qlock) */
+int radix_insert(struct radix_tree *tree, unsigned long key, void *item,
+ void ***slot_p)
{
struct radix_node *r_node;
void **slot;
+
/* Is the tree tall enough? if not, it needs to grow a level. This will
* also create the initial node (upper bound starts at 0). */
- if (key >= tree->upper_bound) {
- r_node = kmem_cache_alloc(radix_kcache, 0);
- if (!r_node)
- return -ENOMEM;
+ while (key >= tree->upper_bound) {
+ r_node = kmem_cache_alloc(radix_kcache, MEM_WAIT);
memset(r_node, 0, sizeof(struct radix_node));
if (tree->root) {
/* tree->root is the old root, now a child of the future root */
} else {
/* if there was no root before, we're both the root and a leaf */
r_node->leaf = TRUE;
+ r_node->parent = 0;
}
+ /* Need to atomically change root, depth, and upper_bound for our
+ * readers, who will check the seq ctr. */
+ __seq_start_write(&tree->seq);
tree->root = r_node;
r_node->my_slot = &tree->root;
tree->depth++;
- tree->upper_bound = 1 << (LOG_RNODE_SLOTS * tree->depth);
+ tree->upper_bound = 1ULL << (LOG_RNODE_SLOTS * tree->depth);
+ __seq_end_write(&tree->seq);
}
+ assert(tree->root);
/* the tree now thinks it is tall enough, so find the last node, insert in
* it, etc */
+ /* This gives us an rcu-protected pointer, though we hold the lock. */
r_node = __radix_lookup_node(tree, key, TRUE);
assert(r_node); /* we want an ENOMEM actually, but i want to see this */
slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)];
if (*slot)
return -EEXIST;
- *slot = item;
+ rcu_assign_pointer(*slot, item);
r_node->num_items++;
+ if (slot_p)
+ *slot_p = slot; /* passing back an rcu-protected pointer */
return 0;
}
+static void __rnode_free_rcu(struct rcu_head *head)
+{
+ struct radix_node *r_node = container_of(head, struct radix_node, rcu);
+
+ kmem_cache_free(radix_kcache, r_node);
+}
+
/* Removes an item from it's parent's structure, freeing the parent if there is
* nothing left, potentially recursively. */
-static void __radix_remove_slot(struct radix_node *r_node, struct radix_node **slot)
+static void __radix_remove_slot(struct radix_node *r_node,
+ struct radix_node **slot)
{
assert(*slot); /* make sure there is something there */
- *slot = 0;
+ rcu_assign_pointer(*slot, NULL);
r_node->num_items--;
- if (!r_node->num_items) {
+ /* this check excludes the root, but the if else handles it. For now, once
+ * we have a root, we'll always keep it (will need some changing in
+ * radix_insert() */
+ if (!r_node->num_items && r_node->parent) {
if (r_node->parent)
__radix_remove_slot(r_node->parent, r_node->my_slot);
else /* we're the last node, attached to the actual tree */
*(r_node->my_slot) = 0;
- kmem_cache_free(radix_kcache, r_node);
+ call_rcu(&r_node->rcu, __rnode_free_rcu);
}
}
/* Removes a key/item from the tree, returning that item (the void*). If it
* detects a radix_node is now unused, it will dealloc that node. Though the
* tree will still think it is tall enough to handle its old upper_bound. It
- * won't "shrink". */
+ * won't "shrink".
+ *
+ * Caller must maintain mutual exclusion (qlock) */
void *radix_delete(struct radix_tree *tree, unsigned long key)
{
void **slot;
void *retval;
- struct radix_node *r_node = __radix_lookup_node(tree, key, 0);
+ struct radix_node *r_node;
+
+ /* This is an rcu-protected pointer, though the caller holds a lock. */
+ r_node = __radix_lookup_node(tree, key, false);
if (!r_node)
return 0;
slot = &r_node->items[key & (NR_RNODE_SLOTS - 1)];
- retval = *slot;
+ retval = rcu_dereference(*slot);
if (retval) {
- __radix_remove_slot(r_node, (struct radix_node**)slot);
+ __radix_remove_slot(r_node, (struct radix_node**)slot);
} else {
/* it's okay to delete an empty, but i want to know about it for now */
warn("Tried to remove a non-existant item from a radix tree!");
void *radix_lookup(struct radix_tree *tree, unsigned long key)
{
void **slot = radix_lookup_slot(tree, key);
+
if (!slot)
return 0;
- return *slot;
+ /* slot was rcu-protected, pointing into the memory of an r_node. we also
+ * want *slot, which is "void *item" to be an rcu-protected pointer. */
+ return rcu_dereference(*slot);
}
/* Returns a pointer to the radix_node holding a given key. 0 if there is no
* ......444444333333222222111111
*
* If an interior node of the tree is missing, this will add one if it was
- * directed to extend the tree. */
+ * directed to extend the tree.
+ *
+ * If we might extend, the caller must maintain mutual exclusion (qlock) */
static struct radix_node *__radix_lookup_node(struct radix_tree *tree,
unsigned long key, bool extend)
{
- unsigned long idx;
- struct radix_node *child_node, *r_node = tree->root;
- if (key >= tree->upper_bound)
+ unsigned long idx, upper_bound;
+ unsigned int depth;
+ seq_ctr_t seq;
+ struct radix_node *child_node, *r_node;
+
+ do {
+ seq = ACCESS_ONCE(tree->seq);
+ rmb();
+ r_node = rcu_dereference(tree->root);
+ depth = tree->depth;
+ upper_bound = tree->upper_bound;
+ } while (seqctr_retry(tree->seq, seq));
+
+ if (key >= upper_bound) {
+ if (extend)
+ warn("Bound (%d) not set for key %d!\n", upper_bound, key);
return 0;
- for (int i = tree->depth; i > 1; i--) { /* i = ..., 4, 3, 2 */
+ }
+ for (int i = depth; i > 1; i--) { /* i = ..., 4, 3, 2 */
idx = (key >> (LOG_RNODE_SLOTS * (i - 1))) & (NR_RNODE_SLOTS - 1);
/* There might not be a node at this part of the tree */
if (!r_node->items[idx]) {
- if (!extend) {
+ if (!extend)
return 0;
- } else {
- /* so build one, possibly returning 0 if we couldn't */
- child_node = kmem_cache_alloc(radix_kcache, 0);
- if (!child_node)
- return 0;
- r_node->items[idx] = child_node;
- memset(child_node, 0, sizeof(struct radix_node));
- /* when we are on the last iteration (i == 2), the child will be
- * a leaf. */
- child_node->leaf = (i == 2) ? TRUE : FALSE;
- child_node->parent = r_node;
- child_node->my_slot = (struct radix_node**)&r_node->items[idx];
- r_node->num_items++;
- r_node = (struct radix_node*)r_node->items[idx];
- }
- } else {
- r_node = (struct radix_node*)r_node->items[idx];
+ child_node = kmem_cache_alloc(radix_kcache, MEM_WAIT);
+ memset(child_node, 0, sizeof(struct radix_node));
+ /* when we are on the last iteration (i == 2), the child will be
+ * a leaf. */
+ child_node->leaf = (i == 2) ? TRUE : FALSE;
+ child_node->parent = r_node;
+ child_node->my_slot = (struct radix_node**)&r_node->items[idx];
+ r_node->num_items++;
+ rcu_assign_pointer(r_node->items[idx], child_node);
}
+ r_node = (struct radix_node*)rcu_dereference(r_node->items[idx]);
}
return r_node;
}
void **radix_lookup_slot(struct radix_tree *tree, unsigned long key)
{
struct radix_node *r_node = __radix_lookup_node(tree, key, FALSE);
+
if (!r_node)
return 0;
key = key & (NR_RNODE_SLOTS - 1);
+ /* r_node is rcu-protected. Our retval is too, since it's a pointer into
+ * the same object as r_node. */
return &r_node->items[key];
}
+/* [x_left, x_right) and [y_left, y_right). */
+static bool ranges_overlap(unsigned long x_left, unsigned long x_right,
+ unsigned long y_left, unsigned long y_right)
+{
+ return ((x_left <= y_left) && (y_left < x_right)) ||
+ ((y_left <= x_left) && (x_left < y_right));
+}
+
+/* Given an index at a depth for a child, returns whether part of it is in the
+ * global range.
+ *
+ * Recall the key is partitioned like so: ....444444333333222222111111. The
+ * depth is 1 when we're in the last rnode and our children are items. When
+ * we're an intermediate node, our depth is higher, and our start/end is the
+ * entire reach of us + our children. */
+static bool child_overlaps_range(unsigned long idx, int depth,
+ unsigned long glb_start_idx,
+ unsigned long glb_end_idx)
+{
+ unsigned long start = idx << (LOG_RNODE_SLOTS * (depth - 1));
+ unsigned long end = (idx + 1) << (LOG_RNODE_SLOTS * (depth - 1));
+
+ return ranges_overlap(start, end, glb_start_idx, glb_end_idx);
+}
+
+/* Helper for walking down a tree.
+ * - depth == 1 means we're on the last radix_node, whose items are all the
+ * actual items.
+ * - tree_idx is the index for our item/node. It encodes the path we took
+ * through the radix tree to get to where we are. For leaves (items), it is
+ * their index in the address space. For internal rnodes, it is a temporary
+ * number, but combined with the depth, you can determine the range of the
+ * rnode's descendents.
+ * - glb_start_idx and glb_end_idx is the global start and end for the entire
+ * for_each operation.
+ *
+ * Returns true if our r_node *was already deleted*. When we call
+ * __radix_remove_slot(), if we removed the last item for r_node, the removal
+ * code will recurse *up* the tree, such that r_node might already be freed.
+ * Same goes for our parent! Hence, we're careful to only access r_node when we
+ * know we have children (once we enter the loop and might remove a slot). */
+static bool rnode_for_each(struct radix_node *r_node, int depth,
+ unsigned long tree_idx, unsigned long glb_start_idx,
+ unsigned long glb_end_idx,
+ radix_cb_t cb, void *arg)
+{
+ unsigned int num_children = ACCESS_ONCE(r_node->num_items);
+
+ /* The tree_idx we were passed was from our parent's perspective. We need
+ * shift it over each time we walk down to put it in terms of our
+ * level/depth. Or think of it as making room for our bits (the values of
+ * i). */
+ tree_idx <<= LOG_RNODE_SLOTS;
+ for (int i = 0; num_children && (i < NR_RNODE_SLOTS); i++) {
+ if (r_node->items[i]) {
+ /* If we really care, we can try to abort the rest of the loop. Not
+ * a big deal */
+ if (!child_overlaps_range(tree_idx + i, depth, glb_start_idx,
+ glb_end_idx))
+ continue;
+ if (depth > 1) {
+ if (rnode_for_each(r_node->items[i], depth - 1, tree_idx + i,
+ glb_start_idx, glb_end_idx, cb, arg))
+ num_children--;
+ } else {
+ if (cb(&r_node->items[i], tree_idx + i, arg)) {
+ __radix_remove_slot(r_node,
+ (struct radix_node**)&r_node->items[i]);
+ num_children--;
+ }
+ }
+ }
+ }
+ return num_children ? false : true;
+}
+
+/* [start_idx, end_idx).
+ *
+ * Caller must maintain mutual exclusion (qlock) */
+void radix_for_each_slot_in_range(struct radix_tree *tree,
+ unsigned long start_idx,
+ unsigned long end_idx,
+ radix_cb_t cb, void *arg)
+{
+ if (!tree->root)
+ return;
+ rnode_for_each(tree->root, tree->depth, 0, start_idx, end_idx, cb, arg);
+}
+
+/* Caller must maintain mutual exclusion (qlock) */
+void radix_for_each_slot(struct radix_tree *tree, radix_cb_t cb, void *arg)
+{
+ radix_for_each_slot_in_range(tree, 0, ULONG_MAX, cb, arg);
+}
+
int radix_gang_lookup(struct radix_tree *tree, void **results,
unsigned long first, unsigned int max_items)
{
void print_radix_tree(struct radix_tree *tree)
{
- printk("Tree %08p, Depth: %d, Bound: %d\n", tree, tree->depth,
+ printk("Tree %p, Depth: %d, Bound: %d\n", tree, tree->depth,
tree->upper_bound);
void print_rnode(struct radix_node *r_node, int depth)
char buf[32] = {0};
for (int i = 0; i < depth; i++)
buf[i] = '\t';
- printk("%sRnode %08p, parent %08p, myslot %08p, %d items, leaf? %d\n",
+ printk("%sRnode %p, parent %p, myslot %p, %d items, leaf? %d\n",
buf, r_node, r_node->parent, r_node->my_slot, r_node->num_items,
r_node->leaf);
for (int i = 0; i < NR_RNODE_SLOTS; i++) {
if (!r_node->items[i])
continue;
if (r_node->leaf)
- printk("\t%sRnode Item %d: %08p\n", buf, i, r_node->items[i]);
+ printk("\t%sRnode Item %d: %p\n", buf, i, r_node->items[i]);
else
print_rnode(r_node->items[i], depth + 1);
}