2 * Copyright (c) 2009 The Regents of the University of California
3 * Barret Rhoden <brho@cs.berkeley.edu>
4 * Kevin Klues <klueska@cs.berkeley.edu>
5 * See LICENSE for details.
7 * Slab allocator, based on the SunOS 5.4 allocator paper.
9 * Note that we don't have a hash table for buf to bufctl for the large buffer
10 * objects, so we use the same style for small objects: store the pointer to the
11 * controlling bufctl at the top of the slab object. Fix this with TODO (BUF).
13 * Ported directly from the kernel's slab allocator. */
20 struct kmem_cache_list kmem_caches;
21 struct mcs_pdr_lock kmem_caches_lock;
23 /* Backend/internal functions, defined later. Grab the lock before calling
25 static void kmem_cache_grow(struct kmem_cache *cp);
27 /* Cache of the kmem_cache objects, needed for bootstrapping */
28 struct kmem_cache kmem_cache_cache;
29 struct kmem_cache *kmem_slab_cache, *kmem_bufctl_cache;
31 static void __kmem_cache_create(struct kmem_cache *kc, const char *name,
32 size_t obj_size, int align, int flags,
33 void (*ctor)(void *, size_t),
34 void (*dtor)(void *, size_t))
38 mcs_pdr_init(&kc->cache_lock);
40 kc->obj_size = obj_size;
43 TAILQ_INIT(&kc->full_slab_list);
44 TAILQ_INIT(&kc->partial_slab_list);
45 TAILQ_INIT(&kc->empty_slab_list);
50 /* put in cache list based on it's size */
51 struct kmem_cache *i, *prev = NULL;
52 mcs_pdr_lock(&kmem_caches_lock);
53 /* find the kmem_cache before us in the list. yes, this is O(n). */
54 SLIST_FOREACH(i, &kmem_caches, link) {
55 if (i->obj_size < kc->obj_size)
61 SLIST_INSERT_AFTER(prev, kc, link);
63 SLIST_INSERT_HEAD(&kmem_caches, kc, link);
64 mcs_pdr_unlock(&kmem_caches_lock);
67 void kmem_cache_init(void)
69 mcs_pdr_init(&kmem_caches_lock);
70 SLIST_INIT(&kmem_caches);
71 /* We need to call the __ version directly to bootstrap the global
72 * kmem_cache_cache. */
73 __kmem_cache_create(&kmem_cache_cache, "kmem_cache",
74 sizeof(struct kmem_cache),
75 __alignof__(struct kmem_cache), 0, NULL, NULL);
76 /* Build the slab and bufctl caches */
77 kmem_slab_cache = kmem_cache_create("kmem_slab", sizeof(struct kmem_slab),
78 __alignof__(struct kmem_slab), 0, NULL, NULL);
79 kmem_bufctl_cache = kmem_cache_create("kmem_bufctl",
80 sizeof(struct kmem_bufctl),
81 __alignof__(struct kmem_bufctl), 0, NULL, NULL);
84 /* Cache management */
85 struct kmem_cache *kmem_cache_create(const char *name, size_t obj_size,
87 void (*ctor)(void *, size_t),
88 void (*dtor)(void *, size_t))
90 static atomic_t initialized = FALSE;
91 /* Init the slab system. We'll set it to TRUE, and whoever one the race
92 * (among multiple initializers) will do the real init. */
93 if (!atomic_swap(&initialized, TRUE))
96 struct kmem_cache *kc = kmem_cache_alloc(&kmem_cache_cache, 0);
97 __kmem_cache_create(kc, name, obj_size, align, flags, ctor, dtor);
101 static void kmem_slab_destroy(struct kmem_cache *cp, struct kmem_slab *a_slab)
103 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
104 /* Deconstruct all the objects, if necessary */
106 void *buf = a_slab->free_small_obj;
107 for (int i = 0; i < a_slab->num_total_obj; i++) {
108 cp->dtor(buf, cp->obj_size);
109 buf += a_slab->obj_size;
112 munmap(ROUNDDOWN(a_slab, PGSIZE), PGSIZE);
114 struct kmem_bufctl *i;
115 void *page_start = (void*)-1;
116 // compute how many pages are allocated, given a power of two allocator
117 size_t num_pages = ROUNDUPPWR2(a_slab->num_total_obj * a_slab->obj_size)
119 TAILQ_FOREACH(i, &a_slab->bufctl_freelist, link) {
120 // Track the lowest buffer address, which is the start of the buffer
121 page_start = MIN(page_start, i->buf_addr);
122 /* Deconstruct all the objects, if necessary */
123 if (cp->dtor) // TODO: (BUF)
124 cp->dtor(i->buf_addr, cp->obj_size);
125 kmem_cache_free(kmem_bufctl_cache, i);
127 // free the pages for the slab's buffer
128 munmap(page_start, num_pages * PGSIZE);
129 // free the slab object
130 kmem_cache_free(kmem_slab_cache, a_slab);
134 /* Once you call destroy, never use this cache again... o/w there may be weird
135 * races, and other serious issues. */
136 void kmem_cache_destroy(struct kmem_cache *cp)
138 struct kmem_slab *a_slab, *next;
140 mcs_pdr_lock(&cp->cache_lock);
141 assert(TAILQ_EMPTY(&cp->full_slab_list));
142 assert(TAILQ_EMPTY(&cp->partial_slab_list));
143 /* Clean out the empty list. We can't use a regular FOREACH here, since the
144 * link element is stored in the slab struct, which is stored on the page
145 * that we are freeing. */
146 a_slab = TAILQ_FIRST(&cp->empty_slab_list);
148 next = TAILQ_NEXT(a_slab, link);
149 kmem_slab_destroy(cp, a_slab);
152 mcs_pdr_lock(&kmem_caches_lock);
153 SLIST_REMOVE(&kmem_caches, cp, kmem_cache, link);
154 mcs_pdr_unlock(&kmem_caches_lock);
155 kmem_cache_free(&kmem_cache_cache, cp);
156 mcs_pdr_unlock(&cp->cache_lock);
159 /* Front end: clients of caches use these */
160 void *kmem_cache_alloc(struct kmem_cache *cp, int flags)
163 mcs_pdr_lock(&cp->cache_lock);
164 // look at partial list
165 struct kmem_slab *a_slab = TAILQ_FIRST(&cp->partial_slab_list);
166 // if none, go to empty list and get an empty and make it partial
168 if (TAILQ_EMPTY(&cp->empty_slab_list))
169 // TODO: think about non-sleeping flags
171 // move to partial list
172 a_slab = TAILQ_FIRST(&cp->empty_slab_list);
173 TAILQ_REMOVE(&cp->empty_slab_list, a_slab, link);
174 TAILQ_INSERT_HEAD(&cp->partial_slab_list, a_slab, link);
176 // have a partial now (a_slab), get an item, return item
177 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
178 retval = a_slab->free_small_obj;
179 /* adding the size of the cache_obj to get to the pointer at end of the
180 * buffer pointing to the next free_small_obj */
181 a_slab->free_small_obj = *(uintptr_t**)(a_slab->free_small_obj +
184 // rip the first bufctl out of the partial slab's buf list
185 struct kmem_bufctl *a_bufctl = TAILQ_FIRST(&a_slab->bufctl_freelist);
186 TAILQ_REMOVE(&a_slab->bufctl_freelist, a_bufctl, link);
187 retval = a_bufctl->buf_addr;
189 a_slab->num_busy_obj++;
190 // Check if we are full, if so, move to the full list
191 if (a_slab->num_busy_obj == a_slab->num_total_obj) {
192 TAILQ_REMOVE(&cp->partial_slab_list, a_slab, link);
193 TAILQ_INSERT_HEAD(&cp->full_slab_list, a_slab, link);
196 mcs_pdr_unlock(&cp->cache_lock);
200 static inline struct kmem_bufctl *buf2bufctl(void *buf, size_t offset)
202 // TODO: hash table for back reference (BUF)
203 return *((struct kmem_bufctl**)(buf + offset));
206 void kmem_cache_free(struct kmem_cache *cp, void *buf)
208 struct kmem_slab *a_slab;
209 struct kmem_bufctl *a_bufctl;
211 mcs_pdr_lock(&cp->cache_lock);
212 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
214 a_slab = (struct kmem_slab*)(ROUNDDOWN(buf, PGSIZE) + PGSIZE -
215 sizeof(struct kmem_slab));
216 /* write location of next free small obj to the space at the end of the
217 * buffer, then list buf as the next free small obj */
218 *(uintptr_t**)(buf + cp->obj_size) = a_slab->free_small_obj;
219 a_slab->free_small_obj = buf;
221 /* Give the bufctl back to the parent slab */
222 // TODO: (BUF) change the interface to not take an offset
223 a_bufctl = buf2bufctl(buf, cp->obj_size);
224 a_slab = a_bufctl->my_slab;
225 TAILQ_INSERT_HEAD(&a_slab->bufctl_freelist, a_bufctl, link);
227 a_slab->num_busy_obj--;
229 // if it was full, move it to partial
230 if (a_slab->num_busy_obj + 1 == a_slab->num_total_obj) {
231 TAILQ_REMOVE(&cp->full_slab_list, a_slab, link);
232 TAILQ_INSERT_HEAD(&cp->partial_slab_list, a_slab, link);
233 } else if (!a_slab->num_busy_obj) {
234 // if there are none, move to from partial to empty
235 TAILQ_REMOVE(&cp->partial_slab_list, a_slab, link);
236 TAILQ_INSERT_HEAD(&cp->empty_slab_list, a_slab, link);
238 mcs_pdr_unlock(&cp->cache_lock);
241 /* Back end: internal functions */
242 /* When this returns, the cache has at least one slab in the empty list. If
243 * page_alloc fails, there are some serious issues. This only grows by one slab
246 * Grab the cache lock before calling this.
248 * TODO: think about page colouring issues with kernel memory allocation. */
249 static void kmem_cache_grow(struct kmem_cache *cp)
251 struct kmem_slab *a_slab;
252 struct kmem_bufctl *a_bufctl;
254 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
255 // Just get a single page for small slabs
256 a_page = mmap(0, PGSIZE, PROT_READ | PROT_WRITE,
257 MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
258 assert(a_page != MAP_FAILED);
259 // the slab struct is stored at the end of the page
260 a_slab = (struct kmem_slab*)(a_page + PGSIZE -
261 sizeof(struct kmem_slab));
262 // Need to add room for the next free item pointer in the object buffer.
263 a_slab->obj_size = ROUNDUP(cp->obj_size + sizeof(uintptr_t), cp->align);
264 a_slab->num_busy_obj = 0;
265 a_slab->num_total_obj = (PGSIZE - sizeof(struct kmem_slab)) /
267 // TODO: consider staggering this IAW section 4.3
268 a_slab->free_small_obj = a_page;
269 /* Walk and create the free list, which is circular. Each item stores
270 * the location of the next one at the end of the block. */
271 void *buf = a_slab->free_small_obj;
272 for (int i = 0; i < a_slab->num_total_obj - 1; i++) {
273 // Initialize the object, if necessary
275 cp->ctor(buf, cp->obj_size);
276 *(uintptr_t**)(buf + cp->obj_size) = buf + a_slab->obj_size;
277 buf += a_slab->obj_size;
279 *((uintptr_t**)(buf + cp->obj_size)) = NULL;
281 a_slab = kmem_cache_alloc(kmem_slab_cache, 0);
282 // TODO: hash table for back reference (BUF)
283 a_slab->obj_size = ROUNDUP(cp->obj_size + sizeof(uintptr_t), cp->align);
284 // alloc n pages, such that it can hold at least 8 items
285 size_t num_pgs = ROUNDUP(NUM_BUF_PER_SLAB * a_slab->obj_size, PGSIZE) /
287 // round up for the contiguous page allocator
288 void *buf = mmap(0, num_pgs * PGSIZE, PROT_READ | PROT_WRITE,
289 MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
290 assert(buf != MAP_FAILED);
291 a_slab->num_busy_obj = 0;
292 a_slab->num_total_obj = ROUNDUPPWR2(num_pgs)*PGSIZE / a_slab->obj_size;
293 TAILQ_INIT(&a_slab->bufctl_freelist);
294 /* for each buffer, set up a bufctl and point to the buffer */
295 for (int i = 0; i < a_slab->num_total_obj; i++) {
296 // Initialize the object, if necessary
298 cp->ctor(buf, cp->obj_size);
299 a_bufctl = kmem_cache_alloc(kmem_bufctl_cache, 0);
300 TAILQ_INSERT_HEAD(&a_slab->bufctl_freelist, a_bufctl, link);
301 a_bufctl->buf_addr = buf;
302 a_bufctl->my_slab = a_slab;
303 // TODO: (BUF) write the bufctl reference at the bottom of the buffer.
304 *(struct kmem_bufctl**)(buf + cp->obj_size) = a_bufctl;
305 buf += a_slab->obj_size;
308 // add a_slab to the empty_list
309 TAILQ_INSERT_HEAD(&cp->empty_slab_list, a_slab, link);
312 /* This deallocs every slab from the empty list. TODO: think a bit more about
313 * this. We can do things like not free all of the empty lists to prevent
314 * thrashing. See 3.4 in the paper. */
315 void kmem_cache_reap(struct kmem_cache *cp)
317 struct kmem_slab *a_slab, *next;
319 // Destroy all empty slabs. Refer to the notes about the while loop
320 mcs_pdr_lock(&cp->cache_lock);
321 a_slab = TAILQ_FIRST(&cp->empty_slab_list);
323 next = TAILQ_NEXT(a_slab, link);
324 kmem_slab_destroy(cp, a_slab);
327 mcs_pdr_unlock(&cp->cache_lock);
330 void print_kmem_cache(struct kmem_cache *cp)
332 mcs_pdr_lock(&cp->cache_lock);
333 printf("\nPrinting kmem_cache:\n---------------------\n");
334 printf("Name: %s\n", cp->name);
335 printf("Objsize: %d\n", cp->obj_size);
336 printf("Align: %d\n", cp->align);
337 printf("Flags: 0x%08x\n", cp->flags);
338 printf("Constructor: 0x%08x\n", cp->ctor);
339 printf("Destructor: 0x%08x\n", cp->dtor);
340 printf("Slab Full: 0x%08x\n", cp->full_slab_list);
341 printf("Slab Partial: 0x%08x\n", cp->partial_slab_list);
342 printf("Slab Empty: 0x%08x\n", cp->empty_slab_list);
343 printf("Current Allocations: %d\n", cp->nr_cur_alloc);
344 mcs_pdr_unlock(&cp->cache_lock);
347 void print_kmem_slab(struct kmem_slab *slab)
349 printf("\nPrinting kmem_slab:\n---------------------\n");
350 printf("Objsize: %d (%p)\n", slab->obj_size, slab->obj_size);
351 printf("NumBusy: %d\n", slab->num_busy_obj);
352 printf("Num_total: %d\n", slab->num_total_obj);
353 if (slab->obj_size + sizeof(uintptr_t) < SLAB_LARGE_CUTOFF) {
354 printf("Free Small obj: 0x%08x\n", slab->free_small_obj);
355 void *buf = slab->free_small_obj;
356 for (int i = 0; i < slab->num_total_obj; i++) {
357 printf("Addr of buf: 0x%08x, Addr of next: 0x%08x\n", buf,
358 *((uintptr_t**)buf));
359 buf += slab->obj_size;
362 printf("This is a big slab!\n");