2 * Copyright (c) 2009 The Regents of the University of California
3 * Barret Rhoden <brho@cs.berkeley.edu>
4 * Kevin Klues <klueska@cs.berkeley.edu>
5 * See LICENSE for details.
7 * Slab allocator, based on the SunOS 5.4 allocator paper.
9 * Note that we don't have a hash table for buf to bufctl for the large buffer
10 * objects, so we use the same style for small objects: store the pointer to the
11 * controlling bufctl at the top of the slab object. Fix this with TODO (BUF).
19 struct kmem_cache_list kmem_caches;
20 spinlock_t kmem_caches_lock;
22 /* Cache of the kmem_cache objects, needed for bootstrapping */
23 struct kmem_cache kmem_cache_cache;
24 struct kmem_cache *kmem_slab_cache, *kmem_bufctl_cache;
26 static void __kmem_cache_create(struct kmem_cache *kc, const char *name,
27 size_t obj_size, int align, int flags,
28 void (*ctor)(void *, size_t),
29 void (*dtor)(void *, size_t))
35 kc->obj_size = obj_size;
38 TAILQ_INIT(&kc->full_slab_list);
39 TAILQ_INIT(&kc->partial_slab_list);
40 TAILQ_INIT(&kc->empty_slab_list);
44 /* put in cache list based on it's size */
45 struct kmem_cache *i, *prev = NULL;
46 spin_lock(&kmem_caches_lock);
47 /* find the kmem_cache before us in the list. yes, this is O(n). */
48 SLIST_FOREACH(i, &kmem_caches, link) {
49 if (i->obj_size < kc->obj_size)
55 SLIST_INSERT_AFTER(prev, kc, link);
57 SLIST_INSERT_HEAD(&kmem_caches, kc, link);
58 spin_unlock(&kmem_caches_lock);
61 void kmem_cache_init(void)
64 SLIST_INIT(&kmem_caches);
65 /* We need to call the __ version directly to bootstrap the global
66 * kmem_cache_cache. */
67 __kmem_cache_create(&kmem_cache_cache, "kmem_cache",
68 sizeof(struct kmem_cache),
69 __alignof__(struct kmem_cache), 0, NULL, NULL);
70 /* Build the slab and bufctl caches */
71 kmem_slab_cache = kmem_cache_create("kmem_slab", sizeof(struct kmem_slab),
72 __alignof__(struct kmem_slab), 0, NULL, NULL);
73 kmem_bufctl_cache = kmem_cache_create("kmem_bufctl",
74 sizeof(struct kmem_bufctl),
75 __alignof__(struct kmem_bufctl), 0, NULL, NULL);
78 /* Cache management */
79 struct kmem_cache *kmem_cache_create(const char *name, size_t obj_size,
81 void (*ctor)(void *, size_t),
82 void (*dtor)(void *, size_t))
84 struct kmem_cache *kc = kmem_cache_alloc(&kmem_cache_cache, 0);
85 __kmem_cache_create(kc, name, obj_size, align, flags, ctor, dtor);
89 static void kmem_slab_destroy(struct kmem_cache *cp, struct kmem_slab *a_slab)
91 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
92 /* Deconstruct all the objects, if necessary */
94 void *buf = a_slab->free_small_obj;
95 for (int i = 0; i < a_slab->num_total_obj; i++) {
96 cp->dtor(buf, cp->obj_size);
97 buf += a_slab->obj_size;
100 page_decref(kva2page(ROUNDDOWN(a_slab, PGSIZE)));
102 struct kmem_bufctl *i;
103 void *page_start = (void*)-1;
104 // compute how many pages are allocated, given a power of two allocator
105 size_t num_pages = ROUNDUPPWR2(a_slab->num_total_obj * a_slab->obj_size)
107 TAILQ_FOREACH(i, &a_slab->bufctl_freelist, link) {
108 // Track the lowest buffer address, which is the start of the buffer
109 page_start = MIN(page_start, i->buf_addr);
110 /* Deconstruct all the objects, if necessary */
111 if (cp->dtor) // TODO: (BUF)
112 cp->dtor(i->buf_addr, cp->obj_size);
113 kmem_cache_free(kmem_bufctl_cache, i);
115 // free the pages for the slab's buffer
116 free_cont_pages(page_start, LOG2_UP(num_pages));
117 // free the slab object
118 kmem_cache_free(kmem_slab_cache, a_slab);
122 /* Once you call destroy, never use this cache again... o/w there may be weird
123 * races, and other serious issues. */
124 void kmem_cache_destroy(struct kmem_cache *cp)
126 struct kmem_slab *a_slab, *next;
128 spin_lock(&cp->cache_lock);
129 assert(TAILQ_EMPTY(&cp->full_slab_list));
130 assert(TAILQ_EMPTY(&cp->partial_slab_list));
131 /* Clean out the empty list. We can't use a regular FOREACH here, since the
132 * link element is stored in the slab struct, which is stored on the page
133 * that we are freeing. */
134 a_slab = TAILQ_FIRST(&cp->empty_slab_list);
136 next = TAILQ_NEXT(a_slab, link);
137 kmem_slab_destroy(cp, a_slab);
140 spin_lock(&kmem_caches_lock);
141 SLIST_REMOVE(&kmem_caches, cp, kmem_cache, link);
142 spin_unlock(&kmem_caches_lock);
143 kmem_cache_free(&kmem_cache_cache, cp);
144 spin_unlock(&cp->cache_lock);
147 /* Front end: clients of caches use these */
148 void *kmem_cache_alloc(struct kmem_cache *cp, int flags)
151 spin_lock(&cp->cache_lock);
152 // look at partial list
153 struct kmem_slab *a_slab = TAILQ_FIRST(&cp->partial_slab_list);
154 // if none, go to empty list and get an empty and make it partial
156 if (TAILQ_EMPTY(&cp->empty_slab_list))
157 // TODO: think about non-sleeping flags
159 // move to partial list
160 a_slab = TAILQ_FIRST(&cp->empty_slab_list);
161 TAILQ_REMOVE(&cp->empty_slab_list, a_slab, link);
162 TAILQ_INSERT_HEAD(&cp->partial_slab_list, a_slab, link);
164 // have a partial now (a_slab), get an item, return item
165 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
166 retval = a_slab->free_small_obj;
167 /* adding the size of the cache_obj to get to the pointer at end of the
168 * buffer pointing to the next free_small_obj */
169 a_slab->free_small_obj = *(uintptr_t**)(a_slab->free_small_obj +
172 // rip the first bufctl out of the partial slab's buf list
173 struct kmem_bufctl *a_bufctl = TAILQ_FIRST(&a_slab->bufctl_freelist);
174 TAILQ_REMOVE(&a_slab->bufctl_freelist, a_bufctl, link);
175 retval = a_bufctl->buf_addr;
177 a_slab->num_busy_obj++;
178 // Check if we are full, if so, move to the full list
179 if (a_slab->num_busy_obj == a_slab->num_total_obj) {
180 TAILQ_REMOVE(&cp->partial_slab_list, a_slab, link);
181 TAILQ_INSERT_HEAD(&cp->full_slab_list, a_slab, link);
183 spin_unlock(&cp->cache_lock);
187 static inline struct kmem_bufctl *buf2bufctl(void *buf, size_t offset)
189 // TODO: hash table for back reference (BUF)
190 return *((struct kmem_bufctl**)(buf + offset));
193 void kmem_cache_free(struct kmem_cache *cp, void *buf)
195 struct kmem_slab *a_slab;
196 struct kmem_bufctl *a_bufctl;
198 spin_lock(&cp->cache_lock);
199 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
201 a_slab = (struct kmem_slab*)(ROUNDDOWN(buf, PGSIZE) + PGSIZE -
202 sizeof(struct kmem_slab));
203 /* write location of next free small obj to the space at the end of the
204 * buffer, then list buf as the next free small obj */
205 *(uintptr_t**)(buf + cp->obj_size) = a_slab->free_small_obj;
206 a_slab->free_small_obj = buf;
208 /* Give the bufctl back to the parent slab */
209 // TODO: (BUF) change the interface to not take an offset
210 a_bufctl = buf2bufctl(buf, cp->obj_size);
211 a_slab = a_bufctl->my_slab;
212 TAILQ_INSERT_HEAD(&a_slab->bufctl_freelist, a_bufctl, link);
214 a_slab->num_busy_obj--;
215 // if it was full, move it to partial
216 if (a_slab->num_busy_obj + 1 == a_slab->num_total_obj) {
217 TAILQ_REMOVE(&cp->full_slab_list, a_slab, link);
218 TAILQ_INSERT_HEAD(&cp->partial_slab_list, a_slab, link);
219 } else if (!a_slab->num_busy_obj) {
220 // if there are none, move to from partial to empty
221 TAILQ_REMOVE(&cp->partial_slab_list, a_slab, link);
222 TAILQ_INSERT_HEAD(&cp->empty_slab_list, a_slab, link);
224 spin_unlock(&cp->cache_lock);
227 /* Back end: internal functions */
228 /* When this returns, the cache has at least one slab in the empty list. If
229 * page_alloc fails, there are some serious issues. This only grows by one slab
232 * Grab the cache lock before calling this.
234 * TODO: think about page colouring issues with kernel memory allocation. */
235 void kmem_cache_grow(struct kmem_cache *cp)
237 struct kmem_slab *a_slab;
238 struct kmem_bufctl *a_bufctl;
239 spin_unlock(&cp->cache_lock);
240 if (cp->obj_size <= SLAB_LARGE_CUTOFF) {
241 // Just get a single page for small slabs
243 if (kpage_alloc(&a_page))
244 panic("[German Accent]: OOM!!!");
245 // the slab struct is stored at the end of the page
246 a_slab = (struct kmem_slab*)(page2kva(a_page) + PGSIZE -
247 sizeof(struct kmem_slab));
248 // Need to add room for the next free item pointer in the object buffer.
249 a_slab->obj_size = ROUNDUP(cp->obj_size + sizeof(uintptr_t), cp->align);
250 a_slab->num_busy_obj = 0;
251 a_slab->num_total_obj = (PGSIZE - sizeof(struct kmem_slab)) /
253 // TODO: consider staggering this IAW section 4.3
254 a_slab->free_small_obj = page2kva(a_page);
255 /* Walk and create the free list, which is circular. Each item stores
256 * the location of the next one at the end of the block. */
257 void *buf = a_slab->free_small_obj;
258 for (int i = 0; i < a_slab->num_total_obj - 1; i++) {
259 // Initialize the object, if necessary
261 cp->ctor(buf, cp->obj_size);
262 *(uintptr_t**)(buf + cp->obj_size) = buf + a_slab->obj_size;
263 buf += a_slab->obj_size;
265 *((uintptr_t**)(buf + cp->obj_size)) = NULL;
267 a_slab = kmem_cache_alloc(kmem_slab_cache, 0);
268 // TODO: hash table for back reference (BUF)
269 a_slab->obj_size = ROUNDUP(cp->obj_size + sizeof(uintptr_t), cp->align);
270 // alloc n pages, such that it can hold at least 8 items
271 size_t num_pgs = ROUNDUP(NUM_BUF_PER_SLAB * a_slab->obj_size, PGSIZE) /
273 // round up for the contiguous page allocator
274 void *buf = get_cont_pages(LOG2_UP(num_pgs), 0);
275 a_slab->num_busy_obj = 0;
276 a_slab->num_total_obj = ROUNDUPPWR2(num_pgs)*PGSIZE / a_slab->obj_size;
277 TAILQ_INIT(&a_slab->bufctl_freelist);
278 /* for each buffer, set up a bufctl and point to the buffer */
279 for (int i = 0; i < a_slab->num_total_obj; i++) {
280 // Initialize the object, if necessary
282 cp->ctor(buf, cp->obj_size);
283 a_bufctl = kmem_cache_alloc(kmem_bufctl_cache, 0);
284 TAILQ_INSERT_HEAD(&a_slab->bufctl_freelist, a_bufctl, link);
285 a_bufctl->buf_addr = buf;
286 a_bufctl->my_slab = a_slab;
287 // TODO: (BUF) write the bufctl reference at the bottom of the buffer.
288 *(struct kmem_bufctl**)(buf + cp->obj_size) = a_bufctl;
289 buf += a_slab->obj_size;
292 // add a_slab to the empty_list
293 TAILQ_INSERT_HEAD(&cp->empty_slab_list, a_slab, link);
294 spin_unlock(&cp->cache_lock);
297 /* This deallocs every slab from the empty list. TODO: think a bit more about
298 * this. We can do things like not free all of the empty lists to prevent
299 * thrashing. See 3.4 in the paper. */
300 void kmem_cache_reap(struct kmem_cache *cp)
302 struct kmem_slab *a_slab, *next;
304 // Destroy all empty slabs. Refer to the notes about the while loop
305 spin_lock(&cp->cache_lock);
306 a_slab = TAILQ_FIRST(&cp->empty_slab_list);
308 next = TAILQ_NEXT(a_slab, link);
309 kmem_slab_destroy(cp, a_slab);
312 spin_unlock(&cp->cache_lock);
315 void print_kmem_cache(struct kmem_cache *cp)
317 spin_lock(&cp->cache_lock);
318 printk("\nPrinting kmem_cache:\n---------------------\n");
319 printk("Name: %s\n", cp->name);
320 printk("Objsize: %d\n", cp->obj_size);
321 printk("Align: %d\n", cp->align);
322 printk("Flags: 0x%08x\n", cp->flags);
323 printk("Constructor: 0x%08x\n", cp->ctor);
324 printk("Destructor: 0x%08x\n", cp->dtor);
325 printk("Slab Full: 0x%08x\n", cp->full_slab_list);
326 printk("Slab Partial: 0x%08x\n", cp->partial_slab_list);
327 printk("Slab Empty: 0x%08x\n", cp->empty_slab_list);
328 spin_unlock(&cp->cache_lock);
331 void print_kmem_slab(struct kmem_slab *slab)
333 printk("\nPrinting kmem_slab:\n---------------------\n");
334 printk("Objsize: %d (%p)\n", slab->obj_size, slab->obj_size);
335 printk("NumBusy: %d\n", slab->num_busy_obj);
336 printk("Num_total: %d\n", slab->num_total_obj);
337 if (slab->obj_size + sizeof(uintptr_t) < SLAB_LARGE_CUTOFF) {
338 printk("Free Small obj: 0x%08x\n", slab->free_small_obj);
339 void *buf = slab->free_small_obj;
340 for (int i = 0; i < slab->num_total_obj; i++) {
341 printk("Addr of buf: 0x%08x, Addr of next: 0x%08x\n", buf,
342 *((uintptr_t**)buf));
343 buf += slab->obj_size;
346 printk("This is a big slab!\n");