akaros/kern/src/ns/allocb.c
<<
>>
Prefs
   1/* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   2 * Portions Copyright © 1997-1999 Vita Nuova Limited
   3 * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
   4 *                                (www.vitanuova.com)
   5 * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
   6 *
   7 * Modified for the Akaros operating system:
   8 * Copyright (c) 2013-2014 The Regents of the University of California
   9 * Copyright (c) 2013-2015 Google Inc.
  10 *
  11 * Permission is hereby granted, free of charge, to any person obtaining a copy
  12 * of this software and associated documentation files (the "Software"), to deal
  13 * in the Software without restriction, including without limitation the rights
  14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15 * copies of the Software, and to permit persons to whom the Software is
  16 * furnished to do so, subject to the following conditions:
  17 *
  18 * The above copyright notice and this permission notice shall be included in
  19 * all copies or substantial portions of the Software.
  20 *
  21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27 * SOFTWARE. */
  28
  29#include <slab.h>
  30#include <kmalloc.h>
  31#include <kref.h>
  32#include <string.h>
  33#include <stdio.h>
  34#include <assert.h>
  35#include <error.h>
  36#include <cpio.h>
  37#include <pmap.h>
  38#include <smp.h>
  39#include <net/ip.h>
  40#include <process.h>
  41
  42/* Note that Hdrspc is only available via padblock (to the 'left' of the rp). */
  43enum {
  44        Hdrspc = 128,           /* leave room for high-level headers */
  45        Bdead = 0x51494F42,     /* "QIOB" */
  46        BLOCKALIGN = 32,        /* was the old BY2V in inferno, which was 8 */
  47};
  48
  49/*
  50 *  allocate blocks (round data base address to 64 bit boundary).
  51 *  if mallocz gives us more than we asked for, leave room at the front
  52 *  for header.
  53 */
  54struct block *block_alloc(size_t size, int mem_flags)
  55{
  56        struct block *b;
  57        uintptr_t addr;
  58        int n;
  59
  60        /* If Hdrspc is not block aligned it will cause issues. */
  61        static_assert(Hdrspc % BLOCKALIGN == 0);
  62
  63        b = kmalloc(sizeof(struct block) + size + Hdrspc + (BLOCKALIGN - 1),
  64                                mem_flags);
  65        if (b == NULL)
  66                return NULL;
  67
  68        b->next = NULL;
  69        b->list = NULL;
  70        b->free = NULL;
  71        b->flag = 0;
  72        b->extra_len = 0;
  73        b->nr_extra_bufs = 0;
  74        b->extra_data = 0;
  75        b->mss = 0;
  76        b->network_offset = 0;
  77        b->transport_offset = 0;
  78
  79        addr = (uintptr_t) b;
  80        addr = ROUNDUP(addr + sizeof(struct block), BLOCKALIGN);
  81        b->base = (uint8_t *) addr;
  82        /* TODO: support this */
  83        /* interesting. We can ask the allocator, after allocating,
  84         * the *real* size of the block we got. Very nice.
  85         * Not on akaros yet.
  86         b->lim = ((uint8_t*)b) + msize(b);
  87         * See use of n in commented code below
  88         */
  89        b->lim = ((uint8_t *) b) + sizeof(struct block) + size + Hdrspc +
  90                (BLOCKALIGN - 1);
  91        b->rp = b->base;
  92        /* TODO: support this */
  93        /* n is supposed to be Hdrspc + rear padding + extra reserved memory,
  94         * but since we don't currently support checking how much memory was
  95         * actually reserved, this is always Hdrspc + rear padding. After
  96         * rounding that down to BLOCKALIGN, it's always Hdrpsc since the
  97         * padding is < BLOCKALIGN.
  98         n = b->lim - b->base - size;
  99         b->rp += n & ~(BLOCKALIGN - 1);
 100         */
 101        b->rp += Hdrspc;
 102        b->wp = b->rp;
 103        /* b->base is aligned, rounded up from b
 104         * b->lim is the upper bound on our malloc
 105         * b->rp is advanced by some aligned amount, based on how much extra we
 106         * received from kmalloc and the Hdrspc. */
 107        return b;
 108}
 109
 110/* Makes sure b has nr_bufs extra_data.  Will grow, but not shrink, an existing
 111 * extra_data array.  When growing, it'll copy over the old entries.  All new
 112 * entries will be zeroed.  mem_flags determines if we'll block on kmallocs.
 113 *
 114 * Return 0 on success or -1 on error.
 115 * Caller is responsible for concurrent access to the block's metadata. */
 116int block_add_extd(struct block *b, unsigned int nr_bufs, int mem_flags)
 117{
 118        unsigned int old_nr_bufs = b->nr_extra_bufs;
 119        size_t old_amt = sizeof(struct extra_bdata) * old_nr_bufs;
 120        size_t new_amt = sizeof(struct extra_bdata) * nr_bufs;
 121        void *new_bdata;
 122
 123        if (old_nr_bufs >= nr_bufs)
 124                return 0;
 125        if (b->extra_data) {
 126                new_bdata = krealloc(b->extra_data, new_amt, mem_flags);
 127                if (!new_bdata)
 128                        return -1;
 129                memset(new_bdata + old_amt, 0, new_amt - old_amt);
 130        } else {
 131                new_bdata = kzmalloc(new_amt, mem_flags);
 132                if (!new_bdata)
 133                        return - 1;
 134        }
 135        b->extra_data = new_bdata;
 136        b->nr_extra_bufs = nr_bufs;
 137        return 0;
 138}
 139
 140/* Go backwards from the end of the list, remember the last unused slot, and
 141 * stop when a used slot is encountered. */
 142static struct extra_bdata *next_unused_slot(struct block *b)
 143{
 144        struct extra_bdata *ebd = NULL;
 145
 146        for (int i = b->nr_extra_bufs - 1; i >= 0; i--) {
 147                if (b->extra_data[i].base)
 148                        break;
 149                ebd = &b->extra_data[i];
 150        }
 151        return ebd;
 152}
 153
 154/* Append an extra data buffer @base with offset @off of length @len to block
 155 * @b.  Reuse an unused extra data slot if there's any.
 156 * Return 0 on success or -1 on error. */
 157int block_append_extra(struct block *b, uintptr_t base, uint32_t off,
 158                       uint32_t len, int mem_flags)
 159{
 160        unsigned int nr_bufs = b->nr_extra_bufs + 1;
 161        struct extra_bdata *ebd;
 162
 163        ebd = next_unused_slot(b);
 164        if (!ebd) {
 165                if (block_add_extd(b, nr_bufs, mem_flags) != 0)
 166                        return -1;
 167                ebd = next_unused_slot(b);
 168                assert(ebd);
 169        }
 170        ebd->base = base;
 171        ebd->off = off;
 172        ebd->len = len;
 173        b->extra_len += ebd->len;
 174        return 0;
 175}
 176
 177/* There's metadata in each block related to the data payload.  For instance,
 178 * the TSO mss, the offsets to various headers, whether csums are needed, etc.
 179 * When you create a new block, like in copyblock, this will copy those bits
 180 * over. */
 181void block_copy_metadata(struct block *new_b, struct block *old_b)
 182{
 183        new_b->flag |= (old_b->flag & BLOCK_META_FLAGS);
 184        new_b->tx_csum_offset = old_b->tx_csum_offset;
 185        new_b->mss = old_b->mss;
 186        new_b->network_offset = old_b->network_offset;
 187        new_b->transport_offset = old_b->transport_offset;
 188        new_b->free = old_b->free;
 189
 190        /* This is probably OK.  Right now, no one calls us with a blocklist.
 191         * Any callers that do would need to manage 'next', either to avoid
 192         * leaking memory (of old_b is freed) or to have multiple pointers to
 193         * the same block (if new_b is a copy for e.g. snoop). */
 194        warn_on(old_b->next);
 195}
 196
 197void block_reset_metadata(struct block *b)
 198{
 199        b->flag &= ~BLOCK_META_FLAGS;
 200        b->tx_csum_offset = 0;
 201        b->mss = 0;
 202        b->network_offset = 0;
 203        b->transport_offset = 0;
 204        b->free = NULL;
 205}
 206
 207/* Adds delta (which may be negative) to the block metadata offsets that are
 208 * relative to b->rp. */
 209void block_add_to_offsets(struct block *b, int delta)
 210{
 211        /* Note we do not add to tx_csum_offset.  That is relative to
 212         * transport_offset */
 213        b->network_offset += delta;
 214        b->transport_offset += delta;
 215}
 216
 217/* Transfers extra data from old to new.  This is not a copy nor a
 218 * qclone/refcount increase on the extra data blobs.  The old block loses the
 219 * data.  This changes BLEN for both, but not BHLEN.  'new' may have preexisting
 220 * ebds. */
 221void block_transfer_extras(struct block *new, struct block *old)
 222{
 223        struct extra_bdata *ebd;
 224
 225        for (int i = 0; i < old->nr_extra_bufs; i++) {
 226                ebd = &old->extra_data[i];
 227                if (!ebd->base || !ebd->len)
 228                        continue;
 229                block_append_extra(new, ebd->base, ebd->off, ebd->len,
 230                                   MEM_WAIT);
 231        }
 232
 233        old->extra_len = 0;
 234        old->nr_extra_bufs = 0;
 235        kfree(old->extra_data);
 236        old->extra_data = NULL;
 237}
 238
 239/* Like block_transfer_extras(), but new may not have preexisting ebds. */
 240void block_replace_extras(struct block *new, struct block *old)
 241{
 242        assert(!new->extra_data);
 243        new->extra_len = old->extra_len;
 244        new->nr_extra_bufs = old->nr_extra_bufs;
 245        new->extra_data = old->extra_data;
 246        old->extra_len = 0;
 247        old->nr_extra_bufs = 0;
 248        old->extra_data = NULL;
 249}
 250
 251/* Given a block, return a block with identical content but as if you allocated
 252 * it freshly with 'size', meaning with size bytes in the header/main body, some
 253 * of which contain the block's main body data in the new block.  Note all
 254 * blocks have an extra Hdrspc bytes to the left that is not counted.
 255 *
 256 * One thing to consider is a block that has 'moved to the right' in its main
 257 * body.  i.e. it used to have data, such as TCP/IP headers, but we've since
 258 * incremented b->rp.  We're near the end of the buffer and lim - wp is small.
 259 * This will give us a new block with the existing contents at the new 'default'
 260 * rp.  The old data to the left of rp will be gone.
 261 *
 262 * b may be in a blist.  We'll deal with its next pointer.  If b is in the
 263 * middle of a blist or a qio bfirst or blast, then the caller needs to deal
 264 * with pointers to it. */
 265struct block *block_realloc(struct block *b, size_t size)
 266{
 267        struct block *new;
 268        size_t amt;
 269
 270        /* This means there is enough space for the old block data and the rest
 271         * of 'size'. */
 272        if (b->lim - b->wp + BHLEN(b) >= size)
 273                return b;
 274        size = MAX(size, BHLEN(b));
 275        new = block_alloc(size, MEM_WAIT);
 276        amt = block_copy_to_body(new, b->rp, BHLEN(b));
 277        assert(amt == BHLEN(b));
 278        new->next = b->next;
 279        b->next = NULL;
 280        block_copy_metadata(new, b);
 281        block_replace_extras(new, b);
 282        freeb(b);
 283        return new;
 284}
 285
 286size_t block_copy_to_body(struct block *to, void *from, size_t copy_amt)
 287{
 288        copy_amt = MIN(to->lim - to->wp, copy_amt);
 289        memcpy(to->wp, from, copy_amt);
 290        to->wp += copy_amt;
 291        return copy_amt;
 292}
 293
 294void free_block_extra(struct block *b)
 295{
 296        struct extra_bdata *ebd;
 297
 298        /* assuming our release method is kfree, which will change when we
 299         * support user buffers */
 300        for (int i = 0; i < b->nr_extra_bufs; i++) {
 301                ebd = &b->extra_data[i];
 302                if (ebd->base)
 303                        kfree((void*)ebd->base);
 304        }
 305        b->extra_len = 0;
 306        b->nr_extra_bufs = 0;
 307        kfree(b->extra_data);   /* harmless if it is 0 */
 308        b->extra_data = 0; /* in case the block is reused by a free override */
 309}
 310
 311/* Frees a block, returning its size (len, not alloc) */
 312size_t freeb(struct block *b)
 313{
 314        void *dead = (void *)Bdead;
 315        size_t ret;
 316
 317        if (b == NULL)
 318                return 0;
 319        ret = BLEN(b);
 320        free_block_extra(b);
 321        /*
 322         * drivers which perform non cache coherent DMA manage their own buffer
 323         * pool of uncached buffers and provide their own free routine.
 324         */
 325        if (b->free) {
 326                b->free(b);
 327                return ret;
 328        }
 329        warn_on(b->next);
 330        /* poison the block in case someone is still holding onto it */
 331        b->next = dead;
 332        b->rp = dead;
 333        b->wp = dead;
 334        b->lim = dead;
 335        b->base = dead;
 336        kfree(b);
 337        return ret;
 338}
 339
 340/* Free a list of blocks, returning their total size. */
 341size_t freeblist(struct block *b)
 342{
 343        struct block *next;
 344        size_t ret = 0;
 345
 346        for (; b != 0; b = next) {
 347                next = b->next;
 348                b->next = 0;
 349                ret += freeb(b);
 350        }
 351        return ret;
 352}
 353
 354void checkb(struct block *b, char *msg)
 355{
 356        void *dead = (void *)Bdead;
 357        struct extra_bdata *ebd;
 358        size_t extra_len = 0;
 359
 360        if (b == dead)
 361                panic("checkb b %s 0x%lx", msg, b);
 362        if (b->base == dead || b->lim == dead || b->next == dead
 363                || b->rp == dead || b->wp == dead) {
 364                printd("checkb: base 0x%8.8lx lim 0x%8.8lx next 0x%8.8lx\n",
 365                           b->base, b->lim, b->next);
 366                printd("checkb: rp 0x%8.8lx wp 0x%8.8lx\n", b->rp, b->wp);
 367                panic("checkb dead: %s\n", msg);
 368        }
 369
 370        if (b->base > b->lim)
 371                panic("checkb 0 %s 0x%lx 0x%lx", msg, b->base, b->lim);
 372        if (b->rp < b->base)
 373                panic("checkb 1 %s 0x%lx 0x%lx", msg, b->base, b->rp);
 374        if (b->wp < b->base)
 375                panic("checkb 2 %s 0x%lx 0x%lx", msg, b->base, b->wp);
 376        if (b->rp > b->lim)
 377                panic("checkb 3 %s 0x%lx 0x%lx", msg, b->rp, b->lim);
 378        if (b->wp > b->lim)
 379                panic("checkb 4 %s 0x%lx 0x%lx", msg, b->wp, b->lim);
 380        if (b->nr_extra_bufs && !b->extra_data)
 381                panic("checkb 5 %s missing extra_data", msg);
 382
 383        for (int i = 0; i < b->nr_extra_bufs; i++) {
 384                ebd = &b->extra_data[i];
 385                if (!ebd->base && (ebd->off || ebd->len))
 386                        panic("checkb %s: ebd %d has no base, but has off %d and len %d",
 387                              msg, i, ebd->off, ebd->len);
 388                if (ebd->base) {
 389                        if (!kmalloc_refcnt((void*)ebd->base))
 390                                panic("checkb %s: buf %d, base %p has no refcnt!\n",
 391                                      msg, i, ebd->base);
 392                        extra_len += ebd->len;
 393                }
 394        }
 395        if (extra_len != b->extra_len)
 396                panic("checkb %s: block extra_len %d differs from sum of ebd len %d",
 397                      msg, b->extra_len, extra_len);
 398}
 399
 400void printblock(struct block *b)
 401{
 402        unsigned char *c;
 403        unsigned int off, elen;
 404        struct extra_bdata *e;
 405
 406        if (b == NULL) {
 407                printk("block is null\n");
 408                return;
 409        }
 410
 411        print_lock();
 412        printk("block of BLEN = %d, with %d header and %d data in %d extras\n",
 413               BLEN(b), BHLEN(b), b->extra_len, b->nr_extra_bufs);
 414
 415        printk("header:\n");
 416        printk("%2x:\t", 0);
 417        off = 0;
 418        for (c = b->rp; c < b->wp; c++) {
 419                printk("  %02x", *c & 0xff);
 420                off++;
 421                if (off % 8 == 0) {
 422                        printk("\n");
 423                        printk("%2x:\t", off);
 424                }
 425        }
 426        printk("\n");
 427        elen = b->extra_len;
 428        for (int i = 0; (i < b->nr_extra_bufs) && elen; i++) {
 429                e = &b->extra_data[i];
 430                if (e->len == 0)
 431                        continue;
 432                elen -= e->len;
 433                printk("data %d:\n", i);
 434                printk("%2x:\t", 0);
 435                for (off = 0; off < e->len; off++) {
 436                        c = (unsigned char *)e->base + e->off + off;
 437                        printk("  %02x", *c & 0xff);
 438                        if ((off + 1) % 8 == 0 && off +1 < e->len) {
 439                                printk("\n");
 440                                printk("%2x:\t", off + 1);
 441                        }
 442                }
 443        }
 444        printk("\n");
 445        print_unlock();
 446}
 447