akaros/kern/drivers/dev/kfs.c
<<
>>
Prefs
   1/* Copyright (c) 2018 Google Inc
   2 * Barret Rhoden <brho@cs.berkeley.edu>
   3 * See LICENSE for details.
   4 *
   5 * #kfs, in-memory ram filesystem, pulling from the kernel's embedded CPIO
   6 */
   7
   8#include <ns.h>
   9#include <kmalloc.h>
  10#include <string.h>
  11#include <stdio.h>
  12#include <assert.h>
  13#include <error.h>
  14#include <tree_file.h>
  15#include <pmap.h>
  16#include <cpio.h>
  17
  18struct dev kfs_devtab;
  19
  20struct kfs {
  21        struct tree_filesystem          tfs;
  22        atomic_t                        qid;
  23} kfs;
  24
  25static uint64_t kfs_get_qid_path(void)
  26{
  27        return atomic_fetch_and_add(&kfs.qid, 1);
  28}
  29
  30static char *devname(void)
  31{
  32        return kfs_devtab.name;
  33}
  34
  35static void kfs_tf_free(struct tree_file *tf)
  36{
  37        /* We have nothing special hanging off the TF */
  38}
  39
  40static void kfs_tf_unlink(struct tree_file *parent, struct tree_file *child)
  41{
  42        /* This is the "+1 for existing" ref. */
  43        tf_kref_put(child);
  44}
  45
  46static void __kfs_tf_init(struct tree_file *tf, int dir_type, int dir_dev,
  47                          struct username *user, int perm)
  48{
  49        struct dir *dir = &tf->file.dir;
  50
  51        fs_file_init_dir(&tf->file, dir_type, dir_dev, user, perm);
  52        dir->qid.path = kfs_get_qid_path();
  53        dir->qid.vers = 0;
  54        /* This is the "+1 for existing" ref.  There is no backing store for the
  55         * FS, such as a disk or 9p, so we can't get rid of a file until it is
  56         * unlinked and decreffed.  Note that KFS doesn't use pruners or
  57         * anything else. */
  58        __kref_get(&tf->kref, 1);
  59}
  60
  61/* Note: If your TFS doesn't support symlinks, you need to error out */
  62static void kfs_tf_create(struct tree_file *parent, struct tree_file *child,
  63                          int perm)
  64{
  65        __kfs_tf_init(child, parent->file.dir.type, parent->file.dir.dev, &eve,
  66                      perm);
  67}
  68
  69static void kfs_tf_rename(struct tree_file *tf, struct tree_file *old_parent,
  70                          struct tree_file *new_parent, const char *name,
  71                          int flags)
  72{
  73        /* We don't have a backend, so we don't need to do anything additional
  74         * for rename. */
  75}
  76
  77static bool kfs_tf_has_children(struct tree_file *parent)
  78{
  79        /* The tree_file parent list is complete and not merely a cache for a
  80         * real backend. */
  81        return !list_empty(&parent->children);
  82}
  83
  84struct tree_file_ops kfs_tf_ops = {
  85        .free = kfs_tf_free,
  86        .unlink = kfs_tf_unlink,
  87        .lookup = NULL,
  88        .create = kfs_tf_create,
  89        .rename = kfs_tf_rename,
  90        .has_children = kfs_tf_has_children,
  91};
  92
  93/* Fills page with its contents from its backing store file.  For KFS, that
  94 * means we're creating or extending a file, and the contents are 0.  Note the
  95 * page/offset might be beyond the current file length, based on the current
  96 * pagemap code. */
  97static int kfs_pm_readpage(struct page_map *pm, struct page *pg)
  98{
  99        memset(page2kva(pg), 0, PGSIZE);
 100        atomic_or(&pg->pg_flags, PG_UPTODATE);
 101        /* Pretend that we blocked while filing this page.  This catches a lot
 102         * of bugs.  It does slightly slow down the kernel, but it's only when
 103         * filling the page cache, and considering we are using a RAMFS, you
 104         * shouldn't measure things that actually rely on KFS's performance. */
 105        kthread_usleep(1);
 106        return 0;
 107}
 108
 109/* Meant to take the page from PM and flush to backing store.  There is no
 110 * backing store. */
 111static int kfs_pm_writepage(struct page_map *pm, struct page *pg)
 112{
 113        return 0;
 114}
 115
 116static void kfs_fs_punch_hole(struct fs_file *f, off64_t begin, off64_t end)
 117{
 118}
 119
 120static bool kfs_fs_can_grow_to(struct fs_file *f, size_t len)
 121{
 122        /* TODO: implement some sort of memory limit */
 123        return true;
 124}
 125
 126struct fs_file_ops kfs_fs_ops = {
 127        .readpage = kfs_pm_readpage,
 128        .writepage = kfs_pm_writepage,
 129        .punch_hole = kfs_fs_punch_hole,
 130        .can_grow_to = kfs_fs_can_grow_to,
 131};
 132
 133/* Consumes root's chan, even on error. */
 134static struct chan *__add_kfs_dir(struct chan *root, char *path,
 135                                  struct cpio_bin_hdr *c_bhdr)
 136{
 137        ERRSTACK(1);
 138        struct chan *c;
 139
 140        if (waserror()) {
 141                warn("failed to add %s", path);
 142                cclose(root);
 143                poperror();
 144                return NULL;
 145        }
 146        c = namec_from(root, path, Acreate, O_EXCL, DMDIR | c_bhdr->c_mode,
 147                       NULL);
 148        poperror();
 149        return c;
 150}
 151
 152static struct chan *__add_kfs_symlink(struct chan *root, char *path,
 153                                      struct cpio_bin_hdr *c_bhdr)
 154{
 155        ERRSTACK(1);
 156        struct chan *c;
 157        char target[c_bhdr->c_filesize + 1];
 158
 159        if (waserror()) {
 160                warn("failed to add %s", path);
 161                cclose(root);
 162                poperror();
 163                return NULL;
 164        }
 165        strncpy(target, c_bhdr->c_filestart, c_bhdr->c_filesize);
 166        target[c_bhdr->c_filesize] = 0;
 167        c = namec_from(root, path, Acreate, O_EXCL,
 168                       DMSYMLINK | S_IRWXU | S_IRWXG | S_IRWXO, target);
 169        poperror();
 170        return c;
 171}
 172
 173static struct chan *__add_kfs_file(struct chan *root, char *path,
 174                                   struct cpio_bin_hdr *c_bhdr)
 175{
 176        ERRSTACK(1);
 177        struct chan *c;
 178        off64_t offset = 0;
 179        size_t ret, amt = c_bhdr->c_filesize;
 180        void *buf = c_bhdr->c_filestart;
 181
 182        if (waserror()) {
 183                warn("failed to add %s", path);
 184                cclose(root);
 185                poperror();
 186                return NULL;
 187        }
 188        c = namec_from(root, path, Acreate, O_EXCL | O_RDWR, c_bhdr->c_mode,
 189                       NULL);
 190        poperror();
 191        if (waserror()) {
 192                warn("failed to modify %s", path);
 193                cclose(c);
 194                poperror();
 195                return NULL;
 196        }
 197        while (amt) {
 198                ret = devtab[c->type].write(c, buf + offset, amt, offset);
 199                amt -= ret;
 200                offset += ret;
 201        }
 202        poperror();
 203        return c;
 204}
 205
 206static int add_kfs_entry(struct cpio_bin_hdr *c_bhdr, void *cb_arg)
 207{
 208        struct tree_file *root = cb_arg;
 209        char *path = c_bhdr->c_filename;
 210        struct chan *c;
 211        struct tree_file *tf;
 212        struct timespec ts;
 213
 214        /* Root of the FS, already part of KFS */
 215        if (!strcmp(path, "."))
 216                return 0;
 217        c = tree_file_alloc_chan(root, &kfs_devtab, "#kfs");
 218        switch (c_bhdr->c_mode & CPIO_FILE_MASK) {
 219        case (CPIO_DIRECTORY):
 220                c = __add_kfs_dir(c, path, c_bhdr);
 221                break;
 222        case (CPIO_SYMLINK):
 223                c = __add_kfs_symlink(c, path, c_bhdr);
 224                break;
 225        case (CPIO_REG_FILE):
 226                c = __add_kfs_file(c, path, c_bhdr);
 227                break;
 228        default:
 229                cclose(c);
 230                warn("Unknown file type %d in the CPIO!",
 231                     c_bhdr->c_mode & CPIO_FILE_MASK);
 232                return -1;
 233        }
 234        if (!c)
 235                return -1;
 236        tf = chan_to_tree_file(c);
 237        ts.tv_sec = c_bhdr->c_mtime;
 238        ts.tv_nsec = 0;
 239        /* Lockless */
 240        __set_acmtime_to(&tf->file, FSF_ATIME | FSF_BTIME | FSF_CTIME |
 241                         FSF_MTIME, &ts);
 242        /* TODO: consider UID/GID.  Right now, everything is owned by eve. */
 243        cclose(c);
 244        return 0;
 245}
 246
 247struct cpio_info {
 248        void *base;
 249        size_t sz;
 250};
 251
 252static void kfs_get_cpio_info(struct cpio_info *ci)
 253{
 254        extern uint8_t _binary_obj_kern_initramfs_cpio_size[];
 255        extern uint8_t _binary_obj_kern_initramfs_cpio_start[];
 256
 257        ci->base = (void*)_binary_obj_kern_initramfs_cpio_start;
 258        ci->sz = (size_t)_binary_obj_kern_initramfs_cpio_size;
 259}
 260
 261static void kfs_extract_cpio(struct cpio_info *ci)
 262{
 263        parse_cpio_entries(ci->base, ci->sz, add_kfs_entry, kfs.tfs.root);
 264}
 265
 266static void kfs_free_cpio(struct cpio_info *ci)
 267{
 268        void *base = ci->base;
 269        size_t sz = ci->sz;
 270
 271        /* The base arena requires page aligned, page sized segments. */
 272        sz -= ROUNDUP(base, PGSIZE) - base;
 273        sz = ROUNDDOWN(sz, PGSIZE);
 274        base = ROUNDUP(base, PGSIZE);
 275        /* Careful - the CPIO is part of the kernel blob and a code address. */
 276        base = KBASEADDR(base);
 277        printk("Freeing %d MB of CPIO RAM\n", sz >> 20);
 278        arena_add(base_arena, base, sz, MEM_WAIT);
 279}
 280
 281static void kfs_init(void)
 282{
 283        struct tree_filesystem *tfs = &kfs.tfs;
 284        struct cpio_info ci[1];
 285
 286        /* This gives us one ref on tfs->root. */
 287        tfs_init(tfs);
 288        tfs->tf_ops = kfs_tf_ops;
 289        tfs->fs_ops = kfs_fs_ops;
 290        /* Note this gives us the "+1 for existing" ref on tfs->root. */
 291        __kfs_tf_init(tfs->root, &kfs_devtab - devtab, 0, &eve, DMDIR | 0777);
 292        /* Other devices might want to create things like kthreads that run the
 293         * LRU pruner or PM sweeper. */
 294        kfs_get_cpio_info(ci);
 295        kfs_extract_cpio(ci);
 296        kfs_free_cpio(ci);
 297        /* This has another kref.  Note that each attach gets a ref and each new
 298         * process gets a ref. */
 299        kern_slash = tree_file_alloc_chan(kfs.tfs.root, &kfs_devtab, "/");
 300}
 301
 302static struct chan *kfs_attach(char *spec)
 303{
 304        /* The root TF has a new kref for the attach chan */
 305        return tree_file_alloc_chan(kfs.tfs.root, &kfs_devtab, "#kfs");
 306}
 307
 308static unsigned long kfs_chan_ctl(struct chan *c, int op, unsigned long a1,
 309                                  unsigned long a2, unsigned long a3,
 310                                  unsigned long a4)
 311{
 312        switch (op) {
 313        case CCTL_SYNC:
 314                return 0;
 315        default:
 316                return tree_chan_ctl(c, op, a1, a2, a3, a4);
 317        }
 318}
 319
 320struct dev kfs_devtab __devtab = {
 321        .name = "kfs",
 322        .reset = devreset,
 323        .init = kfs_init,
 324        .shutdown = devshutdown,
 325        .attach = kfs_attach,
 326        .walk = tree_chan_walk,
 327        .stat = tree_chan_stat,
 328        .open = tree_chan_open,
 329        .create = tree_chan_create,
 330        .close = tree_chan_close,
 331        .read = tree_chan_read,
 332        .bread = devbread,
 333        .write = tree_chan_write,
 334        .bwrite = devbwrite,
 335        .remove = tree_chan_remove,
 336        .rename = tree_chan_rename,
 337        .wstat = tree_chan_wstat,
 338        .power = devpower,
 339        .chaninfo = devchaninfo,
 340        .mmap = tree_chan_mmap,
 341        .chan_ctl = kfs_chan_ctl,
 342};
 343