Block extra_data
[akaros.git] / kern / src / vfs.c
index b707fd2..d5f45a7 100644 (file)
@@ -715,7 +715,7 @@ void dentry_release(struct kref *kref)
 void __dentry_free(struct dentry *dentry)
 {
        if (dentry->d_inode)
-               printk("Freeing dentry %p: %s\n", dentry, dentry->d_name.name);
+               printd("Freeing dentry %p: %s\n", dentry, dentry->d_name.name);
        assert(dentry->d_op);   /* catch bugs.  a while back, some lacked d_op */
        dentry->d_op->d_release(dentry);
        /* TODO: check/test the boundaries on this. */
@@ -1084,6 +1084,24 @@ void stat_inode(struct inode *inode, struct kstat *kstat)
        kstat->st_ctime = inode->i_ctime;
 }
 
+void print_kstat(struct kstat *kstat)
+{
+       printk("kstat info for %p:\n", kstat);
+       printk("\tst_dev    : %p\n", kstat->st_dev);
+       printk("\tst_ino    : %p\n", kstat->st_ino);
+       printk("\tst_mode   : %p\n", kstat->st_mode);
+       printk("\tst_nlink  : %p\n", kstat->st_nlink);
+       printk("\tst_uid    : %p\n", kstat->st_uid);
+       printk("\tst_gid    : %p\n", kstat->st_gid);
+       printk("\tst_rdev   : %p\n", kstat->st_rdev);
+       printk("\tst_size   : %p\n", kstat->st_size);
+       printk("\tst_blksize: %p\n", kstat->st_blksize);
+       printk("\tst_blocks : %p\n", kstat->st_blocks);
+       printk("\tst_atime  : %p\n", kstat->st_atime);
+       printk("\tst_mtime  : %p\n", kstat->st_mtime);
+       printk("\tst_ctime  : %p\n", kstat->st_ctime);
+}
+
 /* Inode Cache management.  In general, search on the ino, get a refcnt'd value
  * back.  Remove does not give you a reference back - it should only be called
  * in inode_release(). */
@@ -1168,7 +1186,7 @@ ssize_t generic_file_read(struct file *file, char *buf, size_t count,
                }
                buf += copy_amt;
                page_off = 0;
-               page_decref(page);      /* it's still in the cache, we just don't need it */
+               pm_put_page(page);      /* it's still in the cache, we just don't need it */
        }
        assert(buf == buf_end);
        *offset += count;
@@ -1218,7 +1236,8 @@ ssize_t generic_file_write(struct file *file, const char *buf, size_t count,
                }
                buf += copy_amt;
                page_off = 0;
-               page_decref(page);      /* it's still in the cache, we just don't need it */
+               atomic_or(&page->pg_flags, PG_DIRTY);
+               pm_put_page(page);      /* it's still in the cache, we just don't need it */
        }
        assert(buf == buf_end);
        *offset += count;
@@ -1348,8 +1367,10 @@ struct file *do_file_open(char *path, int flags, int mode)
 open_the_file:
        /* now open the file (freshly created or if it already existed).  At this
         * point, file_d is a refcnt'd dentry, regardless of which branch we took.*/
-       if (flags & O_TRUNC)
-               warn("File truncation not supported yet.");
+       if (flags & O_TRUNC) {
+               file_d->d_inode->i_size = 0;
+               /* TODO: probably should remove the garbage pages from the page map */
+       }
        file = dentry_open(file_d, flags);                              /* sets errno */
        /* Note the fall through to the exit paths.  File is 0 by default and if
         * dentry_open fails. */
@@ -1810,6 +1831,8 @@ int pipe_release(struct inode *inode, struct file *file)
        } else {
                warn("Bad pipe file flags 0x%x\n", file->f_flags);
        }
+       /* need to wake up any sleeping readers/writers, since we might be done */
+       __cv_broadcast(&pii->p_cv);
        cv_unlock(&pii->p_cv);
        return 0;
 }
@@ -1822,6 +1845,19 @@ struct file_operations pipe_f_op = {
        0
 };
 
+void pipe_debug(struct file *f)
+{
+       struct pipe_inode_info *pii = f->f_dentry->d_inode->i_pipe;
+       assert(pii);
+       printk("PIPE %p\n", pii);
+       printk("\trdoff %p\n", pii->p_rd_off);
+       printk("\twroff %p\n", pii->p_wr_off);
+       printk("\tnr_rds %d\n", pii->p_nr_readers);
+       printk("\tnr_wrs %d\n", pii->p_nr_writers);
+       printk("\tcv waiters %d\n", pii->p_cv.nr_waiters);
+
+}
+
 /* General plan: get a dentry/inode to represent the pipe.  We'll alloc it from
  * the default_ns SB, but won't actually link it anywhere.  It'll only be held
  * alive by the krefs, til all the FDs are closed. */
@@ -1956,8 +1992,8 @@ struct file *dentry_open(struct dentry *dentry, int flags)
        kref_get(&inode->i_sb->s_mount->mnt_kref, 1);
        file->f_vfsmnt = inode->i_sb->s_mount;          /* saving a ref to the vmnt...*/
        file->f_op = inode->i_fop;
-       /* Don't store open mode or creation flags */
-       file->f_flags = flags & ~(O_ACCMODE | O_CREAT_FLAGS);
+       /* Don't store creation flags */
+       file->f_flags = flags & ~O_CREAT_FLAGS;
        file->f_pos = 0;
        file->f_uid = inode->i_uid;
        file->f_gid = inode->i_gid;
@@ -2003,20 +2039,91 @@ struct file *get_file_from_fd(struct files_struct *open_files, int file_desc)
        if (file_desc < 0)
                return 0;
        spin_lock(&open_files->lock);
+       if (open_files->closed) {
+               spin_unlock(&open_files->lock);
+               return 0;
+       }
        if (file_desc < open_files->max_fdset) {
                if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc)) {
                        /* while max_files and max_fdset might not line up, we should never
                         * have a valid fdset higher than files */
                        assert(file_desc < open_files->max_files);
                        retval = open_files->fd[file_desc].fd_file;
-                       assert(retval);
-                       kref_get(&retval->f_kref, 1);
+                       /* 9ns might be using this one, in which case file == 0 */
+                       if (retval)
+                               kref_get(&retval->f_kref, 1);
                }
        }
        spin_unlock(&open_files->lock);
        return retval;
 }
 
+/* Grow the vfs fd set */
+static int grow_fd_set(struct files_struct *open_files) {
+       int n;
+       struct file_desc *nfd, *ofd;
+
+       /* Only update open_fds once. If currently pointing to open_fds_init, then
+        * update it to point to a newly allocated fd_set with space for
+        * NR_FILE_DESC_MAX */
+       if (open_files->open_fds == (struct fd_set*)&open_files->open_fds_init) {
+               open_files->open_fds = kzmalloc(sizeof(struct fd_set), 0);
+               memmove(open_files->open_fds, &open_files->open_fds_init,
+                       sizeof(struct small_fd_set));
+       }
+
+       /* Grow the open_files->fd array in increments of NR_OPEN_FILES_DEFAULT */
+       n = open_files->max_files + NR_OPEN_FILES_DEFAULT;
+       if (n > NR_FILE_DESC_MAX)
+               n = NR_FILE_DESC_MAX;
+       nfd = kzmalloc(n * sizeof(struct file_desc), 0);
+       if (nfd == NULL)
+               return -1;
+
+       /* Move the old array on top of the new one */
+       ofd = open_files->fd;
+       memmove(nfd, ofd, open_files->max_files * sizeof(struct file_desc));
+
+       /* Update the array and the maxes for both max_files and max_fdset */
+       open_files->fd = nfd;
+       open_files->max_files = n;
+       open_files->max_fdset = n;
+
+       /* Only free the old one if it wasn't pointing to open_files->fd_array */
+       if (ofd != open_files->fd_array)
+               kfree(ofd);
+       return 0;
+}
+
+/* Free the vfs fd set if necessary */
+static void free_fd_set(struct files_struct *open_files) {
+       if (open_files->open_fds != (struct fd_set*)&open_files->open_fds_init) {
+               kfree(open_files->open_fds);
+               assert(open_files->fd != open_files->fd_array);
+               kfree(open_files->fd);
+       }
+}
+
+/* 9ns: puts back an FD from the VFS-FD-space. */
+int put_fd(struct files_struct *open_files, int file_desc)
+{
+       if (file_desc < 0) {
+               warn("Negative FD!\n");
+               return 0;
+       }
+       spin_lock(&open_files->lock);
+       if (file_desc < open_files->max_fdset) {
+               if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc)) {
+                       /* while max_files and max_fdset might not line up, we should never
+                        * have a valid fdset higher than files */
+                       assert(file_desc < open_files->max_files);
+                       CLR_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc);
+               }
+       }
+       spin_unlock(&open_files->lock);
+       return 0;
+}
+
 /* Remove FD from the open files, if it was there, and return f.  Currently,
  * this decref's f, so the return value is not consumable or even usable.  This
  * hasn't been thought through yet. */
@@ -2033,7 +2140,7 @@ struct file *put_file_from_fd(struct files_struct *open_files, int file_desc)
                        assert(file_desc < open_files->max_files);
                        file = open_files->fd[file_desc].fd_file;
                        open_files->fd[file_desc].fd_file = 0;
-                       assert(file);
+                       assert(file);   /* 9ns shouldn't call this put */
                        kref_put(&file->f_kref);
                        CLR_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc);
                }
@@ -2041,30 +2148,99 @@ struct file *put_file_from_fd(struct files_struct *open_files, int file_desc)
        spin_unlock(&open_files->lock);
        return file;
 }
-/* Inserts the file in the files_struct, returning the corresponding new file
- * descriptor, or an error code.  We start looking for open fds from low_fd. */
-int insert_file(struct files_struct *open_files, struct file *file, int low_fd)
+
+static int __get_fd(struct files_struct *open_files, int low_fd)
 {
        int slot = -1;
        if ((low_fd < 0) || (low_fd > NR_FILE_DESC_MAX))
                return -EINVAL;
+       if (open_files->closed)
+               return -EINVAL; /* won't matter, they are dying */
+
+       /* Loop until we have a valid slot (we grow the fd_array at the bottom of
+        * the loop if we haven't found a slot in the current array */
+       while (slot == -1) {
+               for (low_fd; low_fd < open_files->max_fdset; low_fd++) {
+                       if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, low_fd))
+                               continue;
+                       slot = low_fd;
+                       SET_BITMASK_BIT(open_files->open_fds->fds_bits, slot);
+                       assert(slot < open_files->max_files &&
+                              open_files->fd[slot].fd_file == 0);
+                       if (slot >= open_files->next_fd)
+                               open_files->next_fd = slot + 1;
+                       break;
+               }
+               if (slot == -1) {
+                       /* Expand the FD array and fd_set */
+                       if (grow_fd_set(open_files) == -1)
+                               return -ENOMEM;
+                       /* loop after growing */
+               }
+       }
+       return slot;
+}
+
+/* Gets and claims a free FD, used by 9ns.  < 0 == error. */
+int get_fd(struct files_struct *open_files, int low_fd)
+{
+       int slot;
        spin_lock(&open_files->lock);
-       for (int i = low_fd; i < open_files->max_fdset; i++) {
-               if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, i))
-                       continue;
-               slot = i;
-               SET_BITMASK_BIT(open_files->open_fds->fds_bits, slot);
-               assert(slot < open_files->max_files &&
-                      open_files->fd[slot].fd_file == 0);
-               kref_get(&file->f_kref, 1);
-               open_files->fd[slot].fd_file = file;
-               open_files->fd[slot].fd_flags = 0;
-               if (slot >= open_files->next_fd)
-                       open_files->next_fd = slot + 1;
-               break;
-       }
-       if (slot == -1) /* should expand the FD array and fd_set */
-               warn("Ran out of file descriptors, deal with me!");
+       slot = __get_fd(open_files, low_fd);
+       spin_unlock(&open_files->lock);
+       return slot;
+}
+
+static int __claim_fd(struct files_struct *open_files, int file_desc)
+{
+       if ((file_desc < 0) || (file_desc > NR_FILE_DESC_MAX))
+               return -EINVAL;
+       if (open_files->closed)
+               return -EINVAL; /* won't matter, they are dying */
+
+       /* Grow the open_files->fd_set until the file_desc can fit inside it */
+       while(file_desc >= open_files->max_files) {
+               grow_fd_set(open_files);
+               cpu_relax();
+       }
+
+       /* If we haven't grown, this could be a problem, so check for it */
+       if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc))
+               return -ENFILE; /* Should never really happen. Here to catch bugs. */
+
+       SET_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc);
+       assert(file_desc < open_files->max_files && open_files->fd[0].fd_file == 0);
+       if (file_desc >= open_files->next_fd)
+               open_files->next_fd = file_desc + 1;
+       return 0;
+}
+
+/* Claims a specific FD when duping FDs. used by 9ns.  < 0 == error. */
+int claim_fd(struct files_struct *open_files, int file_desc)
+{
+       int ret;
+       spin_lock(&open_files->lock);
+       ret = __claim_fd(open_files, file_desc);
+       spin_unlock(&open_files->lock);
+       return ret;
+}
+
+/* Inserts the file in the files_struct, returning the corresponding new file
+ * descriptor, or an error code.  We start looking for open fds from low_fd. */
+int insert_file(struct files_struct *open_files, struct file *file, int low_fd)
+{
+       int slot;
+       spin_lock(&open_files->lock);
+       slot = __get_fd(open_files, low_fd);
+       if (slot < 0) {
+               spin_unlock(&open_files->lock);
+               return slot;
+       }
+       assert(slot < open_files->max_files &&
+              open_files->fd[slot].fd_file == 0);
+       kref_get(&file->f_kref, 1);
+       open_files->fd[slot].fd_file = file;
+       open_files->fd[slot].fd_flags = 0;
        spin_unlock(&open_files->lock);
        return slot;
 }
@@ -2075,12 +2251,19 @@ void close_all_files(struct files_struct *open_files, bool cloexec)
 {
        struct file *file;
        spin_lock(&open_files->lock);
+       if (open_files->closed) {
+               spin_unlock(&open_files->lock);
+               return;
+       }
        for (int i = 0; i < open_files->max_fdset; i++) {
                if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, i)) {
                        /* while max_files and max_fdset might not line up, we should never
                         * have a valid fdset higher than files */
                        assert(i < open_files->max_files);
                        file = open_files->fd[i].fd_file;
+                       /* no file == 9ns uses the FD.  they will deal with it */
+                       if (!file)
+                               continue;
                        if (cloexec && !(open_files->fd[i].fd_flags & O_CLOEXEC))
                                continue;
                        /* Actually close the file */
@@ -2090,6 +2273,10 @@ void close_all_files(struct files_struct *open_files, bool cloexec)
                        CLR_BITMASK_BIT(open_files->open_fds->fds_bits, i);
                }
        }
+       if (!cloexec) {
+               free_fd_set(open_files);
+               open_files->closed = TRUE;
+       }
        spin_unlock(&open_files->lock);
 }
 
@@ -2098,7 +2285,17 @@ void clone_files(struct files_struct *src, struct files_struct *dst)
 {
        struct file *file;
        spin_lock(&src->lock);
+       if (src->closed) {
+               spin_unlock(&src->lock);
+               return;
+       }
        spin_lock(&dst->lock);
+       if (dst->closed) {
+               warn("Destination closed before it opened");
+               spin_unlock(&dst->lock);
+               spin_unlock(&src->lock);
+               return;
+       }
        for (int i = 0; i < src->max_fdset; i++) {
                if (GET_BITMASK_BIT(src->open_fds->fds_bits, i)) {
                        /* while max_files and max_fdset might not line up, we should never
@@ -2108,8 +2305,9 @@ void clone_files(struct files_struct *src, struct files_struct *dst)
                        assert(i < dst->max_files && dst->fd[i].fd_file == 0);
                        SET_BITMASK_BIT(dst->open_fds->fds_bits, i);
                        dst->fd[i].fd_file = file;
-                       assert(file);
-                       kref_get(&file->f_kref, 1);
+                       /* no file means 9ns is using it, they clone separately */
+                       if (file)
+                               kref_get(&file->f_kref, 1);
                        if (i >= dst->next_fd)
                                dst->next_fd = i + 1;
                }