X-Git-Url: http://akaros.cs.berkeley.edu/gitweb/?p=akaros.git;a=blobdiff_plain;f=kern%2Fsrc%2Fvfs.c;h=2fa4e4761b9ce5e940979da541ab280a9c5c2718;hp=39ae86cf77686f3893ff04b44840ebbc1116d054;hb=f49d0da3692b09f01acc68299b80d7ec88f77e5d;hpb=5d46e6d61c3ad439632a582b46fd4cc6baa3d70e diff --git a/kern/src/vfs.c b/kern/src/vfs.c index 39ae86c..2fa4e47 100644 --- a/kern/src/vfs.c +++ b/kern/src/vfs.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include struct sb_tailq super_blocks = TAILQ_HEAD_INITIALIZER(super_blocks); spinlock_t super_blocks_lock = SPINLOCK_INITIALIZER; @@ -38,6 +40,8 @@ struct vfsmount *mount_fs(struct fs_type *fs, char *dev_name, struct super_block *sb; struct vfsmount *vmnt = kmalloc(sizeof(struct vfsmount), 0); + /* this first ref is stored in the NS tailq below */ + kref_init(&vmnt->mnt_kref, fake_release, 1); /* Build the vfsmount, if there is no mnt_pt, mnt is the root vfsmount (for now). * fields related to the actual FS, like the sb and the mnt_root are set in * the fs-specific get_sb() call. */ @@ -47,7 +51,7 @@ struct vfsmount *mount_fs(struct fs_type *fs, char *dev_name, } else { /* common case, but won't be tested til we try to mount another FS */ mnt_pt->d_mount_point = TRUE; mnt_pt->d_mounted_fs = vmnt; - atomic_inc(&vmnt->mnt_refcnt); /* held by mnt_pt */ + kref_get(&vmnt->mnt_kref, 1); /* held by mnt_pt */ vmnt->mnt_parent = mnt_pt->d_sb->s_mount; vmnt->mnt_mountpoint = mnt_pt; } @@ -55,8 +59,7 @@ struct vfsmount *mount_fs(struct fs_type *fs, char *dev_name, vmnt->mnt_flags = flags; vmnt->mnt_devname = dev_name; vmnt->mnt_namespace = ns; - atomic_inc(&ns->refcnt); /* held by vmnt */ - atomic_set(&vmnt->mnt_refcnt, 1); /* for the ref in the NS tailq below */ + kref_get(&ns->kref, 1); /* held by vmnt */ /* Read in / create the SB */ sb = fs->get_sb(fs, flags, dev_name, vmnt); @@ -89,8 +92,8 @@ void vfs_init(void) __alignof__(struct inode), 0, 0, 0); file_kcache = kmem_cache_create("file", sizeof(struct file), __alignof__(struct file), 0, 0, 0); - - atomic_set(&default_ns.refcnt, 1); // default NS never dies, +1 to exist + /* default NS never dies, +1 to exist */ + kref_init(&default_ns.kref, fake_release, 1); spinlock_init(&default_ns.lock); default_ns.root = NULL; TAILQ_INIT(&default_ns.vfsmounts); @@ -106,11 +109,6 @@ void vfs_init(void) default_ns.root = mount_fs(&kfs_fs_type, "RAM", NULL, 0, &default_ns); printk("vfs_init() completed\n"); - /* - put structs and friends in struct proc, and init in proc init - */ - // LOOKUP: follow_mount, follow_link, etc - // pains in the ass for having .. or . in the middle of the path } /* Builds / populates the qstr of a dentry based on its d_iname. If there is an @@ -125,6 +123,12 @@ void qstr_builder(struct dentry *dentry, char *l_name) dentry->d_name.len = strnlen(dentry->d_name.name, MAX_FILENAME_SZ); } +/* Useful little helper - return the string ptr for a given file */ +char *file_name(struct file *file) +{ + return file->f_dentry->d_name.name; +} + /* Some issues with this, coupled closely to fs_lookup. This assumes that * negative dentries are not returned (might differ from linux) */ static struct dentry *do_lookup(struct dentry *parent, char *name) @@ -143,25 +147,48 @@ static struct dentry *do_lookup(struct dentry *parent, char *name) return dentry; } -/* Walk up one directory, being careful of mountpoints, namespaces, and the top - * of the FS */ -static int climb_up(struct nameidata *nd) -{ - // TODO - warn("Climbing up (../) in path lookup not supported yet!"); - return 0; -} - /* Update ND such that it represents having followed dentry. IAW the nd * refcnting rules, we need to decref any references that were in there before * they get clobbered. */ static int next_link(struct dentry *dentry, struct nameidata *nd) { assert(nd->dentry && nd->mnt); - atomic_dec(&nd->dentry->d_refcnt); - atomic_dec(&nd->mnt->mnt_refcnt); + /* update the dentry */ + kref_get(&dentry->d_kref, 1); + kref_put(&nd->dentry->d_kref); nd->dentry = dentry; - nd->mnt = dentry->d_sb->s_mount; + /* update the mount, if we need to */ + if (dentry->d_sb->s_mount != nd->mnt) { + kref_get(&dentry->d_sb->s_mount->mnt_kref, 1); + kref_put(&nd->mnt->mnt_kref); + nd->mnt = dentry->d_sb->s_mount; + } + return 0; +} + +/* Walk up one directory, being careful of mountpoints, namespaces, and the top + * of the FS */ +static int climb_up(struct nameidata *nd) +{ + printd("CLIMB_UP, from %s\n", nd->dentry->d_name.name); + /* Top of the world, just return. Should also check for being at the top of + * the current process's namespace (TODO) */ + if (!nd->dentry->d_parent) + return -1; + /* Check if we are at the top of a mount, if so, we need to follow + * backwards, and then climb_up from that one. We might need to climb + * multiple times if we mount multiple FSs at the same spot (highly + * unlikely). This is completely untested. Might recurse instead. */ + while (nd->mnt->mnt_root == nd->dentry) { + if (!nd->mnt->mnt_parent) { + warn("Might have expected a parent vfsmount (dentry had a parent)"); + return -1; + } + next_link(nd->mnt->mnt_mountpoint, nd); + } + /* Backwards walk (no mounts or any other issues now). */ + next_link(nd->dentry->d_parent, nd); + printd("CLIMB_UP, to %s\n", nd->dentry->d_name.name); return 0; } @@ -171,10 +198,73 @@ static int follow_mount(struct nameidata *nd) return 0; } +static int link_path_walk(char *path, struct nameidata *nd); + +/* When nd->dentry is for a symlink, this will recurse and follow that symlink, + * so that nd contains the results of following the symlink (dentry and mnt). + * Returns when it isn't a symlink, 1 on following a link, and < 0 on error. */ static int follow_symlink(struct nameidata *nd) { - /* Detect symlink, LOOKUP_FOLLOW, follow it, etc... (TODO!) */ - return 0; + int retval; + char *symname; + if (nd->dentry->d_inode->i_type != FS_I_SYMLINK) + return 0; + if (nd->depth > MAX_SYMLINK_DEPTH) + return -ELOOP; + printd("Following symlink for dentry %08p %s\n", nd->dentry, + nd->dentry->d_name.name); + nd->depth++; + symname = nd->dentry->d_inode->i_op->readlink(nd->dentry); + /* We need to pin in nd->dentry (the dentry of the symlink), since we need + * it's symname's storage to stay in memory throughout the upcoming + * link_path_walk(). The last_sym gets decreffed when we path_release() or + * follow another symlink. */ + if (nd->last_sym) + kref_put(&nd->last_sym->d_kref); + kref_get(&nd->dentry->d_kref, 1); + nd->last_sym = nd->dentry; + /* If this an absolute path in the symlink, we need to free the old path and + * start over, otherwise, we continue from the PARENT of nd (the symlink) */ + if (symname[0] == '/') { + path_release(nd); + if (!current) + nd->dentry = default_ns.root->mnt_root; + else + nd->dentry = current->fs_env.root; + nd->mnt = nd->dentry->d_sb->s_mount; + kref_get(&nd->mnt->mnt_kref, 1); + kref_get(&nd->dentry->d_kref, 1); + } else { + climb_up(nd); + } + /* either way, keep on walking in the free world! */ + retval = link_path_walk(symname, nd); + return (retval == 0 ? 1 : retval); +} + +/* Little helper, to make it easier to break out of the nested loops. Will also + * '\0' out the first slash if it's slashes all the way down. Or turtles. */ +static bool packed_trailing_slashes(char *first_slash) +{ + for (char *i = first_slash; *i == '/'; i++) { + if (*(i + 1) == '\0') { + *first_slash = '\0'; + return TRUE; + } + } + return FALSE; +} + +/* Simple helper to set nd to track it's last name to be Name. Also be careful + * with the storage of name. Don't use and nd's name past the lifetime of the + * string used in the path_lookup()/link_path_walk/whatever. Consider replacing + * parts of this with a qstr builder. Note this uses the dentry's d_op, which + * might not be the dentry we care about. */ +static void stash_nd_name(struct nameidata *nd, char *name) +{ + nd->last.name = name; + nd->last.len = strlen(name); + nd->last.hash = nd->dentry->d_op->d_hash(nd->dentry, &nd->last); } /* Resolves the links in a basic path walk. 0 for success, -EWHATEVER @@ -187,14 +277,15 @@ static int link_path_walk(char *path, struct nameidata *nd) char *link = path; int error; + /* Prevent crazy recursion */ + if (nd->depth > MAX_SYMLINK_DEPTH) + return -ELOOP; /* skip all leading /'s */ while (*link == '/') link++; /* if there's nothing left (null terminated), we're done */ if (*link == '\0') return 0; - /* TODO: deal with depth and LOOKUP_FOLLOW, important for symlinks */ - /* iterate through each intermediate link of the path. in general, nd * tracks where we are in the path, as far as dentries go. once we have the * next dentry, we try to update nd based on that dentry. link is the part @@ -205,25 +296,21 @@ static int link_path_walk(char *path, struct nameidata *nd) return error; /* find the next link, break out if it is the end */ next_slash = strchr(link, '/'); - if (!next_slash) + if (!next_slash) { break; - else - if (*(next_slash + 1) == '\0') { - /* trailing slash on the path meant the target is a dir */ + } else { + if (packed_trailing_slashes(next_slash)) { nd->flags |= LOOKUP_DIRECTORY; - *next_slash = '\0'; break; } - /* skip over any interim ./ */ - if (!strncmp("./", link, 2)) { - link = next_slash + 1; - continue; } + /* skip over any interim ./ */ + if (!strncmp("./", link, 2)) + goto next_loop; /* Check for "../", walk up */ if (!strncmp("../", link, 3)) { climb_up(nd); - link = next_slash + 2; - continue; + goto next_loop; } *next_slash = '\0'; link_dentry = do_lookup(nd->dentry, link); @@ -232,31 +319,78 @@ static int link_path_walk(char *path, struct nameidata *nd) return -ENOENT; /* make link_dentry the current step/answer */ next_link(link_dentry, nd); + kref_put(&link_dentry->d_kref); /* do_lookup gave us a refcnt dentry */ /* we could be on a mountpoint or a symlink - need to follow them */ follow_mount(nd); - follow_symlink(nd); + if ((error = follow_symlink(nd)) < 0) + return error; + /* Turn off a possible DIRECTORY lookup, which could have been set + * during the follow_symlink (a symlink could have had a directory at + * the end), though it was in the middle of the real path. */ + nd->flags &= ~LOOKUP_DIRECTORY; if (!(nd->dentry->d_inode->i_type & FS_I_DIR)) return -ENOTDIR; +next_loop: /* move through the path string to the next entry */ link = next_slash + 1; + /* advance past any other interim slashes. we know we won't hit the end + * due to the for loop check above */ + while (*link == '/') + link++; } - /* now, we're on the last link of the path */ - /* if we just want the parent, leave now. linux does some stuff with saving - * the name of the link (last) and the type (last_type), which we'll do once - * i see the need for it. */ - if (nd->flags & LOOKUP_PARENT) + /* Now, we're on the last link of the path. We need to deal with with . and + * .. . This might be weird with PARENT lookups - not sure what semantics + * we want exactly. This will give the parent of whatever the PATH was + * supposed to look like. Note that ND currently points to the parent of + * the last item (link). */ + if (!strcmp(".", link)) { + if (nd->flags & LOOKUP_PARENT) { + stash_nd_name(nd, nd->dentry->d_name.name); + climb_up(nd); + } return 0; - /* deal with some weird cases with . and .. (completely untested) */ - if (!strcmp(".", link)) + } + if (!strcmp("..", link)) { + climb_up(nd); + if (nd->flags & LOOKUP_PARENT) { + stash_nd_name(nd, nd->dentry->d_name.name); + climb_up(nd); + } return 0; - if (!strcmp("..", link)) - return climb_up(nd); + } + /* need to attempt to look it up, in case it's a symlink */ link_dentry = do_lookup(nd->dentry, link); - if (!link_dentry) - return -ENOENT; + if (!link_dentry) { + /* if there's no dentry, we are okay if we are looking for the parent */ + if (nd->flags & LOOKUP_PARENT) { + stash_nd_name(nd, link); + return 0; + } else { + return -ENOENT; + } + } next_link(link_dentry, nd); + kref_put(&link_dentry->d_kref); /* do_lookup gave us a refcnt'd dentry */ + /* at this point, nd is on the final link, but it might be a symlink */ + if (nd->flags & LOOKUP_FOLLOW) { + error = follow_symlink(nd); + if (error < 0) + return error; + /* if we actually followed a symlink, then nd is set and we're done */ + if (error > 0) + return 0; + } + /* One way or another, nd is on the last element of the path, symlinks and + * all. Now we need to climb up to set nd back on the parent, if that's + * what we wanted */ + if (nd->flags & LOOKUP_PARENT) { + stash_nd_name(nd, link_dentry->d_name.name); + climb_up(nd); + return 0; + } + /* now, we have the dentry set, and don't want the parent, but might be on a + * mountpoint still. FYI: this hasn't been thought through completely. */ follow_mount(nd); - follow_symlink(nd); /* If we wanted a directory, but didn't get one, error out */ if ((nd->flags & LOOKUP_DIRECTORY) && !(nd->dentry->d_inode->i_type & FS_I_DIR)) @@ -265,13 +399,14 @@ static int link_path_walk(char *path, struct nameidata *nd) } /* Given path, return the inode for the final dentry. The ND should be - * initialized for the first call - specifically, we need the intent and - * potentially a LOOKUP_PARENT. + * initialized for the first call - specifically, we need the intent. + * LOOKUP_PARENT and friends go in the flags var, which is not the intent. * * Need to be careful too. While the path has been copied-in to the kernel, * it's still user input. */ int path_lookup(char *path, int flags, struct nameidata *nd) { + printd("Path lookup for %s\n", path); /* we allow absolute lookups with no process context */ if (path[0] == '/') { /* absolute lookup */ if (!current) @@ -286,8 +421,8 @@ int path_lookup(char *path, int flags, struct nameidata *nd) nd->mnt = nd->dentry->d_sb->s_mount; /* Whenever references get put in the nd, incref them. Whenever they are * removed, decref them. */ - atomic_inc(&nd->mnt->mnt_refcnt); - atomic_inc(&nd->dentry->d_refcnt); + kref_get(&nd->mnt->mnt_kref, 1); + kref_get(&nd->dentry->d_kref, 1); nd->flags = flags; nd->depth = 0; /* used in symlink following */ return link_path_walk(path, nd); @@ -297,9 +432,13 @@ int path_lookup(char *path, int flags, struct nameidata *nd) * regardless of whether it succeeded or not. It will free any references */ void path_release(struct nameidata *nd) { - /* TODO: (REF), do something when we hit 0, etc... */ - atomic_dec(&nd->dentry->d_refcnt); - atomic_dec(&nd->mnt->mnt_refcnt); + kref_put(&nd->dentry->d_kref); + kref_put(&nd->mnt->mnt_kref); + /* Free the last symlink dentry used, if there was one */ + if (nd->last_sym) { + kref_put(&nd->last_sym->d_kref); + nd->last_sym = 0; /* catch reuse bugs */ + } } /* Superblock functions */ @@ -312,7 +451,7 @@ struct super_block *get_sb(void) struct super_block *sb = kmalloc(sizeof(struct super_block), 0); sb->s_dirty = FALSE; spinlock_init(&sb->s_lock); - atomic_set(&sb->s_refcnt, 1); // for the ref passed out + kref_init(&sb->s_kref, fake_release, 1); /* for the ref passed out */ TAILQ_INIT(&sb->s_inodes); TAILQ_INIT(&sb->s_dirty_i); TAILQ_INIT(&sb->s_io_wb); @@ -337,38 +476,44 @@ void init_sb(struct super_block *sb, struct vfsmount *vmnt, * by vfsmount's mnt_root. The parent is dealt with later. */ struct dentry *d_root = get_dentry(sb, 0, "/"); /* probably right */ - /* a lot of here on down is normally done in lookup() */ + /* a lot of here on down is normally done in lookup() or create, since + * get_dentry isn't a fully usable dentry. The two FS-specific settings are + * normally inherited from a parent within the same FS in get_dentry, but we + * have none here. */ d_root->d_op = d_op; d_root->d_fs_info = d_fs_info; - struct inode *inode = sb->s_op->alloc_inode(sb); + struct inode *inode = get_inode(d_root); if (!inode) panic("This FS sucks!"); - d_root->d_inode = inode; - TAILQ_INSERT_TAIL(&inode->i_dentry, d_root, d_alias); - atomic_inc(&d_root->d_refcnt); /* held by the inode */ inode->i_ino = root_ino; /* TODO: add the inode to the appropriate list (off i_list) */ /* TODO: do we need to read in the inode? can we do this on demand? */ /* if this FS is already mounted, we'll need to do something different. */ sb->s_op->read_inode(inode); /* Link the dentry and SB to the VFS mount */ - vmnt->mnt_root = d_root; /* refcnt'd above */ + vmnt->mnt_root = d_root; /* ref comes from get_dentry */ vmnt->mnt_sb = sb; /* If there is no mount point, there is no parent. This is true only for * the rootfs. */ if (vmnt->mnt_mountpoint) { + kref_get(&vmnt->mnt_mountpoint->d_kref, 1); /* held by d_root */ d_root->d_parent = vmnt->mnt_mountpoint; /* dentry of the root */ - atomic_inc(&vmnt->mnt_mountpoint->d_refcnt);/* held by d_root */ } /* insert the dentry into the dentry cache. when's the earliest we can? * when's the earliest we should? what about concurrent accesses to the * same dentry? should be locking the dentry... */ dcache_put(d_root); // TODO: should set a d_flag too + kref_put(&inode->i_kref); /* give up the ref from get_inode() */ } /* Dentry Functions */ -/* Helper to alloc and initialize a generic dentry. +/* Helper to alloc and initialize a generic dentry. The following needs to be + * set still: d_op (if no parent), d_fs_info (opt), d_inode, connect the inode + * to the dentry (and up the d_kref again), maybe dcache_put(). The inode + * stitching is done in get_inode() or lookup (depending on the FS). + * The setting of the d_op might be problematic when dealing with mounts. Just + * overwrite it. * * If the name is longer than the inline name, it will kmalloc a buffer, so * don't worry about the storage for *name after calling this. */ @@ -380,17 +525,22 @@ struct dentry *get_dentry(struct super_block *sb, struct dentry *parent, struct dentry *dentry = kmem_cache_alloc(dentry_kcache, 0); char *l_name = 0; + if (!dentry) + return 0; //memset(dentry, 0, sizeof(struct dentry)); - atomic_set(&dentry->d_refcnt, 1); /* this ref is returned */ + kref_init(&dentry->d_kref, dentry_release, 1); /* this ref is returned */ spinlock_init(&dentry->d_lock); TAILQ_INIT(&dentry->d_subdirs); dentry->d_time = 0; + kref_get(&sb->s_kref, 1); dentry->d_sb = sb; /* storing a ref here... */ dentry->d_mount_point = FALSE; dentry->d_mounted_fs = 0; + if (parent) { /* no parent for rootfs mount */ + kref_get(&parent->d_kref, 1); + dentry->d_op = parent->d_op; /* d_op set in init_sb for parentless */ + } dentry->d_parent = parent; - if (parent) /* no parent for rootfs mount */ - atomic_inc(&parent->d_refcnt); dentry->d_flags = 0; /* related to its dcache state */ dentry->d_fs_info = 0; SLIST_INIT(&dentry->d_bucket); @@ -405,20 +555,195 @@ struct dentry *get_dentry(struct super_block *sb, struct dentry *parent, l_name[name_len] = '\0'; qstr_builder(dentry, l_name); } + /* Catch bugs by aggressively zeroing this (o/w we use old stuff) */ + dentry->d_inode = 0; return dentry; } /* Adds a dentry to the dcache. */ void dcache_put(struct dentry *dentry) { - // TODO: prob should do something with the dentry flags +#if 0 /* pending a more thorough review of the dcache */ + /* TODO: should set a d_flag too */ spin_lock(&dcache_lock); SLIST_INSERT_HEAD(&dcache, dentry, d_hash); spin_unlock(&dcache_lock); +#endif +} + +/* Cleans up the dentry (after ref == 0). We still may want it, and this is + * where we should add it to the dentry cache. (TODO). For now, we do nothing, + * since we don't have a dcache. Also, if i_nlink == 0, never cache it. + * + * This has to handle two types of dentries: full ones (ones that had been used) + * and ones that had been just for lookups - hence the check for d_inode. + * + * Note that dentries pin and kref their inodes. When all the dentries are + * gone, we want the inode to be released via kref. The inode has internal / + * weak references to the dentry, which are not refcounted. */ +void dentry_release(struct kref *kref) +{ + struct dentry *dentry = container_of(kref, struct dentry, d_kref); + printd("Freeing dentry %08p: %s\n", dentry, dentry->d_name.name); + assert(dentry->d_op); /* catch bugs. a while back, some lacked d_op */ + dentry->d_op->d_release(dentry); + /* TODO: check/test the boundaries on this. */ + if (dentry->d_name.len > DNAME_INLINE_LEN) + kfree((void*)dentry->d_name.name); + kref_put(&dentry->d_sb->s_kref); + if (dentry->d_parent) + kref_put(&dentry->d_parent->d_kref); + if (dentry->d_mounted_fs) + kref_put(&dentry->d_mounted_fs->mnt_kref); + if (dentry->d_inode) { + TAILQ_REMOVE(&dentry->d_inode->i_dentry, dentry, d_alias); + kref_put(&dentry->d_inode->i_kref); /* dentries kref inodes */ + } + kmem_cache_free(dentry_kcache, dentry); +} + +/* Looks up the dentry for the given path, returning a refcnt'd dentry (or 0). + * Permissions are applied for the current user, which is quite a broken system + * at the moment. Flags are lookup flags. */ +struct dentry *lookup_dentry(char *path, int flags) +{ + struct dentry *dentry; + struct nameidata nd_r = {0}, *nd = &nd_r; + int error; + + error = path_lookup(path, flags, nd); + if (error) { + path_release(nd); + set_errno(-error); + return 0; + } + dentry = nd->dentry; + kref_get(&dentry->d_kref, 1); + path_release(nd); + return dentry; } /* Inode Functions */ +/* Creates and initializes a new inode. Generic fields are filled in. + * FS-specific fields are filled in by the callout. Specific fields are filled + * in in read_inode() based on what's on the disk for a given i_no, or when the + * inode is created (for new objects). + * + * i_no is set by the caller. Note that this means this inode can be for an + * inode that is already on disk, or it can be used when creating. */ +struct inode *get_inode(struct dentry *dentry) +{ + struct super_block *sb = dentry->d_sb; + /* FS allocs and sets the following: i_op, i_fop, i_pm.pm_op, and any FS + * specific stuff. */ + struct inode *inode = sb->s_op->alloc_inode(sb); + if (!inode) { + set_errno(ENOMEM); + return 0; + } + TAILQ_INSERT_HEAD(&sb->s_inodes, inode, i_sb_list); /* weak inode ref */ + TAILQ_INIT(&inode->i_dentry); + TAILQ_INSERT_TAIL(&inode->i_dentry, dentry, d_alias); /* weak dentry ref*/ + /* one for the dentry->d_inode, one passed out */ + kref_init(&inode->i_kref, inode_release, 2); + dentry->d_inode = inode; + inode->i_ino = 0; /* set by caller later */ + inode->i_blksize = sb->s_blocksize; + spinlock_init(&inode->i_lock); + inode->i_sb = sb; + inode->i_state = 0; /* need real states, like I_NEW */ + inode->dirtied_when = 0; + inode->i_flags = 0; + atomic_set(&inode->i_writecount, 0); + /* Set up the page_map structures. Default is to use the embedded one. + * Might push some of this back into specific FSs. For now, the FS tells us + * what pm_op they want via i_pm.pm_op, which we use when we point i_mapping + * to i_pm. */ + inode->i_mapping = &inode->i_pm; + inode->i_mapping->pm_host = inode; + radix_tree_init(&inode->i_mapping->pm_tree); + spinlock_init(&inode->i_mapping->pm_tree_lock); + inode->i_mapping->pm_flags = 0; + return inode; +} + +/* Helper op, used when creating regular files, directories, symlinks, etc. + * Note we make a distinction between the mode and the file type (for now). + * After calling this, call the FS specific version (create or mkdir), which + * will set the i_ino, the filetype, and do any other FS-specific stuff. Also + * note that a lot of inode stuff was initialized in get_inode/alloc_inode. The + * stuff here is pertinent to the specific creator (user), mode, and time. Also + * note we don't pass this an nd, like Linux does... */ +static struct inode *create_inode(struct dentry *dentry, int mode) +{ + /* note it is the i_ino that uniquely identifies a file in the system. + * there's a diff between creating an inode (even for an in-use ino) and + * then filling it in, and vs creating a brand new one */ + struct inode *inode = get_inode(dentry); + if (!inode) + return 0; + inode->i_mode = mode; + inode->i_nlink = 1; + inode->i_size = 0; + inode->i_blocks = 0; + inode->i_atime.tv_sec = 0; /* TODO: now! */ + inode->i_ctime.tv_sec = 0; + inode->i_mtime.tv_sec = 0; + inode->i_atime.tv_nsec = 0; /* are these supposed to be the extra ns? */ + inode->i_ctime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; + inode->i_bdev = inode->i_sb->s_bdev; + /* when we have notions of users, do something here: */ + inode->i_uid = 0; + inode->i_gid = 0; + return inode; +} + +/* Create a new disk inode in dir associated with dentry, with the given mode. + * called when creating a regular file. dir is the directory/parent. dentry is + * the dentry of the inode we are creating. Note the lack of the nd... */ +int create_file(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *new_file = create_inode(dentry, mode); + if (!new_file) + return -1; + dir->i_op->create(dir, dentry, mode, 0); + kref_put(&new_file->i_kref); + return 0; +} + +/* Creates a new inode for a directory associated with dentry in dir with the + * given mode. */ +int create_dir(struct inode *dir, struct dentry *dentry, int mode) +{ + struct inode *new_dir = create_inode(dentry, mode); + if (!new_dir) + return -1; + dir->i_op->mkdir(dir, dentry, mode); + /* Make sure my parent tracks me. This is okay, since no directory (dir) + * can have more than one dentry */ + struct dentry *parent = TAILQ_FIRST(&dir->i_dentry); + assert(parent && parent == TAILQ_LAST(&dir->i_dentry, dentry_tailq)); + /* parent dentry tracks dentry as a subdir, weak reference */ + TAILQ_INSERT_TAIL(&parent->d_subdirs, dentry, d_subdirs_link); + kref_put(&new_dir->i_kref); + return 0; +} + +/* Creates a new inode for a symlink associated with dentry in dir, containing + * the symlink symname */ +int create_symlink(struct inode *dir, struct dentry *dentry, + const char *symname, int mode) +{ + struct inode *new_sym = create_inode(dentry, mode); + if (!new_sym) + return -1; + dir->i_op->symlink(dir, dentry, symname); + kref_put(&new_sym->i_kref); + return 0; +} + /* Returns 0 if the given mode is acceptable for the inode, and an appropriate * error code if not. Needs to be writen, based on some sensible rules, and * will also probably use 'current' */ @@ -427,6 +752,43 @@ int check_perms(struct inode *inode, int access_mode) return 0; /* anything goes! */ } +/* Called after all external refs are gone to clean up the inode. Once this is + * called, all dentries pointing here are already done (one of them triggered + * this via kref_put(). */ +void inode_release(struct kref *kref) +{ + struct inode *inode = container_of(kref, struct inode, i_kref); + /* If we still have links, just dealloc the in-memory inode. if we have no + * links, we need to delete it too (which calls destroy). */ + if (inode->i_nlink) + inode->i_sb->s_op->dealloc_inode(inode); + else + inode->i_sb->s_op->delete_inode(inode); + kref_put(&inode->i_sb->s_kref); + assert(inode->i_mapping == &inode->i_pm); + kmem_cache_free(inode_kcache, inode); + /* TODO: (BDEV) */ + // kref_put(inode->i_bdev->kref); /* assuming it's a bdev */ +} + +/* Fills in kstat with the stat information for the inode */ +void stat_inode(struct inode *inode, struct kstat *kstat) +{ + kstat->st_dev = inode->i_sb->s_dev; + kstat->st_ino = inode->i_ino; + kstat->st_mode = inode->i_mode; + kstat->st_nlink = inode->i_nlink; + kstat->st_uid = inode->i_uid; + kstat->st_gid = inode->i_gid; + kstat->st_rdev = inode->i_rdev; + kstat->st_size = inode->i_size; + kstat->st_blksize = inode->i_blksize; + kstat->st_blocks = inode->i_blocks; + kstat->st_atime = inode->i_atime; + kstat->st_mtime = inode->i_mtime; + kstat->st_ctime = inode->i_ctime; +} + /* File functions */ /* Read count bytes from the file into buf, starting at *offset, which is increased @@ -446,11 +808,11 @@ ssize_t generic_file_read(struct file *file, char *buf, size_t count, /* Consider pushing some error checking higher in the VFS */ if (!count) return 0; - if (*offset == file->f_inode->i_size) + if (*offset == file->f_dentry->d_inode->i_size) return 0; /* EOF */ /* Make sure we don't go past the end of the file */ - if (*offset + count > file->f_inode->i_size) { - count = file->f_inode->i_size - *offset; + if (*offset + count > file->f_dentry->d_inode->i_size) { + count = file->f_dentry->d_inode->i_size - *offset; } page_off = *offset & (PGSIZE - 1); first_idx = *offset >> PGSHIFT; @@ -462,8 +824,11 @@ ssize_t generic_file_read(struct file *file, char *buf, size_t count, error = file_load_page(file, i, &page); assert(!error); /* TODO: handle ENOMEM and friends */ copy_amt = MIN(PGSIZE - page_off, buf_end - buf); - /* TODO: think about this. if it's a user buffer, we're relying on - * current to detect whose it is (which should work for async calls). */ + /* TODO: (UMEM) think about this. if it's a user buffer, we're relying + * on current to detect whose it is (which should work for async calls). + * Also, need to propagate errors properly... Probably should do a + * user_mem_check, then free, and also to make a distinction between + * when the kernel wants a read/write (TODO: KFOP) */ if (current) { memcpy_to_user(current, buf, page2kva(page) + page_off, copy_amt); } else { @@ -498,8 +863,8 @@ ssize_t generic_file_write(struct file *file, const char *buf, size_t count, return 0; /* Extend the file. Should put more checks in here, and maybe do this per * page in the for loop below. */ - if (*offset + count > file->f_inode->i_size) - file->f_inode->i_size = *offset + count; + if (*offset + count > file->f_dentry->d_inode->i_size) + file->f_dentry->d_inode->i_size = *offset + count; page_off = *offset & (PGSIZE - 1); first_idx = *offset >> PGSHIFT; last_idx = (*offset + count) >> PGSHIFT; @@ -509,10 +874,11 @@ ssize_t generic_file_write(struct file *file, const char *buf, size_t count, error = file_load_page(file, i, &page); assert(!error); /* TODO: handle ENOMEM and friends */ copy_amt = MIN(PGSIZE - page_off, buf_end - buf); - /* TODO: think about this. if it's a user buffer, we're relying on - * current to detect whose it is (which should work for async calls). */ + /* TODO: (UMEM) (KFOP) think about this. if it's a user buffer, we're + * relying on current to detect whose it is (which should work for async + * calls). */ if (current) { - memcpy_to_user(current, page2kva(page) + page_off, buf, copy_amt); + memcpy_from_user(current, page2kva(page) + page_off, buf, copy_amt); } else { memcpy(page2kva(page) + page_off, buf, copy_amt); } @@ -525,6 +891,392 @@ ssize_t generic_file_write(struct file *file, const char *buf, size_t count, return count; } +/* Directories usually use this for their read method, which is the way glibc + * currently expects us to do a readdir (short of doing linux's getdents). Will + * probably need work, based on whatever real programs want. */ +ssize_t generic_dir_read(struct file *file, char *u_buf, size_t count, + off_t *offset) +{ + struct kdirent dir_r = {0}, *dirent = &dir_r; + unsigned int num_dirents = count / sizeof(struct kdirent); + int retval = 1; + size_t amt_copied = 0; + char *buf_end = u_buf + count; + + if (!count) + return 0; + if (*offset % sizeof(struct kdirent)) { + printk("[kernel] the f_pos for a directory should be dirent-aligned\n"); + set_errno(EINVAL); + return -1; + } + /* for now, we need to tell readdir which dirent we want */ + dirent->d_off = *offset / sizeof(struct kdirent); + for (; (u_buf < buf_end) && (retval == 1); u_buf += sizeof(struct kdirent)){ + /* TODO: UMEM/KFOP (pin the u_buf in the syscall, ditch the local copy, + * get rid of this memcpy and reliance on current, etc). Might be + * tricky with the dirent->d_off */ + retval = file->f_op->readdir(file, dirent); + if (retval < 0) + break; + if (current) { + memcpy_to_user(current, u_buf, dirent, sizeof(struct dirent)); + } else { + memcpy(u_buf, dirent, sizeof(struct dirent)); + } + amt_copied += sizeof(struct dirent); + dirent->d_off++; + } + *offset += amt_copied; + return amt_copied; +} + +/* Opens the file, using permissions from current for lack of a better option. + * It will attempt to create the file if it does not exist and O_CREAT is + * specified. This will return 0 on failure, and set errno. TODO: There's some + * stuff that we don't do, esp related file truncating/creation. flags are for + * opening, the mode is for creating. The flags related to how to create + * (O_CREAT_FLAGS) are handled in this function, not in create_file(). + * + * It's tempting to split this into a do_file_create and a do_file_open, based + * on the O_CREAT flag, but the O_CREAT flag can be ignored if the file exists + * already and O_EXCL isn't specified. We could have open call create if it + * fails, but for now we'll keep it as is. */ +struct file *do_file_open(char *path, int flags, int mode) +{ + struct file *file = 0; + struct dentry *file_d; + struct inode *parent_i; + struct nameidata nd_r = {0}, *nd = &nd_r; + int error; + + /* this isn't quite right, due to the nature of O_CREAT */ + if (flags & O_CREAT) + nd->intent = LOOKUP_CREATE; + else + nd->intent = LOOKUP_OPEN; + /* get the parent, following links. this means you get the parent of the + * final link (which may not be in 'path' in the first place. */ + error = path_lookup(path, LOOKUP_PARENT | LOOKUP_FOLLOW, nd); + if (error) { + path_release(nd); + set_errno(-error); + return 0; + } + /* see if the target is there, handle accordingly */ + file_d = do_lookup(nd->dentry, nd->last.name); + if (!file_d) { + if (!(flags & O_CREAT)) { + path_release(nd); + set_errno(ENOENT); + return 0; + } + /* Create the inode/file. get a fresh dentry too: */ + file_d = get_dentry(nd->dentry->d_sb, nd->dentry, nd->last.name); + parent_i = nd->dentry->d_inode; + /* Note that the mode technically should only apply to future opens, + * but we apply it immediately. */ + if (current) + mode &= ~current->fs_env.umask; + if (create_file(parent_i, file_d, mode)) { + kref_put(&file_d->d_kref); + path_release(nd); + return 0; + } + dcache_put(file_d); + } else { /* something already exists (might be a dir) */ + if ((flags & O_CREAT) && (flags & O_EXCL)) { + /* wanted to create, not open, bail out */ + kref_put(&file_d->d_kref); + path_release(nd); + set_errno(EEXIST); + return 0; + } + } + /* now open the file (freshly created or if it already existed). At this + * point, file_d is a refcnt'd dentry, regardless of which branch we took.*/ + if (flags & O_TRUNC) + warn("File truncation not supported yet."); + file = dentry_open(file_d, flags); /* sets errno */ + if (!file) { + kref_put(&file_d->d_kref); + path_release(nd); + return 0; + } + kref_put(&file_d->d_kref); + path_release(nd); + return file; +} + +/* Path is the location of the symlink, sometimes called the "new path", and + * symname is who we link to, sometimes called the "old path". */ +int do_symlink(char *path, const char *symname, int mode) +{ + struct dentry *sym_d; + struct inode *parent_i; + struct nameidata nd_r = {0}, *nd = &nd_r; + int error; + + nd->intent = LOOKUP_CREATE; + /* get the parent, but don't follow links */ + error = path_lookup(path, LOOKUP_PARENT, nd); + if (error) { + set_errno(-error); + path_release(nd); + return -1; + } + /* see if the target is already there, handle accordingly */ + sym_d = do_lookup(nd->dentry, nd->last.name); + if (sym_d) { + set_errno(EEXIST); + kref_put(&sym_d->d_kref); + path_release(nd); + return -1; + } + /* Doesn't already exist, let's try to make it: */ + sym_d = get_dentry(nd->dentry->d_sb, nd->dentry, nd->last.name); + if (!sym_d) { + set_errno(ENOMEM); + path_release(nd); + return -1; + } + parent_i = nd->dentry->d_inode; + /* TODO: mode should be & ~umask. */ + if (create_symlink(parent_i, sym_d, symname, mode)) { + kref_put(&sym_d->d_kref); + path_release(nd); + return -1; + } + dcache_put(sym_d); + kref_put(&sym_d->d_kref); + path_release(nd); + return 0; +} + +/* Makes a hard link for the file behind old_path to new_path */ +int do_link(char *old_path, char *new_path) +{ + struct dentry *link_d, *old_d; + struct inode *inode, *parent_dir; + struct nameidata nd_r = {0}, *nd = &nd_r; + int error; + int retval = -1; + + nd->intent = LOOKUP_CREATE; + /* get the absolute parent of the new_path */ + error = path_lookup(new_path, LOOKUP_PARENT | LOOKUP_FOLLOW, nd); + if (error) { + set_errno(-error); + goto out_path_only; + } + parent_dir = nd->dentry->d_inode; + /* see if the new target is already there, handle accordingly */ + link_d = do_lookup(nd->dentry, nd->last.name); + if (link_d) { + set_errno(EEXIST); + goto out_link_d; + } + /* Doesn't already exist, let's try to make it. Still need to stitch it to + * an inode and set its FS-specific stuff after this.*/ + link_d = get_dentry(nd->dentry->d_sb, nd->dentry, nd->last.name); + if (!link_d) { + set_errno(ENOMEM); + goto out_path_only; + } + /* Now let's get the old_path target */ + old_d = lookup_dentry(old_path, LOOKUP_FOLLOW); + if (!old_d) /* errno set by lookup_dentry */ + goto out_link_d; + /* For now, can only link to files */ + if (old_d->d_inode->i_type != FS_I_FILE) { + set_errno(EPERM); + goto out_both_ds; + } + /* Must be on the same FS */ + if (old_d->d_sb != link_d->d_sb) { + set_errno(EXDEV); + goto out_both_ds; + } + /* Do whatever FS specific stuff there is first (which is also a chance to + * bail out). */ + error = parent_dir->i_op->link(old_d, parent_dir, link_d); + if (error) { + set_errno(-error); + goto out_both_ds; + } + /* Finally stitch it up */ + inode = old_d->d_inode; + kref_get(&inode->i_kref, 1); + link_d->d_inode = inode; + inode->i_nlink++; + TAILQ_INSERT_TAIL(&inode->i_dentry, link_d, d_alias); /* weak ref */ + dcache_put(link_d); + retval = 0; /* Note the fall through to the exit paths */ +out_both_ds: + kref_put(&old_d->d_kref); +out_link_d: + kref_put(&link_d->d_kref); +out_path_only: + path_release(nd); + return retval; +} + +int do_unlink(char *path) +{ + struct dentry *dentry; + struct inode *parent_dir; + struct nameidata nd_r = {0}, *nd = &nd_r; + int error; + int retval = -1; + + /* get the parent of the target, and don't follow a final link */ + error = path_lookup(path, LOOKUP_PARENT, nd); + if (error) { + set_errno(-error); + goto out_path_only; + } + parent_dir = nd->dentry->d_inode; + /* make sure the target is there */ + dentry = do_lookup(nd->dentry, nd->last.name); + if (!dentry) { + set_errno(ENOENT); + goto out_path_only; + } + /* Make sure the target is not a directory */ + if (dentry->d_inode->i_type == FS_I_DIR) { + set_errno(EISDIR); + goto out_dentry; + } + /* Remove the dentry from its parent */ + error = parent_dir->i_op->unlink(parent_dir, dentry); + if (error) { + set_errno(-error); + goto out_dentry; + } + kref_put(&dentry->d_parent->d_kref); + dentry->d_parent = 0; /* so we don't double-decref it later */ + dentry->d_inode->i_nlink--; /* TODO: race here */ + /* At this point, the dentry is unlinked from the FS, and the inode has one + * less link. When the in-memory objects (dentry, inode) are going to be + * released (after all open files are closed, and maybe after entries are + * evicted from the cache), then nlinks will get checked and the FS-file + * will get removed from the disk */ + retval = 0; /* Note the fall through to the exit paths */ +out_dentry: + kref_put(&dentry->d_kref); +out_path_only: + path_release(nd); + return retval; +} + +/* Checks to see if path can be accessed via mode. Need to actually send the + * mode along somehow, so this doesn't do much now. This is an example of + * decent error propagation from the lower levels via int retvals. */ +int do_file_access(char *path, int mode) +{ + struct nameidata nd_r = {0}, *nd = &nd_r; + int retval = 0; + nd->intent = LOOKUP_ACCESS; + retval = path_lookup(path, 0, nd); + path_release(nd); + return retval; +} + +int do_file_chmod(char *path, int mode) +{ + struct nameidata nd_r = {0}, *nd = &nd_r; + int retval = 0; + retval = path_lookup(path, 0, nd); + if (!retval) { + #if 0 + /* TODO: when we have notions of uid, check for the proc's uid */ + if (nd->dentry->d_inode->i_uid != UID_OF_ME) + retval = -EPERM; + else + #endif + nd->dentry->d_inode->i_mode = mode & 0777; + } + path_release(nd); + return retval; +} + +/* Opens and returns the file specified by dentry */ +struct file *dentry_open(struct dentry *dentry, int flags) +{ + struct inode *inode; + int desired_mode; + struct file *file = kmem_cache_alloc(file_kcache, 0); + if (!file) { + set_errno(ENOMEM); + return 0; + } + inode = dentry->d_inode; + /* Do the mode first, since we can still error out. f_mode stores how the + * OS file is open, which can be more restrictive than the i_mode */ + switch (flags & (O_RDONLY | O_WRONLY | O_RDWR)) { + case O_RDONLY: + desired_mode = S_IRUSR; + break; + case O_WRONLY: + desired_mode = S_IWUSR; + break; + case O_RDWR: + desired_mode = S_IRUSR | S_IWUSR; + break; + default: + goto error_access; + } + if (check_perms(inode, desired_mode)) + goto error_access; + file->f_mode = desired_mode; + /* one for the ref passed out, and *none* for the sb TAILQ */ + kref_init(&file->f_kref, file_release, 1); + /* Add to the list of all files of this SB */ + TAILQ_INSERT_TAIL(&inode->i_sb->s_files, file, f_list); + kref_get(&dentry->d_kref, 1); + file->f_dentry = dentry; + kref_get(&inode->i_sb->s_mount->mnt_kref, 1); + file->f_vfsmnt = inode->i_sb->s_mount; /* saving a ref to the vmnt...*/ + file->f_op = inode->i_fop; + /* Don't store open mode or creation flags */ + file->f_flags = flags & ~(O_ACCMODE | O_CREAT_FLAGS); + file->f_pos = 0; + file->f_uid = inode->i_uid; + file->f_gid = inode->i_gid; + file->f_error = 0; +// struct event_poll_tailq f_ep_links; + spinlock_init(&file->f_ep_lock); + file->f_fs_info = 0; /* prob overriden by the fs */ + file->f_mapping = inode->i_mapping; + file->f_op->open(inode, file); + return file; +error_access: + set_errno(EACCES); + kmem_cache_free(file_kcache, file); + return 0; +} + +/* Closes a file, fsync, whatever else is necessary. Called when the kref hits + * 0. Note that the file is not refcounted on the s_files list, nor is the + * f_mapping refcounted (it is pinned by the i_mapping). */ +void file_release(struct kref *kref) +{ + struct file *file = container_of(kref, struct file, f_kref); + + struct super_block *sb = file->f_dentry->d_sb; + spin_lock(&sb->s_lock); + TAILQ_REMOVE(&sb->s_files, file, f_list); + spin_unlock(&sb->s_lock); + + /* TODO: fsync (BLK). also, we may want to parallelize the blocking that + * could happen in here (spawn kernel threads)... */ + file->f_op->release(file->f_dentry->d_inode, file); + /* Clean up the other refs we hold */ + kref_put(&file->f_dentry->d_kref); + kref_put(&file->f_vfsmnt->mnt_kref); + kmem_cache_free(file_kcache, file); +} + /* Page cache functions */ /* Looks up the index'th page in the page map, returning an incref'd reference, @@ -650,6 +1402,8 @@ int file_load_page(struct file *file, unsigned long index, struct page **pp) struct file *get_file_from_fd(struct files_struct *open_files, int file_desc) { struct file *retval = 0; + if (file_desc < 0) + return 0; spin_lock(&open_files->lock); if (file_desc < open_files->max_fdset) { if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc)) { @@ -658,7 +1412,7 @@ struct file *get_file_from_fd(struct files_struct *open_files, int file_desc) assert(file_desc < open_files->max_files); retval = open_files->fd[file_desc]; assert(retval); - atomic_inc(&retval->f_refcnt); + kref_get(&retval->f_kref, 1); } } spin_unlock(&open_files->lock); @@ -670,24 +1424,25 @@ struct file *get_file_from_fd(struct files_struct *open_files, int file_desc) * hasn't been thought through yet. */ struct file *put_file_from_fd(struct files_struct *open_files, int file_desc) { - struct file *f = 0; + struct file *file = 0; + if (file_desc < 0) + return 0; spin_lock(&open_files->lock); if (file_desc < open_files->max_fdset) { if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc)) { /* while max_files and max_fdset might not line up, we should never * have a valid fdset higher than files */ assert(file_desc < open_files->max_files); - f = open_files->fd[file_desc]; + file = open_files->fd[file_desc]; open_files->fd[file_desc] = 0; - /* TODO: (REF) need to make sure we free if we hit 0 (might do this - * in the caller */ - if (f) - atomic_dec(&f->f_refcnt); - // if 0, drop, decref from higher, sync, whatever + CLR_BITMASK_BIT(open_files->open_fds->fds_bits, file_desc); + /* the if case is due to files (stdin) without a *file yet */ + if (file) + kref_put(&file->f_kref); } } spin_unlock(&open_files->lock); - return f; + return file; } /* Inserts the file in the files_struct, returning the corresponding new file @@ -702,8 +1457,8 @@ int insert_file(struct files_struct *open_files, struct file *file) slot = i; SET_BITMASK_BIT(open_files->open_fds->fds_bits, slot); assert(slot < open_files->max_files && open_files->fd[slot] == 0); + kref_get(&file->f_kref, 1); open_files->fd[slot] = file; - atomic_inc(&file->f_refcnt); if (slot >= open_files->next_fd) open_files->next_fd = slot + 1; break; @@ -713,3 +1468,185 @@ int insert_file(struct files_struct *open_files, struct file *file) spin_unlock(&open_files->lock); return slot; } + +/* Closes all open files. Mostly just a "put" for all files. If cloexec, it + * will only close files that are opened with O_CLOEXEC. */ +void close_all_files(struct files_struct *open_files, bool cloexec) +{ + struct file *file; + spin_lock(&open_files->lock); + for (int i = 0; i < open_files->max_fdset; i++) { + if (GET_BITMASK_BIT(open_files->open_fds->fds_bits, i)) { + /* while max_files and max_fdset might not line up, we should never + * have a valid fdset higher than files */ + assert(i < open_files->max_files); + file = open_files->fd[i]; + if (cloexec && !(file->f_flags | O_CLOEXEC)) + continue; + open_files->fd[i] = 0; + /* the if case is due to files (stdin) without a *file yet */ + if (file) + kref_put(&file->f_kref); + CLR_BITMASK_BIT(open_files->open_fds->fds_bits, i); + } + } + spin_unlock(&open_files->lock); +} + +/* Inserts all of the files from src into dst, used by sys_fork(). */ +void clone_files(struct files_struct *src, struct files_struct *dst) +{ + struct file *file; + spin_lock(&src->lock); + spin_lock(&dst->lock); + for (int i = 0; i < src->max_fdset; i++) { + if (GET_BITMASK_BIT(src->open_fds->fds_bits, i)) { + /* while max_files and max_fdset might not line up, we should never + * have a valid fdset higher than files */ + assert(i < src->max_files); + file = src->fd[i]; + SET_BITMASK_BIT(dst->open_fds->fds_bits, i); + assert(i < dst->max_files && dst->fd[i] == 0); + dst->fd[i] = file; + /* the if case is due to files (stdin) without a *file yet */ + if (file) + kref_get(&file->f_kref, 1); + } + } + spin_unlock(&dst->lock); + spin_unlock(&src->lock); +} + +/* Change the working directory of the given fs env (one per process, at this + * point). Returns 0 for success, -ERROR for whatever error. */ +int do_chdir(struct fs_struct *fs_env, char *path) +{ + struct nameidata nd_r = {0}, *nd = &nd_r; + int retval; + retval = path_lookup(path, LOOKUP_DIRECTORY, nd); + if (!retval) { + /* nd->dentry is the place we want our PWD to be */ + kref_get(&nd->dentry->d_kref, 1); + kref_put(&fs_env->pwd->d_kref); + fs_env->pwd = nd->dentry; + } + path_release(nd); + return retval; +} + +/* Returns a null-terminated string of up to length cwd_l containing the + * absolute path of fs_env, (up to fs_env's root). Be sure to kfree the char* + * "kfree_this" when you are done with it. We do this since it's easier to + * build this string going backwards. Note cwd_l is not a strlen, it's an + * absolute size. */ +char *do_getcwd(struct fs_struct *fs_env, char **kfree_this, size_t cwd_l) +{ + struct dentry *dentry = fs_env->pwd; + size_t link_len; + char *path_start, *kbuf; + + if (cwd_l < 2) { + set_errno(ERANGE); + return 0; + } + kbuf = kmalloc(cwd_l, 0); + if (!kbuf) { + set_errno(ENOMEM); + return 0; + } + *kfree_this = kbuf; + kbuf[cwd_l - 1] = '\0'; + kbuf[cwd_l - 2] = '/'; + /* for each dentry in the path, all the way back to the root of fs_env, we + * grab the dentry name, push path_start back enough, and write in the name, + * using /'s to terminate. We skip the root, since we don't want it's + * actual name, just "/", which is set before each loop. */ + path_start = kbuf + cwd_l - 2; /* the last byte written */ + while (dentry != fs_env->root) { + link_len = dentry->d_name.len; /* this does not count the \0 */ + if (path_start - (link_len + 2) < kbuf) { + kfree(kbuf); + set_errno(ERANGE); + return 0; + } + path_start -= link_len + 1; /* the 1 is for the \0 */ + strncpy(path_start, dentry->d_name.name, link_len); + path_start--; + *path_start = '/'; + dentry = dentry->d_parent; + } + return path_start; +} + +static void print_dir(struct dentry *dentry, char *buf, int depth) +{ + struct dentry *child_d; + struct dirent next; + struct file *dir; + int retval; + int child_num = 0; + + if (!dentry->d_inode->i_type & FS_I_DIR) { + warn("Thought this was only directories!!"); + return; + } + /* Print this dentry */ + printk("%s%s/\n", buf, dentry->d_name.name); + if (depth >= 32) + return; + /* Set buffer for our kids */ + buf[depth] = '\t'; + dir = dentry_open(dentry, 0); + if (!dir) + panic("Filesystem seems inconsistent - unable to open a dir!"); + /* Process every child, recursing on directories */ + while (1) { + next.d_off = child_num++; + retval = dir->f_op->readdir(dir, &next); + if (retval >= 0) { + /* there is an entry, now get its dentry */ + child_d = do_lookup(dentry, next.d_name); + if (!child_d) + panic("Inconsistent FS, dirent doesn't have a dentry!"); + /* Recurse for directories, or just print the name for others */ + switch (child_d->d_inode->i_type) { + case (FS_I_DIR): + print_dir(child_d, buf, depth + 1); + break; + case (FS_I_FILE): + printk("%s%s size(B): %d nlink: %d\n", buf, next.d_name, + child_d->d_inode->i_size, child_d->d_inode->i_nlink); + break; + case (FS_I_SYMLINK): + printk("%s%s -> %s\n", buf, next.d_name, + child_d->d_inode->i_op->readlink(child_d)); + break; + default: + warn("Look around you! Unknown filetype!"); + } + kref_put(&child_d->d_kref); + } + if (retval <= 0) + break; + } + /* Reset buffer to the way it was */ + buf[depth] = '\0'; + kref_put(&dir->f_kref); +} + +/* Debugging */ +int ls_dash_r(char *path) +{ + struct nameidata nd_r = {0}, *nd = &nd_r; + int error; + char buf[32] = {0}; + + error = path_lookup(path, LOOKUP_ACCESS | LOOKUP_DIRECTORY, nd); + if (error) { + path_release(nd); + return error; + } + print_dir(nd->dentry, buf, 0); + path_release(nd); + return 0; +}