17 DIRSIZE = STATFIXLEN + 32 * 4,
18 DIRREADLIM = 2048, /* should handle the largest reasonable directory entry */
19 DIRREADSIZE=8192, /* Just read a lot. Memory is cheap, lots of bandwidth,
20 * and RPCs are very expensive. At the same time,
21 * let's not yet exceed a common MSIZE. */
24 int newfd(struct chan *c, int oflags)
26 int ret = insert_obj_fdt(¤t->open_files, c, 0,
27 oflags & O_CLOEXEC ? FD_CLOEXEC : 0,
34 struct chan *fdtochan(struct fd_table *fdt, int fd, int mode, int chkmnt,
39 c = lookup_fd(fdt, fd, iref, FALSE);
41 /* We lost the info about why there was a problem (we used to track file
42 * group closed too, can add that in later). */
44 error("Bad FD %d\n", fd);
46 if (chkmnt && (c->flag & CMSG)) {
54 if ((mode & c->mode) != mode) {
58 error("FD access mode failure: chan mode 0x%x, wanted 0x%x",
64 long kchanio(void *vc, void *buf, int n, int mode)
77 r = devtab[c->type].read(c, buf, n, c->offset);
78 else if (mode == O_WRITE)
79 r = devtab[c->type].write(c, buf, n, c->offset);
81 error("kchanio: use only O_READ xor O_WRITE");
85 spin_unlock(&c->lock);
90 int openmode(uint32_t omode)
92 /* GIANT WARNING: if this ever throws, ipopen (and probably many others) will
93 * screw up refcnts of Qctl, err, data, etc */
95 /* this is the old plan9 style. i think they want to turn exec into read,
96 * and strip off anything higher, and just return the RD/WR style bits. not
97 * stuff like ORCLOSE. the lack of OEXCL might be a bug on their part (it's
98 * the only one of their non-RW-related flags that isn't masked out).
100 * Note that we no longer convert OEXEC/O_EXEC to O_READ, and instead return
101 * just the O_ACCMODE bits. */
102 if (o >= (OTRUNC | OCEXEC | ORCLOSE | OEXEC))
104 o &= ~(OTRUNC | OCEXEC | ORCLOSE);
111 /* no error checking (we have a shitload of flags anyway), and we return the
112 * basic access modes (RD/WR/ETC) */
113 return omode & O_ACCMODE;
116 void fdclose(struct fd_table *fdt, int fd)
121 int syschdir(char *path)
132 c = namec(path, Atodir, 0, 0);
143 struct fd_table *fdt = ¤t->open_files;
150 * Take no reference on the chan because we don't really need the
151 * data structure, and are calling fdtochan only for error checks.
152 * fdclose takes care of processes racing through here.
154 fdtochan(fdt, fd, -1, 0, 0);
160 int syscreate(char *path, int mode, uint32_t perm)
171 openmode(mode & ~O_EXCL); /* error check only; OEXCL okay here */
172 c = namec(path, Acreate, mode, perm);
177 fd = newfd(c, mode); /* 9ns mode is the O_FLAGS and perm is glibc mode */
196 c = fdtochan(¤t->open_files, old, -1, 0, 1);
197 if (c->qid.type & QTAUTH) {
210 /* Could pass in the fdt instead of the proc, but we used to need the to_proc
211 * for now so we can claim a VFS FD. Careful, we don't close the old chan. */
212 int sys_dup_to(struct proc *from_proc, unsigned int from_fd,
213 struct proc *to_proc, unsigned int to_fd)
223 c = fdtochan(&from_proc->open_files, from_fd, -1, 0, 1);
224 if (c->qid.type & QTAUTH) {
228 ret = insert_obj_fdt(&to_proc->open_files, c, to_fd, 0, TRUE, FALSE);
229 /* drop the ref from fdtochan. if insert succeeded, there is one other ref
230 * stored in the FDT */
233 error("Can't insert FD %d into FDG", to_fd);
238 char *sysfd2path(int fd)
248 c = fdtochan(¤t->open_files, fd, -1, 0, 1);
250 if (c->name != NULL) {
251 s = kzmalloc(c->name->len + 1, 0);
256 memmove(s, c->name->s, c->name->len + 1);
263 int sysfauth(int fd, char *aname)
274 c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1);
280 ac = mntauth(c, aname);
282 /* at this point ac is responsible for keeping c alive */
301 int sysfversion(int fd, unsigned int msize, char *vers, unsigned int arglen)
312 /* check there's a NUL in the version string */
313 if (arglen == 0 || memchr(vers, 0, arglen) == 0)
316 c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1);
322 m = mntversion(c, vers, msize, arglen);
331 int syspipe(int fd[2])
336 static char *names[] = { "data", "data1" };
338 d = &devtab[devno("pipe", 0)];
344 /* need to remove from the fd table and make sure the chan is closed
345 * exactly once. if fd[i] >= 0, then the fd is valid (or it was!) and
346 * the fd table has the only ref (newfd() currently decrefs/consumes the
347 * reference). cclose() doesn't care if you pass it 0 (like kfree()). */
349 close_fd(¤t->open_files, fd[0]);
353 close_fd(¤t->open_files, fd[1]);
359 c[0] = namec("#pipe", Atodir, 0, 0);
361 if (walk(&c[0], &names[0], 1, 1, NULL) < 0)
363 if (walk(&c[1], &names[1], 1, 1, NULL) < 0)
365 c[0] = d->open(c[0], O_RDWR);
366 c[1] = d->open(c[1], O_RDWR);
367 fd[0] = newfd(c[0], 0);
370 fd[1] = newfd(c[1], 0);
377 int sysfwstat(int fd, uint8_t * buf, int n)
387 validstat(buf, n, 0);
388 c = fdtochan(¤t->open_files, fd, -1, 1, 1);
393 n = devtab[c->type].wstat(c, buf, n);
401 long bindmount(struct chan *c, char *old, int flag, char *spec)
407 if (flag > MMASK || (flag & MORDER) == (MBEFORE | MAFTER))
410 c1 = namec(old, Amount, 0, 0);
415 ret = cmount(c, c1, flag, spec);
422 int sysbind(char *new, char *old, int flags)
433 c0 = namec(new, Abind, 0, 0);
438 r = bindmount(c0, old, flags, "");
446 int sysmount(int fd, int afd, char *old, int flags, char *spec)
459 struct mntparam mntparam;
471 bc.c = fdtochan(¤t->open_files, fd, O_RDWR, 0, 1);
473 ac.c = fdtochan(¤t->open_files, afd, O_RDWR, 0, 1);
474 mntparam.chan = bc.c;
475 mntparam.authchan = ac.c;
476 mntparam.spec = spec;
477 mntparam.flags = flags;
478 c0.c = devtab[devno("mnt", 0)].attach((char *)&mntparam);
480 r = bindmount(c0.c, old, flags, spec);
489 int sysunmount(char *src_path, char *onto_path)
508 cmount.c = namec(onto_path, Amount, 0, 0);
509 if (src_path != NULL && src_path[0] != '\0') {
511 * This has to be namec(..., Aopen, ...) because
512 * if arg[0] is something like /srv/cs or /fd/0,
513 * opening it is the only way to get at the real
516 cmounted.c = namec(src_path, Aopen, O_READ, 0);
519 cunmount(cmount.c, cmounted.c);
526 int sysopen(char *path, int vfs_flags)
537 openmode(vfs_flags); /* error check only */
538 c = namec(path, Aopen, vfs_flags, 0);
543 fd = newfd(c, vfs_flags);
552 long unionread(struct chan *c, void *va, long n)
564 /* bring mount in sync with c->uri and c->umc */
565 for (i = 0; mount != NULL && i < c->uri; i++)
569 while (mount != NULL) {
570 /* Error causes component of union to be skipped */
572 /* normally we want to discard the error, but for our ghetto kdirent
573 * hack, we need to repeat unionread if we saw a Eshort */
575 if (!strcmp(current_errstr(), Eshort)) {
577 qunlock(&c->umqlock);
580 /* poperror done below for either branch */
582 if (c->umc == NULL) {
583 c->umc = cclone(mount->to);
584 c->umc = devtab[c->umc->type].open(c->umc,
588 nr = devtab[c->umc->type].read(c->umc, va, n, c->umc->offset);
590 nr = 0; /* dev.c can return -1 */
591 c->umc->offset += nr;
593 poperror(); /* pop regardless */
598 /* Advance to next element */
607 qunlock(&c->umqlock);
611 static void unionrewind(struct chan *c)
619 qunlock(&c->umqlock);
622 static long rread(int fd, void *va, long n, int64_t * offp)
629 /* dirty dirent hack */
637 c = fdtochan(¤t->open_files, fd, O_READ, 1, 1);
646 dir = c->qid.type & QTDIR;
648 /* kdirent hack: userspace is expecting kdirents, but all of 9ns
649 * produces Ms. Just save up what we don't use and append the
650 * new stuff later. Allocate DIRREADSIZE bytes for that purpose.
654 /* expecting only one dirent at a time, o/w we're busted */
655 assert(n >= sizeof(struct kdirent));
657 c->buf=kmalloc(DIRREADSIZE, KMALLOC_WAIT);
660 /* Attempt to extract an M, in case there was some already */
661 amt = convM2kdirent(c->buf, c->bufused, real_va, 0);
664 memmove(c->buf, c->buf + amt, c->bufused);
665 n = sizeof(struct kdirent);
670 printk("Well, sysread of a dir sucks.%s \n", current_errstr());
673 va = c->buf + c->bufused;
674 n = DIRREADSIZE - c->bufused;
677 /* this is the normal plan9 read */
679 n = unionread(c, va, n);
682 spin_lock(&c->lock); /* lock for int64_t assignment */
684 spin_unlock(&c->lock);
694 spin_unlock(&c->lock);
699 n = devtab[c->type].read(c, va, n, off);
707 spin_unlock(&c->lock);
710 /* dirty kdirent hack */
713 c->bufused = c->bufused + n;
714 /* extract an M from the front, then shift the remainder back */
715 amt = convM2kdirent(c->buf, c->bufused, real_va, 0);
717 memmove(c->buf, c->buf + amt, c->bufused);
718 n = amt ? sizeof(struct kdirent) : 0;
719 poperror(); /* matching our debugging waserror */
730 /* Reads exactly n bytes from chan c, starting at its offset. Can block, but if
731 * we get 0 back too soon (EOF or error), then we'll error out with Eshort.
732 * That might need a little work - if there was a previous error, then we
733 * clobbered it and only know Eshort but not why we completed early. */
734 void read_exactly_n(struct chan *c, void *vp, long n)
738 int total = 0, want = n;
742 nn = devtab[c->type].read(c, p, n, c->offset);
743 printd("readn: Got %d@%lld\n", nn, c->offset);
745 error("%s: wanted %d, got %d", Eshort, want, total);
748 spin_unlock(&c->lock);
755 long sysread(int fd, void *va, long n)
757 return rread(fd, va, n, NULL);
760 long syspread(int fd, void *va, long n, int64_t off)
762 return rread(fd, va, n, &off);
765 int sysremove(char *path)
775 c = namec(path, Aremove, 0, 0);
777 c->type = -1; /* see below */
781 devtab[c->type].remove(c);
783 * Remove clunks the fid, but we need to recover the Chan
784 * so fake it up. -1 aborts the dev's close.
794 int64_t sysseek(int fd, int64_t off, int whence)
805 c = fdtochan(¤t->open_files, fd, -1, 1, 1);
811 if (devtab[c->type].dc == '|')
816 if (c->qid.type & QTDIR) {
822 spin_lock(&c->lock); /* lock for int64_t assignment */
824 spin_unlock(&c->lock);
828 if (c->qid.type & QTDIR)
830 spin_lock(&c->lock); /* lock for read/write update */
833 spin_unlock(&c->lock);
837 spin_unlock(&c->lock);
841 if (c->qid.type & QTDIR)
843 dir = chandirstat(c);
845 error("internal error: stat error in seek");
850 spin_lock(&c->lock); /* lock for read/write update */
852 spin_unlock(&c->lock);
866 void validstat(uint8_t * s, int n, int slashok)
872 if (statcheck(s, n) < 0)
874 /* verify that name entry is acceptable */
875 s += STATFIXLEN - 4 * BIT16SZ; /* location of first string */
877 * s now points at count for first string.
878 * if it's too long, let the server decide; this is
879 * only for his protection anyway. otherwise
880 * we'd have to allocate and waserror.
884 if (m + 1 > sizeof buf) {
889 /* name could be '/' */
890 if (strcmp(buf, "/") != 0)
891 validname(buf, slashok);
894 int sysfstat(int fd, uint8_t *buf, int n)
904 c = fdtochan(¤t->open_files, fd, -1, 0, 1);
909 devtab[c->type].stat(c, buf, n);
918 int sysfstatakaros(int fd, struct kstat *ks)
923 buf = kmalloc(n, KMALLOC_WAIT);
924 n = sysfstat(fd, buf, n);
926 convM2kstat(buf, n, ks);
933 int sysstat(char *path, uint8_t *buf, int n)
943 c = namec(path, Aaccess, 0, 0);
948 devtab[c->type].stat(c, buf, n);
957 int sysstatakaros(char *path, struct kstat *ks)
962 buf = kmalloc(n, KMALLOC_WAIT);
963 n = sysstat(path, buf, n);
965 convM2kstat(buf, n, ks);
972 static long rwrite(int fd, void *va, long n, int64_t * offp)
984 c = fdtochan(¤t->open_files, fd, O_WRITE, 1, 1);
989 if (c->qid.type & QTDIR)
996 /* append changes the offset to the end, and even if we fail later, this
997 * change will persist */
998 if (c->flag & O_APPEND) {
999 dir = chandirstat(c);
1001 error("internal error: stat error in append write");
1002 spin_lock(&c->lock); /* legacy lock for int64 assignment */
1003 c->offset = dir->length;
1004 spin_unlock(&c->lock);
1007 spin_lock(&c->lock);
1010 spin_unlock(&c->lock);
1016 spin_lock(&c->lock);
1018 spin_unlock(&c->lock);
1024 m = devtab[c->type].write(c, va, n, off);
1027 if (offp == NULL && m < n) {
1028 spin_lock(&c->lock);
1030 spin_unlock(&c->lock);
1040 long syswrite(int fd, void *va, long n)
1042 return rwrite(fd, va, n, NULL);
1045 long syspwrite(int fd, void *va, long n, int64_t off)
1047 return rwrite(fd, va, n, &off);
1050 int syswstat(char *path, uint8_t * buf, int n)
1060 validstat(buf, n, 0);
1061 c = namec(path, Aaccess, 0, 0);
1066 n = devtab[c->type].wstat(c, buf, n);
1074 struct dir *chandirstat(struct chan *c)
1082 for (i = 0; i < 2; i++) { /* should work by the second try */
1083 d = kzmalloc(sizeof(struct dir) + nd, 0);
1084 buf = (uint8_t *) & d[1];
1090 n = devtab[c->type].stat(c, buf, nd);
1096 nd = GBIT16((uint8_t *) buf) + BIT16SZ; /* size needed to store whole stat buffer including count */
1098 convM2D(buf, n, d, (char *)&d[1]);
1101 /* else sizeof(Dir)+nd is plenty */
1108 struct dir *sysdirstat(char *name)
1119 c = namec(name, Aaccess, 0, 0);
1132 struct dir *sysdirfstat(int fd)
1143 c = fdtochan(¤t->open_files, fd, -1, 0, 1);
1156 int sysdirwstat(char *name, struct dir *dir)
1163 buf = kzmalloc(r, 0);
1164 convD2M(dir, buf, r);
1165 r = syswstat(name, buf, r);
1167 return r < 0 ? r : 0;
1170 int sysdirfwstat(int fd, struct dir *dir)
1177 buf = kzmalloc(r, 0);
1178 convD2M(dir, buf, r);
1179 r = sysfwstat(fd, buf, r);
1181 return r < 0 ? r : 0;
1184 static long dirpackage(uint8_t * buf, long ts, struct kdirent **d)
1188 long ss, i, n, nn, m = 0;
1196 * first find number of all stats, check they look like stats, & size all associated strings
1200 for (i = 0; i < ts; i += m) {
1201 m = BIT16SZ + GBIT16(&buf[i]);
1202 if (statcheck(&buf[i], m) < 0)
1209 error("bad directory format");
1211 *d = kzmalloc(n * sizeof(**d) + ss, 0);
1216 * then convert all buffers
1218 s = (char *)*d + n * sizeof(**d);
1220 for (i = 0; i < ts; i += m) {
1221 m = BIT16SZ + GBIT16((uint8_t *) & buf[i]);
1222 if (nn >= n || /*convM2D */ convM2kdirent(&buf[i], m, *d + nn, s) != m) {
1225 error("bad directory entry");
1234 long sysdirread(int fd, struct kdirent **d)
1245 buf = kzmalloc(DIRREADLIM, 0);
1252 ts = sysread(fd, buf, DIRREADLIM);
1254 ts = dirpackage(buf, ts, d);
1261 int sysiounit(int fd)
1267 c = fdtochan(¤t->open_files, fd, -1, 0, 1);
1271 return 0; /* n.b. */
1279 void print_chaninfo(struct chan *c)
1282 char buf[64] = { 0 };
1283 bool has_dev = c->type != -1;
1284 if (has_dev && !devtab[c->type].chaninfo) {
1285 printk("Chan type %d has no chaninfo!\n", c->type);
1288 printk("Chan pathname: %s ref %d, Dev: %s, Devinfo: %s",
1289 c->name ? c->name->s : "no cname",
1290 kref_refcnt(&c->ref),
1291 has_dev ? devtab[c->type].name : "no dev",
1292 has_dev ? devtab[c->type].chaninfo(c, buf, sizeof(buf)) : "");
1294 printk("qid.path: %p\n", c->qid.path);
1298 /* TODO: 9ns ns inheritance flags: Shared, copied, or empty. The old fgrp is
1299 * managed by the fd_table, which is handled outside this function. We share
1301 int plan9setup(struct proc *new_proc, struct proc *parent, int flags)
1304 struct proc *old_current;
1305 struct kref *new_dot_ref;
1308 printk("plan9setup failed, %s\n", current_errstr());
1313 /* We are probably spawned by the kernel directly, and have no parent to
1316 * TODO: One problem is namec wants a current set for things like
1317 * genbuf. So we'll use new_proc for this bootstrapping. Note
1318 * switch_to() also loads the cr3. */
1319 new_proc->pgrp = newpgrp();
1320 old_current = switch_to(new_proc);
1321 new_proc->slash = namec("#root", Atodir, 0, 0);
1322 if (!new_proc->slash)
1323 panic("no root device");
1324 switch_back(new_proc, old_current);
1325 /* Want the name to be "/" instead of "#root" */
1326 cnameclose(new_proc->slash->name);
1327 new_proc->slash->name = newcname("/");
1328 new_proc->dot = cclone(new_proc->slash);
1332 /* Shared semantics */
1333 kref_get(&parent->pgrp->ref, 1);
1334 new_proc->pgrp = parent->pgrp;
1335 /* copy semantics on / and . (doesn't make a lot of sense in akaros o/w) */
1336 /* / should never disappear while we hold a ref to parent */
1337 chan_incref(parent->slash);
1338 new_proc->slash = parent->slash;
1339 /* dot could change concurrently, and we could fail to gain a ref if whoever
1340 * decref'd dot triggered the release. if that did happen, new_proc->dot
1341 * should update and we can try again. */
1342 while (!(new_dot_ref = kref_get_not_zero(&parent->dot->ref, 1)))
1344 /* And now, we can't trust parent->dot, and need to determine our dot from
1345 * the ref we obtained. */
1346 new_proc->dot = container_of(new_dot_ref, struct chan, ref);
1351 /* Open flags, create modes, access types, file flags, and all that...
1353 * there are a bunch of things here:
1354 * 1) file creation flags (e.g. O_TRUNC)
1355 * 2) file status flags (e.g. O_APPEND)
1356 * 3) file open modes (e.g. O_RDWR)
1357 * 4) file descriptor flags (e.g. CLOEXEC)
1358 * 5) file creation mode (e.g. S_IRWXU)
1359 * the 1-4 are passed in via open's vfs_flags, and the 5 via mode only when
1362 * file creation flags (1) only matter when creating, but aren't permanent.
1363 * O_EXCL, O_DIRECTORY, O_TRUNC, etc.
1365 * file status flags (2) are per struct file/chan. stuff like O_APPEND,
1366 * O_ASYNC, etc. we convert those to an internal flag bit and store in c->flags
1368 * the open mode (3) matters for a given FD/chan (chan->mode), and should be
1369 * stored in the chan. (c->mode) stuff like O_RDONLY.
1371 * the file descriptor flags (4) clearly are in the FD. note that the same
1372 * file/chan can be opened by two different FDs, with different flags. the only
1373 * one anyone uses is CLOEXEC. while exec may not last long in akaros, i can
1374 * imagine similar "never pass to children" flags/meanings.
1376 * the file creation mode (5) matters for the device's permissions; given this,
1377 * it should be stored in the device/inode. ACLs fall under this category.
1379 * finally, only certain categories can be edited afterwards: file status flags
1380 * (2), FD flags (4), and file permissions (5). */
1381 int fd_getfl(int fd)
1391 c = fdtochan(¤t->open_files, fd, -1, 0, 1);
1394 ret |= c->flag & CEXTERNAL_FLAGS;
1401 static bool cexternal_flags_differ(int set1, int set2, int flags)
1403 flags &= CEXTERNAL_FLAGS;
1404 return (set1 & flags) ^ (set2 & flags);
1407 int fd_setfl(int fd, int flags)
1416 c = fdtochan(¤t->open_files, fd, -1, 0, 1);
1417 if (cexternal_flags_differ(flags, c->flag, O_CLOEXEC)) {
1418 /* TODO: The whole CCEXEC / O_CLOEXEC on 9ns needs work */
1420 error("can't toggle O_CLOEXEC with setfl");
1422 if (cexternal_flags_differ(flags, c->flag, O_PATH)) {
1424 error("can't toggle O_PATH with setfl");
1426 if (cexternal_flags_differ(flags, c->flag, O_NONBLOCK)) {
1427 /* If we want to let them toggle NONBLOCK, it'd require a device op */
1429 error("can't set O_NONBLOCK, use a device-specific ctl command");
1431 c->flag = (c->flag & ~CEXTERNAL_FLAGS) | (flags & CEXTERNAL_FLAGS);