akaros/kern/drivers/dev/mnt.c
<<
>>
Prefs
   1/* Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
   2 * Portions Copyright © 1997-1999 Vita Nuova Limited
   3 * Portions Copyright © 2000-2007 Vita Nuova Holdings Limited
   4 *                                (www.vitanuova.com)
   5 * Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
   6 *
   7 * Modified for the Akaros operating system:
   8 * Copyright (c) 2013-2014 The Regents of the University of California
   9 * Copyright (c) 2013-2015 Google Inc.
  10 *
  11 * Permission is hereby granted, free of charge, to any person obtaining a copy
  12 * of this software and associated documentation files (the "Software"), to deal
  13 * in the Software without restriction, including without limitation the rights
  14 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15 * copies of the Software, and to permit persons to whom the Software is
  16 * furnished to do so, subject to the following conditions:
  17 *
  18 * The above copyright notice and this permission notice shall be included in
  19 * all copies or substantial portions of the Software.
  20 *
  21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
  24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  27 * SOFTWARE. */
  28
  29#include <slab.h>
  30#include <kmalloc.h>
  31#include <kref.h>
  32#include <string.h>
  33#include <stdio.h>
  34#include <assert.h>
  35#include <error.h>
  36#include <cpio.h>
  37#include <pmap.h>
  38#include <smp.h>
  39#include <net/ip.h>
  40#include <smallidpool.h>
  41
  42struct dev mntdevtab;
  43
  44static char *devname(void)
  45{
  46        return mntdevtab.name;
  47}
  48
  49/*
  50 * References are managed as follows:
  51 * The channel to the server - a network connection or pipe - has one
  52 * reference for every Chan open on the server.  The server channel has
  53 * c->mux set to the Mnt used for muxing control to that server.  Mnts
  54 * have no reference count; they go away when c goes away.
  55 * Each channel derived from the mount point has mchan set to c,
  56 * and increfs/decrefs mchan to manage references on the server
  57 * connection.
  58 */
  59
  60#define MAXRPC (IOHDRSZ+8192)
  61#define MAXTAG MAX_U16_POOL_SZ
  62
  63static __inline int isxdigit(int c)
  64{
  65        if ((c >= '0') && (c <= '9'))
  66                return 1;
  67        if ((c >= 'a') && (c <= 'f'))
  68                return 1;
  69        if ((c >= 'A') && (c <= 'F'))
  70                return 1;
  71        return 0;
  72}
  73
  74struct mntrpc {
  75        struct chan *c;         /* Channel for whom we are working */
  76        struct mntrpc *list;    /* Free/pending list */
  77        struct fcall request;   /* Outgoing file system protocol message */
  78        struct fcall reply;     /* Incoming reply */
  79        struct mnt *m;          /* Mount device during rpc */
  80        struct rendez r;        /* Place to hang out */
  81        uint8_t *rpc;           /* I/O Data buffer */
  82        unsigned int rpclen;    /* len of buffer */
  83        struct block *b;        /* reply blocks */
  84        char done;              /* Rpc completed */
  85        uint64_t stime;         /* start time for mnt statistics */
  86        uint32_t reqlen;        /* request length for mnt statistics */
  87        uint32_t replen;        /* reply length for mnt statistics */
  88        struct mntrpc *flushed; /* message this one flushes */
  89};
  90
  91/* Our TRUNC and remove on close differ from 9ps, so we'll need to translate.
  92 * I got these flags from http://man.cat-v.org/plan_9/5/open */
  93#define MNT_9P_OPEN_OTRUNC              0x10
  94#define MNT_9P_OPEN_ORCLOSE             0x40
  95
  96struct Mntalloc {
  97        spinlock_t l;
  98        struct mnt *list;               /* Mount devices in use */
  99        struct mnt *mntfree;            /* Free list */
 100        struct mntrpc *rpcfree;
 101        int nrpcfree;
 102        int nrpcused;
 103        uint32_t id;
 104        struct u16_pool *tags;
 105} mntalloc;
 106
 107void mattach(struct mnt *, struct chan *, char *unused_char_p_t);
 108struct mnt *mntchk(struct chan *);
 109void mntdirfix(uint8_t * unused_uint8_p_t, struct chan *);
 110struct mntrpc *mntflushalloc(struct mntrpc *, uint32_t);
 111void mntflushfree(struct mnt *, struct mntrpc *);
 112void mntfree(struct mntrpc *);
 113void mntgate(struct mnt *);
 114void mntpntfree(struct mnt *);
 115void mntqrm(struct mnt *, struct mntrpc *);
 116struct mntrpc *mntralloc(struct chan *, uint32_t);
 117size_t mntrdwr(int unused_int, struct chan *, void *, size_t, off64_t);
 118int mntrpcread(struct mnt *, struct mntrpc *);
 119void mountio(struct mnt *, struct mntrpc *);
 120void mountmux(struct mnt *, struct mntrpc *);
 121void mountrpc(struct mnt *, struct mntrpc *);
 122int rpcattn(void *);
 123struct chan *mntchan(void);
 124
 125void (*mntstats) (int unused_int, struct chan *, uint64_t, uint32_t);
 126
 127static void mntinit(void)
 128{
 129        mntalloc.id = 1;
 130        mntalloc.tags = create_u16_pool(MAXTAG);
 131        (void) get_u16(mntalloc.tags);  /* don't allow 0 as a tag */
 132        //fmtinstall('F', fcallfmt);
 133/*      fmtinstall('D', dirfmt); */
 134/*      fmtinstall('M', dirmodefmt);  */
 135}
 136
 137/*
 138 * Version is not multiplexed: message sent only once per connection.
 139 */
 140long mntversion(struct chan *c, char *version, int msize, int returnlen)
 141{
 142        ERRSTACK(2);
 143        struct fcall f;
 144        uint8_t *msg;
 145        struct mnt *m;
 146        char *v;
 147        long k, l;
 148        uint64_t oo;
 149        char buf[128];
 150
 151        /* make sure no one else does this until we've established ourselves */
 152        qlock(&c->umqlock);
 153        if (waserror()) {
 154                qunlock(&c->umqlock);
 155                nexterror();
 156        }
 157
 158        /* defaults */
 159        if (msize == 0)
 160                msize = MAXRPC;
 161        if (msize > c->iounit && c->iounit != 0)
 162                msize = c->iounit;
 163        v = version;
 164        if (v == NULL || v[0] == '\0')
 165                v = VERSION9P;
 166
 167        /* validity */
 168        if (msize < 0)
 169                error(EFAIL, "bad iounit in version call");
 170        if (strncmp(v, VERSION9P, strlen(VERSION9P)) != 0)
 171                error(EFAIL, "bad 9P version specification");
 172
 173        m = c->mux;
 174
 175        if (m != NULL) {
 176                qunlock(&c->umqlock);
 177                poperror();
 178
 179                strlcpy(buf, m->version, sizeof(buf));
 180                k = strlen(buf);
 181                if (strncmp(buf, v, k) != 0) {
 182                        snprintf(buf, sizeof buf,
 183                                 "incompatible 9P versions %s %s", m->version,
 184                                 v);
 185                        error(EFAIL, buf);
 186                }
 187                if (returnlen > 0) {
 188                        if (returnlen < k)
 189                                error(ENAMETOOLONG, ERROR_FIXME);
 190                        memmove(version, buf, k);
 191                }
 192                return k;
 193        }
 194
 195        f.type = Tversion;
 196        f.tag = NOTAG;
 197        f.msize = msize;
 198        f.version = v;
 199        msg = kzmalloc(8192 + IOHDRSZ, 0);
 200        if (msg == NULL)
 201                exhausted("version memory");
 202        if (waserror()) {
 203                kfree(msg);
 204                nexterror();
 205        }
 206        k = convS2M(&f, msg, 8192 + IOHDRSZ);
 207        if (k == 0)
 208                error(EFAIL, "bad fversion conversion on send");
 209
 210        spin_lock(&c->lock);
 211        oo = c->offset;
 212        c->offset += k;
 213        spin_unlock(&c->lock);
 214
 215        l = devtab[c->type].write(c, msg, k, oo);
 216
 217        if (l < k) {
 218                spin_lock(&c->lock);
 219                c->offset -= k - l;
 220                spin_unlock(&c->lock);
 221                error(EFAIL, "short write in fversion");
 222        }
 223
 224        /* message sent; receive and decode reply */
 225        k = devtab[c->type].read(c, msg, 8192 + IOHDRSZ, c->offset);
 226        if (k <= 0)
 227                error(EFAIL, "EOF receiving fversion reply");
 228
 229        spin_lock(&c->lock);
 230        c->offset += k;
 231        spin_unlock(&c->lock);
 232
 233        l = convM2S(msg, k, &f);
 234        if (l != k)
 235                error(EFAIL, "bad fversion conversion on reply");
 236        if (f.type != Rversion) {
 237                if (f.type == Rerror)
 238                        error(EFAIL, f.ename);
 239                error(EFAIL, "unexpected reply type in fversion");
 240        }
 241        if (f.msize > msize)
 242                error(EFAIL, "server tries to increase msize in fversion");
 243        if (f.msize < 256 || f.msize > 1024 * 1024)
 244                error(EFAIL, "nonsense value of msize in fversion");
 245        if (strncmp(f.version, v, strlen(f.version)) != 0)
 246                error(EFAIL, "bad 9P version returned from server");
 247
 248        /* now build Mnt associated with this connection */
 249        spin_lock(&mntalloc.l);
 250        m = mntalloc.mntfree;
 251        if (m != 0)
 252                mntalloc.mntfree = m->list;
 253        else {
 254                m = kzmalloc(sizeof(struct mnt), 0);
 255                if (m == 0) {
 256                        spin_unlock(&mntalloc.l);
 257                        exhausted("mount devices");
 258                }
 259        }
 260        m->list = mntalloc.list;
 261        mntalloc.list = m;
 262        m->version = NULL;
 263        kstrdup(&m->version, f.version);
 264        m->id = mntalloc.id++;
 265        m->q = qopen(10 * MAXRPC, 0, NULL, NULL);
 266        m->msize = f.msize;
 267        spin_unlock(&mntalloc.l);
 268
 269        poperror();     /* msg */
 270        kfree(msg);
 271
 272        spin_lock(&m->lock);
 273        m->queue = 0;
 274        m->rip = 0;
 275
 276        c->flag |= CMSG;
 277        c->mux = m;
 278        m->c = c;
 279        spin_unlock(&m->lock);
 280
 281        poperror();     /* c */
 282        qunlock(&c->umqlock);
 283        k = strlen(f.version);
 284        if (returnlen > 0) {
 285                if (returnlen < k)
 286                        error(ENAMETOOLONG, ERROR_FIXME);
 287                memmove(version, f.version, k);
 288        }
 289
 290        return k;
 291}
 292
 293struct chan *mntauth(struct chan *c, char *spec)
 294{
 295        ERRSTACK(2);
 296        struct mnt *m;
 297        struct mntrpc *r;
 298
 299        m = c->mux;
 300
 301        if (m == NULL) {
 302                mntversion(c, VERSION9P, MAXRPC, 0);
 303                m = c->mux;
 304                if (m == NULL)
 305                        error(EINVAL, ERROR_FIXME);
 306        }
 307
 308        c = mntchan();
 309        if (waserror()) {
 310                /* Close must not be called since it will
 311                 * call mnt recursively
 312                 */
 313                chanfree(c);
 314                nexterror();
 315        }
 316
 317        r = mntralloc(0, m->msize);
 318
 319        if (waserror()) {
 320                mntfree(r);
 321                nexterror();
 322        }
 323
 324        r->request.type = Tauth;
 325        r->request.afid = c->fid;
 326        /* This assumes we're called from a syscall, which should always be
 327         * true. */
 328        if (!current_kthread->sysc)
 329                warn("Kthread %s didn't have a syscall, current is %s",
 330                     current_kthread->name, current ? current->progname : NULL);
 331        r->request.uname = current->user.name;
 332        r->request.aname = spec;
 333        mountrpc(m, r);
 334
 335        c->qid = r->reply.aqid;
 336        c->mchan = m->c;
 337        chan_incref(m->c);
 338        c->mqid = c->qid;
 339        c->mode = O_RDWR;
 340
 341        poperror();     /* r */
 342        mntfree(r);
 343
 344        poperror();     /* c */
 345
 346        return c;
 347
 348}
 349
 350static struct chan *mntattach(char *muxattach)
 351{
 352        ERRSTACK(2);
 353        struct mnt *m;
 354        struct chan *c;
 355        struct mntrpc *r;
 356        struct mntparam *params = (struct mntparam *)muxattach;
 357
 358        c = params->chan;
 359
 360        m = c->mux;
 361
 362        if (m == NULL) {
 363                mntversion(c, NULL, 0, 0);
 364                m = c->mux;
 365                if (m == NULL)
 366                        error(EINVAL, ERROR_FIXME);
 367        }
 368
 369        c = mntchan();
 370        if (waserror()) {
 371                /* Close must not be called since it will
 372                 * call mnt recursively
 373                 */
 374                chanfree(c);
 375                nexterror();
 376        }
 377
 378        r = mntralloc(0, m->msize);
 379
 380        if (waserror()) {
 381                mntfree(r);
 382                nexterror();
 383        }
 384
 385        r->request.type = Tattach;
 386        r->request.fid = c->fid;
 387        if (params->authchan == NULL)
 388                r->request.afid = NOFID;
 389        else
 390                r->request.afid = params->authchan->fid;
 391        /* This assumes we're called from a syscall, which should always be
 392         * true. */
 393        if (!current_kthread->sysc)
 394                warn("Kthread %s didn't have a syscall, current is %s",
 395                     current_kthread->name, current ? current->progname : NULL);
 396        r->request.uname = current->user.name;
 397        r->request.aname = params->spec;
 398        mountrpc(m, r);
 399
 400        c->qid = r->reply.qid;
 401        c->mchan = m->c;
 402        chan_incref(m->c);
 403        c->mqid = c->qid;
 404
 405        poperror();     /* r */
 406        mntfree(r);
 407
 408        poperror();     /* c */
 409        return c;
 410}
 411
 412struct chan *mntchan(void)
 413{
 414        struct chan *c;
 415
 416        c = devattach(devname(), 0);
 417        spin_lock(&mntalloc.l);
 418        c->dev = mntalloc.id++;
 419        spin_unlock(&mntalloc.l);
 420
 421        if (c->mchan)
 422                panic("mntchan non-zero %p", c->mchan);
 423        return c;
 424}
 425
 426static struct walkqid *mntwalk(struct chan *c, struct chan *nc, char **name,
 427                                                           unsigned int nname)
 428{
 429        ERRSTACK(2);
 430        volatile int alloc;
 431        int i;
 432        struct mnt *m;
 433        struct mntrpc *r;
 434        struct walkqid *wq;
 435
 436        if (nc != NULL)
 437                printd("mntwalk: nc != NULL\n");
 438        if (nname > MAXWELEM)
 439                error(EFAIL, "devmnt: too many name elements");
 440        alloc = 0;
 441        wq = kzmalloc(sizeof(struct walkqid) + nname * sizeof(struct qid),
 442                                  MEM_WAIT);
 443        if (waserror()) {
 444                if (alloc && wq->clone != NULL)
 445                        cclose(wq->clone);
 446                kfree(wq);
 447                poperror();
 448                return NULL;
 449        }
 450
 451        alloc = 0;
 452        m = mntchk(c);
 453        r = mntralloc(c, m->msize);
 454        if (nc == NULL) {
 455                nc = devclone(c);
 456                /* Until the other side accepts this fid, we can't mntclose it.
 457                 * Therefore set type to -1 for now.  inferno was setting this
 458                 * to 0, assuming it was devroot.  lining up with chanrelease
 459                 * and newchan */
 460                nc->type = -1;
 461                alloc = 1;
 462        }
 463        wq->clone = nc;
 464
 465        if (waserror()) {
 466                mntfree(r);
 467                nexterror();
 468        }
 469        r->request.type = Twalk;
 470        r->request.fid = c->fid;
 471        r->request.newfid = nc->fid;
 472        r->request.nwname = nname;
 473        memmove(r->request.wname, name, nname * sizeof(char *));
 474
 475        mountrpc(m, r);
 476
 477        if (r->reply.nwqid > nname)
 478                error(EFAIL, "too many QIDs returned by walk");
 479        if (r->reply.nwqid < nname) {
 480                if (alloc)
 481                        cclose(nc);
 482                wq->clone = NULL;
 483                if (r->reply.nwqid == 0) {
 484                        kfree(wq);
 485                        wq = NULL;
 486                        goto Return;
 487                }
 488        }
 489
 490        /* move new fid onto mnt device and update its qid */
 491        if (wq->clone != NULL) {
 492                if (wq->clone != c) {
 493                        wq->clone->type = c->type;
 494                        wq->clone->mchan = c->mchan;
 495                        chan_incref(c->mchan);
 496                }
 497                if (r->reply.nwqid > 0)
 498                        wq->clone->qid = r->reply.wqid[r->reply.nwqid - 1];
 499        }
 500        wq->nqid = r->reply.nwqid;
 501        for (i = 0; i < wq->nqid; i++)
 502                wq->qid[i] = r->reply.wqid[i];
 503
 504Return:
 505        poperror();
 506        mntfree(r);
 507        poperror();
 508        return wq;
 509}
 510
 511static size_t mntstat(struct chan *c, uint8_t *dp, size_t n)
 512{
 513        ERRSTACK(1);
 514        struct mnt *m;
 515        struct mntrpc *r;
 516
 517        if (n < BIT16SZ)
 518                error(EINVAL, ERROR_FIXME);
 519        m = mntchk(c);
 520        r = mntralloc(c, m->msize);
 521        if (waserror()) {
 522                mntfree(r);
 523                nexterror();
 524        }
 525        r->request.type = Tstat;
 526        r->request.fid = c->fid;
 527        mountrpc(m, r);
 528
 529        if (r->reply.nstat > n) {
 530                /* doesn't fit; just patch the count and return */
 531                PBIT16((uint8_t *) dp, r->reply.nstat);
 532                n = BIT16SZ;
 533        } else {
 534                n = r->reply.nstat;
 535                memmove(dp, r->reply.stat, n);
 536                validstat(dp, n, 0);
 537                mntdirfix(dp, c);
 538        }
 539        poperror();
 540        mntfree(r);
 541        return n;
 542}
 543
 544static struct chan *mntopencreate(int type, struct chan *c, char *name,
 545                                  int omode, uint32_t perm)
 546{
 547        ERRSTACK(1);
 548        struct mnt *m;
 549        struct mntrpc *r;
 550
 551        m = mntchk(c);
 552        r = mntralloc(c, m->msize);
 553        if (waserror()) {
 554                mntfree(r);
 555                nexterror();
 556        }
 557        r->request.type = type;
 558        r->request.fid = c->fid;
 559        r->request.mode = omode_to_9p_accmode(omode);
 560        if (omode & O_TRUNC)
 561                r->request.mode |= MNT_9P_OPEN_OTRUNC;
 562        if (omode & O_REMCLO)
 563                r->request.mode |= MNT_9P_OPEN_ORCLOSE;
 564        if (type == Tcreate) {
 565                r->request.perm = perm;
 566                r->request.name = name;
 567        }
 568        mountrpc(m, r);
 569
 570        c->qid = r->reply.qid;
 571        c->offset = 0;
 572        c->mode = openmode(omode);
 573        c->iounit = r->reply.iounit;
 574        if (c->iounit == 0 || c->iounit > m->msize - IOHDRSZ)
 575                c->iounit = m->msize - IOHDRSZ;
 576        c->flag |= COPEN;
 577        poperror();
 578        mntfree(r);
 579
 580        return c;
 581}
 582
 583static struct chan *mntopen(struct chan *c, int omode)
 584{
 585        return mntopencreate(Topen, c, NULL, omode, 0);
 586}
 587
 588static void mntcreate(struct chan *c, char *name, int omode, uint32_t perm,
 589                      char *ext)
 590{
 591        /* TODO: support extensions for e.g. symlinks */
 592        if (perm & DMSYMLINK)
 593                error(EINVAL, "#%s doesn't support symlinks", devname());
 594        mntopencreate(Tcreate, c, name, omode, perm);
 595}
 596
 597static void mntclunk(struct chan *c, int t)
 598{
 599        ERRSTACK(1);
 600        struct mnt *m;
 601        struct mntrpc *r;
 602
 603        m = mntchk(c);
 604        r = mntralloc(c, m->msize);
 605        if (waserror()) {
 606                mntfree(r);
 607                nexterror();
 608        }
 609
 610        r->request.type = t;
 611        r->request.fid = c->fid;
 612        mountrpc(m, r);
 613        mntfree(r);
 614        poperror();
 615}
 616
 617void muxclose(struct mnt *m)
 618{
 619        struct mntrpc *q, *r;
 620
 621        for (q = m->queue; q; q = r) {
 622                r = q->list;
 623                mntfree(q);
 624        }
 625        m->id = 0;
 626        kfree(m->version);
 627        m->version = NULL;
 628        mntpntfree(m);
 629}
 630
 631void mntpntfree(struct mnt *m)
 632{
 633        struct mnt *f, **l;
 634        struct queue *q;
 635
 636        spin_lock(&mntalloc.l);
 637        l = &mntalloc.list;
 638        for (f = *l; f; f = f->list) {
 639                if (f == m) {
 640                        *l = m->list;
 641                        break;
 642                }
 643                l = &f->list;
 644        }
 645        m->list = mntalloc.mntfree;
 646        mntalloc.mntfree = m;
 647        q = m->q;
 648        spin_unlock(&mntalloc.l);
 649
 650        qfree(q);
 651}
 652
 653static void mntclose(struct chan *c)
 654{
 655        mntclunk(c, Tclunk);
 656}
 657
 658static void mntremove(struct chan *c)
 659{
 660        mntclunk(c, Tremove);
 661}
 662
 663static size_t mntwstat(struct chan *c, uint8_t *dp, size_t n)
 664{
 665        ERRSTACK(1);
 666        struct mnt *m;
 667        struct mntrpc *r;
 668
 669        m = mntchk(c);
 670        r = mntralloc(c, m->msize);
 671        if (waserror()) {
 672                mntfree(r);
 673                nexterror();
 674        }
 675        r->request.type = Twstat;
 676        r->request.fid = c->fid;
 677        r->request.nstat = n;
 678        r->request.stat = dp;
 679        mountrpc(m, r);
 680        poperror();
 681        mntfree(r);
 682        return n;
 683}
 684
 685/* the servers should either return units of whole directory entries
 686 * OR support seeking to an arbitrary place. One or other.
 687 * Both are fine, but at least one is a minimum.
 688 * If the return a partial result, but more than one result,
 689 * we'll return a shorter read and the next offset will be aligned
 690 */
 691static size_t mntread(struct chan *c, void *buf, size_t n, off64_t off)
 692{
 693        uint8_t *p, *e;
 694        int nc, dirlen;
 695        int numdirent = 0;
 696
 697
 698        p = buf;
 699
 700        n = mntrdwr(Tread, c, buf, n, off);
 701
 702        if (c->qid.type & QTDIR) {
 703                for (e = &p[n]; p + BIT16SZ < e; p += dirlen) {
 704                        dirlen = BIT16SZ + GBIT16(p);
 705                        if (p + dirlen > e){
 706                                break;
 707                        }
 708                        validstat(p, dirlen, 0);
 709                        mntdirfix(p, c);
 710                        numdirent += dirlen;
 711                }
 712                if (p != e) {
 713                        //error(Esbadstat);
 714                        /* not really. Maybe the server supports
 715                         * arbitrary seek like go9p now does.
 716                         */
 717                        n = numdirent;
 718                }
 719        }
 720        return n;
 721}
 722
 723static size_t mntwrite(struct chan *c, void *buf, size_t n, off64_t off)
 724{
 725        return mntrdwr(Twrite, c, buf, n, off);
 726}
 727
 728size_t mntrdwr(int type, struct chan *c, void *buf, size_t n, off64_t off)
 729{
 730        ERRSTACK(1);
 731        struct mnt *m;
 732        struct mntrpc *r;       /* TO DO: volatile struct { Mntrpc *r; } r; */
 733        char *uba;
 734        uint32_t cnt, nr, nreq;
 735
 736        m = mntchk(c);
 737        uba = buf;
 738        cnt = 0;
 739        for (;;) {
 740                r = mntralloc(c, m->msize);
 741                if (waserror()) {
 742                        mntfree(r);
 743                        nexterror();
 744                }
 745                r->request.type = type;
 746                r->request.fid = c->fid;
 747                r->request.offset = off;
 748                r->request.data = uba;
 749                nr = n;
 750                if (nr > m->msize - IOHDRSZ)
 751                        nr = m->msize - IOHDRSZ;
 752                r->request.count = nr;
 753                mountrpc(m, r);
 754                nreq = r->request.count;
 755                nr = r->reply.count;
 756                if (nr > nreq)
 757                        nr = nreq;
 758
 759                if (type == Tread)
 760                        r->b = bl2mem((uint8_t *) uba, r->b, nr);
 761
 762                poperror();
 763                mntfree(r);
 764                off += nr;
 765                uba += nr;
 766                cnt += nr;
 767                n -= nr;
 768                if (nr != nreq || n == 0 /*|| current->killed */ )
 769                        break;
 770        }
 771        return cnt;
 772}
 773
 774void mountrpc(struct mnt *m, struct mntrpc *r)
 775{
 776        char *sn, *cn;
 777        int t;
 778        char *e;
 779
 780        r->reply.tag = 0;
 781        r->reply.type = Tmax;   /* can't ever be a valid message type */
 782
 783        mountio(m, r);
 784
 785        t = r->reply.type;
 786        switch (t) {
 787        case Rerror:
 788                /* in Akaros mode, first four characters
 789                 * are errno.
 790                 */
 791                e = r->reply.ename;
 792                /* If it is in the format "XXXX <at least one char>" */
 793                if ((strlen(e) > 5) && isxdigit(e[0]) &&
 794                        isxdigit(e[1]) &&
 795                        isxdigit(e[2]) &&
 796                        isxdigit(e[3])) {
 797
 798                        int errno = strtoul(e, NULL, 16);
 799
 800                        error(errno, &r->reply.ename[5]);
 801                } else
 802                        error(EFAIL, r->reply.ename);
 803        case Rflush:
 804                error(EINTR, ERROR_FIXME);
 805        default:
 806                if (t == r->request.type + 1)
 807                        break;
 808                sn = "?";
 809                if (m->c->name != NULL)
 810                        sn = m->c->name->s;
 811                cn = "?";
 812                if (r->c != NULL && r->c->name != NULL)
 813                        cn = r->c->name->s;
 814                warn("mnt: mismatch from %s %s rep %p tag %d fid %d T%d R%d rp %d\n",
 815                     sn, cn, r, r->request.tag, r->request.fid, r->request.type,
 816                     r->reply.type, r->reply.tag);
 817                error(EPROTO, ERROR_FIXME);
 818        }
 819}
 820
 821static bool kth_proc_is_dying(struct kthread *kth)
 822{
 823        return kth->proc ? proc_is_dying(kth->proc) : false;
 824}
 825
 826void mountio(struct mnt *m, struct mntrpc *r)
 827{
 828        ERRSTACK(1);
 829        int n;
 830
 831        while (waserror()) {
 832                if (m->rip == current_kthread)
 833                        mntgate(m);
 834                /* Syscall aborts are like Plan 9 Eintr.  For those, we need to
 835                 * change the old request to a flush (mntflushalloc) and try
 836                 * again.  We'll always try to flush, and you can't get out
 837                 * until the flush either succeeds or errors out with a
 838                 * non-abort/Eintr error.
 839                 *
 840                 * This all means that regular aborts cannot break us out of
 841                 * here!  We can consider that policy in the future, if we need
 842                 * to.  Regardless, if the process is dying, we really do need
 843                 * to abort.  We might not always have a process (RKM
 844                 * chan_release), but in that case we're fine
 845                 * - we're not preventing a process from dying. */
 846                if ((get_errno() != EINTR) ||
 847                    kth_proc_is_dying(current_kthread)) {
 848                        /* all other errors or dying, bail out! */
 849                        mntflushfree(m, r);
 850                        nexterror();
 851                }
 852                /* try again.  this is where you can get the "rpc tags" errstr.
 853                 */
 854                r = mntflushalloc(r, m->msize);
 855                /* need one for every waserror call; so this plus one outside */
 856                poperror();
 857        }
 858
 859        spin_lock(&m->lock);
 860        r->m = m;
 861        r->list = m->queue;
 862        m->queue = r;
 863        spin_unlock(&m->lock);
 864
 865        /* Transmit a file system rpc */
 866        if (m->msize == 0)
 867                panic("msize");
 868        n = convS2M(&r->request, r->rpc, m->msize);
 869        if (n < 0)
 870                panic("bad message type in mountio");
 871        if (devtab[m->c->type].write(m->c, r->rpc, n, 0) != n)
 872                error(EIO, ERROR_FIXME);
 873/*      r->stime = fastticks(NULL); */
 874        r->reqlen = n;
 875
 876        /* Gate readers onto the mount point one at a time */
 877        for (;;) {
 878                spin_lock(&m->lock);
 879                if (m->rip == 0)
 880                        break;
 881                spin_unlock(&m->lock);
 882                rendez_sleep(&r->r, rpcattn, r);
 883                if (r->done) {
 884                        poperror();
 885                        mntflushfree(m, r);
 886                        return;
 887                }
 888        }
 889        m->rip = current_kthread;
 890        spin_unlock(&m->lock);
 891        while (r->done == 0) {
 892                if (mntrpcread(m, r) < 0)
 893                        error(EIO, ERROR_FIXME);
 894                mountmux(m, r);
 895        }
 896        mntgate(m);
 897        poperror();
 898        mntflushfree(m, r);
 899}
 900
 901static int doread(struct mnt *m, int len)
 902{
 903        struct block *b;
 904
 905        while (qlen(m->q) < len) {
 906                b = devtab[m->c->type].bread(m->c, m->msize, 0);
 907                if (b == NULL)
 908                        return -1;
 909                if (blocklen(b) == 0) {
 910                        freeblist(b);
 911                        return -1;
 912                }
 913                qaddlist(m->q, b);
 914        }
 915        return 0;
 916}
 917
 918int mntrpcread(struct mnt *m, struct mntrpc *r)
 919{
 920        int i, t, len, hlen;
 921        struct block *b, **l, *nb;
 922
 923        r->reply.type = 0;
 924        r->reply.tag = 0;
 925
 926        /* read at least length, type, and tag and pullup to a single block */
 927        if (doread(m, BIT32SZ + BIT8SZ + BIT16SZ) < 0)
 928                return -1;
 929        nb = pullupqueue(m->q, BIT32SZ + BIT8SZ + BIT16SZ);
 930
 931        /* read in the rest of the message, avoid ridiculous (for now) message
 932         * sizes */
 933        len = GBIT32(nb->rp);
 934        if (len > m->msize) {
 935                qdiscard(m->q, qlen(m->q));
 936                return -1;
 937        }
 938        if (doread(m, len) < 0)
 939                return -1;
 940
 941        /* pullup the header (i.e. everything except data) */
 942        t = nb->rp[BIT32SZ];
 943        switch (t) {
 944                case Rread:
 945                        hlen = BIT32SZ + BIT8SZ + BIT16SZ + BIT32SZ;
 946                        break;
 947                default:
 948                        hlen = len;
 949                        break;
 950        }
 951        nb = pullupqueue(m->q, hlen);
 952
 953        if (convM2S(nb->rp, len, &r->reply) <= 0) {
 954                /* bad message, dump it */
 955                printd("mntrpcread: convM2S failed\n");
 956                qdiscard(m->q, len);
 957                return -1;
 958        }
 959
 960        /* TODO: this should use a qio helper directly.  qputback should have
 961         * the qlocked, but I guess we assume we're the only one using it. */
 962
 963        /* hang the data off of the fcall struct */
 964        l = &r->b;
 965        *l = NULL;
 966        do {
 967                b = qget(m->q);
 968                /* TODO: have better block helpers for this and the memmove
 969                 * below */
 970                b = linearizeblock(b);
 971                if (hlen > 0) {
 972                        b->rp += hlen;
 973                        len -= hlen;
 974                        hlen = 0;
 975                }
 976                i = BLEN(b);
 977                if (i <= len) {
 978                        len -= i;
 979                        *l = b;
 980                        l = &(b->next);
 981                } else {
 982                        /* split block and put unused bit back */
 983                        nb = block_alloc(i - len, MEM_WAIT);
 984                        memmove(nb->wp, b->rp + len, i - len);
 985                        b->wp = b->rp + len;
 986                        nb->wp += i - len;
 987                        qputback(m->q, nb);
 988                        *l = b;
 989                        return 0;
 990                }
 991        } while (len > 0);
 992
 993        return 0;
 994}
 995
 996void mntgate(struct mnt *m)
 997{
 998        struct mntrpc *q;
 999
1000        spin_lock(&m->lock);
1001        m->rip = 0;
1002        for (q = m->queue; q; q = q->list) {
1003                if (q->done == 0)
1004                        if (rendez_wakeup(&q->r))
1005                                break;
1006        }
1007        spin_unlock(&m->lock);
1008}
1009
1010void mountmux(struct mnt *m, struct mntrpc *r)
1011{
1012        struct mntrpc **l, *q;
1013
1014        spin_lock(&m->lock);
1015        l = &m->queue;
1016        for (q = *l; q; q = q->list) {
1017                /* look for a reply to a message */
1018                if (q->request.tag == r->reply.tag) {
1019                        *l = q->list;
1020                        if (q != r) {
1021                                /*
1022                                 * Completed someone else.
1023                                 * Trade pointers to receive buffer.
1024                                 */
1025                                q->reply = r->reply;
1026                                q->b = r->b;
1027                                r->b = NULL;
1028                        }
1029                        q->done = 1;
1030                        spin_unlock(&m->lock);
1031                        if (mntstats != NULL)
1032                                (*mntstats) (q->request.type, m->c, q->stime,
1033                                             q->reqlen + r->replen);
1034                        if (q != r)
1035                                rendez_wakeup(&q->r);
1036                        return;
1037                }
1038                l = &q->list;
1039        }
1040        spin_unlock(&m->lock);
1041        if (r->reply.type == Rerror) {
1042                printd("unexpected reply tag %u; type %d (error %q)\n",
1043                       r->reply.tag, r->reply.type, r->reply.ename);
1044        } else {
1045                printd("unexpected reply tag %u; type %d\n", r->reply.tag,
1046                       r->reply.type);
1047        }
1048}
1049
1050/*
1051 * Create a new flush request and chain the previous
1052 * requests from it
1053 */
1054struct mntrpc *mntflushalloc(struct mntrpc *r, uint32_t iounit)
1055{
1056        struct mntrpc *fr;
1057
1058        fr = mntralloc(0, iounit);
1059
1060        fr->request.type = Tflush;
1061        if (r->request.type == Tflush)
1062                fr->request.oldtag = r->request.oldtag;
1063        else
1064                fr->request.oldtag = r->request.tag;
1065        fr->flushed = r;
1066
1067        return fr;
1068}
1069
1070/*
1071 *  Free a chain of flushes.  Remove each unanswered
1072 *  flush and the original message from the unanswered
1073 *  request queue.  Mark the original message as done
1074 *  and if it hasn't been answered set the reply to to
1075 *  Rflush.
1076 */
1077void mntflushfree(struct mnt *m, struct mntrpc *r)
1078{
1079        struct mntrpc *fr;
1080
1081        while (r) {
1082                fr = r->flushed;
1083                if (!r->done) {
1084                        r->reply.type = Rflush;
1085                        mntqrm(m, r);
1086                }
1087                if (fr)
1088                        mntfree(r);
1089                r = fr;
1090        }
1091}
1092
1093static int alloctag(void)
1094{
1095        return get_u16(mntalloc.tags);
1096}
1097
1098static void freetag(int t)
1099{
1100        put_u16(mntalloc.tags, t);
1101}
1102
1103struct mntrpc *mntralloc(struct chan *c, uint32_t msize)
1104{
1105        struct mntrpc *new;
1106
1107        spin_lock(&mntalloc.l);
1108        new = mntalloc.rpcfree;
1109        if (new == NULL) {
1110                new = kzmalloc(sizeof(struct mntrpc), 0);
1111                if (new == NULL) {
1112                        spin_unlock(&mntalloc.l);
1113                        exhausted("mount rpc header");
1114                }
1115                rendez_init(&new->r);
1116                /*
1117                 * The header is split from the data buffer as
1118                 * mountmux may swap the buffer with another header.
1119                 */
1120                new->rpc = kzmalloc(msize, MEM_WAIT);
1121                if (new->rpc == NULL) {
1122                        kfree(new);
1123                        spin_unlock(&mntalloc.l);
1124                        exhausted("mount rpc buffer");
1125                }
1126                new->rpclen = msize;
1127                new->request.tag = alloctag();
1128                if (new->request.tag == NOTAG) {
1129                        kfree(new);
1130                        spin_unlock(&mntalloc.l);
1131                        exhausted("rpc tags");
1132                }
1133        } else {
1134                mntalloc.rpcfree = new->list;
1135                mntalloc.nrpcfree--;
1136                if (new->rpclen < msize) {
1137                        kfree(new->rpc);
1138                        new->rpc = kzmalloc(msize, MEM_WAIT);
1139                        if (new->rpc == NULL) {
1140                                kfree(new);
1141                                mntalloc.nrpcused--;
1142                                spin_unlock(&mntalloc.l);
1143                                exhausted("mount rpc buffer");
1144                        }
1145                        new->rpclen = msize;
1146                }
1147        }
1148        mntalloc.nrpcused++;
1149        spin_unlock(&mntalloc.l);
1150        new->c = c;
1151        new->done = 0;
1152        new->flushed = NULL;
1153        new->b = NULL;
1154        return new;
1155}
1156
1157void mntfree(struct mntrpc *r)
1158{
1159        if (r->b != NULL)
1160                freeblist(r->b);
1161        spin_lock(&mntalloc.l);
1162        if (mntalloc.nrpcfree >= 10) {
1163                kfree(r->rpc);
1164                freetag(r->request.tag);
1165                kfree(r);
1166        } else {
1167                r->list = mntalloc.rpcfree;
1168                mntalloc.rpcfree = r;
1169                mntalloc.nrpcfree++;
1170        }
1171        mntalloc.nrpcused--;
1172        spin_unlock(&mntalloc.l);
1173}
1174
1175void mntqrm(struct mnt *m, struct mntrpc *r)
1176{
1177        struct mntrpc **l, *f;
1178
1179        spin_lock(&m->lock);
1180        r->done = 1;
1181
1182        l = &m->queue;
1183        for (f = *l; f; f = f->list) {
1184                if (f == r) {
1185                        *l = r->list;
1186                        break;
1187                }
1188                l = &f->list;
1189        }
1190        spin_unlock(&m->lock);
1191}
1192
1193struct mnt *mntchk(struct chan *c)
1194{
1195        struct mnt *m;
1196
1197        /* This routine is mostly vestiges of prior lives; now it's just sanity
1198         * checking */
1199
1200        if (c->mchan == NULL)
1201                panic("mntchk 1: NULL mchan c %s\n", /*c2name(c)*/ "channame?");
1202
1203        m = c->mchan->mux;
1204
1205        if (m == NULL)
1206                printd("mntchk 2: NULL mux c %s c->mchan %s \n", c2name(c),
1207                           c2name(c->mchan));
1208
1209        /*
1210         * Was it closed and reused (was error(Eshutdown); now, it can't happen)
1211         */
1212        if (m->id == 0 || m->id >= c->dev)
1213                panic("mntchk 3: can't happen");
1214
1215        return m;
1216}
1217
1218/*
1219 * Rewrite channel type and dev for in-flight data to
1220 * reflect local values.  These entries are known to be
1221 * the first two in the Dir encoding after the count.
1222 */
1223void mntdirfix(uint8_t * dirbuf, struct chan *c)
1224{
1225        /* TODO: We used to use the device's char (dc), instead of the type.
1226         * not sure about the effects one way or the other.  This might be the
1227         * type[2] and dev[4] in a D (struct dir, see 9p's stat
1228         * (http://man.cat-v.org/plan_9/5/stat).  In which case, those should be
1229         * for the kernel's use.  Hopefully our kernel. */
1230        dirbuf += BIT16SZ;      /* skip count */
1231        PBIT16(dirbuf, c->type);
1232        dirbuf += BIT16SZ;
1233        PBIT32(dirbuf, c->dev);
1234}
1235
1236int rpcattn(void *v)
1237{
1238        struct mntrpc *r;
1239
1240        r = v;
1241        return r->done || r->m->rip == 0;
1242}
1243
1244struct dev mntdevtab __devtab = {
1245        .name = "mnt",
1246
1247        .reset = devreset,
1248        .init = mntinit,
1249        .shutdown = devshutdown,
1250        .attach = mntattach,
1251        .walk = mntwalk,
1252        .stat = mntstat,
1253        .open = mntopen,
1254        .create = mntcreate,
1255        .close = mntclose,
1256        .read = mntread,
1257        .bread = devbread,
1258        .write = mntwrite,
1259        .bwrite = devbwrite,
1260        .remove = mntremove,
1261        .wstat = mntwstat,
1262        .power = devpower,
1263        .chaninfo = devchaninfo,
1264};
1265