Emergency commit to fix a gitastrophe
[akaros.git] / kern / drivers / dev / proc.c
1 /* 
2  * This file is part of the UCB release of Plan 9. It is subject to the license
3  * terms in the LICENSE file found in the top-level directory of this
4  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
5  * part of the UCB release of Plan 9, including this file, may be copied,
6  * modified, propagated, or distributed except according to the terms contained
7  * in the LICENSE file.
8  */
9
10 //#define DEBUG
11 /* proc on plan 9 has lots of capabilities, some of which we might
12  * want for akaros:
13  * debug control
14  * event tracing
15  * process control (no need for signal system call, etc.)
16  * textual status
17  * rather than excise code that won't work, I'm bracketing it with
18  * #if 0 until we know we don't want it
19  */
20 #include <vfs.h>
21 #include <kfs.h>
22 #include <slab.h>
23 #include <kmalloc.h>
24 #include <kref.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <assert.h>
28 #include <error.h>
29 #include <cpio.h>
30 #include <pmap.h>
31 #include <smp.h>
32 #include <arch/vmm/vmm.h>
33
34 enum {
35         Qdir,
36         Qtrace,
37         Qtracepids,
38         Qns,
39         Qargs,
40         Qctl,
41         Qfd,
42         Qfpregs,
43         Qkregs,
44         Qmem,
45         Qnote,
46         Qnoteid,
47         Qnotepg,
48         Qproc,
49         Qregs,
50         Qsegment,
51         Qstatus,
52         Qtext,
53         Qwait,
54         Qprofile,
55         Qsyscall,
56         Qcore,
57 };
58
59 enum {
60         CMclose,
61         CMclosefiles,
62         CMfixedpri,
63         CMhang,
64         CMkill,
65         CMnohang,
66         CMnoswap,
67         CMpri,
68         CMprivate,
69         CMprofile,
70         CMstart,
71         CMstartstop,
72         CMstartsyscall,
73         CMstop,
74         CMwaitstop,
75         CMwired,
76         CMtrace,
77         CMcore,
78         CMvminit,
79         CMvmstart,
80         CMvmkill,
81 };
82
83 enum {
84         Nevents = 0x4000,
85         Emask = Nevents - 1,
86         Ntracedpids = 1024,
87         STATSIZE = 8 + 1 + 10 + 1 + 6 + 2,
88 };
89
90 /*
91  * Status, fd, and ns are left fully readable (0444) because of their use in debugging,
92  * particularly on shared servers.
93  * Arguably, ns and fd shouldn't be readable; if you'd prefer, change them to 0000
94  */
95 struct dirtab procdir[] = {
96         {"args", {Qargs}, 0, 0660},
97         {"ctl", {Qctl}, 0, 0660},
98         {"fd", {Qfd}, 0, 0444},
99         {"fpregs", {Qfpregs}, 0, 0000},
100         //  {"kregs",   {Qkregs},   sizeof(Ureg),       0600},
101         {"mem", {Qmem}, 0, 0000},
102         {"note", {Qnote}, 0, 0000},
103         {"noteid", {Qnoteid}, 0, 0664},
104         {"notepg", {Qnotepg}, 0, 0000},
105         {"ns", {Qns}, 0, 0444},
106         {"proc", {Qproc}, 0, 0400},
107         //  {"regs",        {Qregs},    sizeof(Ureg),       0000},
108         {"segment", {Qsegment}, 0, 0444},
109         {"status", {Qstatus}, STATSIZE, 0444},
110         {"text", {Qtext}, 0, 0000},
111         {"wait", {Qwait}, 0, 0400},
112         {"profile", {Qprofile}, 0, 0400},
113         {"syscall", {Qsyscall}, 0, 0400},
114         {"core", {Qcore}, 0, 0444},
115 };
116
117 static
118 struct cmdtab proccmd[] = {
119         {CMclose, "close", 2},
120         {CMclosefiles, "closefiles", 1},
121         {CMfixedpri, "fixedpri", 2},
122         {CMhang, "hang", 1},
123         {CMnohang, "nohang", 1},
124         {CMnoswap, "noswap", 1},
125         {CMkill, "kill", 1},
126         {CMpri, "pri", 2},
127         {CMprivate, "private", 1},
128         {CMprofile, "profile", 1},
129         {CMstart, "start", 1},
130         {CMstartstop, "startstop", 1},
131         {CMstartsyscall, "startsyscall", 1},
132         {CMstop, "stop", 1},
133         {CMwaitstop, "waitstop", 1},
134         {CMwired, "wired", 2},
135         {CMtrace, "trace", 0},
136         {CMcore, "core", 2},
137         {CMcore, "core", 2},
138         {CMcore, "core", 2},
139         {CMvminit, "vminit", 0},
140         {CMvmstart, "vmstart", 0},
141         {CMvmkill, "vmkill", 0},
142 };
143
144 /*
145  * struct qids are, in path:
146  *       5 bits of file type (qids above) (old comment said 4 here)
147  *      23 bits of process slot number + 1 (pid + 1 is stored)
148  *           in vers,
149  *      32 bits of pid, for consistency checking
150  * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid.
151  */
152 #define QSHIFT  5       /* location in qid of proc slot # */
153 #define SLOTBITS 23     /* number of bits in the slot */
154 #define QIDMASK ((1<<QSHIFT)-1)
155 #define SLOTMASK        (((1<<SLOTBITS)-1) << QSHIFT)
156
157 #define QID(q)          ((((uint32_t)(q).path)&QIDMASK)>>0)
158 #define SLOT(q)         (((((uint32_t)(q).path)&SLOTMASK)>>QSHIFT)-1)
159 #define PID(q)          ((q).vers)
160 #define NOTEID(q)       ((q).vers)
161
162 static void procctlreq(struct proc *, char *, int);
163 static int procctlmemio(struct proc *, uintptr_t, int, void *, int);
164 //static struct chan*   proctext(struct chan*, struct proc*);
165 //static Segment* txt2data(struct proc*, Segment*);
166 //static int    procstopped(void*);
167 static void mntscan(struct mntwalk *, struct proc *);
168
169 //static Traceevent *tevents;
170 static char *tpids, *tpidsc, *tpidse;
171 static spinlock_t tlock;
172 static int topens;
173 static int tproduced, tconsumed;
174 //static void notrace(struct proc*, int, int64_t);
175
176 //void (*proctrace)(struct proc*, int, int64_t) = notrace;
177
178 #if 0
179 static void profclock(Ureg * ur, Timer *)
180 {
181         Tos *tos;
182
183         if (up == NULL || current->state != Running)
184                 return;
185
186         /* user profiling clock */
187         if (userureg(ur)) {
188                 tos = (Tos *) (USTKTOP - sizeof(Tos));
189                 tos->clock += TK2MS(1);
190                 segclock(userpc(ur));
191         }
192 }
193 #endif
194 static int
195 procgen(struct chan *c, char *name, struct dirtab *tab, int unused, int s,
196                 struct dir *dp)
197 {
198         struct qid qid;
199         struct proc *p;
200         char *ename;
201
202         int pid;
203         uint32_t path, perm, len;
204         if (s == DEVDOTDOT) {
205                 mkqid(&qid, Qdir, 0, QTDIR);
206                 devdir(c, qid, "#p", 0, eve, 0555, dp);
207                 return 1;
208         }
209
210         if (c->qid.path == Qdir) {
211                 if (s == 0) {
212                         strncpy(get_cur_genbuf(), "trace", GENBUF_SZ);
213                         mkqid(&qid, Qtrace, -1, QTFILE);
214                         devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
215                         return 1;
216                 }
217                 if (s == 1) {
218                         strncpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
219                         mkqid(&qid, Qtracepids, -1, QTFILE);
220                         devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
221                         return 1;
222                 }
223                 s -= 2;
224                 if (name != NULL) {
225                         /* ignore s and use name to find pid */
226                         pid = strtol(name, &ename, 10);
227                         if (pid <= 0 || ename[0] != '\0')
228                                 return -1;
229                         p = pid2proc(pid);
230                         if (!p)
231                                 return -1;
232                         /* Need to update s, so that it's the correct 'index' for our proc
233                          * (aka, the pid).  We use s later when making the qid. */
234                         s = pid;
235                 } else {
236                         /* This is a shitty iterator, and the list isn't guaranteed to give
237                          * you the same ordering twice in a row. (procs come and go). */
238                         p = pid_nth(s);
239                         if (!p)
240                                 return -1;
241                         pid = p->pid;
242                 }
243
244                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%u", pid);
245                 /*
246                  * String comparison is done in devwalk so
247                  * name must match its formatted pid.
248                  */
249                 if (name != NULL && strcmp(name, get_cur_genbuf()) != 0) {
250                         printk("pid-name mismatch, name: %s, pid %d\n", name, pid);
251                         kref_put(&p->p_kref);
252                         return -1;
253                 }
254                 mkqid(&qid, (s + 1) << QSHIFT, pid, QTDIR);
255                 devdir(c, qid, get_cur_genbuf(), 0, p->user, DMDIR | 0555, dp);
256                 kref_put(&p->p_kref);
257                 return 1;
258         }
259         if (c->qid.path == Qtrace) {
260                 strncpy(get_cur_genbuf(), "trace", GENBUF_SZ);
261                 mkqid(&qid, Qtrace, -1, QTFILE);
262                 devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
263                 return 1;
264         }
265         if (c->qid.path == Qtracepids) {
266                 strncpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
267                 mkqid(&qid, Qtracepids, -1, QTFILE);
268                 devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
269                 return 1;
270         }
271         if (s >= ARRAY_SIZE(procdir))
272                 return -1;
273         if (tab)
274                 panic("procgen");
275
276         tab = &procdir[s];
277         /* path is everything other than the QID part.  Not sure from the orig code
278          * if they wanted just the pid part (SLOTMASK) or everything above QID */
279         path = c->qid.path & ~QIDMASK;  /* slot component */
280         if ((p = pid2proc(SLOT(c->qid))) == NULL)
281                 return -1;
282         perm = 0444 | tab->perm;
283 #if 0
284         if (perm == 0)
285                 perm = p->procmode;
286         else    /* just copy read bits */
287                 perm |= p->procmode & 0444;
288 #endif
289
290         len = tab->length;
291 #if 0
292         switch (QID(c->qid)) {
293                 case Qwait:
294                         len = p->nwait; /* incorrect size, but >0 means there's something to read */
295                         break;
296                 case Qprofile:
297                         q = p->seg[TSEG];
298                         if (q && q->profile) {
299                                 len = (q->top - q->base) >> LRESPROF;
300                                 len *= sizeof(*q->profile);
301                         }
302                         break;
303         }
304 #endif
305
306         mkqid(&qid, path | tab->qid.path, c->qid.vers, QTFILE);
307         devdir(c, qid, tab->name, len, p->user, perm, dp);
308         kref_put(&p->p_kref);
309         return 1;
310 }
311
312 #if 0
313 static void notrace(struct proc *, Tevent, int64_t)
314 {
315 }
316
317 static spinlock_t tlck = SPINLOCK_INITIALIZER_IRQSAVE;
318
319 static void _proctrace(struct proc *p, Tevent etype, int64_t ts)
320 {
321         Traceevent *te;
322         int tp;
323
324         ilock(&tlck);
325         if (p->trace == 0 || topens == 0 || tproduced - tconsumed >= Nevents) {
326                 iunlock(&tlck);
327                 return;
328         }
329         tp = tproduced++;
330         iunlock(&tlck);
331
332         te = &tevents[tp & Emask];
333         te->pid = p->pid;
334         te->etype = etype;
335         if (ts == 0)
336                 te->time = todget(NULL);
337         else
338                 te->time = ts;
339         te->core = m->machno;
340 }
341
342 void proctracepid(struct proc *p)
343 {
344         if (p->trace == 1 && proctrace != notrace) {
345                 p->trace = 2;
346                 ilock(&tlck);
347                 tpidsc = seprint(tpidsc, tpidse, "%d %s\n", p->pid, p->text);
348                 iunlock(&tlck);
349         }
350 }
351
352 #endif
353 static void procinit(void)
354 {
355 #if 0
356         if (conf.nproc >= (SLOTMASK >> QSHIFT) - 1)
357                 printd("warning: too many procs for devproc\n");
358         addclock0link((void (*)(void))profclock, 113);  /* Relative prime to HZ */
359 #endif
360 }
361
362 static struct chan *procattach(char *spec)
363 {
364         return devattach('p', spec);
365 }
366
367 static struct walkqid *procwalk(struct chan *c, struct chan *nc, char **name,
368                                                                 int nname)
369 {
370         return devwalk(c, nc, name, nname, 0, 0, procgen);
371 }
372
373 static int procstat(struct chan *c, uint8_t * db, int n)
374 {
375         return devstat(c, db, n, 0, 0, procgen);
376 }
377
378 /*
379  *  none can't read or write state on other
380  *  processes.  This is to contain access of
381  *  servers running as none should they be
382  *  subverted by, for example, a stack attack.
383  */
384 static void nonone(struct proc *p)
385 {
386         return;
387 #if 0
388         if (p == up)
389                 return;
390         if (strcmp(current->user, "none") != 0)
391                 return;
392         if (iseve())
393                 return;
394         error(Eperm);
395 #endif
396 }
397
398 static struct chan *procopen(struct chan *c, int omode)
399 {
400         ERRSTACK(2);
401         struct proc *p;
402         struct pgrp *pg;
403         struct chan *tc;
404         int pid;
405
406         if (c->qid.type & QTDIR)
407                 return devopen(c, omode, 0, 0, procgen);
408
409         if (QID(c->qid) == Qtrace) {
410                 error("proc: Qtrace: not yet");
411 #if 0
412                 if (omode != OREAD)
413                         error(Eperm);
414                 lock(&tlock);
415                 if (waserror()) {
416                         unlock(&tlock);
417                         nexterror();
418                 }
419                 if (topens > 0)
420                         error("already open");
421                 topens++;
422                 if (tevents == NULL) {
423                         tevents = (Traceevent *) kzmalloc(sizeof(Traceevent) * Nevents,
424                                                                                           KMALLOC_WAIT);
425                         if (tevents == NULL)
426                                 error(Enomem);
427                         tpids = kzmalloc(Ntracedpids * 20, KMALLOC_WAIT);
428                         if (tpids == NULL) {
429                                 kfree(tpids);
430                                 tpids = NULL;
431                                 error(Enomem);
432                         }
433                         tpidsc = tpids;
434                         tpidse = tpids + Ntracedpids * 20;
435                         *tpidsc = 0;
436                         tproduced = tconsumed = 0;
437                 }
438                 proctrace = _proctrace;
439                 poperror();
440                 unlock(&tlock);
441
442                 c->mode = openmode(omode);
443                 c->flag |= COPEN;
444                 c->offset = 0;
445                 return c;
446 #endif
447         }
448         if (QID(c->qid) == Qtracepids) {
449                 error("Proc: Qtracepids: not yet");
450 #if 0
451                 if (omode != OREAD)
452                         error(Eperm);
453                 c->mode = openmode(omode);
454                 c->flag |= COPEN;
455                 c->offset = 0;
456                 return c;
457 #endif
458         }
459         if ((p = pid2proc(SLOT(c->qid))) == NULL)
460                 error(Eprocdied);
461         //qlock(&p->debug);
462         if (waserror()) {
463                 //qunlock(&p->debug);
464                 kref_put(&p->p_kref);
465                 nexterror();
466         }
467         pid = PID(c->qid);
468         if (p->pid != pid)
469                 error(Eprocdied);
470
471         omode = openmode(omode);
472
473         switch (QID(c->qid)) {
474                 case Qtext:
475                         error("notyet");
476 /*
477                         if (omode != OREAD)
478                                 error(Eperm);
479                         tc = proctext(c, p);
480                         tc->offset = 0;
481                         poperror();
482                         qunlock(&p->debug);
483                         kref_put(&p->p_kref);
484                         cclose(c);
485                         return tc;
486 */
487                 case Qproc:
488                 case Qsegment:
489                 case Qprofile:
490                 case Qfd:
491                         if (omode != OREAD)
492                                 error(Eperm);
493                         break;
494
495                 case Qnote:
496 //          if (p->privatemem)
497                         error(Eperm);
498                         break;
499
500                 case Qmem:
501 //          if (p->privatemem)
502                         error(Eperm);
503                         //nonone(p);
504                         break;
505
506                 case Qargs:
507                 case Qnoteid:
508                 case Qwait:
509                 case Qregs:
510                 case Qfpregs:
511                 case Qkregs:
512                 case Qsyscall:
513                 case Qcore:
514                         nonone(p);
515                         break;
516
517                 case Qns:
518                         if (omode != OREAD)
519                                 error(Eperm);
520                         c->aux = kzmalloc(sizeof(struct mntwalk), KMALLOC_WAIT);
521                         break;
522                 case Qstatus:
523                 case Qctl:
524                         break;
525                 case Qnotepg:
526                         error("not yet");
527 #if 0
528                         nonone(p);
529                         pg = p->pgrp;
530                         if (pg == NULL)
531                                 error(Eprocdied);
532                         if (omode != OWRITE || pg->pgrpid == 1)
533                                 error(Eperm);
534                         c->pgrpid.path = pg->pgrpid + 1;
535                         c->pgrpid.vers = p->noteid;
536 #endif
537                         break;
538
539                 default:
540                         poperror();
541                         //qunlock(&p->debug);
542                         kref_put(&p->p_kref);
543                         printk("procopen %#llux\n", c->qid.path);
544                         error(Egreg);
545         }
546
547         /* Affix pid to qid */
548 //  if (p->state != Dead)
549         c->qid.vers = p->pid;
550         /* make sure the process slot didn't get reallocated while we were playing */
551         //coherence();
552         /* TODO: think about what we really want here.  In akaros, we wouldn't have
553          * our pid changed like that. */
554         if (p->pid != pid)
555                 error(Eprocdied);
556
557         tc = devopen(c, omode, 0, 0, procgen);
558         poperror();
559         //qunlock(&p->debug);
560         kref_put(&p->p_kref);
561         return tc;
562 }
563
564 static int procwstat(struct chan *c, uint8_t * db, int n)
565 {
566         ERRSTACK(2);
567         error("procwwstat: not yet");
568 #if 0
569         struct proc *p;
570         struct dir *d;
571
572         if (c->qid.type & QTDIR)
573                 error(Eperm);
574
575         if (QID(c->qid) == Qtrace)
576                 return devwstat(c, db, n);
577
578         if ((p = pid2proc(SLOT(c->qid))) == NULL)
579                 error(Eprocdied);
580         nonone(p);
581         d = NULL;
582         qlock(&p->debug);
583         if (waserror()) {
584                 qunlock(&p->debug);
585                 kref_put(&p->p_kref);
586                 kfree(d);
587                 nexterror();
588         }
589
590         if (p->pid != PID(c->qid))
591                 error(Eprocdied);
592
593         if (strcmp(current->user, p->user) != 0 && strcmp(current->user, eve) != 0)
594                 error(Eperm);
595
596         d = kzmalloc(sizeof(struct dir) + n, KMALLOC_WAIT);
597         n = convM2D(db, n, &d[0], (char *)&d[1]);
598         if (n == 0)
599                 error(Eshortstat);
600         if (!emptystr(d->uid) && strcmp(d->uid, p->user) != 0) {
601                 if (strcmp(current->user, eve) != 0)
602                         error(Eperm);
603                 else
604                         kstrdup(&p->user, d->uid);
605         }
606         if (d->mode != ~0UL)
607                 p->procmode = d->mode & 0777;
608
609         poperror();
610         qunlock(&p->debug);
611         kref_put(&p->p_kref);
612         kfree(d);
613
614         return n;
615 #endif
616 }
617
618 #if 0
619 static long procoffset(long offset, char *va, int *np)
620 {
621         if (offset > 0) {
622                 offset -= *np;
623                 if (offset < 0) {
624                         memmove(va, va + *np + offset, -offset);
625                         *np = -offset;
626                 } else
627                         *np = 0;
628         }
629         return offset;
630 }
631
632 static int procqidwidth(struct chan *c)
633 {
634         char buf[32];
635
636         return sprint(buf, "%lu", c->qid.vers);
637 }
638
639 int procfdprint(struct chan *c, int fd, int w, char *s, int ns)
640 {
641         int n;
642
643         if (w == 0)
644                 w = procqidwidth(c);
645         n = snprint(s, ns,
646                                 "%3d %.2s %C %4ud (%.16llux %*lud %.2ux) %5ld %8lld %s\n", fd,
647                                 &"r w rw"[(c->mode & 3) << 1], c->dev->dc, c->devno,
648                                 c->qid.path, w, c->qid.vers, c->qid.type, c->iounit, c->offset,
649                                 c->name->s);
650         return n;
651 }
652
653 static int procfds(struct proc *p, char *va, int count, long offset)
654 {
655         ERRSTACK(2);
656         struct fgrp *f;
657         struct chan *c;
658         char buf[256];
659         int n, i, w, ww;
660         char *a;
661
662         /* print to buf to avoid holding fgrp lock while writing to user space */
663         if (count > sizeof buf)
664                 count = sizeof buf;
665         a = buf;
666
667         qlock(&p->debug);
668         f = p->fgrp;
669         if (f == NULL) {
670                 qunlock(&p->debug);
671                 return 0;
672         }
673         lock(f);
674         if (waserror()) {
675                 unlock(f);
676                 qunlock(&p->debug);
677                 nexterror();
678         }
679
680         n = readstr(0, a, count, p->dot->name->s);
681         n += snprint(a + n, count - n, "\n");
682         offset = procoffset(offset, a, &n);
683         /* compute width of qid.path */
684         w = 0;
685         for (i = 0; i <= f->maxfd; i++) {
686                 c = f->fd[i];
687                 if (c == NULL)
688                         continue;
689                 ww = procqidwidth(c);
690                 if (ww > w)
691                         w = ww;
692         }
693         for (i = 0; i <= f->maxfd; i++) {
694                 c = f->fd[i];
695                 if (c == NULL)
696                         continue;
697                 n += procfdprint(c, i, w, a + n, count - n);
698                 offset = procoffset(offset, a, &n);
699         }
700         poperror();
701         unlock(f);
702         qunlock(&p->debug);
703
704         /* copy result to user space, now that locks are released */
705         memmove(va, buf, n);
706
707         return n;
708 }
709 #endif
710 static void procclose(struct chan *c)
711 {
712         if (QID(c->qid) == Qtrace) {
713                 spin_lock(&tlock);
714                 if (topens > 0)
715                         topens--;
716                 /* ??
717                    if(topens == 0)
718                    proctrace = notrace;
719                  */
720                 spin_unlock(&tlock);
721         }
722         if (QID(c->qid) == Qns && c->aux != 0)
723                 kfree(c->aux);
724 }
725
726 void int2flag(int flag, char *s)
727 {
728         if (flag == 0) {
729                 *s = '\0';
730                 return;
731         }
732         *s++ = '-';
733         if (flag & MAFTER)
734                 *s++ = 'a';
735         if (flag & MBEFORE)
736                 *s++ = 'b';
737         if (flag & MCREATE)
738                 *s++ = 'c';
739         if (flag & MCACHE)
740                 *s++ = 'C';
741         *s = '\0';
742 }
743
744 #if 0
745 static char *argcpy(char *s, char *p)
746 {
747         char *t, *tp, *te;
748         int n;
749
750         n = p - s;
751         if (n > 128)
752                 n = 128;
753         if (n <= 0) {
754                 t = kzmalloc(1, KMALLOC_WAIT);
755                 *t = 0;
756                 return t;
757         }
758         t = kzmalloc(n, KMALLOC_WAIT);
759         tp = t;
760         te = t + n;
761
762         while (tp + 1 < te) {
763                 for (p--; p > s && p[-1] != 0; p--) ;
764                 tp = seprint(tp, te, "%q ", p);
765                 if (p == s)
766                         break;
767         }
768         if (*tp == ' ')
769                 *tp = 0;
770         return t;
771 }
772
773 static int procargs(struct proc *p, char *buf, int nbuf)
774 {
775         char *s;
776
777         if (p->setargs == 0) {
778                 s = argcpy(p->args, p->args + p->nargs);
779                 kfree(p->args);
780                 p->nargs = strlen(s);
781                 p->args = s;
782                 p->setargs = 1;
783         }
784         return snprint(buf, nbuf, "%s", p->args);
785 }
786
787 static int eventsavailable(void *)
788 {
789         return tproduced > tconsumed;
790 }
791 #endif
792 static long procread(struct chan *c, void *va, long n, int64_t off)
793 {
794         ERRSTACK(5);
795         struct proc *p;
796         long l, r;
797         int i, j, navail, pid, rsize;
798         char flag[10], *sps, *srv, statbuf[512];
799         uintptr_t offset, u;
800         int tesz;
801         uint8_t *rptr;
802         struct mntwalk *mw;
803
804         if (c->qid.type & QTDIR) {
805                 int nn;
806                 printd("procread: dir\n");
807                 nn = devdirread(c, va, n, 0, 0, procgen);
808                 printd("procread: %d\n", nn);
809                 return nn;
810         }
811
812         offset = off;
813 #if 0
814         if (QID(c->qid) == Qtrace) {
815                 if (!eventsavailable(NULL))
816                         return 0;
817
818                 rptr = va;
819                 tesz = BIT32SZ + BIT32SZ + BIT64SZ + BIT32SZ;
820                 navail = tproduced - tconsumed;
821                 if (navail > n / tesz)
822                         navail = n / tesz;
823                 while (navail > 0) {
824                         PBIT32(rptr, tevents[tconsumed & Emask].pid);
825                         rptr += BIT32SZ;
826                         PBIT32(rptr, tevents[tconsumed & Emask].etype);
827                         rptr += BIT32SZ;
828                         PBIT64(rptr, tevents[tconsumed & Emask].time);
829                         rptr += BIT64SZ;
830                         PBIT32(rptr, tevents[tconsumed & Emask].core);
831                         rptr += BIT32SZ;
832                         tconsumed++;
833                         navail--;
834                 }
835                 return rptr - (uint8_t *) va;
836         }
837
838         if (QID(c->qid) == Qtracepids)
839                 if (tpids == NULL)
840                         return 0;
841                 else
842                         return readstr(off, va, n, tpids);
843 #endif
844         if ((p = pid2proc(SLOT(c->qid))) == NULL)
845                 error(Eprocdied);
846         if (p->pid != PID(c->qid)) {
847                 kref_put(&p->p_kref);
848                 error(Eprocdied);
849         }
850         switch (QID(c->qid)) {
851                 default:
852                         kref_put(&p->p_kref);
853                         break;
854 #if 0
855 #warning check refcnting in here
856                 case Qargs:
857                         qlock(&p->debug);
858                         j = procargs(p, current->genbuf, sizeof current->genbuf);
859                         qunlock(&p->debug);
860                         kref_put(&p->p_kref);
861                         if (offset >= j)
862                                 return 0;
863                         if (offset + n > j)
864                                 n = j - offset;
865                         memmove(va, &current->genbuf[offset], n);
866                         return n;
867
868                 case Qsyscall:
869                         if (p->syscalltrace == NULL)
870                                 return 0;
871                         return readstr(offset, va, n, p->syscalltrace);
872
873                 case Qcore:
874                         i = 0;
875                         ac = p->ac;
876                         wired = p->wired;
877                         if (ac != NULL)
878                                 i = ac->machno;
879                         else if (wired != NULL)
880                                 i = wired->machno;
881                         snprint(statbuf, sizeof statbuf, "%d\n", i);
882                         return readstr(offset, va, n, statbuf);
883
884                 case Qmem:
885                         if (offset < KZERO
886                                 || (offset >= USTKTOP - USTKSIZE && offset < USTKTOP)) {
887                                 r = procctlmemio(p, offset, n, va, 1);
888                                 kref_put(&p->p_kref);
889                                 return r;
890                         }
891
892                         if (!iseve()) {
893                                 kref_put(&p->p_kref);
894                                 error(Eperm);
895                         }
896
897                         /* validate kernel addresses */
898                         if (offset < PTR2UINT(end)) {
899                                 if (offset + n > PTR2UINT(end))
900                                         n = PTR2UINT(end) - offset;
901                                 memmove(va, UINT2PTR(offset), n);
902                                 kref_put(&p->p_kref);
903                                 return n;
904                         }
905                         for (i = 0; i < nelem(conf.mem); i++) {
906                                 cm = &conf.mem[i];
907                                 /* klimit-1 because klimit might be zero! */
908                                 if (cm->kbase <= offset && offset <= cm->klimit - 1) {
909                                         if (offset + n >= cm->klimit - 1)
910                                                 n = cm->klimit - offset;
911                                         memmove(va, UINT2PTR(offset), n);
912                                         kref_put(&p->p_kref);
913                                         return n;
914                                 }
915                         }
916                         kref_put(&p->p_kref);
917                         error(Ebadarg);
918
919                 case Qprofile:
920                         s = p->seg[TSEG];
921                         if (s == 0 || s->profile == 0)
922                                 error("profile is off");
923                         i = (s->top - s->base) >> LRESPROF;
924                         i *= sizeof(*s->profile);
925                         if (offset >= i) {
926                                 kref_put(&p->p_kref);
927                                 return 0;
928                         }
929                         if (offset + n > i)
930                                 n = i - offset;
931                         memmove(va, ((char *)s->profile) + offset, n);
932                         kref_put(&p->p_kref);
933                         return n;
934
935                 case Qnote:
936                         qlock(&p->debug);
937                         if (waserror()) {
938                                 qunlock(&p->debug);
939                                 kref_put(&p->p_kref);
940                                 nexterror();
941                         }
942                         if (p->pid != PID(c->qid))
943                                 error(Eprocdied);
944                         if (n < 1)      /* must accept at least the '\0' */
945                                 error(Etoosmall);
946                         if (p->nnote == 0)
947                                 n = 0;
948                         else {
949                                 i = strlen(p->note[0].msg) + 1;
950                                 if (i > n)
951                                         i = n;
952                                 rptr = va;
953                                 memmove(rptr, p->note[0].msg, i);
954                                 rptr[i - 1] = '\0';
955                                 p->nnote--;
956                                 memmove(p->note, p->note + 1, p->nnote * sizeof(Note));
957                                 n = i;
958                         }
959                         if (p->nnote == 0)
960                                 p->notepending = 0;
961                         poperror();
962                         qunlock(&p->debug);
963                         kref_put(&p->p_kref);
964                         return n;
965
966                 case Qproc:
967                         if (offset >= sizeof(struct proc)) {
968                                 kref_put(&p->p_kref);
969                                 return 0;
970                         }
971                         if (offset + n > sizeof(struct proc))
972                                 n = sizeof(struct proc) - offset;
973                         memmove(va, ((char *)p) + offset, n);
974                         kref_put(&p->p_kref);
975                         return n;
976
977                 case Qregs:
978                         rptr = (uint8_t *) p->dbgreg;
979                         rsize = sizeof(Ureg);
980 regread:
981                         if (rptr == 0) {
982                                 kref_put(&p->p_kref);
983                                 error(Enoreg);
984                         }
985                         if (offset >= rsize) {
986                                 kref_put(&p->p_kref);
987                                 return 0;
988                         }
989                         if (offset + n > rsize)
990                                 n = rsize - offset;
991                         memmove(va, rptr + offset, n);
992                         kref_put(&p->p_kref);
993                         return n;
994
995                 case Qkregs:
996                         memset(&kur, 0, sizeof(Ureg));
997                         setkernur(&kur, p);
998                         rptr = (uint8_t *) & kur;
999                         rsize = sizeof(Ureg);
1000                         goto regread;
1001
1002                 case Qfpregs:
1003                         r = fpudevprocio(p, va, n, offset, 0);
1004                         kref_put(&p->p_kref);
1005                         return r;
1006
1007                 case Qstatus:
1008                         if (offset >= STATSIZE) {
1009                                 kref_put(&p->p_kref);
1010                                 return 0;
1011                         }
1012                         if (offset + n > STATSIZE)
1013                                 n = STATSIZE - offset;
1014
1015                         sps = p->psstate;
1016                         if (sps == 0)
1017                                 sps = statename[p->state];
1018                         memset(statbuf, ' ', sizeof statbuf);
1019                         j = 2 * KNAMELEN + 12;
1020                         snprint(statbuf, j + 1, "%-*.*s%-*.*s%-12.11s",
1021                                         KNAMELEN, KNAMELEN - 1, p->text,
1022                                         KNAMELEN, KNAMELEN - 1, p->user, sps);
1023
1024                         for (i = 0; i < 6; i++) {
1025                                 l = p->time[i];
1026                                 if (i == TReal)
1027                                         l = sys->ticks - l;
1028                                 l = TK2MS(l);
1029                                 readnum(0, statbuf + j + NUMSIZE * i, NUMSIZE, l, NUMSIZE);
1030                         }
1031                         /* ignore stack, which is mostly non-existent */
1032                         u = 0;
1033                         for (i = 1; i < NSEG; i++) {
1034                                 s = p->seg[i];
1035                                 if (s)
1036                                         u += s->top - s->base;
1037                         }
1038                         readnum(0, statbuf + j + NUMSIZE * 6, NUMSIZE, u >> 10u, NUMSIZE);      /* wrong size */
1039                         readnum(0, statbuf + j + NUMSIZE * 7, NUMSIZE, p->basepri, NUMSIZE);
1040                         readnum(0, statbuf + j + NUMSIZE * 8, NUMSIZE, p->priority,
1041                                         NUMSIZE);
1042
1043                         /*
1044                          * NIX: added # of traps, syscalls, and iccs
1045                          */
1046                         readnum(0, statbuf + j + NUMSIZE * 9, NUMSIZE, p->ntrap, NUMSIZE);
1047                         readnum(0, statbuf + j + NUMSIZE * 10, NUMSIZE, p->nintr, NUMSIZE);
1048                         readnum(0, statbuf + j + NUMSIZE * 11, NUMSIZE, p->nsyscall,
1049                                         NUMSIZE);
1050                         readnum(0, statbuf + j + NUMSIZE * 12, NUMSIZE, p->nicc, NUMSIZE);
1051                         readnum(0, statbuf + j + NUMSIZE * 13, NUMSIZE, p->nactrap,
1052                                         NUMSIZE);
1053                         readnum(0, statbuf + j + NUMSIZE * 14, NUMSIZE, p->nacsyscall,
1054                                         NUMSIZE);
1055                         memmove(va, statbuf + offset, n);
1056                         kref_put(&p->p_kref);
1057                         return n;
1058
1059                 case Qsegment:
1060                         j = 0;
1061                         for (i = 0; i < NSEG; i++) {
1062                                 sg = p->seg[i];
1063                                 if (sg == 0)
1064                                         continue;
1065                                 j += sprint(statbuf + j, "%-6s %c%c %p %p %4d\n",
1066                                                         sname[sg->type & SG_TYPE],
1067                                                         sg->type & SG_RONLY ? 'R' : ' ',
1068                                                         sg->profile ? 'P' : ' ',
1069                                                         sg->base, sg->top, sg->ref);
1070                         }
1071                         kref_put(&p->p_kref);
1072                         if (offset >= j)
1073                                 return 0;
1074                         if (offset + n > j)
1075                                 n = j - offset;
1076                         if (n == 0 && offset == 0)
1077                                 exhausted("segments");
1078                         memmove(va, &statbuf[offset], n);
1079                         return n;
1080
1081                 case Qwait:
1082                         if (!canqlock(&p->qwaitr)) {
1083                                 kref_put(&p->p_kref);
1084                                 error(Einuse);
1085                         }
1086
1087                         if (waserror()) {
1088                                 qunlock(&p->qwaitr);
1089                                 kref_put(&p->p_kref);
1090                                 nexterror();
1091                         }
1092
1093                         lock(&p->exl);
1094                         if (up == p && p->nchild == 0 && p->waitq == 0) {
1095                                 unlock(&p->exl);
1096                                 error(Enochild);
1097                         }
1098                         pid = p->pid;
1099                         while (p->waitq == 0) {
1100                                 unlock(&p->exl);
1101                                 rendez_sleep(&p->waitr, haswaitq, p);
1102                                 if (p->pid != pid)
1103                                         error(Eprocdied);
1104                                 lock(&p->exl);
1105                         }
1106                         wq = p->waitq;
1107                         p->waitq = wq->next;
1108                         p->nwait--;
1109                         unlock(&p->exl);
1110
1111                         poperror();
1112                         qunlock(&p->qwaitr);
1113                         kref_put(&p->p_kref);
1114                         n = snprint(va, n, "%d %lu %lud %lud %q",
1115                                                 wq->w.pid,
1116                                                 wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal],
1117                                                 wq->w.msg);
1118                         kfree(wq);
1119                         return n;
1120 #endif
1121                 case Qstatus:{
1122                                 /* the extra 2 is paranoia */
1123                                 char buf[8 + 1 + PROC_PROGNAME_SZ + 1 + 10 + 1 + 6 + 2];
1124                                 snprintf(buf, sizeof(buf),
1125                                          "%8d %-*s %-10s %6d", p->pid, PROC_PROGNAME_SZ,
1126                                          p->progname, procstate2str(p->state),
1127                                          p->ppid);
1128                                 kref_put(&p->p_kref);
1129                                 return readstr(off, va, n, buf);
1130                         }
1131
1132                 case Qns:
1133                         //qlock(&p->debug);
1134                         if (waserror()) {
1135                                 //qunlock(&p->debug);
1136                                 kref_put(&p->p_kref);
1137                                 nexterror();
1138                         }
1139                         if (p->pgrp == NULL || p->pid != PID(c->qid))
1140                                 error(Eprocdied);
1141                         mw = c->aux;
1142                         if (mw->cddone) {
1143                                 poperror();
1144                                 //qunlock(&p->debug);
1145                                 kref_put(&p->p_kref);
1146                                 return 0;
1147                         }
1148                         mntscan(mw, p);
1149                         if (mw->mh == 0) {
1150                                 mw->cddone = 1;
1151                                 i = snprintf(va, n, "cd %s\n", p->dot->name->s);
1152                                 poperror();
1153                                 //qunlock(&p->debug);
1154                                 kref_put(&p->p_kref);
1155                                 return i;
1156                         }
1157                         int2flag(mw->cm->mflag, flag);
1158                         if (strcmp(mw->cm->to->name->s, "#M") == 0) {
1159                                 srv = srvname(mw->cm->to->mchan);
1160                                 i = snprintf(va, n, "mount %s %s %s %s\n", flag,
1161                                                          srv == NULL ? mw->cm->to->mchan->name->s : srv,
1162                                                          mw->mh->from->name->s,
1163                                                          mw->cm->spec ? mw->cm->spec : "");
1164                                 kfree(srv);
1165                         } else
1166                                 i = snprintf(va, n, "bind %s %s %s\n", flag,
1167                                                          mw->cm->to->name->s, mw->mh->from->name->s);
1168                         poperror();
1169                         //qunlock(&p->debug);
1170                         kref_put(&p->p_kref);
1171                         return i;
1172 #if 0
1173                 case Qnoteid:
1174                         r = readnum(offset, va, n, p->noteid, NUMSIZE);
1175                         kref_put(&p->p_kref);
1176                         return r;
1177                 case Qfd:
1178                         r = procfds(p, va, n, offset);
1179                         kref_put(&p->p_kref);
1180                         return r;
1181 #endif
1182         }
1183
1184         error(Egreg);
1185         return 0;       /* not reached */
1186 }
1187
1188 static void mntscan(struct mntwalk *mw, struct proc *p)
1189 {
1190         struct pgrp *pg;
1191         struct mount *t;
1192         struct mhead *f;
1193         int best, i, last, nxt;
1194
1195         pg = p->pgrp;
1196         rlock(&pg->ns);
1197
1198         nxt = 0;
1199         best = (int)(~0U >> 1); /* largest 2's complement int */
1200
1201         last = 0;
1202         if (mw->mh)
1203                 last = mw->cm->mountid;
1204
1205         for (i = 0; i < MNTHASH; i++) {
1206                 for (f = pg->mnthash[i]; f; f = f->hash) {
1207                         for (t = f->mount; t; t = t->next) {
1208                                 if (mw->mh == 0 || (t->mountid > last && t->mountid < best)) {
1209                                         mw->cm = t;
1210                                         mw->mh = f;
1211                                         best = mw->cm->mountid;
1212                                         nxt = 1;
1213                                 }
1214                         }
1215                 }
1216         }
1217         if (nxt == 0)
1218                 mw->mh = 0;
1219
1220         runlock(&pg->ns);
1221 }
1222
1223 static long procwrite(struct chan *c, void *va, long n, int64_t off)
1224 {
1225         ERRSTACK(2);
1226
1227         struct proc *p, *t;
1228         int i, id, l;
1229         char *args;
1230         uintptr_t offset;
1231
1232         if (c->qid.type & QTDIR)
1233                 error(Eisdir);
1234
1235         if ((p = pid2proc(SLOT(c->qid))) == NULL)
1236                 error(Eprocdied);
1237
1238         if (waserror()) {
1239                 kref_put(&p->p_kref);
1240                 nexterror();
1241         }
1242         if (p->pid != PID(c->qid))
1243                 error(Eprocdied);
1244
1245         offset = off;
1246
1247         switch (QID(c->qid)) {
1248 #if 0
1249                 case Qargs:
1250                         if (n == 0)
1251                                 error(Eshort);
1252                         if (n >= sizeof buf - strlen(p->text) - 1)
1253                                 error(Etoobig);
1254                         l = snprintf(buf, sizeof buf, "%s [%s]", p->text, (char *)va);
1255                         args = kzmalloc(l + 1, KMALLOC_WAIT);
1256                         if (args == NULL)
1257                                 error(Enomem);
1258                         memmove(args, buf, l);
1259                         args[l] = 0;
1260                         kfree(p->args);
1261                         p->nargs = l;
1262                         p->args = args;
1263                         p->setargs = 1;
1264                         break;
1265
1266                 case Qmem:
1267                         if (p->state != Stopped)
1268                                 error(Ebadctl);
1269
1270                         n = procctlmemio(p, offset, n, va, 0);
1271                         break;
1272
1273                 case Qregs:
1274                         if (offset >= sizeof(Ureg))
1275                                 n = 0;
1276                         else if (offset + n > sizeof(Ureg))
1277                                 n = sizeof(Ureg) - offset;
1278                         if (p->dbgreg == 0)
1279                                 error(Enoreg);
1280                         setregisters(p->dbgreg, (char *)(p->dbgreg) + offset, va, n);
1281                         break;
1282
1283                 case Qfpregs:
1284                         n = fpudevprocio(p, va, n, offset, 1);
1285                         break;
1286 #endif
1287                 case Qctl:
1288                         procctlreq(p, va, n);
1289                         break;
1290
1291                 default:
1292                         poperror();
1293                         kref_put(&p->p_kref);
1294                         error("unknown qid %#llux in procwrite\n", c->qid.path);
1295         }
1296         poperror();
1297         kref_put(&p->p_kref);
1298         return n;
1299
1300 }
1301
1302 struct dev procdevtab __devtab = {
1303         'p',
1304         "proc",
1305
1306         devreset,
1307         procinit,
1308         devshutdown,
1309         procattach,
1310         procwalk,
1311         procstat,
1312         procopen,
1313         devcreate,
1314         procclose,
1315         procread,
1316         devbread,
1317         procwrite,
1318         devbwrite,
1319         devremove,
1320         procwstat,
1321         devpower,
1322         devchaninfo,
1323 };
1324
1325 #if 0
1326 static struct chan *proctext(struct chan *c, struct proc *p)
1327 {
1328         ERRSTACK(2);
1329         struct chan *tc;
1330         Image *i;
1331         Segment *s;
1332
1333         s = p->seg[TSEG];
1334         if (s == 0)
1335                 error(Enonexist);
1336         if (p->state == Dead)
1337                 error(Eprocdied);
1338
1339         lock(s);
1340         i = s->image;
1341         if (i == 0) {
1342                 unlock(s);
1343                 error(Eprocdied);
1344         }
1345         unlock(s);
1346
1347         lock(i);
1348         if (waserror()) {
1349                 unlock(i);
1350                 nexterror();
1351         }
1352
1353         tc = i->c;
1354         if (tc == 0)
1355                 error(Eprocdied);
1356
1357         /* TODO: what do you want here?  you can't get a kref and have the new val
1358          * be 1.  Here is the old code: if (kref_get(&tc->ref, 1) == 1 || ... ) */
1359         if (kref_refcnt(&tc->ref, 1) == 1 || (tc->flag & COPEN) == 0
1360                 || tc->mode != OREAD) {
1361                 cclose(tc);
1362                 error(Eprocdied);
1363         }
1364
1365         if (p->pid != PID(c->qid)) {
1366                 cclose(tc);
1367                 error(Eprocdied);
1368         }
1369
1370         poperror();
1371         unlock(i);
1372
1373         return tc;
1374 }
1375
1376 /* TODO: this will fail at compile time, since we don't have a proc-wide rendez,
1377  * among other things, and we'll need to rewrite this for akaros */
1378 void procstopwait(struct proc *p, int ctl)
1379 {
1380         ERRSTACK(2);
1381         int pid;
1382
1383         if (p->pdbg)
1384                 error(Einuse);
1385         if (procstopped(p) || p->state == Broken)
1386                 return;
1387
1388         if (ctl != 0)
1389                 p->procctl = ctl;
1390         p->pdbg = up;
1391         pid = p->pid;
1392         qunlock(&p->debug);
1393         current->psstate = "Stopwait";
1394         if (waserror()) {
1395                 p->pdbg = 0;
1396                 qlock(&p->debug);
1397                 nexterror();
1398         }
1399         rendez_sleep(&current->sleep, procstopped, p);
1400         poperror();
1401         qlock(&p->debug);
1402         if (p->pid != pid)
1403                 error(Eprocdied);
1404 }
1405
1406 #endif
1407 static void procctlcloseone(struct proc *p, int fd)
1408 {
1409 // TODO: resolve this and sys_close
1410         struct file *file = get_file_from_fd(&p->open_files, fd);
1411         int retval = 0;
1412         printd("%s %d\n", __func__, fd);
1413         /* VFS */
1414         if (file) {
1415                 put_file_from_fd(&p->open_files, fd);
1416                 kref_put(&file->f_kref);        /* Drop the ref from get_file */
1417                 return;
1418         }
1419         /* 9ns, should also handle errors (bad FD, etc) */
1420         retval = sysclose(fd);
1421         return;
1422
1423         //sys_close(p, fd);
1424 }
1425
1426 void procctlclosefiles(struct proc *p, int all, int fd)
1427 {
1428         int i;
1429
1430         if (all)
1431                 for (i = 0; i < NR_FILE_DESC_MAX; i++)
1432                         procctlcloseone(p, i);
1433         else
1434                 procctlcloseone(p, fd);
1435 }
1436
1437 static void procctlreq(struct proc *p, char *va, int n)
1438 {
1439         ERRSTACK(1);
1440         int8_t irq_state = 0;
1441         int npc, pri, core;
1442         struct cmdbuf *cb;
1443         struct cmdtab *ct;
1444         int64_t time;
1445         char *e;
1446
1447         cb = parsecmd(va, n);
1448         if (waserror()) {
1449                 kfree(cb);
1450                 nexterror();
1451         }
1452
1453         ct = lookupcmd(cb, proccmd, ARRAY_SIZE(proccmd));
1454
1455         switch (ct->index) {
1456                 case CMvmstart:
1457                 case CMvmkill:
1458                 default:
1459                         error("nope\n");
1460                         break;
1461                 case CMtrace:
1462                         systrace_trace_pid(p);
1463                         break;
1464                 case CMclose:
1465                         procctlclosefiles(p, 0, atoi(cb->f[1]));
1466                         break;
1467                 case CMclosefiles:
1468                         procctlclosefiles(p, 1, 0);
1469                         break;
1470 #if 0
1471                         we may want this.Let us pause a proc.case CMhang:p->hang = 1;
1472                         break;
1473 #endif
1474                 case CMkill:
1475                         p = pid2proc(strtol(cb->f[1], 0, 0));
1476                         if (!p)
1477                                 error("No such proc\n");
1478
1479                         enable_irqsave(&irq_state);
1480                         proc_destroy(p);
1481                         disable_irqsave(&irq_state);
1482                         proc_decref(p);
1483                         /* this is a little ghetto. it's not fully free yet, but we are also
1484                          * slowing it down by messing with it, esp with the busy waiting on a
1485                          * hyperthreaded core. */
1486                         spin_on(p->env_cr3);
1487                         break;
1488                 case CMvminit:
1489                         break;
1490         }
1491         poperror();
1492         kfree(cb);
1493 }
1494
1495 #if 0
1496 static int procstopped(void *a)
1497 {
1498         struct proc *p = a;
1499         return p->state == Stopped;
1500 }
1501
1502 static int
1503 procctlmemio(struct proc *p, uintptr_t offset, int n, void *va, int read)
1504 {
1505         KMap *k;
1506         Pte *pte;
1507         Page *pg;
1508         Segment *s;
1509         uintptr_t soff, l;                      /* hmmmm */
1510         uint8_t *b;
1511         uintmem pgsz;
1512
1513         for (;;) {
1514                 s = seg(p, offset, 1);
1515                 if (s == 0)
1516                         error(Ebadarg);
1517
1518                 if (offset + n >= s->top)
1519                         n = s->top - offset;
1520
1521                 if (!read && (s->type & SG_TYPE) == SG_TEXT)
1522                         s = txt2data(p, s);
1523
1524                 s->steal++;
1525                 soff = offset - s->base;
1526                 if (waserror()) {
1527                         s->steal--;
1528                         nexterror();
1529                 }
1530                 if (fixfault(s, offset, read, 0, s->color) == 0)
1531                         break;
1532                 poperror();
1533                 s->steal--;
1534         }
1535         poperror();
1536         pte = s->map[soff / PTEMAPMEM];
1537         if (pte == 0)
1538                 panic("procctlmemio");
1539         pgsz = m->pgsz[s->pgszi];
1540         pg = pte->pages[(soff & (PTEMAPMEM - 1)) / pgsz];
1541         if (pagedout(pg))
1542                 panic("procctlmemio1");
1543
1544         l = pgsz - (offset & (pgsz - 1));
1545         if (n > l)
1546                 n = l;
1547
1548         k = kmap(pg);
1549         if (waserror()) {
1550                 s->steal--;
1551                 kunmap(k);
1552                 nexterror();
1553         }
1554         b = (uint8_t *) VA(k);
1555         b += offset & (pgsz - 1);
1556         if (read == 1)
1557                 memmove(va, b, n);      /* This can fault */
1558         else
1559                 memmove(b, va, n);
1560         poperror();
1561         kunmap(k);
1562
1563         /* Ensure the process sees text page changes */
1564         if (s->flushme)
1565                 memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
1566
1567         s->steal--;
1568
1569         if (read == 0)
1570                 p->newtlb = 1;
1571
1572         return n;
1573 }
1574
1575 static Segment *txt2data(struct proc *p, Segment * s)
1576 {
1577         int i;
1578         Segment *ps;
1579
1580         ps = newseg(SG_DATA, s->base, s->size);
1581         ps->image = s->image;
1582         kref_get(&ps->image->ref, 1);
1583         ps->fstart = s->fstart;
1584         ps->flen = s->flen;
1585         ps->flushme = 1;
1586
1587         qlock(&p->seglock);
1588         for (i = 0; i < NSEG; i++)
1589                 if (p->seg[i] == s)
1590                         break;
1591         if (i == NSEG)
1592                 panic("segment gone");
1593
1594         qunlock(&s->lk);
1595         putseg(s);
1596         qlock(&ps->lk);
1597         p->seg[i] = ps;
1598         qunlock(&p->seglock);
1599
1600         return ps;
1601 }
1602
1603 Segment *data2txt(Segment * s)
1604 {
1605         Segment *ps;
1606
1607         ps = newseg(SG_TEXT, s->base, s->size);
1608         ps->image = s->image;
1609         kref_get(&ps->image->ref, 1);
1610         ps->fstart = s->fstart;
1611         ps->flen = s->flen;
1612         ps->flushme = 1;
1613
1614         return ps;
1615 }
1616 #endif