Fix strace flow control and data extraction issues
[akaros.git] / kern / drivers / dev / proc.c
1 /*
2  * This file is part of the UCB release of Plan 9. It is subject to the license
3  * terms in the LICENSE file found in the top-level directory of this
4  * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
5  * part of the UCB release of Plan 9, including this file, may be copied,
6  * modified, propagated, or distributed except according to the terms contained
7  * in the LICENSE file.
8  */
9
10 //#define DEBUG
11 /* proc on plan 9 has lots of capabilities, some of which we might
12  * want for akaros:
13  * debug control
14  * event tracing
15  * process control (no need for signal system call, etc.)
16  * textual status
17  * rather than excise code that won't work, I'm bracketing it with
18  * #if 0 until we know we don't want it
19  */
20 #include <vfs.h>
21 #include <kfs.h>
22 #include <slab.h>
23 #include <kmalloc.h>
24 #include <kref.h>
25 #include <string.h>
26 #include <stdio.h>
27 #include <assert.h>
28 #include <error.h>
29 #include <cpio.h>
30 #include <pmap.h>
31 #include <smp.h>
32 #include <arch/vmm/vmm.h>
33 #include <ros/vmm.h>
34
35 struct dev procdevtab;
36
37 static char *devname(void)
38 {
39         return procdevtab.name;
40 }
41
42 enum {
43         Qdir,
44         Qtrace,
45         Qtracepids,
46         Qns,
47         Qargs,
48         Qctl,
49         Qfd,
50         Qfpregs,
51         Qkregs,
52         Qmem,
53         Qnote,
54         Qnoteid,
55         Qnotepg,
56         Qproc,
57         Qregs,
58         Qsegment,
59         Qstatus,
60         Qstrace,
61         Qvmstatus,
62         Qtext,
63         Qwait,
64         Qprofile,
65         Qsyscall,
66         Qcore,
67 };
68
69 enum {
70         CMclose,
71         CMclosefiles,
72         CMfixedpri,
73         CMhang,
74         CMkill,
75         CMnohang,
76         CMnoswap,
77         CMpri,
78         CMprivate,
79         CMprofile,
80         CMstart,
81         CMstartstop,
82         CMstartsyscall,
83         CMstop,
84         CMtrace,
85         CMwaitstop,
86         CMwired,
87         CMcore,
88         CMvminit,
89         CMvmstart,
90         CMvmkill,
91         CMstraceme,
92         CMstraceall,
93         CMstraceoff,
94 };
95
96 enum {
97         Nevents = 0x4000,
98         Emask = Nevents - 1,
99         Ntracedpids = 1024,
100         STATSIZE = 8 + 1 + 10 + 1 + 6 + 2,
101 };
102
103 /*
104  * Status, fd, and ns are left fully readable (0444) because of their use in debugging,
105  * particularly on shared servers.
106  * Arguably, ns and fd shouldn't be readable; if you'd prefer, change them to 0000
107  */
108 struct dirtab procdir[] = {
109         {"args", {Qargs}, 0, 0660},
110         {"ctl", {Qctl}, 0, 0660},
111         {"fd", {Qfd}, 0, 0444},
112         {"fpregs", {Qfpregs}, 0, 0000},
113         //  {"kregs",   {Qkregs},   sizeof(Ureg),       0600},
114         {"mem", {Qmem}, 0, 0000},
115         {"note", {Qnote}, 0, 0000},
116         {"noteid", {Qnoteid}, 0, 0664},
117         {"notepg", {Qnotepg}, 0, 0000},
118         {"ns", {Qns}, 0, 0444},
119         {"proc", {Qproc}, 0, 0400},
120         //  {"regs",        {Qregs},    sizeof(Ureg),       0000},
121         {"segment", {Qsegment}, 0, 0444},
122         {"status", {Qstatus}, STATSIZE, 0444},
123         {"strace", {Qstrace}, 0, 0666},
124         {"vmstatus", {Qvmstatus}, 0, 0444},
125         {"text", {Qtext}, 0, 0000},
126         {"wait", {Qwait}, 0, 0400},
127         {"profile", {Qprofile}, 0, 0400},
128         {"syscall", {Qsyscall}, 0, 0400},
129         {"core", {Qcore}, 0, 0444},
130 };
131
132 static
133 struct cmdtab proccmd[] = {
134         {CMclose, "close", 2},
135         {CMclosefiles, "closefiles", 1},
136         {CMfixedpri, "fixedpri", 2},
137         {CMhang, "hang", 1},
138         {CMnohang, "nohang", 1},
139         {CMnoswap, "noswap", 1},
140         {CMkill, "kill", 1},
141         {CMpri, "pri", 2},
142         {CMprivate, "private", 1},
143         {CMprofile, "profile", 1},
144         {CMstart, "start", 1},
145         {CMstartstop, "startstop", 1},
146         {CMstartsyscall, "startsyscall", 1},
147         {CMstop, "stop", 1},
148         {CMtrace, "trace", 0},
149         {CMwaitstop, "waitstop", 1},
150         {CMwired, "wired", 2},
151         {CMcore, "core", 2},
152         {CMcore, "core", 2},
153         {CMcore, "core", 2},
154         {CMvminit, "vminit", 0},
155         {CMvmstart, "vmstart", 0},
156         {CMvmkill, "vmkill", 0},
157         {CMstraceme, "straceme", 0},
158         {CMstraceall, "straceall", 0},
159         {CMstraceoff, "straceoff", 0},
160 };
161
162 /*
163  * struct qids are, in path:
164  *       5 bits of file type (qids above) (old comment said 4 here)
165  *      23 bits of process slot number + 1 (pid + 1 is stored)
166  *           in vers,
167  *      32 bits of pid, for consistency checking
168  * If notepg, c->pgrpid.path is pgrp slot, .vers is noteid.
169  */
170 #define QSHIFT  5       /* location in qid of proc slot # */
171 #define SLOTBITS 23     /* number of bits in the slot */
172 #define QIDMASK ((1<<QSHIFT)-1)
173 #define SLOTMASK        (((1<<SLOTBITS)-1) << QSHIFT)
174
175 #define QID(q)          ((((uint32_t)(q).path)&QIDMASK)>>0)
176 #define SLOT(q)         (((((uint32_t)(q).path)&SLOTMASK)>>QSHIFT)-1)
177 #define PID(q)          ((q).vers)
178 #define NOTEID(q)       ((q).vers)
179
180 static void procctlreq(struct proc *, char *, int);
181 static int procctlmemio(struct proc *, uintptr_t, int, void *, int);
182 //static struct chan*   proctext(struct chan*, struct proc*);
183 //static Segment* txt2data(struct proc*, Segment*);
184 //static int    procstopped(void*);
185 static void mntscan(struct mntwalk *, struct proc *);
186
187 //static Traceevent *tevents;
188 static char *tpids, *tpidsc, *tpidse;
189 static spinlock_t tlock;
190 static int topens;
191 static int tproduced, tconsumed;
192 //static void notrace(struct proc*, int, int64_t);
193
194 //void (*proctrace)(struct proc*, int, int64_t) = notrace;
195
196 #if 0
197 static void profclock(Ureg * ur, Timer *)
198 {
199         Tos *tos;
200
201         if (up == NULL || current->state != Running)
202                 return;
203
204         /* user profiling clock */
205         if (userureg(ur)) {
206                 tos = (Tos *) (USTKTOP - sizeof(Tos));
207                 tos->clock += TK2MS(1);
208                 segclock(userpc(ur));
209         }
210 }
211 #endif
212 static int
213 procgen(struct chan *c, char *name, struct dirtab *tab, int unused, int s,
214                 struct dir *dp)
215 {
216         struct qid qid;
217         struct proc *p;
218         char *ename;
219
220         int pid;
221         uint32_t path, perm, len;
222         if (s == DEVDOTDOT) {
223                 mkqid(&qid, Qdir, 0, QTDIR);
224                 devdir(c, qid, devname(), 0, eve, 0555, dp);
225                 return 1;
226         }
227
228         if (c->qid.path == Qdir) {
229                 if (s == 0) {
230                         strlcpy(get_cur_genbuf(), "trace", GENBUF_SZ);
231                         mkqid(&qid, Qtrace, -1, QTFILE);
232                         devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
233                         return 1;
234                 }
235                 if (s == 1) {
236                         strlcpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
237                         mkqid(&qid, Qtracepids, -1, QTFILE);
238                         devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
239                         return 1;
240                 }
241                 s -= 2;
242                 if (name != NULL) {
243                         /* ignore s and use name to find pid */
244                         pid = strtol(name, &ename, 10);
245                         if (pid <= 0 || ename[0] != '\0')
246                                 return -1;
247                         p = pid2proc(pid);
248                         if (!p)
249                                 return -1;
250                         /* Need to update s, so that it's the correct 'index' for our proc
251                          * (aka, the pid).  We use s later when making the qid. */
252                         s = pid;
253                 } else {
254                         /* This is a shitty iterator, and the list isn't guaranteed to give
255                          * you the same ordering twice in a row. (procs come and go). */
256                         p = pid_nth(s);
257                         if (!p)
258                                 return -1;
259                         pid = p->pid;
260                 }
261
262                 snprintf(get_cur_genbuf(), GENBUF_SZ, "%u", pid);
263                 /*
264                  * String comparison is done in devwalk so
265                  * name must match its formatted pid.
266                  */
267                 if (name != NULL && strcmp(name, get_cur_genbuf()) != 0) {
268                         printk("pid-name mismatch, name: %s, pid %d\n", name, pid);
269                         kref_put(&p->p_kref);
270                         return -1;
271                 }
272                 mkqid(&qid, (s + 1) << QSHIFT, pid, QTDIR);
273                 devdir(c, qid, get_cur_genbuf(), 0, p->user, DMDIR | 0555, dp);
274                 kref_put(&p->p_kref);
275                 return 1;
276         }
277         if (c->qid.path == Qtrace) {
278                 strlcpy(get_cur_genbuf(), "trace", GENBUF_SZ);
279                 mkqid(&qid, Qtrace, -1, QTFILE);
280                 devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
281                 return 1;
282         }
283         if (c->qid.path == Qtracepids) {
284                 strlcpy(get_cur_genbuf(), "tracepids", GENBUF_SZ);
285                 mkqid(&qid, Qtracepids, -1, QTFILE);
286                 devdir(c, qid, get_cur_genbuf(), 0, eve, 0444, dp);
287                 return 1;
288         }
289         if (s >= ARRAY_SIZE(procdir))
290                 return -1;
291         if (tab)
292                 panic("procgen");
293
294         tab = &procdir[s];
295         /* path is everything other than the QID part.  Not sure from the orig code
296          * if they wanted just the pid part (SLOTMASK) or everything above QID */
297         path = c->qid.path & ~QIDMASK;  /* slot component */
298         if ((p = pid2proc(SLOT(c->qid))) == NULL)
299                 return -1;
300         perm = 0444 | tab->perm;
301 #if 0
302         if (perm == 0)
303                 perm = p->procmode;
304         else    /* just copy read bits */
305                 perm |= p->procmode & 0444;
306 #endif
307
308         len = tab->length;
309 #if 0
310         switch (QID(c->qid)) {
311                 case Qwait:
312                         len = p->nwait; /* incorrect size, but >0 means there's something to read */
313                         break;
314                 case Qprofile:
315                         q = p->seg[TSEG];
316                         if (q && q->profile) {
317                                 len = (q->top - q->base) >> LRESPROF;
318                                 len *= sizeof(*q->profile);
319                         }
320                         break;
321         }
322 #endif
323
324         mkqid(&qid, path | tab->qid.path, c->qid.vers, QTFILE);
325         devdir(c, qid, tab->name, len, p->user, perm, dp);
326         kref_put(&p->p_kref);
327         return 1;
328 }
329
330 #if 0
331 static void notrace(struct proc *, Tevent, int64_t)
332 {
333 }
334
335 static spinlock_t tlck = SPINLOCK_INITIALIZER_IRQSAVE;
336
337 static void _proctrace(struct proc *p, Tevent etype, int64_t ts)
338 {
339         Traceevent *te;
340         int tp;
341
342         ilock(&tlck);
343         if (p->trace == 0 || topens == 0 || tproduced - tconsumed >= Nevents) {
344                 iunlock(&tlck);
345                 return;
346         }
347         tp = tproduced++;
348         iunlock(&tlck);
349
350         te = &tevents[tp & Emask];
351         te->pid = p->pid;
352         te->etype = etype;
353         if (ts == 0)
354                 te->time = todget(NULL);
355         else
356                 te->time = ts;
357         te->core = m->machno;
358 }
359
360 void proctracepid(struct proc *p)
361 {
362         if (p->trace == 1 && proctrace != notrace) {
363                 p->trace = 2;
364                 ilock(&tlck);
365                 tpidsc = seprint(tpidsc, tpidse, "%d %s\n", p->pid, p->text);
366                 iunlock(&tlck);
367         }
368 }
369
370 #endif
371 static void procinit(void)
372 {
373 #if 0
374         if (conf.nproc >= (SLOTMASK >> QSHIFT) - 1)
375                 printd("warning: too many procs for devproc\n");
376         addclock0link((void (*)(void))profclock, 113);  /* Relative prime to HZ */
377 #endif
378 }
379
380 static struct chan *procattach(char *spec)
381 {
382         return devattach(devname(), spec);
383 }
384
385 static struct walkqid *procwalk(struct chan *c, struct chan *nc, char **name,
386                                                                 int nname)
387 {
388         return devwalk(c, nc, name, nname, 0, 0, procgen);
389 }
390
391 static int procstat(struct chan *c, uint8_t * db, int n)
392 {
393         return devstat(c, db, n, 0, 0, procgen);
394 }
395
396 /*
397  *  none can't read or write state on other
398  *  processes.  This is to contain access of
399  *  servers running as none should they be
400  *  subverted by, for example, a stack attack.
401  */
402 static void nonone(struct proc *p)
403 {
404         return;
405 #if 0
406         if (p == up)
407                 return;
408         if (strcmp(current->user, "none") != 0)
409                 return;
410         if (iseve())
411                 return;
412         error(EPERM, ERROR_FIXME);
413 #endif
414 }
415
416 static struct chan *procopen(struct chan *c, int omode)
417 {
418         ERRSTACK(2);
419         struct proc *p;
420         struct pgrp *pg;
421         struct chan *tc;
422         int pid;
423
424         if (c->qid.type & QTDIR)
425                 return devopen(c, omode, 0, 0, procgen);
426
427         if (QID(c->qid) == Qtrace) {
428                 error(ENOSYS, ERROR_FIXME);
429 #if 0
430                 if (omode != OREAD)
431                         error(EPERM, ERROR_FIXME);
432                 lock(&tlock);
433                 if (waserror()) {
434                         unlock(&tlock);
435                         nexterror();
436                 }
437                 if (topens > 0)
438                         error(EFAIL, "already open");
439                 topens++;
440                 if (tevents == NULL) {
441                         tevents = (Traceevent *) kzmalloc(sizeof(Traceevent) * Nevents,
442                                                                                           KMALLOC_WAIT);
443                         if (tevents == NULL)
444                                 error(ENOMEM, ERROR_FIXME);
445                         tpids = kzmalloc(Ntracedpids * 20, KMALLOC_WAIT);
446                         if (tpids == NULL) {
447                                 kfree(tpids);
448                                 tpids = NULL;
449                                 error(ENOMEM, ERROR_FIXME);
450                         }
451                         tpidsc = tpids;
452                         tpidse = tpids + Ntracedpids * 20;
453                         *tpidsc = 0;
454                         tproduced = tconsumed = 0;
455                 }
456                 proctrace = _proctrace;
457                 poperror();
458                 unlock(&tlock);
459
460                 c->mode = openmode(omode);
461                 c->flag |= COPEN;
462                 c->offset = 0;
463                 return c;
464 #endif
465         }
466         if (QID(c->qid) == Qtracepids) {
467                 error(ENOSYS, ERROR_FIXME);
468 #if 0
469                 if (omode != OREAD)
470                         error(EPERM, ERROR_FIXME);
471                 c->mode = openmode(omode);
472                 c->flag |= COPEN;
473                 c->offset = 0;
474                 return c;
475 #endif
476         }
477         if ((p = pid2proc(SLOT(c->qid))) == NULL)
478                 error(ESRCH, ERROR_FIXME);
479         //qlock(&p->debug);
480         if (waserror()) {
481                 //qunlock(&p->debug);
482                 kref_put(&p->p_kref);
483                 nexterror();
484         }
485         pid = PID(c->qid);
486         if (p->pid != pid)
487                 error(ESRCH, ERROR_FIXME);
488
489         omode = openmode(omode);
490
491         switch (QID(c->qid)) {
492                 case Qtext:
493                         error(ENOSYS, ERROR_FIXME);
494 /*
495                         if (omode != OREAD)
496                                 error(EPERM, ERROR_FIXME);
497                         tc = proctext(c, p);
498                         tc->offset = 0;
499                         poperror();
500                         qunlock(&p->debug);
501                         kref_put(&p->p_kref);
502                         cclose(c);
503                         return tc;
504 */
505                 case Qproc:
506                 case Qsegment:
507                 case Qprofile:
508                 case Qfd:
509                         if (omode != O_READ)
510                                 error(EPERM, ERROR_FIXME);
511                         break;
512
513                 case Qnote:
514 //          if (p->privatemem)
515                         error(EPERM, ERROR_FIXME);
516                         break;
517
518                 case Qmem:
519 //          if (p->privatemem)
520                         error(EPERM, ERROR_FIXME);
521                         //nonone(p);
522                         break;
523
524                 case Qargs:
525                 case Qnoteid:
526                 case Qwait:
527                 case Qregs:
528                 case Qfpregs:
529                 case Qkregs:
530                 case Qsyscall:
531                 case Qcore:
532                         nonone(p);
533                         break;
534
535                 case Qns:
536                         if (omode != O_READ)
537                                 error(EPERM, ERROR_FIXME);
538                         c->aux = kzmalloc(sizeof(struct mntwalk), KMALLOC_WAIT);
539                         break;
540                 case Qstatus:
541                 case Qvmstatus:
542                 case Qctl:
543                         break;
544
545                 case Qstrace:
546                         if (!p->strace)
547                                 error(ENOENT, "Process does not have tracing enabled");
548                         /* the ref we are upping is the one we put in __proc_free, which is
549                          * the one we got from CMstrace{on,me}.  We have a ref on p, so we
550                          * know we won't free until we decref the proc. */
551                         kref_get(&p->strace->users, 1);
552                         c->aux = p->strace;
553                         break;
554                 case Qnotepg:
555                         error(ENOSYS, ERROR_FIXME);
556 #if 0
557                         nonone(p);
558                         pg = p->pgrp;
559                         if (pg == NULL)
560                                 error(ESRCH, ERROR_FIXME);
561                         if (omode != OWRITE || pg->pgrpid == 1)
562                                 error(EPERM, ERROR_FIXME);
563                         c->pgrpid.path = pg->pgrpid + 1;
564                         c->pgrpid.vers = p->noteid;
565 #endif
566                         break;
567
568                 default:
569                         printk("procopen %#llux\n", c->qid.path);
570                         error(EINVAL, ERROR_FIXME);
571         }
572
573         /* Affix pid to qid */
574 //  if (p->state != Dead)
575         c->qid.vers = p->pid;
576         /* make sure the process slot didn't get reallocated while we were playing */
577         //coherence();
578         /* TODO: think about what we really want here.  In akaros, we wouldn't have
579          * our pid changed like that. */
580         if (p->pid != pid)
581                 error(ESRCH, ERROR_FIXME);
582
583         tc = devopen(c, omode, 0, 0, procgen);
584         poperror();
585         //qunlock(&p->debug);
586         kref_put(&p->p_kref);
587         return tc;
588 }
589
590 static int procwstat(struct chan *c, uint8_t * db, int n)
591 {
592         ERRSTACK(2);
593         error(ENOSYS, ERROR_FIXME);
594 #if 0
595         struct proc *p;
596         struct dir *d;
597
598         if (c->qid.type & QTDIR)
599                 error(EPERM, ERROR_FIXME);
600
601         if (QID(c->qid) == Qtrace)
602                 return devwstat(c, db, n);
603
604         if ((p = pid2proc(SLOT(c->qid))) == NULL)
605                 error(ESRCH, ERROR_FIXME);
606         nonone(p);
607         d = NULL;
608         qlock(&p->debug);
609         if (waserror()) {
610                 qunlock(&p->debug);
611                 kref_put(&p->p_kref);
612                 kfree(d);
613                 nexterror();
614         }
615
616         if (p->pid != PID(c->qid))
617                 error(ESRCH, ERROR_FIXME);
618
619         if (strcmp(current->user, p->user) != 0 && strcmp(current->user, eve) != 0)
620                 error(EPERM, ERROR_FIXME);
621
622         d = kzmalloc(sizeof(struct dir) + n, KMALLOC_WAIT);
623         n = convM2D(db, n, &d[0], (char *)&d[1]);
624         if (n == 0)
625                 error(ENOENT, ERROR_FIXME);
626         if (!emptystr(d->uid) && strcmp(d->uid, p->user) != 0) {
627                 if (strcmp(current->user, eve) != 0)
628                         error(EPERM, ERROR_FIXME);
629                 else
630                         kstrdup(&p->user, d->uid);
631         }
632         if (d->mode != ~0UL)
633                 p->procmode = d->mode & 0777;
634
635         poperror();
636         qunlock(&p->debug);
637         kref_put(&p->p_kref);
638         kfree(d);
639
640         return n;
641 #endif
642 }
643
644 #if 0
645 static long procoffset(long offset, char *va, int *np)
646 {
647         if (offset > 0) {
648                 offset -= *np;
649                 if (offset < 0) {
650                         memmove(va, va + *np + offset, -offset);
651                         *np = -offset;
652                 } else
653                         *np = 0;
654         }
655         return offset;
656 }
657
658 static int procqidwidth(struct chan *c)
659 {
660         char buf[32];
661
662         return sprint(buf, "%lu", c->qid.vers);
663 }
664
665 int procfdprint(struct chan *c, int fd, int w, char *s, int ns)
666 {
667         int n;
668
669         if (w == 0)
670                 w = procqidwidth(c);
671         n = snprint(s, ns,
672                                 "%3d %.2s %C %4ud (%.16llux %*lud %.2ux) %5ld %8lld %s\n", fd,
673                                 &"r w rw"[(c->mode & 3) << 1], c->dev->dc, c->devno,
674                                 c->qid.path, w, c->qid.vers, c->qid.type, c->iounit, c->offset,
675                                 c->name->s);
676         return n;
677 }
678
679 static int procfds(struct proc *p, char *va, int count, long offset)
680 {
681         ERRSTACK(2);
682         struct fgrp *f;
683         struct chan *c;
684         char buf[256];
685         int n, i, w, ww;
686         char *a;
687
688         /* print to buf to avoid holding fgrp lock while writing to user space */
689         if (count > sizeof buf)
690                 count = sizeof buf;
691         a = buf;
692
693         qlock(&p->debug);
694         f = p->fgrp;
695         if (f == NULL) {
696                 qunlock(&p->debug);
697                 return 0;
698         }
699         lock(f);
700         if (waserror()) {
701                 unlock(f);
702                 qunlock(&p->debug);
703                 nexterror();
704         }
705
706         n = readstr(0, a, count, p->dot->name->s);
707         n += snprint(a + n, count - n, "\n");
708         offset = procoffset(offset, a, &n);
709         /* compute width of qid.path */
710         w = 0;
711         for (i = 0; i <= f->maxfd; i++) {
712                 c = f->fd[i];
713                 if (c == NULL)
714                         continue;
715                 ww = procqidwidth(c);
716                 if (ww > w)
717                         w = ww;
718         }
719         for (i = 0; i <= f->maxfd; i++) {
720                 c = f->fd[i];
721                 if (c == NULL)
722                         continue;
723                 n += procfdprint(c, i, w, a + n, count - n);
724                 offset = procoffset(offset, a, &n);
725         }
726         poperror();
727         unlock(f);
728         qunlock(&p->debug);
729
730         /* copy result to user space, now that locks are released */
731         memmove(va, buf, n);
732
733         return n;
734 }
735 #endif
736 static void procclose(struct chan *c)
737 {
738         if (QID(c->qid) == Qtrace) {
739                 spin_lock(&tlock);
740                 if (topens > 0)
741                         topens--;
742                 /* ??
743                    if(topens == 0)
744                    proctrace = notrace;
745                  */
746                 spin_unlock(&tlock);
747         }
748         if (QID(c->qid) == Qsyscall) {
749                 if (c->aux)
750                         qclose(c->aux);
751                 c->aux = NULL;
752         }
753         if (QID(c->qid) == Qns && c->aux != 0)
754                 kfree(c->aux);
755         if (QID(c->qid) == Qstrace && c->aux != 0) {
756                 struct strace *s = c->aux;
757
758                 kref_put(&s->users);
759                 c->aux = NULL;
760         }
761 }
762
763 void int2flag(int flag, char *s)
764 {
765         if (flag == 0) {
766                 *s = '\0';
767                 return;
768         }
769         *s++ = '-';
770         if (flag & MAFTER)
771                 *s++ = 'a';
772         if (flag & MBEFORE)
773                 *s++ = 'b';
774         if (flag & MCREATE)
775                 *s++ = 'c';
776         if (flag & MCACHE)
777                 *s++ = 'C';
778         *s = '\0';
779 }
780
781 #if 0
782 static char *argcpy(char *s, char *p)
783 {
784         char *t, *tp, *te;
785         int n;
786
787         n = p - s;
788         if (n > 128)
789                 n = 128;
790         if (n <= 0) {
791                 t = kzmalloc(1, KMALLOC_WAIT);
792                 *t = 0;
793                 return t;
794         }
795         t = kzmalloc(n, KMALLOC_WAIT);
796         tp = t;
797         te = t + n;
798
799         while (tp + 1 < te) {
800                 for (p--; p > s && p[-1] != 0; p--) ;
801                 tp = seprint(tp, te, "%q ", p);
802                 if (p == s)
803                         break;
804         }
805         if (*tp == ' ')
806                 *tp = 0;
807         return t;
808 }
809
810 static int procargs(struct proc *p, char *buf, int nbuf)
811 {
812         char *s;
813
814         if (p->setargs == 0) {
815                 s = argcpy(p->args, p->args + p->nargs);
816                 kfree(p->args);
817                 p->nargs = strlen(s);
818                 p->args = s;
819                 p->setargs = 1;
820         }
821         return snprint(buf, nbuf, "%s", p->args);
822 }
823
824 static int eventsavailable(void *)
825 {
826         return tproduced > tconsumed;
827 }
828 #endif
829 static long procread(struct chan *c, void *va, long n, int64_t off)
830 {
831         ERRSTACK(5);
832         struct proc *p;
833         long l, r;
834         int i, j, navail, pid, rsize;
835         char flag[10], *sps, *srv, statbuf[512];
836         uintptr_t offset, u;
837         int tesz;
838         uint8_t *rptr;
839         struct mntwalk *mw;
840         struct strace *s;
841
842         if (c->qid.type & QTDIR) {
843                 int nn;
844                 printd("procread: dir\n");
845                 nn = devdirread(c, va, n, 0, 0, procgen);
846                 printd("procread: %d\n", nn);
847                 return nn;
848         }
849
850         offset = off;
851 #if 0
852         if (QID(c->qid) == Qtrace) {
853                 if (!eventsavailable(NULL))
854                         return 0;
855
856                 rptr = va;
857                 tesz = BIT32SZ + BIT32SZ + BIT64SZ + BIT32SZ;
858                 navail = tproduced - tconsumed;
859                 if (navail > n / tesz)
860                         navail = n / tesz;
861                 while (navail > 0) {
862                         PBIT32(rptr, tevents[tconsumed & Emask].pid);
863                         rptr += BIT32SZ;
864                         PBIT32(rptr, tevents[tconsumed & Emask].etype);
865                         rptr += BIT32SZ;
866                         PBIT64(rptr, tevents[tconsumed & Emask].time);
867                         rptr += BIT64SZ;
868                         PBIT32(rptr, tevents[tconsumed & Emask].core);
869                         rptr += BIT32SZ;
870                         tconsumed++;
871                         navail--;
872                 }
873                 return rptr - (uint8_t *) va;
874         }
875
876         if (QID(c->qid) == Qtracepids)
877                 if (tpids == NULL)
878                         return 0;
879                 else
880                         return readstr(off, va, n, tpids);
881 #endif
882         /* Some shit in proc doesn't need to grab the reference.  For strace, we
883          * already have the chan open, and all we want to do is read the queue,
884          * which exists because of our kref on it. */
885         switch (QID(c->qid)) {
886                 case Qstrace:
887                         s = c->aux;
888                         n = qread(s->q, va, n);
889                         return n;
890         }
891
892         if ((p = pid2proc(SLOT(c->qid))) == NULL)
893                 error(ESRCH, "%d: no such process", SLOT(c->qid));
894         if (p->pid != PID(c->qid)) {
895                 kref_put(&p->p_kref);
896                 error(ESRCH, "weird: p->pid is %d, PID(c->qid) is %d: mismatch",
897                       p->pid, PID(c->qid));
898         }
899         switch (QID(c->qid)) {
900                 default:
901                         kref_put(&p->p_kref);
902                         break;
903 #if 0
904 #warning check refcnting in here
905                 case Qargs:
906                         qlock(&p->debug);
907                         j = procargs(p, current->genbuf, sizeof current->genbuf);
908                         qunlock(&p->debug);
909                         kref_put(&p->p_kref);
910                         if (offset >= j)
911                                 return 0;
912                         if (offset + n > j)
913                                 n = j - offset;
914                         memmove(va, &current->genbuf[offset], n);
915                         return n;
916
917                 case Qsyscall:
918                         if (p->syscalltrace == NULL)
919                                 return 0;
920                         return readstr(offset, va, n, p->syscalltrace);
921
922                 case Qcore:
923                         i = 0;
924                         ac = p->ac;
925                         wired = p->wired;
926                         if (ac != NULL)
927                                 i = ac->machno;
928                         else if (wired != NULL)
929                                 i = wired->machno;
930                         snprint(statbuf, sizeof statbuf, "%d\n", i);
931                         return readstr(offset, va, n, statbuf);
932
933                 case Qmem:
934                         if (offset < KZERO
935                                 || (offset >= USTKTOP - USTKSIZE && offset < USTKTOP)) {
936                                 r = procctlmemio(p, offset, n, va, 1);
937                                 kref_put(&p->p_kref);
938                                 return r;
939                         }
940
941                         if (!iseve()) {
942                                 kref_put(&p->p_kref);
943                                 error(EPERM, ERROR_FIXME);
944                         }
945
946                         /* validate kernel addresses */
947                         if (offset < PTR2UINT(end)) {
948                                 if (offset + n > PTR2UINT(end))
949                                         n = PTR2UINT(end) - offset;
950                                 memmove(va, UINT2PTR(offset), n);
951                                 kref_put(&p->p_kref);
952                                 return n;
953                         }
954                         for (i = 0; i < nelem(conf.mem); i++) {
955                                 cm = &conf.mem[i];
956                                 /* klimit-1 because klimit might be zero! */
957                                 if (cm->kbase <= offset && offset <= cm->klimit - 1) {
958                                         if (offset + n >= cm->klimit - 1)
959                                                 n = cm->klimit - offset;
960                                         memmove(va, UINT2PTR(offset), n);
961                                         kref_put(&p->p_kref);
962                                         return n;
963                                 }
964                         }
965                         kref_put(&p->p_kref);
966                         error(EINVAL, ERROR_FIXME);
967
968                 case Qprofile:
969                         s = p->seg[TSEG];
970                         if (s == 0 || s->profile == 0)
971                                 error(EFAIL, "profile is off");
972                         i = (s->top - s->base) >> LRESPROF;
973                         i *= sizeof(*s->profile);
974                         if (offset >= i) {
975                                 kref_put(&p->p_kref);
976                                 return 0;
977                         }
978                         if (offset + n > i)
979                                 n = i - offset;
980                         memmove(va, ((char *)s->profile) + offset, n);
981                         kref_put(&p->p_kref);
982                         return n;
983
984                 case Qnote:
985                         qlock(&p->debug);
986                         if (waserror()) {
987                                 qunlock(&p->debug);
988                                 kref_put(&p->p_kref);
989                                 nexterror();
990                         }
991                         if (p->pid != PID(c->qid))
992                                 error(ESRCH, ERROR_FIXME);
993                         if (n < 1)      /* must accept at least the '\0' */
994                                 error(ENAMETOOLONG, ERROR_FIXME);
995                         if (p->nnote == 0)
996                                 n = 0;
997                         else {
998                                 i = strlen(p->note[0].msg) + 1;
999                                 if (i > n)
1000                                         i = n;
1001                                 rptr = va;
1002                                 memmove(rptr, p->note[0].msg, i);
1003                                 rptr[i - 1] = '\0';
1004                                 p->nnote--;
1005                                 memmove(p->note, p->note + 1, p->nnote * sizeof(Note));
1006                                 n = i;
1007                         }
1008                         if (p->nnote == 0)
1009                                 p->notepending = 0;
1010                         poperror();
1011                         qunlock(&p->debug);
1012                         kref_put(&p->p_kref);
1013                         return n;
1014
1015                 case Qproc:
1016                         if (offset >= sizeof(struct proc)) {
1017                                 kref_put(&p->p_kref);
1018                                 return 0;
1019                         }
1020                         if (offset + n > sizeof(struct proc))
1021                                 n = sizeof(struct proc) - offset;
1022                         memmove(va, ((char *)p) + offset, n);
1023                         kref_put(&p->p_kref);
1024                         return n;
1025
1026                 case Qregs:
1027                         rptr = (uint8_t *) p->dbgreg;
1028                         rsize = sizeof(Ureg);
1029 regread:
1030                         if (rptr == 0) {
1031                                 kref_put(&p->p_kref);
1032                                 error(ENODATA, ERROR_FIXME);
1033                         }
1034                         if (offset >= rsize) {
1035                                 kref_put(&p->p_kref);
1036                                 return 0;
1037                         }
1038                         if (offset + n > rsize)
1039                                 n = rsize - offset;
1040                         memmove(va, rptr + offset, n);
1041                         kref_put(&p->p_kref);
1042                         return n;
1043
1044                 case Qkregs:
1045                         memset(&kur, 0, sizeof(Ureg));
1046                         setkernur(&kur, p);
1047                         rptr = (uint8_t *) & kur;
1048                         rsize = sizeof(Ureg);
1049                         goto regread;
1050
1051                 case Qfpregs:
1052                         r = fpudevprocio(p, va, n, offset, 0);
1053                         kref_put(&p->p_kref);
1054                         return r;
1055
1056                 case Qstatus:
1057                         if (offset >= STATSIZE) {
1058                                 kref_put(&p->p_kref);
1059                                 return 0;
1060                         }
1061                         if (offset + n > STATSIZE)
1062                                 n = STATSIZE - offset;
1063
1064                         sps = p->psstate;
1065                         if (sps == 0)
1066                                 sps = statename[p->state];
1067                         memset(statbuf, ' ', sizeof statbuf);
1068                         j = 2 * KNAMELEN + 12;
1069                         snprint(statbuf, j + 1, "%-*.*s%-*.*s%-12.11s",
1070                                         KNAMELEN, KNAMELEN - 1, p->text,
1071                                         KNAMELEN, KNAMELEN - 1, p->user, sps);
1072
1073                         for (i = 0; i < 6; i++) {
1074                                 l = p->time[i];
1075                                 if (i == TReal)
1076                                         l = sys->ticks - l;
1077                                 l = TK2MS(l);
1078                                 readnum(0, statbuf + j + NUMSIZE * i, NUMSIZE, l, NUMSIZE);
1079                         }
1080                         /* ignore stack, which is mostly non-existent */
1081                         u = 0;
1082                         for (i = 1; i < NSEG; i++) {
1083                                 s = p->seg[i];
1084                                 if (s)
1085                                         u += s->top - s->base;
1086                         }
1087                         readnum(0, statbuf + j + NUMSIZE * 6, NUMSIZE, u >> 10u, NUMSIZE);      /* wrong size */
1088                         readnum(0, statbuf + j + NUMSIZE * 7, NUMSIZE, p->basepri, NUMSIZE);
1089                         readnum(0, statbuf + j + NUMSIZE * 8, NUMSIZE, p->priority,
1090                                         NUMSIZE);
1091
1092                         /*
1093                          * NIX: added # of traps, syscalls, and iccs
1094                          */
1095                         readnum(0, statbuf + j + NUMSIZE * 9, NUMSIZE, p->ntrap, NUMSIZE);
1096                         readnum(0, statbuf + j + NUMSIZE * 10, NUMSIZE, p->nintr, NUMSIZE);
1097                         readnum(0, statbuf + j + NUMSIZE * 11, NUMSIZE, p->nsyscall,
1098                                         NUMSIZE);
1099                         readnum(0, statbuf + j + NUMSIZE * 12, NUMSIZE, p->nicc, NUMSIZE);
1100                         readnum(0, statbuf + j + NUMSIZE * 13, NUMSIZE, p->nactrap,
1101                                         NUMSIZE);
1102                         readnum(0, statbuf + j + NUMSIZE * 14, NUMSIZE, p->nacsyscall,
1103                                         NUMSIZE);
1104                         memmove(va, statbuf + offset, n);
1105                         kref_put(&p->p_kref);
1106                         return n;
1107
1108                 case Qsegment:
1109                         j = 0;
1110                         for (i = 0; i < NSEG; i++) {
1111                                 sg = p->seg[i];
1112                                 if (sg == 0)
1113                                         continue;
1114                                 j += sprint(statbuf + j, "%-6s %c%c %p %p %4d\n",
1115                                                         sname[sg->type & SG_TYPE],
1116                                                         sg->type & SG_RONLY ? 'R' : ' ',
1117                                                         sg->profile ? 'P' : ' ',
1118                                                         sg->base, sg->top, sg->ref);
1119                         }
1120                         kref_put(&p->p_kref);
1121                         if (offset >= j)
1122                                 return 0;
1123                         if (offset + n > j)
1124                                 n = j - offset;
1125                         if (n == 0 && offset == 0)
1126                                 exhausted("segments");
1127                         memmove(va, &statbuf[offset], n);
1128                         return n;
1129
1130                 case Qwait:
1131                         if (!canqlock(&p->qwaitr)) {
1132                                 kref_put(&p->p_kref);
1133                                 error(EBUSY, ERROR_FIXME);
1134                         }
1135
1136                         if (waserror()) {
1137                                 qunlock(&p->qwaitr);
1138                                 kref_put(&p->p_kref);
1139                                 nexterror();
1140                         }
1141
1142                         lock(&p->exl);
1143                         if (up == p && p->nchild == 0 && p->waitq == 0) {
1144                                 unlock(&p->exl);
1145                                 error(ECHILD, ERROR_FIXME);
1146                         }
1147                         pid = p->pid;
1148                         while (p->waitq == 0) {
1149                                 unlock(&p->exl);
1150                                 rendez_sleep(&p->waitr, haswaitq, p);
1151                                 if (p->pid != pid)
1152                                         error(ESRCH, ERROR_FIXME);
1153                                 lock(&p->exl);
1154                         }
1155                         wq = p->waitq;
1156                         p->waitq = wq->next;
1157                         p->nwait--;
1158                         unlock(&p->exl);
1159
1160                         poperror();
1161                         qunlock(&p->qwaitr);
1162                         kref_put(&p->p_kref);
1163                         n = snprint(va, n, "%d %lu %lud %lud %q",
1164                                                 wq->w.pid,
1165                                                 wq->w.time[TUser], wq->w.time[TSys], wq->w.time[TReal],
1166                                                 wq->w.msg);
1167                         kfree(wq);
1168                         return n;
1169 #endif
1170                 case Qstatus:{
1171                                 /* the old code grew the stack and was hideous.
1172                                  * status is not a high frequency operation; just malloc. */
1173                                 char *buf = kmalloc(4096, KMALLOC_WAIT);
1174                                 char *s = buf, *e = buf + 4096;
1175                                 int i;
1176
1177                                 s = seprintf(s, e,
1178                                          "%8d %-*s %-10s %6d", p->pid, PROC_PROGNAME_SZ,
1179                                          p->progname, procstate2str(p->state),
1180                                          p->ppid);
1181                                 if (p->strace)
1182                                         s = seprintf(s, e, " %d trace users %d traced procs",
1183                                                      kref_refcnt(&p->strace->users),
1184                                                      kref_refcnt(&p->strace->procs));
1185                                 kref_put(&p->p_kref);
1186                                 i = readstr(off, va, n, buf);
1187                                 kfree(buf);
1188                                 return i;
1189                         }
1190
1191                 case Qvmstatus:
1192                         {
1193                                 char buf[50*65 + 2];
1194                                 int i, offset;
1195                                 offset=0;
1196                                 offset += snprintf(buf+offset, sizeof(buf)-offset, "{\n");
1197                                 for (i = 0; i < 65; i++) {
1198                                         if (p->vmm.vmexits[i] != 0) {
1199                                                 offset += snprintf(buf+offset, sizeof(buf)-offset,
1200                                                                    "\"%s\":\"%lld\",\n",
1201                                                                    VMX_EXIT_REASON_NAMES[i],
1202                                                                    p->vmm.vmexits[i]);
1203                                         }
1204                                 }
1205                                 offset += snprintf(buf+offset, sizeof(buf)-offset, "}\n");
1206                                 kref_put(&p->p_kref);
1207                                 return readstr(off, va, n, buf);
1208                         }
1209                 case Qns:
1210                         //qlock(&p->debug);
1211                         if (waserror()) {
1212                                 //qunlock(&p->debug);
1213                                 kref_put(&p->p_kref);
1214                                 nexterror();
1215                         }
1216                         if (p->pgrp == NULL || p->pid != PID(c->qid))
1217                                 error(ESRCH, ERROR_FIXME);
1218                         mw = c->aux;
1219                         if (mw->cddone) {
1220                                 poperror();
1221                                 //qunlock(&p->debug);
1222                                 kref_put(&p->p_kref);
1223                                 return 0;
1224                         }
1225                         mntscan(mw, p);
1226                         if (mw->mh == 0) {
1227                                 mw->cddone = 1;
1228                                 i = snprintf(va, n, "cd %s\n", p->dot->name->s);
1229                                 poperror();
1230                                 //qunlock(&p->debug);
1231                                 kref_put(&p->p_kref);
1232                                 return i;
1233                         }
1234                         int2flag(mw->cm->mflag, flag);
1235                         if (strcmp(mw->cm->to->name->s, "#M") == 0) {
1236                                 srv = srvname(mw->cm->to->mchan);
1237                                 i = snprintf(va, n, "mount %s %s %s %s\n", flag,
1238                                                          srv == NULL ? mw->cm->to->mchan->name->s : srv,
1239                                                          mw->mh->from->name->s,
1240                                                          mw->cm->spec ? mw->cm->spec : "");
1241                                 kfree(srv);
1242                         } else
1243                                 i = snprintf(va, n, "bind %s %s %s\n", flag,
1244                                                          mw->cm->to->name->s, mw->mh->from->name->s);
1245                         poperror();
1246                         //qunlock(&p->debug);
1247                         kref_put(&p->p_kref);
1248                         return i;
1249 #if 0
1250                 case Qnoteid:
1251                         r = readnum(offset, va, n, p->noteid, NUMSIZE);
1252                         kref_put(&p->p_kref);
1253                         return r;
1254                 case Qfd:
1255                         r = procfds(p, va, n, offset);
1256                         kref_put(&p->p_kref);
1257                         return r;
1258 #endif
1259         }
1260
1261
1262         error(EINVAL, "QID %d did not match any QIDs for #proc", QID(c->qid));
1263         return 0;       /* not reached */
1264 }
1265
1266 static void mntscan(struct mntwalk *mw, struct proc *p)
1267 {
1268         struct pgrp *pg;
1269         struct mount *t;
1270         struct mhead *f;
1271         int best, i, last, nxt;
1272
1273         pg = p->pgrp;
1274         rlock(&pg->ns);
1275
1276         nxt = 0;
1277         best = (int)(~0U >> 1); /* largest 2's complement int */
1278
1279         last = 0;
1280         if (mw->mh)
1281                 last = mw->cm->mountid;
1282
1283         for (i = 0; i < MNTHASH; i++) {
1284                 for (f = pg->mnthash[i]; f; f = f->hash) {
1285                         for (t = f->mount; t; t = t->next) {
1286                                 if (mw->mh == 0 || (t->mountid > last && t->mountid < best)) {
1287                                         mw->cm = t;
1288                                         mw->mh = f;
1289                                         best = mw->cm->mountid;
1290                                         nxt = 1;
1291                                 }
1292                         }
1293                 }
1294         }
1295         if (nxt == 0)
1296                 mw->mh = 0;
1297
1298         runlock(&pg->ns);
1299 }
1300
1301 static long procwrite(struct chan *c, void *va, long n, int64_t off)
1302 {
1303         ERRSTACK(2);
1304
1305         struct proc *p, *t;
1306         int i, id, l;
1307         char *args;
1308         uintptr_t offset;
1309
1310         if (c->qid.type & QTDIR)
1311                 error(EISDIR, ERROR_FIXME);
1312
1313         if ((p = pid2proc(SLOT(c->qid))) == NULL)
1314                 error(ESRCH, ERROR_FIXME);
1315
1316         if (waserror()) {
1317                 kref_put(&p->p_kref);
1318                 nexterror();
1319         }
1320         if (p->pid != PID(c->qid))
1321                 error(ESRCH, ERROR_FIXME);
1322
1323         offset = off;
1324
1325         switch (QID(c->qid)) {
1326 #if 0
1327                 case Qargs:
1328                         if (n == 0)
1329                                 error(EINVAL, ERROR_FIXME);
1330                         if (n >= sizeof buf - strlen(p->text) - 1)
1331                                 error(E2BIG, ERROR_FIXME);
1332                         l = snprintf(buf, sizeof buf, "%s [%s]", p->text, (char *)va);
1333                         args = kzmalloc(l + 1, KMALLOC_WAIT);
1334                         if (args == NULL)
1335                                 error(ENOMEM, ERROR_FIXME);
1336                         memmove(args, buf, l);
1337                         args[l] = 0;
1338                         kfree(p->args);
1339                         p->nargs = l;
1340                         p->args = args;
1341                         p->setargs = 1;
1342                         break;
1343
1344                 case Qmem:
1345                         if (p->state != Stopped)
1346                                 error(EINVAL, ERROR_FIXME);
1347
1348                         n = procctlmemio(p, offset, n, va, 0);
1349                         break;
1350
1351                 case Qregs:
1352                         if (offset >= sizeof(Ureg))
1353                                 n = 0;
1354                         else if (offset + n > sizeof(Ureg))
1355                                 n = sizeof(Ureg) - offset;
1356                         if (p->dbgreg == 0)
1357                                 error(ENODATA, ERROR_FIXME);
1358                         setregisters(p->dbgreg, (char *)(p->dbgreg) + offset, va, n);
1359                         break;
1360
1361                 case Qfpregs:
1362                         n = fpudevprocio(p, va, n, offset, 1);
1363                         break;
1364 #endif
1365                 case Qctl:
1366                         procctlreq(p, va, n);
1367                         break;
1368
1369                 /* this lets your write a marker into the data stream,
1370                  * which is a very powerful tool. */
1371                 case Qstrace:
1372                         assert(c->aux);
1373                         /* it is possible that the q hungup and is closed.  that would be
1374                          * the case if all of the procs closed and decref'd.  if the q is
1375                          * closed, qwrite() will throw an error. */
1376                         n = qwrite(((struct strace*)c->aux)->q, va, n);
1377                         break;
1378                 default:
1379                         error(EFAIL, "unknown qid %#llux in procwrite\n", c->qid.path);
1380         }
1381         poperror();
1382         kref_put(&p->p_kref);
1383         return n;
1384 }
1385
1386 struct dev procdevtab __devtab = {
1387         .name = "proc",
1388
1389         .reset = devreset,
1390         .init = procinit,
1391         .shutdown = devshutdown,
1392         .attach = procattach,
1393         .walk = procwalk,
1394         .stat = procstat,
1395         .open = procopen,
1396         .create = devcreate,
1397         .close = procclose,
1398         .read = procread,
1399         .bread = devbread,
1400         .write = procwrite,
1401         .bwrite = devbwrite,
1402         .remove = devremove,
1403         .wstat = procwstat,
1404         .power = devpower,
1405         .chaninfo = devchaninfo,
1406 };
1407
1408 #if 0
1409 static struct chan *proctext(struct chan *c, struct proc *p)
1410 {
1411         ERRSTACK(2);
1412         struct chan *tc;
1413         Image *i;
1414         Segment *s;
1415
1416         s = p->seg[TSEG];
1417         if (s == 0)
1418                 error(ENOENT, ERROR_FIXME);
1419         if (p->state == Dead)
1420                 error(ESRCH, ERROR_FIXME);
1421
1422         lock(s);
1423         i = s->image;
1424         if (i == 0) {
1425                 unlock(s);
1426                 error(ESRCH, ERROR_FIXME);
1427         }
1428         unlock(s);
1429
1430         lock(i);
1431         if (waserror()) {
1432                 unlock(i);
1433                 nexterror();
1434         }
1435
1436         tc = i->c;
1437         if (tc == 0)
1438                 error(ESRCH, ERROR_FIXME);
1439
1440         /* TODO: what do you want here?  you can't get a kref and have the new val
1441          * be 1.  Here is the old code: if (kref_get(&tc->ref, 1) == 1 || ... ) */
1442         if (kref_refcnt(&tc->ref, 1) == 1 || (tc->flag & COPEN) == 0
1443                 || tc->mode != OREAD) {
1444                 cclose(tc);
1445                 error(ESRCH, ERROR_FIXME);
1446         }
1447
1448         if (p->pid != PID(c->qid)) {
1449                 cclose(tc);
1450                 error(ESRCH, ERROR_FIXME);
1451         }
1452
1453         poperror();
1454         unlock(i);
1455
1456         return tc;
1457 }
1458
1459 /* TODO: this will fail at compile time, since we don't have a proc-wide rendez,
1460  * among other things, and we'll need to rewrite this for akaros */
1461 void procstopwait(struct proc *p, int ctl)
1462 {
1463         ERRSTACK(2);
1464         int pid;
1465
1466         if (p->pdbg)
1467                 error(EBUSY, ERROR_FIXME);
1468         if (procstopped(p) || p->state == Broken)
1469                 return;
1470
1471         if (ctl != 0)
1472                 p->procctl = ctl;
1473         p->pdbg = up;
1474         pid = p->pid;
1475         qunlock(&p->debug);
1476         current->psstate = "Stopwait";
1477         if (waserror()) {
1478                 p->pdbg = 0;
1479                 qlock(&p->debug);
1480                 nexterror();
1481         }
1482         rendez_sleep(&current->sleep, procstopped, p);
1483         poperror();
1484         qlock(&p->debug);
1485         if (p->pid != pid)
1486                 error(ESRCH, ERROR_FIXME);
1487 }
1488
1489 #endif
1490 static void procctlcloseone(struct proc *p, int fd)
1491 {
1492 // TODO: resolve this and sys_close
1493         struct file *file = get_file_from_fd(&p->open_files, fd);
1494         int retval = 0;
1495         printd("%s %d\n", __func__, fd);
1496         /* VFS */
1497         if (file) {
1498                 put_file_from_fd(&p->open_files, fd);
1499                 kref_put(&file->f_kref);        /* Drop the ref from get_file */
1500                 return;
1501         }
1502         /* 9ns, should also handle errors (bad FD, etc) */
1503         retval = sysclose(fd);
1504         return;
1505
1506         //sys_close(p, fd);
1507 }
1508
1509 void procctlclosefiles(struct proc *p, int all, int fd)
1510 {
1511         int i;
1512
1513         if (all)
1514                 for (i = 0; i < NR_FILE_DESC_MAX; i++)
1515                         procctlcloseone(p, i);
1516         else
1517                 procctlcloseone(p, fd);
1518 }
1519
1520 static void strace_shutdown(struct kref *a)
1521 {
1522         struct strace *strace = container_of(a, struct strace, procs);
1523         static const char base_msg[] = "Traced ~%lu syscs, Dropped %lu";
1524         size_t msg_len = NUMSIZE64 * 2 + sizeof(base_msg);
1525         char *msg = kmalloc(msg_len, 0);
1526
1527         if (msg)
1528                 snprintf(msg, msg_len, base_msg, strace->appx_nr_sysc,
1529                          atomic_read(&strace->nr_drops));
1530         qhangup(strace->q, msg);
1531         kfree(msg);
1532 }
1533
1534 static void strace_release(struct kref *a)
1535 {
1536         struct strace *strace = container_of(a, struct strace, users);
1537
1538         qfree(strace->q);
1539         kfree(strace);
1540 }
1541
1542 static void procctlreq(struct proc *p, char *va, int n)
1543 {
1544         ERRSTACK(1);
1545         int8_t irq_state = 0;
1546         int npc, pri, core;
1547         struct cmdbuf *cb;
1548         struct cmdtab *ct;
1549         int64_t time;
1550         char *e;
1551         struct strace *strace;
1552
1553         cb = parsecmd(va, n);
1554         if (waserror()) {
1555                 kfree(cb);
1556                 nexterror();
1557         }
1558
1559         ct = lookupcmd(cb, proccmd, ARRAY_SIZE(proccmd));
1560
1561         switch (ct->index) {
1562         case CMstraceall:
1563         case CMstraceme:
1564                 /* common allocation.  if we inherited, we might have one already */
1565                 if (!p->strace) {
1566                         strace = kzmalloc(sizeof(*p->strace), KMALLOC_WAIT);
1567                         strace->q = qopen(65536, Qdropoverflow|Qcoalesce, NULL, NULL);
1568                         /* both of these refs are put when the proc is freed.  procs is for
1569                          * every process that has this p->strace.  users is procs + every
1570                          * user (e.g. from open()).
1571                          *
1572                          * it is possible to kref_put the procs kref in proc_destroy, which
1573                          * would make strace's job easier (no need to do an async wait on
1574                          * the child), and we wouldn't need to decref p in
1575                          * procread(Qstrace).  But the downside is that proc_destroy races
1576                          * with us here with the kref initialization. */
1577                         kref_init(&strace->procs, strace_shutdown, 1);
1578                         kref_init(&strace->users, strace_release, 1);
1579                         if (!atomic_cas_ptr((void**)&p->strace, 0, strace)) {
1580                                 /* someone else won the race and installed strace. */
1581                                 qfree(strace->q);
1582                                 kfree(strace);
1583                                 error(EAGAIN, "Concurrent strace init, try again");
1584                         }
1585                 }
1586                 break;
1587         }
1588
1589         /* actually do the command. */
1590         switch (ct->index) {
1591         case CMvmstart:
1592         case CMvmkill:
1593         default:
1594                 error(EFAIL, "Command not implemented");
1595                 break;
1596         case CMtrace:
1597                 systrace_trace_pid(p);
1598                 break;
1599         case CMclose:
1600                 procctlclosefiles(p, 0, atoi(cb->f[1]));
1601                 break;
1602         case CMclosefiles:
1603                 procctlclosefiles(p, 1, 0);
1604                 break;
1605 #if 0
1606                 we may want this.Let us pause a proc.case CMhang:p->hang = 1;
1607                 break;
1608 #endif
1609         case CMkill:
1610                 p = pid2proc(strtol(cb->f[1], 0, 0));
1611                 if (!p)
1612                         error(EFAIL, "No such proc\n");
1613
1614                 enable_irqsave(&irq_state);
1615                 proc_destroy(p);
1616                 disable_irqsave(&irq_state);
1617                 proc_decref(p);
1618                 /* this is a little ghetto. it's not fully free yet, but we are also
1619                  * slowing it down by messing with it, esp with the busy waiting on a
1620                  * hyperthreaded core. */
1621                 spin_on(p->env_cr3);
1622                 break;
1623         case CMvminit:
1624                 break;
1625         case CMstraceme:
1626                 p->strace_on = TRUE;
1627                 p->strace_inherit = FALSE;
1628                 break;
1629         case CMstraceall:
1630                 p->strace_on = TRUE;
1631                 p->strace_inherit = TRUE;
1632                 break;
1633         case CMstraceoff:
1634                 p->strace_on = FALSE;
1635                 p->strace_inherit = FALSE;
1636                 break;
1637         }
1638         poperror();
1639         kfree(cb);
1640 }
1641
1642 #if 0
1643 static int procstopped(void *a)
1644 {
1645         struct proc *p = a;
1646         return p->state == Stopped;
1647 }
1648
1649 static int
1650 procctlmemio(struct proc *p, uintptr_t offset, int n, void *va, int read)
1651 {
1652         KMap *k;
1653         Pte *pte;
1654         Page *pg;
1655         Segment *s;
1656         uintptr_t soff, l;                      /* hmmmm */
1657         uint8_t *b;
1658         uintmem pgsz;
1659
1660         for (;;) {
1661                 s = seg(p, offset, 1);
1662                 if (s == 0)
1663                         error(EINVAL, ERROR_FIXME);
1664
1665                 if (offset + n >= s->top)
1666                         n = s->top - offset;
1667
1668                 if (!read && (s->type & SG_TYPE) == SG_TEXT)
1669                         s = txt2data(p, s);
1670
1671                 s->steal++;
1672                 soff = offset - s->base;
1673                 if (waserror()) {
1674                         s->steal--;
1675                         nexterror();
1676                 }
1677                 if (fixfault(s, offset, read, 0, s->color) == 0)
1678                         break;
1679                 poperror();
1680                 s->steal--;
1681         }
1682         poperror();
1683         pte = s->map[soff / PTEMAPMEM];
1684         if (pte == 0)
1685                 panic("procctlmemio");
1686         pgsz = m->pgsz[s->pgszi];
1687         pg = pte->pages[(soff & (PTEMAPMEM - 1)) / pgsz];
1688         if (pagedout(pg))
1689                 panic("procctlmemio1");
1690
1691         l = pgsz - (offset & (pgsz - 1));
1692         if (n > l)
1693                 n = l;
1694
1695         k = kmap(pg);
1696         if (waserror()) {
1697                 s->steal--;
1698                 kunmap(k);
1699                 nexterror();
1700         }
1701         b = (uint8_t *) VA(k);
1702         b += offset & (pgsz - 1);
1703         if (read == 1)
1704                 memmove(va, b, n);      /* This can fault */
1705         else
1706                 memmove(b, va, n);
1707         poperror();
1708         kunmap(k);
1709
1710         /* Ensure the process sees text page changes */
1711         if (s->flushme)
1712                 memset(pg->cachectl, PG_TXTFLUSH, sizeof(pg->cachectl));
1713
1714         s->steal--;
1715
1716         if (read == 0)
1717                 p->newtlb = 1;
1718
1719         return n;
1720 }
1721
1722 static Segment *txt2data(struct proc *p, Segment * s)
1723 {
1724         int i;
1725         Segment *ps;
1726
1727         ps = newseg(SG_DATA, s->base, s->size);
1728         ps->image = s->image;
1729         kref_get(&ps->image->ref, 1);
1730         ps->fstart = s->fstart;
1731         ps->flen = s->flen;
1732         ps->flushme = 1;
1733
1734         qlock(&p->seglock);
1735         for (i = 0; i < NSEG; i++)
1736                 if (p->seg[i] == s)
1737                         break;
1738         if (i == NSEG)
1739                 panic("segment gone");
1740
1741         qunlock(&s->lk);
1742         putseg(s);
1743         qlock(&ps->lk);
1744         p->seg[i] = ps;
1745         qunlock(&p->seglock);
1746
1747         return ps;
1748 }
1749
1750 Segment *data2txt(Segment * s)
1751 {
1752         Segment *ps;
1753
1754         ps = newseg(SG_TEXT, s->base, s->size);
1755         ps->image = s->image;
1756         kref_get(&ps->image->ref, 1);
1757         ps->fstart = s->fstart;
1758         ps->flen = s->flen;
1759         ps->flushme = 1;
1760
1761         return ps;
1762 }
1763 #endif