0554b45a041a5a37405b0db8019d80e19c323239
[akaros.git] / kern / drivers / dev / vm.c
1 //#define DEBUG
2 /* Copyright 2014 Google Inc.
3  * Copyright (c) 2013 The Regents of the University of California
4  * Barret Rhoden <brho@cs.berkeley.edu>
5  * See LICENSE for details.
6  *
7  * devvm/#V: a device for VMs
8  *
9  */
10
11 #include <kmalloc.h>
12 #include <string.h>
13 #include <stdio.h>
14 #include <assert.h>
15 #include <error.h>
16 #include <pmap.h>
17 #include <sys/queue.h>
18 #include <smp.h>
19 #include <kref.h>
20 #include <atomic.h>
21 #include <alarm.h>
22 #include <event.h>
23 #include <umem.h>
24 #include <devalarm.h>
25 #include <arch/types.h>
26 #include <arch/vm.h>
27 #include <arch/emulate.h>
28 #include <arch/vmdebug.h>
29
30 /* qid path types */
31 enum {
32         Qtopdir = 1,
33         Qclone,
34         Qstat,
35         Qvmdir,
36         Qctl,
37         Qimage,
38 };
39
40 /* The QID is the TYPE and the index into the vms array.
41  * We reserve the right to make it an id later.
42  */
43 #define ID_SHIFT 5
44 /* vm's have an image.
45  * Note that the image can be read even as it is running. */
46 struct vm {
47         struct kref kref;
48         /* should this be an array of pages? Hmm. */
49         void *image;
50         unsigned long imagesize;
51         int id; // not used yet. 
52         struct litevm *archvm;
53 };
54
55 static spinlock_t vmlock;
56 /* array, not linked list. We expect few, might as well be cache friendly. */
57 static struct vm *vms = NULL;
58 static int nvm = 0;
59 static int vmok = 0;
60
61 static spinlock_t vmidlock[1];
62 static struct kref vmid[1] = { {(void *)1, fake_release} };
63
64 static inline struct vm *
65 QID2VM(struct qid q)
66 {
67         return &vms[((q).path >> ID_SHIFT)];
68 }
69
70 static inline int 
71 TYPE(struct qid q)
72 {
73         return ((q).path & ((1 << ID_SHIFT) - 1));
74 }
75
76 static inline int QID(int index, int type)
77 {
78         return ((index << ID_SHIFT) | type);
79 }
80
81 /* we'll need this somewhere more generic. */
82 static void readn(struct chan *c, void *vp, long n)
83 {
84         print_func_entry();
85         char *p;
86         long nn;
87         int total = 0, want = n;
88
89         p = vp;
90         while (n > 0) {
91                 nn = devtab[c->type].read(c, p, n, c->offset);
92                 printk("readn: Got %d@%lld\n", nn, c->offset);
93                 if (nn == 0)
94                         error("%s: wanted %d, got %d", Eshort, total, want);
95                 c->offset += nn;
96                 p += nn;
97                 n -= nn;
98                 total += nn;
99         }
100         print_func_exit();
101 }
102
103 /* not called yet.  -- we have to unlink the vm */
104 static void vm_release(struct kref *kref)
105 {
106         print_func_entry();
107         struct vm *v = container_of(kref, struct vm, kref);
108         spin_lock_irqsave(&vmlock);
109         /* cute trick. Save the last element of the array in place of the
110          * one we're deleting. Reduce nvm. Don't realloc; that way, next
111          * time we add a vm the allocator will just return.
112          * Well, this is stupid, because when we do this, we break
113          * the QIDs, which have pointers embedded in them.
114          * darn it, may have to use a linked list. Nope, will probably
115          * just walk the array until we find a matching id. Still ... yuck.
116          */
117         if (v != &vms[nvm - 1]) {
118                 /* free the image ... oops */
119                 /* get rid of the kref. */
120                 *v = vms[nvm - 1];
121         }
122         nvm--;
123         spin_unlock(&vmlock);
124         print_func_exit();
125 }
126
127 /* VM ids run in the range 1..infinity. But vmx.c wants them
128  * 0-based.
129  */
130 static int newvmid(void)
131 {
132         print_func_entry();
133         int id;
134         spin_lock_irqsave(vmidlock);
135         id = kref_refcnt(vmid);
136         kref_get(vmid, 1);
137         spin_unlock(vmidlock);
138         print_func_exit();
139         return id - 1;
140 }
141
142 static int vmgen(struct chan *c, char *entry_name,
143                                  struct dirtab *unused, int unused_nr_dirtab,
144                                  int s, struct dir *dp)
145 {
146         print_func_entry();
147         struct qid q;
148         struct vm *vm_i;
149         printd("GEN s %d\n", s);
150         /* Whether we're in one dir or at the top, .. still takes us to the top. */
151         if (s == DEVDOTDOT) {
152                 mkqid(&q, Qtopdir, 0, QTDIR);
153                 devdir(c, c->qid, "#V", 0, eve, 0555, dp);
154                 print_func_exit();
155                 return 1;
156         }
157         printd("TYPE %d\n", TYPE(c->qid));
158         switch (TYPE(c->qid)) {
159                 case Qtopdir:
160                         printd("Qtopdir s %d nvm %d\n", s, nvm);
161                         /* Generate elements for the top level dir.  We support clone, stat,
162                          * vm dirs at the top level */
163                         if (s == 0) {
164                                 mkqid(&q, Qclone, 0, QTFILE);
165                                 devdir(c, q, "clone", 0, eve, 0666, dp);
166                                 print_func_exit();
167                                 return 1;
168                         }
169                         s--;
170                         if (s == 0) {
171                                 mkqid(&q, Qstat, 0, QTFILE);
172                                 devdir(c, q, "stat", 0, eve, 0666, dp);
173                                 print_func_exit();
174                                 return 1;
175                         }
176                         s--;    /* 1 -> 0th element, 2 -> 1st element, etc */
177                         spin_lock_irqsave(&vmlock);
178                         if (s >= nvm) {
179                                 printd("DONE qtopdir\n");
180                                 spin_unlock(&vmlock);
181                                 print_func_exit();
182                                 return -1;
183                         }
184                         vm_i = &vms[s];
185                         snprintf(get_cur_genbuf(), GENBUF_SZ, "vm%d", vm_i->id);
186                         spin_unlock(&vmlock);
187                         printk("clone vm_i is %p\n", vm_i);
188                         mkqid(&q, QID(s, Qvmdir), 0, QTDIR);
189                         devdir(c, q, get_cur_genbuf(), 0, eve, 0555, dp);
190                         print_func_exit();
191                         return 1;
192                 case Qvmdir:
193                         /* Gen the contents of the vm dirs */
194                         s += Qctl;      /* first time through, start on Qctl */
195                         switch (s) {
196                                 case Qctl:
197                                         mkqid(&q, QID(s-Qctl, Qctl), 0, QTFILE);
198                                         devdir(c, q, "ctl", 0, eve, 0666, dp);
199                                         print_func_exit();
200                                         return 1;
201                                 case Qimage:
202                                         mkqid(&q, QID(s-Qctl, Qimage), 0, QTFILE);
203                                         devdir(c, q, "image", 0, eve, 0666, dp);
204                                         print_func_exit();
205                                         return 1;
206                         }
207                         print_func_exit();
208                         return -1;
209                         /* Need to also provide a direct hit for Qclone and all other files (at
210                          * all levels of the hierarchy).  Every file is both
211                          * generated (via the s increments in their respective directories) and
212                          * directly gen-able.  devstat() will call gen with a specific path in
213                          * the qid.  In these cases, we make a dir for whatever they are asking
214                          * for.  Note the qid stays the same.  I think this is what the old
215                          * plan9 comments above devgen were talking about for (ii).
216                          *
217                          * We don't need to do this for the directories - devstat will look for
218                          * the a directory by path and fail.  Then it will manually build the
219                          * stat output (check the -1 case in devstat). */
220                 case Qclone:
221                         devdir(c, c->qid, "clone", 0, eve, 0666, dp);
222                         print_func_exit();
223                         return 1;
224                 case Qstat:
225                         devdir(c, c->qid, "stat", 0, eve, 0444, dp);
226                         print_func_exit();
227                         return 1;
228                 case Qctl:
229                         devdir(c, c->qid, "ctl", 0, eve, 0666, dp);
230                         print_func_exit();
231                         return 1;
232                 case Qimage:
233                         devdir(c, c->qid, "image", 0, eve, 0666, dp);
234                         print_func_exit();
235                         return 1;
236         }
237         print_func_exit();
238         return -1;
239 }
240
241 static void vminit(void)
242 {
243         print_func_entry();
244         int i;
245         spinlock_init_irqsave(&vmlock);
246         spinlock_init_irqsave(vmidlock);
247         i = vmx_init();
248         if (i == 0)
249                 vmok = 1;
250         printk("vminit: litevm_init returns %d\n", i);
251
252         print_func_exit();
253 }
254
255 static struct chan *vmattach(char *spec)
256 {
257         print_func_entry();
258         if (!vmok)
259                 error("No VMs available");
260         struct chan *c = devattach('V', spec);
261         mkqid(&c->qid, Qtopdir, 0, QTDIR);
262         print_func_exit();
263         return c;
264 }
265
266 static struct walkqid *vmwalk(struct chan *c, struct chan *nc, char **name,
267                                                           int nname)
268 {
269         print_func_entry();
270         print_func_exit();
271         return devwalk(c, nc, name, nname, 0, 0, vmgen);
272 }
273
274 static int vmstat(struct chan *c, uint8_t * db, int n)
275 {
276         print_func_entry();
277         print_func_exit();
278         return devstat(c, db, n, 0, 0, vmgen);
279 }
280
281 /* It shouldn't matter if p = current is DYING.  We'll eventually fail to insert
282  * the open chan into p's fd table, then decref the chan. */
283 static struct chan *vmopen(struct chan *c, int omode)
284 {
285         print_func_entry();
286         ERRSTACK(1);
287         struct vm *v = QID2VM(c->qid);
288         printk("vmopen: v is %p\n", v);
289         if (waserror()) {
290                 nexterror();
291         }
292         switch (TYPE(c->qid)) {
293                 case Qtopdir:
294                 case Qvmdir:
295                         if (omode & ORCLOSE)
296                                 error(Eperm);
297                         if (!IS_RDONLY(omode))
298                                 error(Eisdir);
299                         break;
300                 case Qclone:
301                         spin_lock_irqsave(&vmlock);
302                         vms = krealloc(vms, sizeof(vms[0]) * (nvm + 1), 0);
303                         v = &vms[nvm];
304                         nvm++;
305                         spin_unlock(&vmlock);
306                         kref_init(&v->kref, vm_release, 1);
307                         v->id = newvmid();
308                         mkqid(&c->qid, QID(nvm, Qctl), 0, QTFILE);
309                         c->aux = v;
310                         printd("New VM id %d\n", v->id);
311                         v->archvm = vmx_open();
312                         if (!v->archvm) {
313                                 printk("vm_open failed\n");
314                                 error("vm_open failed");
315                         }
316                         if (vmx_create_vcpu(v->archvm, v->id) < 0) {
317                                 printk("vm_create failed");
318                                 error("vm_create failed");
319                         }
320                         printk("Qclone open: id %d, v is %p, v->archvm is %p\n", 
321                                         nvm-1,
322                                         v, v->archvm);
323                         break;
324                 case Qstat:
325                         break;
326                 case Qctl:
327                 case Qimage:
328                         c->aux = QID2VM(c->qid);
329                         printk("open qctl: aux (vm) is %p\n", c->aux);
330                         break;
331         }
332         c->mode = openmode(omode);
333         /* Assumes c is unique (can't be closed concurrently */
334         c->flag |= COPEN;
335         c->offset = 0;
336         poperror();
337         print_func_exit();
338         return c;
339 }
340
341 static void vmcreate(struct chan *c, char *name, int omode, uint32_t perm)
342 {
343         print_func_entry();
344         error(Eperm);
345         print_func_exit();
346 }
347
348 static void vmremove(struct chan *c)
349 {
350         print_func_entry();
351         error(Eperm);
352         print_func_exit();
353 }
354
355 static int vmwstat(struct chan *c, uint8_t * dp, int n)
356 {
357         print_func_entry();
358         error("No vmwstat");
359         print_func_exit();
360         return 0;
361 }
362
363 static void vmclose(struct chan *c)
364 {
365         print_func_entry();
366         struct vm *v = c->aux;
367         if (!v) {
368                 print_func_exit();
369                 return;
370         }
371         /* There are more closes than opens.  For instance, sysstat doesn't open,
372          * but it will close the chan it got from namec.  We only want to clean
373          * up/decref chans that were actually open. */
374         if (!(c->flag & COPEN)) {
375                 print_func_exit();
376                 return;
377         }
378         switch (TYPE(c->qid)) {
379                         /* for now, leave the VM active even when we close ctl */
380                 case Qctl:
381                         break;
382                 case Qimage:
383                         kref_put(&v->kref);
384                         break;
385         }
386         print_func_exit();
387 }
388
389 static long vmread(struct chan *c, void *ubuf, long n, int64_t offset)
390 {
391         print_func_entry();
392         struct vm *v = c->aux;
393         printd("VMREAD\n");
394         switch (TYPE(c->qid)) {
395                 case Qtopdir:
396                 case Qvmdir:
397                         print_func_exit();
398                         return devdirread(c, ubuf, n, 0, 0, vmgen);
399                 case Qstat:
400                         print_func_exit();
401                         return readnum(offset, ubuf, n, nvm, NUMSIZE32);
402                 case Qctl:
403                         assert(v);
404                         print_func_exit();
405                         return readnum(offset, ubuf, n, v->id, NUMSIZE32);
406                 case Qimage:
407                         assert(v);
408                         print_func_exit();
409                         return readmem(offset, ubuf, n, v->image, v->imagesize);
410                 default:
411                         panic("Bad QID %p in devvm", c->qid.path);
412         }
413         print_func_exit();
414         return 0;
415 }
416
417 static long vmwrite(struct chan *c, void *ubuf, long n, int64_t unused)
418 {
419         print_func_entry();
420         ERRSTACK(3);
421         char buf[32];
422         struct cmdbuf *cb;
423         struct vm *vm;
424         struct litevm *litevm;
425         uint64_t hexval;
426         printd("vmwrite(%p, %p, %d)\n", c, ubuf, n);
427         switch (TYPE(c->qid)) {
428                 case Qtopdir:
429                 case Qvmdir:
430                 case Qstat:
431                         error(Eperm);
432                 case Qctl:
433                         vm = c->aux;
434                         litevm = vm->archvm;
435                         printk("qctl: vm is %p, litevm is %p\n", vm, litevm);
436                         cb = parsecmd(ubuf, n);
437                         if (waserror()) {
438                                 kfree(cb);
439                                 nexterror();
440                         }
441                         if (!strcmp(cb->f[0], "run")) {
442                                 int ret;
443                                 if (cb->nf != 4)
444                                         error("usage: run vcpu emulated mmio_completed");
445                                 struct litevm_run vmr;
446                                 vmr.vcpu = strtoul(cb->f[1], NULL, 0);
447                                 vmr.emulated = strtoul(cb->f[2], NULL, 0);
448                                 vmr.mmio_completed = strtoul(cb->f[3], NULL, 0);
449                                 ret = vm_run(litevm, &vmr);
450                                 printk("vm_run returns %d\n", ret);
451                                 print_func_exit();
452                                 return ret;
453                         } else if (!strcmp(cb->f[0], "stop")) {
454                                 error("can't stop a vm yet");
455                         } else if (!strcmp(cb->f[0], "mapmem")) {
456                                 struct chan *file;
457                                 void *v;
458                                 vm = c->aux;
459                                 uint64_t filesize;
460                                 struct litevm_memory_region vmr;
461                                 int got;
462
463                                 if (cb->nf != 6)
464                                         error("usage: mapmem file slot flags addr size");
465                                 vmr.slot = strtoul(cb->f[2], NULL, 0);
466                                 vmr.flags = strtoul(cb->f[3], NULL, 0);
467                                 vmr.guest_phys_addr = strtoul(cb->f[4], NULL, 0);
468                                 filesize = strtoul(cb->f[5], NULL, 0);
469                                 vmr.memory_size = (filesize + 4095) & ~4095ULL;
470
471                                 file = namec(cb->f[1], Aopen, OREAD, 0);
472                                 printk("after namec file is %p\n", file);
473                                 if (waserror()) {
474                                         cclose(file);
475                                         nexterror();
476                                 }
477                                 /* at some point we want to mmap from the kernel
478                                  * but we don't have that yet. This all needs
479                                  * rethinking but the abstractions of kvm do too.
480                                  */
481                                 v = kmalloc(vmr.memory_size, KMALLOC_WAIT);
482                                 if (waserror()) {
483                                         kfree(v);
484                                         nexterror();
485                                 }
486
487                                 readn(file, v, filesize);
488                                 vmr.init_data = v;
489
490                                 if (vm_set_memory_region(litevm, &vmr))
491                                         error("vm_set_memory_region failed");
492                                 poperror();
493                                 poperror();
494                                 kfree(v);
495                                 cclose(file);
496
497                         } else if (!strcmp(cb->f[0], "region")) {
498                                 void *v;
499                                 struct litevm_memory_region vmr;
500                                 if (cb->nf != 5)
501                                         error("usage: mapmem slot flags addr size");
502                                 vmr.slot = strtoul(cb->f[1], NULL, 0);
503                                 vmr.flags = strtoul(cb->f[2], NULL, 0);
504                                 vmr.guest_phys_addr = strtoul(cb->f[3], NULL, 0);
505                                 vmr.memory_size = strtoul(cb->f[4], NULL, 0);
506                                 if (vm_set_memory_region(litevm, &vmr))
507                                         error("vm_set_memory_region failed");
508                         } else {
509                                 error("%s: not implemented", cb->f[0]);
510                         }
511                         kfree(cb);
512                         poperror();
513                         break;
514                 case Qimage:
515                         error("can't write an image yet");
516                         break;
517                 default:
518                         panic("Bad QID %p in devvm", c->qid.path);
519         }
520         print_func_exit();
521         return n;
522 }
523
524 struct dev vmdevtab __devtab = {
525         'V',
526         "vm",
527
528         devreset,
529         vminit,
530         devshutdown,
531         vmattach,
532         vmwalk,
533         vmstat,
534         vmopen,
535         vmcreate,
536         vmclose,
537         vmread,
538         devbread,
539         vmwrite,
540         devbwrite,
541         vmremove,
542         vmwstat,
543         devpower,
544 //  devconfig,
545         devchaninfo,
546 };