Move the kernel to start at 16M+2M.
[akaros.git] / tests / vmm / vmrunkernel.c
1 #include <stdio.h> 
2 #include <pthread.h>
3 #include <sys/types.h>
4 #include <sys/stat.h>
5 #include <fcntl.h>
6 #include <parlib/arch/arch.h>
7 #include <parlib/ros_debug.h>
8 #include <unistd.h>
9 #include <errno.h>
10 #include <dirent.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <ros/syscall.h>
14 #include <sys/mman.h>
15 #include <vmm/coreboot_tables.h>
16 #include <vmm/vmm.h>
17 #include <ros/arch/mmu.h>
18 #include <ros/vmx.h>
19 #include <parlib/uthread.h>
20 #include <vmm/virtio.h>
21 #include <vmm/virtio_mmio.h>
22 #include <vmm/virtio_ids.h>
23 #include <vmm/virtio_config.h>
24
25 /* this test will run the "kernel" in the negative address space. We hope. */
26 int *mmap_blob;
27 unsigned long long stack[1024];
28 volatile int shared = 0;
29 volatile int quit = 0;
30 int mcp = 1;
31
32 /* total hack. If the vm runs away we want to get control again. */
33 unsigned int maxresume = (unsigned int) -1;
34
35 #define MiB 0x100000u
36 #define GiB (1u<<30)
37 #define GKERNBASE (16*MiB)
38 #define KERNSIZE (128*MiB+GKERNBASE)
39 uint8_t _kernel[KERNSIZE];
40
41 unsigned long long *p512, *p1, *p2m;
42
43 void **my_retvals;
44 int nr_threads = 3;
45 int debug = 0;
46 /* unlike Linux, this shared struct is for both host and guest. */
47 //      struct virtqueue *constoguest = 
48 //              vring_new_virtqueue(0, 512, 8192, 0, inpages, NULL, NULL, "test");
49 uint64_t virtio_mmio_base = 0x100000000;
50
51 void *consout(void *arg)
52 {
53         char *line, *consline, *outline;
54         static struct scatterlist out[] = { {NULL, sizeof(outline)}, };
55         static struct scatterlist in[] = { {NULL, sizeof(line)}, };
56         static struct scatterlist iov[32];
57         struct virtio_threadarg *a = arg;
58         static unsigned int inlen, outlen, conslen;
59         struct virtqueue *v = a->arg->virtio;
60         fprintf(stderr, "talk thread ..\n");
61         uint16_t head, gaveit = 0, gotitback = 0;
62         uint32_t vv;
63         int i;
64         int num;
65         if (debug) {
66                 printf("----------------------- TT a %p\n", a);
67                 printf("talk thread ttargs %x v %x\n", a, v);
68         }
69         
70         for(num = 0;;num++) {
71                 /* host: use any buffers we should have been sent. */
72                 head = wait_for_vq_desc(v, iov, &outlen, &inlen);
73                 if (debug)
74                         printf("CCC: vq desc head %d, gaveit %d gotitback %d\n", head, gaveit, gotitback);
75                 for(i = 0; debug && i < outlen + inlen; i++)
76                         printf("CCC: v[%d/%d] v %p len %d\n", i, outlen + inlen, iov[i].v, iov[i].length);
77                 /* host: if we got an output buffer, just output it. */
78                 for(i = 0; i < outlen; i++) {
79                         num++;
80                         int j;
81                         for (j = 0; j < iov[i].length; j++)
82                                 printf("%c", ((char *)iov[i].v)[j]);
83                 }
84                 
85                 if (debug)
86                         printf("CCC: outlen is %d; inlen is %d\n", outlen, inlen);
87                 /* host: fill in the writeable buffers. */
88                 /* why we're getting these I don't know. */
89                 for (i = outlen; i < outlen + inlen; i++) {
90                         if (debug) fprintf(stderr, "CCC: send back empty writeable");
91                         iov[i].length = 0;
92                 }
93                 if (debug) printf("CCC: call add_used\n");
94                 /* host: now ack that we used them all. */
95                 add_used(v, head, outlen+inlen);
96                 if (debug) printf("CCC: DONE call add_used\n");
97         }
98         fprintf(stderr, "All done\n");
99         return NULL;
100 }
101
102 void *consin(void *arg)
103 {
104         struct virtio_threadarg *a = arg;
105         char *line, *outline;
106         static char consline[128];
107         static struct scatterlist iov[32];
108         static struct scatterlist out[] = { {NULL, sizeof(outline)}, };
109         static struct scatterlist in[] = { {NULL, sizeof(line)}, };
110
111         static unsigned int inlen, outlen, conslen;
112         struct virtqueue *v = a->arg->virtio;
113         fprintf(stderr, "consin thread ..\n");
114         uint16_t head, gaveit = 0, gotitback = 0;
115         uint32_t vv;
116         int i;
117         int num;
118         
119         if (debug) printf("Spin on console being read, print num queues, halt\n");
120
121         for(num = 0;! quit;num++) {
122                 int debug = 1;
123                 /* host: use any buffers we should have been sent. */
124                 head = wait_for_vq_desc(v, iov, &outlen, &inlen);
125                 if (debug)
126                         printf("vq desc head %d, gaveit %d gotitback %d\n", head, gaveit, gotitback);
127                 for(i = 0; debug && i < outlen + inlen; i++)
128                         printf("v[%d/%d] v %p len %d\n", i, outlen + inlen, iov[i].v, iov[i].length);
129                 if (debug)
130                         printf("outlen is %d; inlen is %d\n", outlen, inlen);
131                 /* host: fill in the writeable buffers. */
132                 for (i = outlen; i < outlen + inlen; i++) {
133                         /* host: read a line. */
134                         memset(consline, 0, 128);
135                         if (fgets(consline, 4096-256, stdin) == NULL) {
136                                 exit(0);
137                         } 
138                         if (debug) printf("GOT A LINE:%s:\n", consline);
139                         if (strlen(consline) < 3 && consline[0] == 'q' ) {
140                                 quit = 1;
141                                 break;
142                         }
143
144                         memmove(iov[i].v, consline, strlen(consline)+ 1);
145                         iov[i].length = strlen(consline) + 1;
146                 }
147                 if (debug) printf("call add_used\n");
148                 /* host: now ack that we used them all. */
149                 add_used(v, head, outlen+inlen);
150                 if (debug) printf("DONE call add_used\n");
151         }
152         fprintf(stderr, "All done\n");
153         return NULL;
154 }
155
156 static struct vqdev vqdev= {
157 name: "console",
158 dev: VIRTIO_ID_CONSOLE,
159 device_features: 0, /* Can't do it: linux console device does not support it. VIRTIO_F_VERSION_1*/
160 numvqs: 2,
161 vqs: {
162                 {name: "consin", maxqnum: 64, f: &consin, arg: (void *)0},
163                 {name: "consout", maxqnum: 64, f: consout, arg: (void *)0},
164         }
165 };
166
167 int main(int argc, char **argv)
168 {
169         uint64_t virtiobase = 0x100000000ULL;
170         void *rsdp = (void *)0;
171         struct vmctl vmctl;
172         int amt;
173         int vmmflags = 0; // Disabled probably forever. VMM_VMCALL_PRINTF;
174         uint64_t entry = 0x1200000, kerneladdress = 0x1200000;
175         int nr_gpcs = 1;
176         int fd = open("#c/vmctl", O_RDWR), ret;
177         void * x;
178         int kfd = -1;
179         static char cmd[512];
180         void *coreboot_tables = (void *) 0x1165000;
181
182         // mmap is not working for us at present.
183         if ((uint64_t)_kernel > GKERNBASE) {
184                 printf("kernel array @%p is above , GKERNBASE@%p sucks\n", _kernel, GKERNBASE);
185                 exit(1);
186         }
187         memset(_kernel, 0, sizeof(_kernel));
188
189         if (fd < 0) {
190                 perror("#cons/sysctl");
191                 exit(1);
192         }
193         argc--,argv++;
194         // switches ...
195         // Sorry, I don't much like the gnu opt parsing code.
196         while (1) {
197                 if (*argv[0] != '-')
198                         break;
199                 switch(argv[0][1]) {
200                 case 'd':
201                         debug++;
202                         break;
203                 case 'v':
204                         vmmflags |= VMM_VMCALL_PRINTF;
205                         break;
206                 case 'm':
207                         argc--,argv++;
208                         maxresume = strtoull(argv[0], 0, 0);
209                         break;
210                 default:
211                         printf("BMAFR\n");
212                         break;
213                 }
214                 argc--,argv++;
215         }
216         if (argc < 1) {
217                 fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)] [coreboot_tables [loadaddress [entrypoint]]]\n", argv[0]);
218                 exit(1);
219         }
220         if (argc > 1)
221                 coreboot_tables = (void *) strtoull(argv[1], 0, 0);
222         if (argc > 2)
223                 kerneladdress = strtoull(argv[2], 0, 0);
224         if (argc > 3)
225                 entry = strtoull(argv[3], 0, 0);
226         kfd = open(argv[0], O_RDONLY);
227         if (kfd < 0) {
228                 perror(argv[0]);
229                 exit(1);
230         }
231         // read in the kernel.
232         x = (void *)kerneladdress;
233         for(;;) {
234                 amt = read(kfd, x, 1048576);
235                 if (amt < 0) {
236                         perror("read");
237                         exit(1);
238                 }
239                 if (amt == 0) {
240                         break;
241                 }
242                 x += amt;
243         }
244         fprintf(stderr, "Read in %d bytes\n", x-kerneladdress);
245
246         fprintf(stderr, "Run with %d cores and vmmflags 0x%x\n", nr_gpcs, vmmflags);
247         if (ros_syscall(SYS_setup_vmm, nr_gpcs, vmmflags, 0, 0, 0, 0) != nr_gpcs) {
248                 perror("Guest pcore setup failed");
249                 exit(1);
250         }
251         /* blob that is faulted in from the EPT first.  we need this to be in low
252          * memory (not above the normal mmap_break), so the EPT can look it up.
253          * Note that we won't get 4096.  The min is 1MB now, and ld is there. */
254         mmap_blob = mmap((int*)4096, PGSIZE, PROT_READ | PROT_WRITE,
255                          MAP_ANONYMOUS, -1, 0);
256         if (mmap_blob == MAP_FAILED) {
257                 perror("Unable to mmap");
258                 exit(1);
259         }
260
261         mcp = 1;
262         if (mcp) {
263                 my_retvals = malloc(sizeof(void*) * nr_threads);
264                 if (!my_retvals)
265                         perror("Init threads/malloc");
266
267                 pthread_can_vcore_request(FALSE);       /* 2LS won't manage vcores */
268                 pthread_need_tls(FALSE);
269                 pthread_mcp_init();                                     /* gives us one vcore */
270                 vcore_request(nr_threads - 1);          /* ghetto incremental interface */
271                 for (int i = 0; i < nr_threads; i++) {
272                         x = __procinfo.vcoremap;
273                         printf("%p\n", __procinfo.vcoremap);
274                         printf("Vcore %d mapped to pcore %d\n", i,
275                                 __procinfo.vcoremap[i].pcoreid);
276                 }
277         }
278
279         ret = syscall(33, 1);
280         if (ret < 0) {
281                 perror("vm setup");
282                 exit(1);
283         }
284         ret = posix_memalign((void **)&p512, 4096, 3*4096);
285         printf("memalign is %p\n", p512);
286         if (ret) {
287                 perror("ptp alloc");
288                 exit(1);
289         }
290         p1 = &p512[512];
291         p2m = &p512[1024];
292         uint64_t kernbase = 0; //0xffffffff80000000;
293         uint64_t highkernbase = 0xffffffff80000000;
294         p512[PML4(kernbase)] = (unsigned long long)p1 | 7;
295         p1[PML3(kernbase)] = /*0x87; */(unsigned long long)p2m | 7;
296         p512[PML4(highkernbase)] = (unsigned long long)p1 | 7;
297         p1[PML3(highkernbase)] = /*0x87; */(unsigned long long)p2m | 7;
298 #define _2MiB (0x200000)
299         int i;
300         for (i = 0; i < 512; i++) {
301                 p2m[PML2(kernbase + i * _2MiB)] = 0x87 | i * _2MiB;
302         }
303
304         kernbase >>= (0+12);
305         kernbase <<= (0 + 12);
306         uint8_t *kernel = (void *)GKERNBASE;
307         //write_coreboot_table(coreboot_tables, ((void *)VIRTIOBASE) /*kernel*/, KERNSIZE + 1048576);
308         hexdump(stdout, coreboot_tables, 512);
309         printf("kernbase for pml4 is 0x%llx and entry is %llx\n", kernbase, entry);
310         printf("p512 %p p512[0] is 0x%lx p1 %p p1[0] is 0x%x\n", p512, p512[0], p1, p1[0]);
311         vmctl.command = REG_RSP_RIP_CR3;
312         vmctl.cr3 = (uint64_t) p512;
313         vmctl.regs.tf_rip = entry;
314         vmctl.regs.tf_rsp = (uint64_t) &stack[1024];
315         if (mcp) {
316                 /* set up virtio bits, which depend on threads being enabled. */
317                 register_virtio_mmio(&vqdev, virtio_mmio_base);
318         }
319         printf("threads started\n");
320         printf("Writing command :%s:\n", cmd);
321
322         ret = write(fd, &vmctl, sizeof(vmctl));
323         if (ret != sizeof(vmctl)) {
324                 perror(cmd);
325         }
326         while (1) {
327                 void showstatus(FILE *f, struct vmctl *v);
328                 int c;
329                 uint8_t byte;
330                 vmctl.command = REG_RIP;
331                 if (maxresume-- == 0)
332                         debug = 1;
333                 if (debug) {
334                         printf("RIP %p, shutdown 0x%x\n", vmctl.regs.tf_rip, vmctl.shutdown);
335                         showstatus(stdout, &vmctl);
336                         printf("RESUME?\n");
337                         c = getchar();
338                         if (c == 'q')
339                                 break;
340                 }
341                 if (vmctl.shutdown == SHUTDOWN_EPT_VIOLATION) {
342                         uint64_t gpa;
343                         uint64_t *regp;
344                         uint8_t regx;
345                         int store;
346                         if (decode(&vmctl, &gpa, &regx, &regp, &store)) {
347                                 quit = 1;
348                                 break;
349                         }
350                         if ((gpa & ~0xfffULL) == virtiobase) {
351                                 if (debug) printf("DO SOME VIRTIO\n");
352                                 virtio_mmio(&vmctl, gpa, regx, regp, store);
353                         } else if (gpa == 0x40e) {
354                                 *regp = (uint64_t) rsdp;
355                         } else {
356                                 printf("EPT violation: can't handle %p\n", gpa);
357                                 quit = 1;
358                                 break;
359                         }
360                         vmctl.shutdown = 0;
361                         vmctl.gpa = 0;
362                         vmctl.command = REG_ALL;
363                 } else if (vmctl.shutdown == SHUTDOWN_UNHANDLED_EXIT_REASON) {
364                         switch(vmctl.ret_code){
365                         case  EXIT_REASON_VMCALL:
366                                 byte = vmctl.regs.tf_rdi;
367                                 printf("%c", byte);
368                                 if (byte == '\n') printf("%c", 'V');
369                                 vmctl.regs.tf_rip += 3;
370                                 break;
371                         case EXIT_REASON_EXTERNAL_INTERRUPT:
372                                 fprintf(stderr, "XINT\n");
373                                 // Just inject a GPF for now. See what shakes.
374                                 vmctl.interrupt = 0x80000302; // b0d;
375                                 vmctl.command = RESUME;
376                                 break;
377                         case EXIT_REASON_IO_INSTRUCTION:
378                                 printf("IO @ %p\n", vmctl.regs.tf_rip);
379                                 io(&vmctl);
380                                 break;
381                         case EXIT_REASON_HLT:
382                                 printf("\n================== Guest halted. RIP. =======================\n");
383                                 quit = 1;
384                                 break;
385                         default:
386                                 fprintf(stderr, "Don't know how to handle exit %d\n", vmctl.ret_code);
387                                 quit = 1;
388                                 break;
389                         }
390                 }
391                 if (debug) printf("at bottom of switch, quit is %d\n", quit);
392                 if (quit)
393                         break;
394                 if (debug) printf("NOW DO A RESUME\n");
395                 ret = write(fd, &vmctl, sizeof(vmctl));
396                 if (ret != sizeof(vmctl)) {
397                         perror(cmd);
398                 }
399         }
400
401         /* later. 
402         for (int i = 0; i < nr_threads-1; i++) {
403                 int ret;
404                 if (pthread_join(my_threads[i], &my_retvals[i]))
405                         perror("pth_join failed");
406                 printf("%d %d\n", i, ret);
407         }
408  */
409
410         return 0;
411 }