akaros/tests/vmm/vmrunkernel.c
<<
>>
Prefs
   1#include <stdio.h>
   2#include <pthread.h>
   3#include <sys/types.h>
   4#include <sys/stat.h>
   5#include <fcntl.h>
   6#include <parlib/arch/arch.h>
   7#include <parlib/ros_debug.h>
   8#include <unistd.h>
   9#include <gelf.h>
  10#include <errno.h>
  11#include <libelf.h>
  12#include <dirent.h>
  13#include <stdlib.h>
  14#include <string.h>
  15#include <ros/syscall.h>
  16#include <sys/mman.h>
  17#include <vmm/vmm.h>
  18#include <vmm/acpi/acpi.h>
  19#include <ros/arch/mmu.h>
  20#include <ros/arch/membar.h>
  21#include <ros/vmm.h>
  22#include <parlib/uthread.h>
  23#include <vmm/linux_bootparam.h>
  24#include <getopt.h>
  25#include <parlib/alarm.h>
  26
  27#include <vmm/virtio.h>
  28#include <vmm/virtio_blk.h>
  29#include <vmm/virtio_mmio.h>
  30#include <vmm/virtio_ids.h>
  31#include <vmm/virtio_config.h>
  32#include <vmm/virtio_console.h>
  33#include <vmm/virtio_net.h>
  34#include <vmm/virtio_lguest_console.h>
  35
  36#include <vmm/sched.h>
  37#include <vmm/net.h>
  38#include <sys/eventfd.h>
  39#include <sys/uio.h>
  40#include <parlib/opts.h>
  41
  42struct virtual_machine local_vm = {.mtx = UTH_MUTEX_INIT},
  43                            *vm = &local_vm;
  44
  45struct vmm_gpcore_init *gpcis;
  46
  47void vapic_status_dump(FILE *f, void *vapic);
  48
  49#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1)
  50#error "Get a gcc newer than 4.4.0"
  51#else
  52#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
  53#endif
  54
  55static void virtio_poke_guest(uint8_t vec, uint32_t dest)
  56{
  57        if (dest < vm->nr_gpcs) {
  58                vmm_interrupt_guest(vm, dest, vec);
  59                return;
  60        }
  61        if (dest != 0xffffffff)
  62                panic("INVALID DESTINATION: 0x%02x\n", dest);
  63
  64        for (int i = 0; i < vm->nr_gpcs; i++)
  65                vmm_interrupt_guest(vm, i, vec);
  66}
  67
  68static struct virtio_mmio_dev cons_mmio_dev = {
  69        .poke_guest = virtio_poke_guest,
  70};
  71
  72static struct virtio_console_config cons_cfg;
  73static struct virtio_console_config cons_cfg_d;
  74
  75static struct virtio_vq_dev cons_vqdev = {
  76        .name = "console",
  77        .dev_id = VIRTIO_ID_CONSOLE,
  78        .dev_feat =
  79        (1ULL << VIRTIO_F_VERSION_1) | (1 << VIRTIO_RING_F_INDIRECT_DESC),
  80        .num_vqs = 2,
  81        .cfg = &cons_cfg,
  82        .cfg_d = &cons_cfg_d,
  83        .cfg_sz = sizeof(struct virtio_console_config),
  84        .transport_dev = &cons_mmio_dev,
  85        .vqs = {
  86                {
  87                        .name = "cons_receiveq",
  88                        .qnum_max = 64,
  89                        .srv_fn = cons_receiveq_fn,
  90                        .vqdev = &cons_vqdev
  91                },
  92                {
  93                        .name = "cons_transmitq",
  94                        .qnum_max = 64,
  95                        .srv_fn = cons_transmitq_fn,
  96                        .vqdev = &cons_vqdev
  97                },
  98        }
  99};
 100
 101static struct virtio_mmio_dev net_mmio_dev = {
 102        .poke_guest = virtio_poke_guest,
 103};
 104
 105static struct virtio_net_config net_cfg = {
 106        .max_virtqueue_pairs = 1
 107};
 108static struct virtio_net_config net_cfg_d = {
 109        .max_virtqueue_pairs = 1
 110};
 111
 112static struct virtio_vq_dev net_vqdev = {
 113        .name = "network",
 114        .dev_id = VIRTIO_ID_NET,
 115        .dev_feat = (1ULL << VIRTIO_F_VERSION_1 | 1 << VIRTIO_NET_F_MAC),
 116
 117        .num_vqs = 2,
 118        .cfg = &net_cfg,
 119        .cfg_d = &net_cfg_d,
 120        .cfg_sz = sizeof(struct virtio_net_config),
 121        .transport_dev = &net_mmio_dev,
 122        .vqs = {
 123                {
 124                        .name = "net_receiveq",
 125                        .qnum_max = 64,
 126                        .srv_fn = net_receiveq_fn,
 127                        .vqdev = &net_vqdev
 128                },
 129                {
 130                        .name = "net_transmitq",
 131                        .qnum_max = 64,
 132                        .srv_fn = net_transmitq_fn,
 133                        .vqdev = &net_vqdev
 134                },
 135        }
 136};
 137
 138static struct virtio_mmio_dev blk_mmio_dev = {
 139        .poke_guest = virtio_poke_guest,
 140};
 141
 142static struct virtio_blk_config blk_cfg = {
 143};
 144
 145static struct virtio_blk_config blk_cfg_d = {
 146};
 147
 148static struct virtio_vq_dev blk_vqdev = {
 149        .name = "block",
 150        .dev_id = VIRTIO_ID_BLOCK,
 151        .dev_feat = (1ULL << VIRTIO_F_VERSION_1),
 152
 153        .num_vqs = 1,
 154        .cfg = &blk_cfg,
 155        .cfg_d = &blk_cfg_d,
 156        .cfg_sz = sizeof(struct virtio_blk_config),
 157        .transport_dev = &blk_mmio_dev,
 158        .vqs = {
 159                {
 160                        .name = "blk_request",
 161                        .qnum_max = 64,
 162                        .srv_fn = blk_request,
 163                        .vqdev = &blk_vqdev
 164                },
 165        }
 166};
 167
 168/* Parse func: given a line of text, it sets any vnet options */
 169static void __parse_vnet_opts(char *_line)
 170{
 171        char *eq, *spc;
 172
 173        /* Check all bools first */
 174        if (!strcmp(_line, "snoop")) {
 175                vnet_snoop = TRUE;
 176                return;
 177        }
 178        if (!strcmp(_line, "map_diagnostics")) {
 179                vnet_map_diagnostics = TRUE;
 180                return;
 181        }
 182        if (!strcmp(_line, "real_address")) {
 183                vnet_real_ip_addrs = TRUE;
 184                return;
 185        }
 186        /* Numeric fields, must have an = */
 187        eq = strchr(_line, '=');
 188        if (!eq)
 189                return;
 190        *eq++ = 0;
 191        /* Drop spaces before =.  atoi trims any spaces after =. */
 192        while ((spc = strrchr(_line, ' ')))
 193                *spc = 0;
 194        if (!strcmp(_line, "nat_timeout")) {
 195                vnet_nat_timeout = atoi(eq);
 196                return;
 197        }
 198}
 199
 200static void set_vnet_opts(char *net_opts)
 201{
 202        if (parse_opts_file(net_opts, __parse_vnet_opts))
 203                perror("parse opts file");
 204}
 205
 206/* Parse func: given a line of text, it builds any vnet port forwardings. */
 207static void __parse_vnet_port_fwds(char *_line)
 208{
 209        char *tok, *tok_save = 0;
 210        char *proto, *host_port, *guest_port;
 211
 212        tok = strtok_r(_line, ":", &tok_save);
 213        if (!tok)
 214                return;
 215        if (strcmp(tok, "port"))
 216                return;
 217        tok = strtok_r(NULL, ":", &tok_save);
 218        if (!tok) {
 219                fprintf(stderr, "%s, port with no proto!", __func__);
 220                return;
 221        }
 222        proto = tok;
 223        tok = strtok_r(NULL, ":", &tok_save);
 224        if (!tok) {
 225                fprintf(stderr, "%s, port with no host port!", __func__);
 226                return;
 227        }
 228        host_port = tok;
 229        tok = strtok_r(NULL, ":", &tok_save);
 230        if (!tok) {
 231                fprintf(stderr, "%s, port with no guest port!", __func__);
 232                return;
 233        }
 234        guest_port = tok;
 235        vnet_port_forward(proto, host_port, guest_port);
 236}
 237
 238static void set_vnet_port_fwds(char *net_opts)
 239{
 240        if (parse_opts_file(net_opts, __parse_vnet_port_fwds))
 241                perror("parse opts file");
 242}
 243
 244/* We map the APIC-access page, the per core Virtual APIC page and the
 245 * per core Posted Interrupt Descriptors.
 246 * Note: check if the PID/PIR needs to be a 4k page. */
 247void alloc_intr_pages(void)
 248{
 249        void *a_page;
 250        void *pages, *pir;
 251
 252        a_page = mmap((void *)APIC_GPA, PGSIZE, PROT_READ | PROT_WRITE,
 253                      MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 254
 255        if (a_page != (void *)APIC_GPA) {
 256                perror("Could not mmap APIC");
 257                exit(1);
 258        }
 259        /* The VM should never actually read from this page. */
 260        for (int i = 0; i < PGSIZE/4; i++)
 261                ((uint32_t *)a_page)[i] = 0xDEADBEEF;
 262
 263        /* Allocate VAPIC and PIR pages. */
 264        pages = mmap((void*)0, vm->nr_gpcs * 2 * PGSIZE, PROT_READ | PROT_WRITE,
 265                     MAP_POPULATE | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 266        if (pages == MAP_FAILED) {
 267                perror("Unable to map VAPIC and PIR pages.");
 268                exit(1);
 269        }
 270
 271        /* We use the first vm->nr_gpcs pages for the VAPIC, and the second set
 272         * for the PIRs. Each VAPIC and PIR gets its own 4k page. */
 273        pir = pages + (vm->nr_gpcs * PGSIZE);
 274
 275        /* Set the addresses in the gpcis.  These gpcis get copied into the
 276         * guest_threads during their construction. */
 277        for (int i = 0; i < vm->nr_gpcs; i++) {
 278                gpcis[i].posted_irq_desc = pir + (PGSIZE * i);
 279                gpcis[i].vapic_addr = pages + (PGSIZE * i);
 280                gpcis[i].apic_addr = a_page;
 281                gpcis[i].fsbase = 0;
 282                gpcis[i].gsbase = 0;
 283
 284                /* Set APIC ID. */
 285                ((uint32_t *)gpcis[i].vapic_addr)[0x20/4] = i;
 286                /* Set APIC VERSION. */
 287                ((uint32_t *)gpcis[i].vapic_addr)[0x30/4] = 0x01060015;
 288                /* Set LOGICAL APIC ID. */
 289                ((uint32_t *)gpcis[i].vapic_addr)[0xD0/4] = 1 << i;
 290        }
 291}
 292
 293/* This rams all cores with a valid timer vector and initial count
 294 * with a timer interrupt. Used only for debugging/as a temporary workaround */
 295void *inject_timer_spurious(void *args)
 296{
 297        struct vmm_gpcore_init *curgpci;
 298        uint32_t initial_count;
 299        uint8_t vector;
 300
 301        for (int i = 0; i < vm->nr_gpcs; i++) {
 302                curgpci = gth_to_gpci(gpcid_to_gth(vm, i));
 303                vector = ((uint32_t *)curgpci->vapic_addr)[0x32] & 0xff;
 304                initial_count = ((uint32_t *)curgpci->vapic_addr)[0x38];
 305                if (initial_count && vector)
 306                        vmm_interrupt_guest(vm, i, vector);
 307        }
 308        return 0;
 309}
 310
 311/* This injects the timer interrupt to the guest. */
 312void *inject_timer(void *args)
 313{
 314        struct guest_thread *gth = (struct guest_thread*)args;
 315        struct vmm_gpcore_init *gpci = gth_to_gpci(gth);
 316        uint8_t vector = ((uint32_t *)gpci->vapic_addr)[0x32] & 0xff;
 317
 318        vmm_interrupt_guest(vm, gth->gpc_id, vector);
 319        return 0;
 320}
 321
 322/* This handler must never call set_alarm after interrupting the guest,
 323 * otherwise the guest could try to write to the timer msrs and cause a
 324 * race condition. */
 325void timer_alarm_handler(struct alarm_waiter *waiter)
 326{
 327        uint8_t vector;
 328        uint32_t initial_count;
 329        uint32_t divide_config_reg;
 330        uint32_t multiplier;
 331        uint32_t timer_mode;
 332        struct guest_thread *gth = (struct guest_thread*)waiter->data;
 333        struct vmm_gpcore_init *gpci = gth_to_gpci(gth);
 334
 335        vector = ((uint32_t *)gpci->vapic_addr)[0x32] & 0xff;
 336        timer_mode = (((uint32_t *)gpci->vapic_addr)[0x32] >> 17) & 0x03;
 337        initial_count = ((uint32_t *)gpci->vapic_addr)[0x38];
 338        divide_config_reg = ((uint32_t *)gpci->vapic_addr)[0x3E];
 339
 340        /* Don't blame me for this. Look at the intel manual
 341         * Vol 3 10.5.4 APIC Timer */
 342        multiplier = (((divide_config_reg & 0x08) >> 1) |
 343                      (divide_config_reg & 0x03)) + 1;
 344        multiplier &= 0x07;
 345
 346        if (vector && initial_count && timer_mode == 0x01) {
 347                /* This is periodic, we reset the alarm */
 348                set_awaiter_rel(waiter, initial_count << multiplier);
 349                set_alarm(waiter);
 350        }
 351
 352        /* We spin up a task to inject the timer because vmm_interrupt_guest
 353         * may block and we can't do that from vcore context. */
 354        vmm_run_task(vm, inject_timer, gth);
 355}
 356
 357/* This sets up the structs for each of the guest pcore's timers, but
 358 * doesn't actually start the alarms until the core writes all the reasonable
 359 * values to the x2apic msrs. */
 360void init_timer_alarms(void)
 361{
 362        for (uint64_t i = 0; i < vm->nr_gpcs; i++) {
 363                struct alarm_waiter *timer_alarm =
 364                        malloc(sizeof(struct alarm_waiter));
 365                struct guest_thread *gth = gpcid_to_gth(vm, i);
 366
 367                /* TODO: consider a struct to bundle a bunch of things, not just
 368                 * timer_alarm. */
 369                gth->user_data = (void *)timer_alarm;
 370                timer_alarm->data = gth;
 371                init_awaiter(timer_alarm, timer_alarm_handler);
 372        }
 373}
 374
 375int main(int argc, char **argv)
 376{
 377        int debug = 0;
 378        unsigned long long memsize = GiB;
 379        uintptr_t memstart = MinMemory;
 380        uintptr_t memend;
 381        struct boot_params *bp;
 382        char cmdline_default[512] = {0};
 383        char *cmdline_extra = "\0";
 384        char *cmdline;
 385        void *a = (void *)0xe0000;
 386        int vmmflags = 0;
 387        uint64_t entry = 0;
 388        int ret;
 389        struct vm_trapframe *vm_tf;
 390        char *cmdlinep;
 391        int cmdlinesz, len, cmdline_fd;
 392        char *disk_image_file = NULL;
 393        int c;
 394        struct stat stat_result;
 395        int num_read;
 396        int option_index;
 397        char *smbiostable = NULL;
 398        char *net_opts = NULL;
 399        uint64_t num_pcs = 1;
 400        bool is_greedy = FALSE;
 401        bool is_scp = FALSE;
 402        char *initrd = NULL;
 403        uint64_t initrd_start = 0, initrd_size = 0;
 404        uint64_t kernel_max_address;
 405
 406        static struct option long_options[] = {
 407                {"debug",         no_argument,       0, 'd'},
 408                {"vmm_vmcall",    no_argument,       0, 'v'},
 409                {"maxresume",     required_argument, 0, 'R'},
 410                {"memsize",       required_argument, 0, 'm'},
 411                {"memstart",      required_argument, 0, 'M'},
 412                {"cmdline_extra", required_argument, 0, 'c'},
 413                {"greedy",        no_argument,       0, 'g'},
 414                {"initrd",        required_argument, 0, 'i'},
 415                {"scp",           no_argument,       0, 's'},
 416                {"image_file",    required_argument, 0, 'f'},
 417                {"cmdline",       required_argument, 0, 'k'},
 418                {"net",           required_argument, 0, 'n'},
 419                {"num_cores",     required_argument, 0, 'N'},
 420                {"smbiostable",   required_argument, 0, 't'},
 421                {"help",          no_argument,       0, 'h'},
 422                {0, 0, 0, 0}
 423        };
 424
 425        if ((uintptr_t)__procinfo.program_end >= MinMemory) {
 426                fprintf(stderr,
 427                        "Panic: vmrunkernel binary extends into guest memory\n");
 428                exit(1);
 429        }
 430
 431        vm->low4k = malloc(PGSIZE);
 432        memset(vm->low4k, 0xff, PGSIZE);
 433        vm->low4k[0x40e] = 0;
 434        vm->low4k[0x40f] = 0;
 435        // Why is this here? Because the static initializer is getting
 436        // set to 1.  Yes, 1. This might be part of the weirdness
 437        // Barrett is reporting with linker sets. So let's leave it
 438        // here until we trust our toolchain.
 439        if (memsize != GiB)
 440                fprintf(stderr, "static initializers are broken\n");
 441        memsize = GiB;
 442
 443        while ((c = getopt_long(argc, argv, "dvi:m:M:c:gsf:k:N:n:t:hR:",
 444                                long_options, &option_index)) != -1) {
 445                switch (c) {
 446                case 'd':
 447                        debug++;
 448                        break;
 449                case 'v':
 450                        vmmflags |= VMM_CTL_FL_KERN_PRINTC;
 451                        break;
 452                case 'm':
 453                        memsize = strtoull(optarg, 0, 0);
 454                        break;
 455                case 'M':
 456                        memstart = strtoull(optarg, 0, 0);
 457                        break;
 458                case 'c':
 459                        cmdline_extra = optarg;
 460                case 'g':       /* greedy */
 461                        parlib_never_yield = TRUE;
 462                        if (is_scp) {
 463                                fprintf(stderr,
 464                                        "Can't be both greedy and an SCP\n");
 465                                exit(1);
 466                        }
 467                        is_greedy = TRUE;
 468                        break;
 469                case 's':       /* scp */
 470                        parlib_wants_to_be_mcp = FALSE;
 471                        if (is_greedy) {
 472                                fprintf(stderr,
 473                                        "Can't be both greedy and an SCP\n");
 474                                exit(1);
 475                        }
 476                        is_scp = TRUE;
 477                        break;
 478                case 'f':       /* file to pass to blk_init */
 479                        disk_image_file = optarg;
 480                        break;
 481                case 'i':
 482                        initrd = optarg;
 483                        break;
 484                case 'k':       /* specify file to get cmdline args from */
 485                        cmdline_fd = open(optarg, O_RDONLY);
 486                        if (cmdline_fd < 0) {
 487                                fprintf(stderr, "failed to open file: %s\n",
 488                                        optarg);
 489                                exit(1);
 490                        }
 491                        if (stat(optarg, &stat_result) == -1) {
 492                                fprintf(stderr, "stat of %s failed\n", optarg);
 493                                exit(1);
 494                        }
 495                        len = stat_result.st_size;
 496                        if (len > 512) {
 497                                fprintf(stderr, "command line options exceed 512 bytes!");
 498                                exit(1);
 499                        }
 500                        num_read = read(cmdline_fd, cmdline_default, len);
 501                        if (num_read != len) {
 502                                fprintf(stderr, "read failed len was : %d, num_read was: %d\n",
 503                                        len, num_read);
 504                                exit(1);
 505                        }
 506                        close(cmdline_fd);
 507                        break;
 508                case 't':
 509                        smbiostable = optarg;
 510                        break;
 511                case 'n':
 512                        net_opts = optarg;
 513                        break;
 514                case 'N':
 515                        num_pcs = strtoull(optarg, 0, 0);
 516                        break;
 517                case 'h':
 518                default:
 519                        // Sadly, the getopt_long struct does
 520                        // not have a pointer to help text.
 521                        for (int i = 0;
 522                             i <
 523                             sizeof(long_options) / sizeof(long_options[0]) - 1;
 524                             i++) {
 525                                struct option *l = &long_options[i];
 526
 527                                fprintf(stderr, "%s or %c%s\n", l->name, l->val,
 528                                        l->has_arg ? " <arg>" : "");
 529                        }
 530                        exit(0);
 531                }
 532        }
 533
 534        if (strlen(cmdline_default) == 0) {
 535                fprintf(stderr,
 536                        "WARNING: No command line parameter file specified.\n");
 537        }
 538        argc -= optind;
 539        argv += optind;
 540        if (argc < 1) {
 541                fprintf(stderr, "Usage: %s vmimage [-n (no vmcall printf)]\n",
 542                        argv[0]);
 543                exit(1);
 544        }
 545
 546        // Set vm->nr_gpcs before it's referenced in the struct setups below.
 547        vm->nr_gpcs = num_pcs;
 548        /* These are only used to be passed to vmm_init, which makes copies
 549         * internally */
 550        gpcis = (struct vmm_gpcore_init *)
 551                        malloc(num_pcs * sizeof(struct vmm_gpcore_init));
 552        alloc_intr_pages();
 553
 554        memend = memstart + memsize - 1;
 555        if (memend >= BRK_START) {
 556                fprintf(stderr,
 557                        "memstart 0x%llx memsize 0x%llx -> 0x%llx is too large; overlaps BRK_START at %p\n",
 558                        memstart, memsize, memstart + memsize, BRK_START);
 559                exit(1);
 560        }
 561
 562        mmap_memory(vm, memstart, memsize);
 563
 564        entry = load_elf(argv[0], 0, &kernel_max_address, NULL);
 565        if (entry == 0) {
 566                fprintf(stderr, "Unable to load kernel %s\n", argv[0]);
 567                exit(1);
 568        }
 569
 570        a = setup_biostables(vm, a, smbiostable);
 571
 572        bp = a;
 573        a = init_e820map(vm, bp);
 574
 575        if (initrd) {
 576                initrd_start = ROUNDUP(kernel_max_address, PGSIZE);
 577                initrd_size = setup_initrd(initrd, (void *)initrd_start,
 578                                           memend - initrd_start + 1);
 579                if (initrd_size <= 0) {
 580                        fprintf(stderr, "Unable to load initrd %s\n", initrd);
 581                        exit(1);
 582                }
 583
 584                bp->hdr.ramdisk_image = initrd_start;
 585                bp->hdr.ramdisk_size = initrd_size;
 586                bp->hdr.root_dev = 0x100;
 587                bp->hdr.type_of_loader = 0xff;
 588        }
 589
 590        /* The MMIO address of the console device is really the address of an
 591         * unbacked EPT page: accesses to this page will cause a page fault that
 592         * traps to the host, which will examine the fault, see it was for the
 593         * known MMIO address, and fulfill the MMIO read or write on the guest's
 594         * behalf accordingly. We place the virtio space at 512 GB higher than
 595         * the guest physical memory to avoid a full page table walk. */
 596        uintptr_t virtio_mmio_base_addr_hint;
 597        uintptr_t virtio_mmio_base_addr;
 598
 599        virtio_mmio_base_addr_hint =
 600            ROUNDUP((bp->e820_map[bp->e820_entries - 1].addr +
 601                     bp->e820_map[bp->e820_entries - 1].size),
 602                     PML4_PTE_REACH);
 603
 604        /* mmap with prot_none so we don't accidentally mmap something else
 605         * here.
 606         * We give space for 512 devices right now.
 607         * TODO(ganshun): Make it dynamic based on number of virtio devices. */
 608        virtio_mmio_base_addr =
 609            (uintptr_t) mmap((void *) virtio_mmio_base_addr_hint, 512 * PGSIZE,
 610                             PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
 611
 612        if (!virtio_mmio_base_addr || virtio_mmio_base_addr >= BRK_START) {
 613                /* Either we were unable to mmap at all or we mapped it too
 614                 * high. */
 615                panic("Unable to mmap protect space for virtio devices, got 0x%016x",
 616                      virtio_mmio_base_addr);
 617        }
 618
 619        cons_mmio_dev.addr =
 620                virtio_mmio_base_addr + PGSIZE * VIRTIO_MMIO_CONSOLE_DEV;
 621        cons_mmio_dev.vqdev = &cons_vqdev;
 622        vm->virtio_mmio_devices[VIRTIO_MMIO_CONSOLE_DEV] = &cons_mmio_dev;
 623
 624        net_mmio_dev.addr =
 625                virtio_mmio_base_addr + PGSIZE * VIRTIO_MMIO_NETWORK_DEV;
 626        net_mmio_dev.vqdev = &net_vqdev;
 627        vm->virtio_mmio_devices[VIRTIO_MMIO_NETWORK_DEV] = &net_mmio_dev;
 628
 629        if (disk_image_file != NULL) {
 630                blk_mmio_dev.addr =
 631                        virtio_mmio_base_addr + PGSIZE * VIRTIO_MMIO_BLOCK_DEV;
 632                blk_mmio_dev.vqdev = &blk_vqdev;
 633                vm->virtio_mmio_devices[VIRTIO_MMIO_BLOCK_DEV] = &blk_mmio_dev;
 634                blk_init_fn(&blk_vqdev, disk_image_file);
 635        }
 636
 637        set_vnet_opts(net_opts);
 638        vnet_init(vm, &net_vqdev);
 639        set_vnet_port_fwds(net_opts);
 640
 641        /* Set the kernel command line parameters */
 642        a += 4096;
 643        cmdline = a;
 644        a += 4096;
 645
 646        bp->hdr.cmd_line_ptr = (uintptr_t) cmdline;
 647
 648        len = snprintf(cmdline, 4096, "%s %s", cmdline_default, cmdline_extra);
 649
 650        cmdlinesz = 4096 - len;
 651        cmdlinep = cmdline + len;
 652
 653        for (int i = 0; i < VIRTIO_MMIO_MAX_NUM_DEV; i++) {
 654                if (vm->virtio_mmio_devices[i] == NULL)
 655                        continue;
 656
 657                /* Append all the virtio mmio base addresses. */
 658
 659                /* Since the lower number irqs are no longer being used, the
 660                 * irqs can now be assigned starting from 0.  */
 661                vm->virtio_mmio_devices[i]->irq = i;
 662                len = snprintf(cmdlinep, cmdlinesz,
 663                               "\n virtio_mmio.device=1K@0x%llx:%lld",
 664                               vm->virtio_mmio_devices[i]->addr,
 665                               vm->virtio_mmio_devices[i]->irq);
 666                if (len >= cmdlinesz) {
 667                        fprintf(stderr, "Too many arguments to the linux command line.");
 668                        exit(1);
 669                }
 670                cmdlinesz -= len;
 671                cmdlinep += len;
 672        }
 673
 674        /* Set maxcpus to the number of cores we're giving the guest. */
 675        len = snprintf(cmdlinep, cmdlinesz,
 676                       "\n maxcpus=%lld\n possible_cpus=%lld", vm->nr_gpcs,
 677                       vm->nr_gpcs);
 678        if (len >= cmdlinesz) {
 679                fprintf(stderr,
 680                        "Too many arguments to the linux command line.");
 681                exit(1);
 682        }
 683        cmdlinesz -= len;
 684        cmdlinep += len;
 685
 686        ret = vmm_init(vm, gpcis, vmmflags);
 687        assert(!ret);
 688        free(gpcis);
 689
 690        init_timer_alarms();
 691
 692        setup_paging(vm);
 693
 694        vm_tf = gpcid_to_vmtf(vm, 0);
 695        vm_tf->tf_cr3 = (uint64_t) vm->root;
 696        vm_tf->tf_rip = entry;
 697        vm_tf->tf_rsp = 0xe0000;
 698        vm_tf->tf_rsi = (uint64_t) bp;
 699        vm_tf->tf_rflags = FL_RSVD_1;
 700        vm->up_gpcs = 1;
 701        start_guest_thread(gpcid_to_gth(vm, 0));
 702
 703        uthread_sleep_forever();
 704        return 0;
 705}
 706