Updates from vmm-akaros
authorMichael Taufen <mtaufen@gmail.com>
Wed, 10 Feb 2016 17:37:58 +0000 (09:37 -0800)
committerBarret Rhoden <brho@cs.berkeley.edu>
Tue, 16 Feb 2016 22:28:48 +0000 (17:28 -0500)
Boot params
e820 info
Use copy_vmctl_tovmtf(*) in __build_vm_ctx_cp(*)
Inject GPF on unsupported MSR access
Add linux_bootparam.h

Signed-off-by: Michael Taufen <mtaufen@gmail.com>
[ pragma once, static_assert->parlib_static_assert ]
Signed-off-by: Barret Rhoden <brho@cs.berkeley.edu>
kern/arch/x86/trap.c
tests/vmm/vmrunkernel.c
user/vmm/include/linux_bootparam.h [new file with mode: 0644]

index 6cb72cb..3bd995a 100644 (file)
@@ -367,13 +367,13 @@ static void trap_dispatch(struct hw_trapframe *hw_tf)
                         * the same).  See set_current_ctx() for more info. */
                        if (!in_kernel(hw_tf))
                                hw_tf = &pcpui->cur_ctx->tf.hw_tf;
-                       printd("bad opcode, eip: %p, next 3 bytes: %x %x %x\n", ip, 
-                              *(uint8_t*)(ip + 0), 
-                              *(uint8_t*)(ip + 1), 
-                              *(uint8_t*)(ip + 2)); 
+                       printd("bad opcode, eip: %p, next 3 bytes: %x %x %x\n", ip,
+                              *(uint8_t*)(ip + 0),
+                              *(uint8_t*)(ip + 1),
+                              *(uint8_t*)(ip + 2));
                        /* rdtscp: 0f 01 f9 */
-                       if (*(uint8_t*)(ip + 0) == 0x0f, 
-                           *(uint8_t*)(ip + 1) == 0x01, 
+                       if (*(uint8_t*)(ip + 0) == 0x0f,
+                           *(uint8_t*)(ip + 1) == 0x01,
                            *(uint8_t*)(ip + 2) == 0xf9) {
                                x86_fake_rdtscp(hw_tf);
                                pcpui->__lock_checking_enabled++;       /* for print debugging */
index 2dd3778..07e325a 100644 (file)
@@ -1,4 +1,4 @@
-#include <stdio.h> 
+#include <stdio.h>
 #include <pthread.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <ros/arch/mmu.h>
 #include <ros/vmm.h>
 #include <parlib/uthread.h>
+#include <vmm/linux_bootparam.h>
 #include <vmm/virtio.h>
 #include <vmm/virtio_mmio.h>
 #include <vmm/virtio_ids.h>
 #include <vmm/virtio_config.h>
 
+
+
+void showstatus(FILE *f, struct vmctl *v);
+
 int msrio(struct vmctl *vcpu, uint32_t opcode);
 
 struct vmctl vmctl;
@@ -34,65 +39,6 @@ uth_mutex_t the_ball;
 pthread_t vm_thread;
 void (*old_thread_refl)(struct uthread *uth, struct user_context *ctx);
 
-/* callback, runs in vcore context.  this sets up our initial context.  once we
- * become runnable again, we'll run the first bits of the vm ctx.  after that,
- * our context will be stopped and started and will just run whatever the guest
- * VM wants.  we'll never come back to this code or to run_vm(). */
-static void __build_vm_ctx_cb(struct uthread *uth, void *arg)
-{
-       struct pthread_tcb *pthread = (struct pthread_tcb*)uth;
-       struct vmctl *vmctl = (struct vmctl*)arg;
-       struct vm_trapframe *vm_tf;
-
-       __pthread_generic_yield(pthread);
-       pthread->state = PTH_BLK_YIELDING;
-
-       memset(&uth->u_ctx, 0, sizeof(struct user_context));
-       uth->u_ctx.type = ROS_VM_CTX;
-       vm_tf = &uth->u_ctx.tf.vm_tf;
-
-       vm_tf->tf_guest_pcoreid = 0;    /* assuming only 1 guest core */
-       vm_tf->tf_cr3 = vmctl->cr3;
-       vm_tf->tf_rip = vmctl->regs.tf_rip;
-       vm_tf->tf_rsp = vmctl->regs.tf_rsp;
-
-       /* other HW/GP regs are 0, which should be fine.  the FP state is still
-        * whatever we were running before, though this is pretty much unnecessary.
-        * we mostly don't want crazy crap in the uth->as, and a non-current_uthread
-        * VM ctx is supposed to have something in their FP state (like HW ctxs). */
-       save_fp_state(&uth->as);
-       uth->flags |= UTHREAD_FPSAVED | UTHREAD_SAVED;
-
-       uthread_runnable(uth);
-}
-
-static void *run_vm(void *arg)
-{
-       struct vmctl *vmctl = (struct vmctl*)arg;
-
-       assert(vmctl->command == REG_RSP_RIP_CR3);
-       /* We need to hack our context, so that next time we run, we're a VM ctx */
-       uthread_yield(FALSE, __build_vm_ctx_cb, arg);
-}
-
-static void vmm_thread_refl_fault(struct uthread *uth,
-                                  struct user_context *ctx)
-{
-       struct pthread_tcb *pthread = (struct pthread_tcb*)uth;
-
-       /* Hack to call the original pth 2LS op */
-       if (!ctx->type == ROS_VM_CTX) {
-               old_thread_refl(uth, ctx);
-               return;
-       }
-       __pthread_generic_yield(pthread);
-       /* normally we'd handle the vmexit here.  to work within the existing
-        * framework, we just wake the controller thread.  It'll look at our ctx
-        * then make us runnable again */
-       pthread->state = PTH_BLK_MUTEX;
-       uth_mutex_unlock(the_ball);             /* wake the run_vmthread */
-}
-
 static void copy_vmtf_to_vmctl(struct vm_trapframe *vm_tf, struct vmctl *vmctl)
 {
        vmctl->cr3 = vm_tf->tf_cr3;
@@ -153,6 +99,66 @@ static void copy_vmctl_to_vmtf(struct vmctl *vmctl, struct vm_trapframe *vm_tf)
        /* Don't care about the rest of the fields.  The kernel only writes them */
 }
 
+/* callback, runs in vcore context.  this sets up our initial context.  once we
+ * become runnable again, we'll run the first bits of the vm ctx.  after that,
+ * our context will be stopped and started and will just run whatever the guest
+ * VM wants.  we'll never come back to this code or to run_vm(). */
+static void __build_vm_ctx_cb(struct uthread *uth, void *arg)
+{
+       struct pthread_tcb *pthread = (struct pthread_tcb*)uth;
+       struct vmctl *vmctl = (struct vmctl*)arg;
+       struct vm_trapframe *vm_tf;
+
+       __pthread_generic_yield(pthread);
+       pthread->state = PTH_BLK_YIELDING;
+
+       memset(&uth->u_ctx, 0, sizeof(struct user_context));
+       uth->u_ctx.type = ROS_VM_CTX;
+       vm_tf = &uth->u_ctx.tf.vm_tf;
+
+       vm_tf->tf_guest_pcoreid = 0;    /* assuming only 1 guest core */
+
+       copy_vmctl_to_vmtf(vmctl, vm_tf);
+
+       /* other HW/GP regs are 0, which should be fine.  the FP state is still
+        * whatever we were running before, though this is pretty much unnecessary.
+        * we mostly don't want crazy crap in the uth->as, and a non-current_uthread
+        * VM ctx is supposed to have something in their FP state (like HW ctxs). */
+       save_fp_state(&uth->as);
+       uth->flags |= UTHREAD_FPSAVED | UTHREAD_SAVED;
+
+       uthread_runnable(uth);
+}
+
+static void *run_vm(void *arg)
+{
+       struct vmctl *vmctl = (struct vmctl*)arg;
+
+       assert(vmctl->command == REG_RSP_RIP_CR3);
+       /* We need to hack our context, so that next time we run, we're a VM ctx */
+       uthread_yield(FALSE, __build_vm_ctx_cb, arg);
+}
+
+static void vmm_thread_refl_fault(struct uthread *uth,
+                                  struct user_context *ctx)
+{
+       struct pthread_tcb *pthread = (struct pthread_tcb*)uth;
+
+       /* Hack to call the original pth 2LS op */
+       if (!ctx->type == ROS_VM_CTX) {
+               old_thread_refl(uth, ctx);
+               return;
+       }
+       __pthread_generic_yield(pthread);
+       /* normally we'd handle the vmexit here.  to work within the existing
+        * framework, we just wake the controller thread.  It'll look at our ctx
+        * then make us runnable again */
+       pthread->state = PTH_BLK_MUTEX;
+       uth_mutex_unlock(the_ball);             /* wake the run_vmthread */
+}
+
+
+
 /* this will start the vm thread, and return when the thread has blocked,
  * with the right info in vmctl. */
 static void run_vmthread(struct vmctl *vmctl)
@@ -183,12 +189,15 @@ static void run_vmthread(struct vmctl *vmctl)
        copy_vmtf_to_vmctl(&vm_thread->uthread.u_ctx.tf.vm_tf, vmctl);
 }
 
-/* Kind of sad what a total clusterf the pc world is. By 1999, you could just scan the hardware 
- * and work it out. But 2005, that was no longer possible. How sad. 
- * so we have to fake acpi to make it all work. !@#$!@#$#.
+/* By 1999, you could just scan the hardware
+ * and work it out. But 2005, that was no longer possible. How sad.
+ * so we have to fake acpi to make it all work.
  * This will be copied to memory at 0xe0000, so the kernel can find it.
  */
-/* assume they're all 256 bytes long just to make it easy. Just have pointers that point to aligned things. */
+
+/* assume they're all 256 bytes long just to make it easy.
+ * Just have pointers that point to aligned things.
+ */
 
 struct acpi_table_rsdp rsdp = {
        .signature = "RSD PTR ",
@@ -234,7 +243,7 @@ struct acpi_table_madt madt = {
                .asl_compiler_id = "RON ",
                .asl_compiler_revision = 0,
        },
-       
+
        .address = 0xfee00000ULL,
 };
 
@@ -307,7 +316,7 @@ int nr_threads = 4;
 int debug = 0;
 int resumeprompt = 0;
 /* unlike Linux, this shared struct is for both host and guest. */
-//     struct virtqueue *constoguest = 
+//     struct virtqueue *constoguest =
 //             vring_new_virtqueue(0, 512, 8192, 0, inpages, NULL, NULL, "test");
 uint64_t virtio_mmio_base = 0x100000000ULL;
 
@@ -347,11 +356,12 @@ void *consout(void *arg)
        uint32_t vv;
        int i;
        int num;
+
        if (debug) {
                fprintf(stderr, "----------------------- TT a %p\n", a);
                fprintf(stderr, "talk thread ttargs %x v %x\n", a, v);
        }
-       
+
        for(num = 0;;num++) {
                //int debug = 1;
                /* host: use any buffers we should have been sent. */
@@ -388,7 +398,7 @@ void *consout(void *arg)
        return NULL;
 }
 
-// FIXME. 
+// FIXME.
 volatile int consdata = 0;
 
 void *consin(void *arg)
@@ -429,7 +439,7 @@ void *consin(void *arg)
                        memset(consline, 0, 128);
                        if (read(0, consline, 1) < 0) {
                                exit(0);
-                       } 
+                       }
                        if (debug) fprintf(stderr, "CONSIN: GOT A LINE:%s:\n", consline);
                        if (debug) fprintf(stderr, "CONSIN: OUTLEN:%d:\n", outlen);
                        if (strlen(consline) < 3 && consline[0] == 'q' ) {
@@ -498,8 +508,8 @@ static uint8_t acpi_tb_checksum(uint8_t *buffer, uint32_t length)
 static void gencsum(uint8_t *target, void *data, int len)
 {
        uint8_t csum;
-       // blast target to zero so it does not get counted (it might be in the struct we checksum) 
-       // And, yes, it is, goodness.
+       // blast target to zero so it does not get counted
+       // (it might be in the struct we checksum) And, yes, it is, goodness.
        fprintf(stderr, "gencsum %p target %p source %d bytes\n", target, data, len);
        *target = 0;
        csum  = acpi_tb_checksum((uint8_t *)data, len);
@@ -537,6 +547,8 @@ static void set_posted_interrupt(int vector)
 
 int main(int argc, char **argv)
 {
+       struct boot_params *bp;
+       char *cmdline;
        uint64_t *p64;
        void *a = (void *)0xe0000;
        struct acpi_table_rsdp *r;
@@ -544,7 +556,7 @@ int main(int argc, char **argv)
        struct acpi_table_madt *m;
        struct acpi_table_xsdt *x;
        uint64_t virtiobase = 0x100000000ULL;
-       // lowmem is a bump allocated pointer to 2M at the "physbase" of memory 
+       // lowmem is a bump allocated pointer to 2M at the "physbase" of memory
        void *lowmem = (void *) 0x1000000;
        //struct vmctl vmctl;
        int amt;
@@ -566,6 +578,7 @@ int main(int argc, char **argv)
        fprintf(stderr, "%p %p %p %p\n", PGSIZE, PGSHIFT, PML1_SHIFT,
                        PML1_PTE_REACH);
 
+
        // mmap is not working for us at present.
        if ((uint64_t)_kernel > GKERNBASE) {
                fprintf(stderr, "kernel array @%p is above , GKERNBASE@%p sucks\n", _kernel, GKERNBASE);
@@ -581,7 +594,7 @@ int main(int argc, char **argv)
        //Place mmap(Gan)
        a_page = mmap((void *)0xfee00000, PGSIZE, PROT_READ | PROT_WRITE,
                              MAP_POPULATE | MAP_ANONYMOUS, -1, 0);
-       fprintf(stderr, "a_page mmap pointer %p", a_page);
+       fprintf(stderr, "a_page mmap pointer %p\n", a_page);
 
        if (a_page == (void *) -1) {
                perror("Could not mmap APIC");
@@ -738,7 +751,7 @@ int main(int argc, char **argv)
        memset(a, 0, 4096);
        a += 4096;
        gpci.vapic_addr = a;
-       //vmctl.vapic = (uint64_t) a_page;      
+       //vmctl.vapic = (uint64_t) a_page;
        memset(a, 0, 4096);
        ((uint32_t *)a)[0x30/4] = 0x01060014;
        p64 = a;
@@ -748,6 +761,51 @@ int main(int argc, char **argv)
        a += 4096;
        gpci.apic_addr = (void*)0xfee00000;
 
+       /* Allocate memory for, and zero the bootparams
+        * page before writing to it, or Linux thinks
+        * we're talking crazy.
+        */
+       a += 4096;
+       bp = a;
+       memset(bp, 0, 4096);
+
+       /* Set the kernel command line parameters */
+       a += 4096;
+       cmdline = a;
+       a += 4096;
+       bp->hdr.cmd_line_ptr = (uintptr_t) cmdline;
+       sprintf(cmdline, "earlyprintk=vmcall,keep"
+                            " console=hvc0"
+                            " virtio_mmio.device=1M@0x100000000:32"
+                            " nosmp"
+                            " maxcpus=1"
+                            " acpi.debug_layer=0x2"
+                            " acpi.debug_level=0xffffffff"
+                            " apic=debug"
+                            " noexec=off"
+                            " nohlt"
+                            " init=/bin/sh"
+                            " lapic=notscdeadline"
+                            " lapictimerfreq=1000"
+                            " pit=none");
+
+
+       /* Put the e820 memory region information in the boot_params */
+       bp->e820_entries = 3;
+       int e820i = 0;
+
+       bp->e820_map[e820i].addr = 0;
+       bp->e820_map[e820i].size = 16 * 1048576;
+       bp->e820_map[e820i++].type = E820_RESERVED;
+
+       bp->e820_map[e820i].addr = 16 * 1048576;
+       bp->e820_map[e820i].size = 128 * 1048576;
+       bp->e820_map[e820i++].type = E820_RAM;
+
+       bp->e820_map[e820i].addr = 0xf0000000;
+       bp->e820_map[e820i].size = 0x10000000;
+       bp->e820_map[e820i++].type = E820_RESERVED;
+
        if (ros_syscall(SYS_vmm_setup, nr_gpcs, &gpci, vmmflags, 0, 0, 0) !=
            nr_gpcs) {
                perror("Guest pcore setup failed");
@@ -810,13 +868,14 @@ int main(int argc, char **argv)
        vmctl.cr3 = (uint64_t) p512;
        vmctl.regs.tf_rip = entry;
        vmctl.regs.tf_rsp = (uint64_t) &stack[1024];
+       vmctl.regs.tf_rsi = (uint64_t) bp;
        if (mcp) {
                /* set up virtio bits, which depend on threads being enabled. */
                register_virtio_mmio(&vqdev, virtio_mmio_base);
        }
        fprintf(stderr, "threads started\n");
        fprintf(stderr, "Writing command :%s:\n", cmd);
-       
+
        if (debug)
                vapic_status_dump(stderr, (void *)gpci.vapic_addr);
 
@@ -826,7 +885,7 @@ int main(int argc, char **argv)
                vapic_status_dump(stderr, (void *)gpci.vapic_addr);
 
        while (1) {
-               void showstatus(FILE *f, struct vmctl *v);
+
                int c;
                uint8_t byte;
                vmctl.command = REG_RIP;
@@ -923,10 +982,25 @@ int main(int argc, char **argv)
                        case EXIT_REASON_MSR_WRITE:
                        case EXIT_REASON_MSR_READ:
                                fprintf(stderr, "Do an msr\n");
-                               quit = msrio(&vmctl, vmctl.ret_code);
-                               if (quit) {
+                               if (msrio(&vmctl, vmctl.ret_code)) {
+                                       // uh-oh, msrio failed
+                                       // well, hand back a GP fault which is what Intel does
                                        fprintf(stderr, "MSR FAILED: RIP %p, shutdown 0x%x\n", vmctl.regs.tf_rip, vmctl.shutdown);
                                        showstatus(stderr, &vmctl);
+
+                                       // Use event injection through vmctl to send
+                                       // a general protection fault
+                                       // vmctl.interrupt gets written to the VM-Entry
+                                       // Interruption-Information Field by vmx
+                                       vmctl.interrupt = (1 << 31) // "Valid" bit
+                                                       | (0 << 12) // Reserved by Intel
+                                                       | (1 << 11) // Deliver-error-code bit (set if event pushes error code to stack)
+                                                       | (3 << 8)  // Event type (3 is "hardware exception")
+                                                       | 13;       // Interrupt/exception vector (13 is "general protection fault")
+                                       run_vmthread(&vmctl);
+                               } else {
+                                       vmctl.regs.tf_rip += 2;
+                                       run_vmthread(&vmctl);
                                }
                                break;
                        case EXIT_REASON_MWAIT_INSTRUCTION:
@@ -957,9 +1031,9 @@ int main(int argc, char **argv)
                                //fprintf(stderr, "RIP %p, shutdown 0x%x\n", vmctl.regs.tf_rip, vmctl.shutdown);
                                //showstatus(stderr, &vmctl);
                                break;
-                       case EXIT_REASON_APIC_ACCESS:                           
+                       case EXIT_REASON_APIC_ACCESS:
                                if (1 || debug)fprintf(stderr, "APIC READ EXIT\n");
-                               
+
                                uint64_t gpa, *regp, val;
                                uint8_t regx;
                                int store, size;
@@ -1006,7 +1080,7 @@ int main(int argc, char **argv)
                run_vmthread(&vmctl);
        }
 
-       /* later. 
+       /* later.
        for (int i = 0; i < nr_threads-1; i++) {
                int ret;
                if (pthread_join(my_threads[i], &my_retvals[i]))
diff --git a/user/vmm/include/linux_bootparam.h b/user/vmm/include/linux_bootparam.h
new file mode 100644 (file)
index 0000000..da1778e
--- /dev/null
@@ -0,0 +1,217 @@
+/* Copyright (C) 1991-2016, the Linux Kernel authors
+ *
+ * This source code is licensed under the GNU General Public License
+ * Version 2. See the file COPYING for more details.
+ *
+ * Part of this code originates from Linux kernel files:
+ *
+ * linux/arch/x86/include/uapi/asm/bootparam.h
+ * linux/arch/x86/include/uapi/asm/e820.h
+ *
+ * These files are missing copyright headers, but are supposed to be
+ * governed by the overall Linux copyright.
+ */
+
+#pragma once
+
+#define E820MAX (128)
+/* From Linux e820.h */
+#define E820NR  0x1e8       /* # entries in E820MAP */
+
+#define E820_RAM    1
+#define E820_RESERVED   2
+#define E820_ACPI   3
+#define E820_NVS    4
+#define E820_UNUSABLE   5
+
+struct e820entry {
+    uint64_t addr;  /* start of memory segment */
+    uint64_t size;  /* size of memory segment */
+    uint32_t type;  /* type of memory segment */
+} __attribute__((packed));
+
+struct e820map {
+    uint32_t nr_map;
+    struct e820entry map[E820MAX];
+};
+
+/* from linux bootparam.h */
+
+/* setup_data types */
+#define SETUP_NONE          0
+#define SETUP_E820_EXT          1
+#define SETUP_DTB           2
+#define SETUP_PCI           3
+#define SETUP_EFI           4
+
+/* ram_size flags */
+#define RAMDISK_IMAGE_START_MASK    0x07FF
+#define RAMDISK_PROMPT_FLAG     0x8000
+#define RAMDISK_LOAD_FLAG       0x4000
+
+/* loadflags */
+#define LOADED_HIGH (1<<0)
+#define KASLR_FLAG  (1<<1)
+#define QUIET_FLAG  (1<<5)
+#define KEEP_SEGMENTS   (1<<6)
+#define CAN_USE_HEAP    (1<<7)
+
+/* xloadflags */
+#define XLF_KERNEL_64           (1<<0)
+#define XLF_CAN_BE_LOADED_ABOVE_4G  (1<<1)
+#define XLF_EFI_HANDOVER_32     (1<<2)
+#define XLF_EFI_HANDOVER_64     (1<<3)
+#define XLF_EFI_KEXEC           (1<<4)
+
+//#include <linux/types.h>
+//#include <linux/screen_info.h>
+//#include <linux/apm_bios.h>
+//#include <linux/edd.h>
+//#include <asm/e820.h>
+//#include <asm/ist.h>
+//#include <video/edid.h>
+
+/* extensible setup data list node */
+struct setup_data {
+    uint64_t next;
+    uint32_t type;
+    uint32_t len;
+    uint8_t data[0];
+};
+
+struct setup_header {
+    uint8_t setup_sects;
+    uint16_t    root_flags;
+    uint32_t    syssize;
+    uint16_t    ram_size;
+    uint16_t    vid_mode;
+    uint16_t    root_dev;
+    uint16_t    boot_flag;
+    uint16_t    jump;
+    uint32_t    header;
+    uint16_t    version;
+    uint32_t    realmode_swtch;
+    uint16_t    start_sys;
+    uint16_t    kernel_version;
+    uint8_t type_of_loader;
+    uint8_t loadflags;
+    uint16_t    setup_move_size;
+    uint32_t    code32_start;
+    uint32_t    ramdisk_image;
+    uint32_t    ramdisk_size;
+    uint32_t    bootsect_kludge;
+    uint16_t    heap_end_ptr;
+    uint8_t ext_loader_ver;
+    uint8_t ext_loader_type;
+    uint32_t    cmd_line_ptr;
+    uint32_t    initrd_addr_max;
+    uint32_t    kernel_alignment;
+    uint8_t relocatable_kernel;
+    uint8_t min_alignment;
+    uint16_t    xloadflags;
+    uint32_t    cmdline_size;
+    uint32_t    hardware_subarch;
+    uint64_t    hardware_subarch_data;
+    uint32_t    payload_offset;
+    uint32_t    payload_length;
+    uint64_t    setup_data;
+    uint64_t    pref_address;
+    uint32_t    init_size;
+    uint32_t    handover_offset;
+} __attribute__((packed));
+
+struct sys_desc_table {
+    uint16_t length;
+    uint8_t  table[14];
+};
+
+/* Gleaned from OFW's set-parameters in cpu/x86/pc/linux.fth */
+struct olpc_ofw_header {
+    uint32_t ofw_magic; /* OFW signature */
+    uint32_t ofw_version;
+    uint32_t cif_handler;   /* callback into OFW */
+    uint32_t irq_desc_table;
+} __attribute__((packed));
+
+struct efi_info {
+    uint32_t efi_loader_signature;
+    uint32_t efi_systab;
+    uint32_t efi_memdesc_size;
+    uint32_t efi_memdesc_version;
+    uint32_t efi_memmap;
+    uint32_t efi_memmap_size;
+    uint32_t efi_systab_hi;
+    uint32_t efi_memmap_hi;
+};
+
+/* The so-called "zeropage" */
+struct boot_params {
+    //struct screen_info screen_info;           /* 0x000 */
+    uint8_t screen_info[0x40];
+    //struct apm_bios_info apm_bios_info;       /* 0x040 */
+    uint8_t apm_bios_info[0x14];
+    uint8_t  _pad2[4];                  /* 0x054 */
+    uint64_t  tboot_addr;               /* 0x058 */
+    //struct ist_info ist_info;         /* 0x060 */
+    uint8_t ist_info[0x10];
+    uint8_t  _pad3[16];             /* 0x070 */
+    uint8_t  hd0_info[16];  /* obsolete! */     /* 0x080 */
+    uint8_t  hd1_info[16];  /* obsolete! */     /* 0x090 */
+    //struct sys_desc_table sys_desc_table;     /* 0x0a0 */
+    uint8_t sys_desc_table[0x10];
+    //struct olpc_ofw_header olpc_ofw_header;       /* 0x0b0 */
+    uint8_t olpc_ofs_header[0x10];
+    uint32_t ext_ramdisk_image;         /* 0x0c0 */
+    uint32_t ext_ramdisk_size;              /* 0x0c4 */
+    uint32_t ext_cmd_line_ptr;              /* 0x0c8 */
+    uint8_t  _pad4[116];                /* 0x0cc */
+    //struct edid_info edid_info;           /* 0x140 */
+    uint8_t edid_info[0x1c0-0x140];
+    //struct efi_info efi_info;         /* 0x1c0 */
+    uint8_t efi_info[0x20];
+    uint32_t alt_mem_k;             /* 0x1e0 */
+    uint32_t scratch;       /* Scratch field! */    /* 0x1e4 */
+    uint8_t  e820_entries;              /* 0x1e8 */
+    uint8_t  eddbuf_entries;                /* 0x1e9 */
+    uint8_t  edd_mbr_sig_buf_entries;           /* 0x1ea */
+    uint8_t  kbd_status;                /* 0x1eb */
+    uint8_t  _pad5[3];                  /* 0x1ec */
+    /*
+     * The sentinel is set to a nonzero value (0xff) in header.S.
+     *
+     * A bootloader is supposed to only take setup_header and put
+     * it into a clean boot_params buffer. If it turns out that
+     * it is clumsy or too generous with the buffer, it most
+     * probably will pick up the sentinel variable too. The fact
+     * that this variable then is still 0xff will let kernel
+     * know that some variables in boot_params are invalid and
+     * kernel should zero out certain portions of boot_params.
+     */
+    uint8_t  sentinel;                  /* 0x1ef */
+    uint8_t  _pad6[1];                  /* 0x1f0 */
+    struct setup_header hdr;    /* setup header */  /* 0x1f1 */
+    uint8_t  _pad7[0x290-0x1f1-sizeof(struct setup_header)];
+    uint32_t edd_mbr_sig_buffer[/*EDD_MBR_SIG_MAX*/ (0x2d0-0x290)/sizeof(uint32_t)];    /* 0x290 */
+    struct e820entry e820_map[E820MAX];     /* 0x2d0 */
+    uint8_t  _pad8[48];             /* 0xcd0 */
+    //struct edd_info eddbuf[EDDMAXNR];     /* 0xd00 */
+    uint8_t eddbuf[0xeec-0xd00];
+    uint8_t  _pad9[276];                /* 0xeec */
+} __attribute__((packed));
+
+enum {
+    X86_SUBARCH_PC = 0,
+    X86_SUBARCH_LGUEST,
+    X86_SUBARCH_XEN,
+    X86_SUBARCH_INTEL_MID,
+    X86_SUBARCH_CE4100,
+    X86_NR_SUBARCHS,
+};
+
+#include <parlib/assert.h>
+
+/* sorry, need this here. */
+static void do_not_call_bootparam_asserts(void)
+{
+    parlib_static_assert(offsetof(struct boot_params, e820_map) == 0x2d0);
+}