Andrew's port to sparc
authorAndrew Waterman <waterman@r53.millennium.berkeley.edu>
Thu, 30 Jul 2009 21:48:32 +0000 (14:48 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Thu, 30 Jul 2009 23:03:35 +0000 (16:03 -0700)
143 files changed:
GNUmakefile
arch/i386/boot/Makefrag [new file with mode: 0644]
arch/i386/boot/boot.S [new file with mode: 0644]
arch/i386/boot/main.c [new file with mode: 0644]
arch/i386/boot/sign.pl [new file with mode: 0644]
arch/i386/include/apic.h [new file with mode: 0644]
arch/i386/include/arch.h [new file with mode: 0644]
arch/i386/include/atomic.h [new file with mode: 0644]
arch/i386/include/console.h [new file with mode: 0644]
arch/i386/include/kbdreg.h [new file with mode: 0644]
arch/i386/include/mmu.h [new file with mode: 0644]
arch/i386/include/smp.h [new file with mode: 0644]
arch/i386/include/trap.h [new file with mode: 0644]
arch/i386/include/types.h [new file with mode: 0644]
arch/i386/include/x86.h [new file with mode: 0644]
arch/i386/kernel.ld [new file with mode: 0644]
arch/i386/src/Makefrag [new file with mode: 0644]
arch/i386/src/apic.c [new file with mode: 0644]
arch/i386/src/console.c [new file with mode: 0644]
arch/i386/src/cpuinfo.c [new file with mode: 0644]
arch/i386/src/entry.S [new file with mode: 0644]
arch/i386/src/env.c [new file with mode: 0644]
arch/i386/src/kclock.c [new file with mode: 0644]
arch/i386/src/kdebug.c [new file with mode: 0644]
arch/i386/src/pmap.c [new file with mode: 0644]
arch/i386/src/smp.c [new file with mode: 0644]
arch/i386/src/smp_boot.c [new file with mode: 0644]
arch/i386/src/smp_entry.S [new file with mode: 0644]
arch/i386/src/trap.c [new file with mode: 0644]
arch/i386/src/trapentry.S [new file with mode: 0644]
arch/sparc/boot/Makefrag [new file with mode: 0644]
arch/sparc/include/arch.h [new file with mode: 0644]
arch/sparc/include/atomic.h [new file with mode: 0644]
arch/sparc/include/console.h [new file with mode: 0644]
arch/sparc/include/frontend.h [new file with mode: 0644]
arch/sparc/include/kbdreg.h [new file with mode: 0644]
arch/sparc/include/mmu.h [new file with mode: 0644]
arch/sparc/include/smp.h [new file with mode: 0644]
arch/sparc/include/sparc.h [new file with mode: 0644]
arch/sparc/include/timer.h [new file with mode: 0644]
arch/sparc/include/trap.h [new file with mode: 0644]
arch/sparc/include/trap_table.h [new file with mode: 0644]
arch/sparc/include/types.h [new file with mode: 0644]
arch/sparc/kernel.ld [new file with mode: 0644]
arch/sparc/src/Makefrag [new file with mode: 0644]
arch/sparc/src/boot.c [new file with mode: 0644]
arch/sparc/src/console.c [new file with mode: 0644]
arch/sparc/src/cpuinfo.c [new file with mode: 0644]
arch/sparc/src/entry.S [new file with mode: 0644]
arch/sparc/src/env.c [new file with mode: 0644]
arch/sparc/src/env.c.save [new file with mode: 0644]
arch/sparc/src/env.c.save.1 [new file with mode: 0644]
arch/sparc/src/frontend.c [new file with mode: 0644]
arch/sparc/src/pmap.c [new file with mode: 0644]
arch/sparc/src/smp.c [new file with mode: 0644]
arch/sparc/src/spillfill.S [new file with mode: 0644]
arch/sparc/src/timer.c [new file with mode: 0644]
arch/sparc/src/trap.c [new file with mode: 0644]
arch/sparc/src/trap_entry.S [new file with mode: 0644]
arch/sparc/src/trap_table.S [new file with mode: 0644]
foo [new file with mode: 0644]
include/arch/apic.h [deleted file]
include/arch/atomic.h [deleted file]
include/arch/console.h [deleted file]
include/arch/elf.h [deleted file]
include/arch/kbdreg.h [deleted file]
include/arch/mmu.h [deleted file]
include/arch/multiboot.h [deleted file]
include/arch/smp.h [deleted file]
include/arch/stab.h [deleted file]
include/arch/timer.h [deleted file]
include/arch/types.h [deleted file]
include/arch/x86.h [deleted file]
include/atomic.h
include/elf.h [new file with mode: 0644]
include/env.h
include/monitor.h
include/multiboot.h [new file with mode: 0644]
include/pmap.h
include/ros/env.h
include/ros/memlayout.h
include/ros/ring_buffer.h
include/ros/timer.h [new file with mode: 0644]
include/ros/trap.h [deleted file]
include/smp.h [new file with mode: 0644]
include/stab.h [new file with mode: 0644]
include/stdarg.h
include/stdio.h
include/string.h
include/testing.h
include/trap.h
kern/boot/Makefrag [deleted file]
kern/boot/boot.S [deleted file]
kern/boot/main.c [deleted file]
kern/boot/sign.pl [deleted file]
kern/kernel.ld [deleted file]
kern/src/Makefrag
kern/src/apic.c [deleted file]
kern/src/arch [new symlink]
kern/src/atomic.c
kern/src/console.c [deleted file]
kern/src/entry.S [deleted file]
kern/src/env.c
kern/src/init.c
kern/src/kclock.c [deleted file]
kern/src/kdebug.c [deleted file]
kern/src/manager.c
kern/src/monitor.c
kern/src/multiboot.c [new file with mode: 0644]
kern/src/pmap.c
kern/src/printf.c
kern/src/printfmt.c
kern/src/smp.c
kern/src/smp_entry.S [deleted file]
kern/src/string.c
kern/src/syscall.c
kern/src/testing.c
kern/src/timer.c
kern/src/trap.c [deleted file]
kern/src/trapentry.S [deleted file]
kern/src/workqueue.c
user/apps/roslib/Makefrag
user/apps/roslib/apps.ld [deleted file]
user/apps/roslib/apps_i386.ld [new file with mode: 0644]
user/apps/roslib/apps_sparc.ld [new file with mode: 0644]
user/apps/roslib/fptest.c [new file with mode: 0644]
user/apps/roslib/measurements.c
user/apps/roslib/proctests.c
user/parlib/src/Makefrag
user/roslib/inc/atomic.h
user/roslib/inc/lib.h
user/roslib/inc/measure.h
user/roslib/inc/stdarg.h
user/roslib/src/Makefrag
user/roslib/src/atomic.c
user/roslib/src/entry.S [deleted file]
user/roslib/src/entry_i386.S [new file with mode: 0644]
user/roslib/src/entry_sparc.S [new file with mode: 0644]
user/roslib/src/panic.c
user/roslib/src/syscall.c
user/roslib/src/syscall_i386.c [new file with mode: 0644]
user/roslib/src/syscall_sparc.c [new file with mode: 0644]
user/roslib/src/timer.c

index cd4bad1..ac851c6 100644 (file)
@@ -8,6 +8,7 @@
 OBJDIR := obj
 
 TOP_DIR := .
+ARCH_DIR := $(TOP_DIR)/arch
 INCLUDE_DIR := $(TOP_DIR)/include
 UNAME=$(shell uname -m)
 V = @
@@ -47,15 +48,15 @@ NM      := $(GCCPREFIX)nm
 PERL    := perl
 
 # User defined constants passed on the command line 
-ARCH ?= NONE
+TARGET_ARCH ?= i386
 
 # Universal compiler flags
 # -fno-builtin is required to avoid refs to undefined functions in the kernel.
 # Only optimize to -O1 to discourage inlining, which complicates backtraces.
-CFLAGS := $(CFLAGS) -D$(ARCH) 
-CFLAGS += -O -pipe -MD -fno-builtin -fno-stack-protector -gstabs
-CFLAGS += -Wall -Wno-format -Wno-unused
-CFLAGS += -nostdinc -Igccinclude/i386
+CFLAGS := $(CFLAGS) -D$(TARGET_ARCH)
+CFLAGS += -O2 -pipe -MD -fno-builtin -fno-stack-protector -gstabs
+CFLAGS += -Wall -Wno-format -Wno-unused -Wno-attributes
+CFLAGS += -nostdinc -Igccinclude/$(TARGET_ARCH)
 
 # Universal loader flags
 LDFLAGS := -nostdlib
@@ -64,17 +65,29 @@ LDFLAGS := -nostdlib
 GCC_LIB := $(shell $(CC) -print-libgcc-file-name)
 
 # 64 Bit specific flags / definitions
-ifeq ($(UNAME),x86_64)
-       CFLAGS += -m32
-       LDFLAGS += -melf_i386
-       GCC_LIB = $(shell $(CC) -print-libgcc-file-name | sed 's/libgcc.a/32\/libgcc.a/')
+ifeq ($(TARGET_ARCH),i386)
+       ifeq ($(UNAME),x86_64)
+               CFLAGS += -m32
+               LDFLAGS += -melf_i386
+               GCC_LIB = $(shell $(CC) -print-libgcc-file-name | sed 's/libgcc.a/32\/libgcc.a/')
+       endif
 endif
 
 # List of directories that the */Makefrag makefile fragments will add to
 OBJDIRS :=
 
 # Make sure that 'all' is the first target
-all:
+all: symlinks
+
+kern/boot/Makefrag: symlinks
+
+symlinks:
+       -unlink include/arch
+       ln -s ../arch/$(TARGET_ARCH)/include/ include/arch
+       -unlink kern/src/arch
+       ln -s ../../arch/$(TARGET_ARCH)/src/ kern/src/arch
+       -unlink kern/boot
+       ln -s ../arch/$(TARGET_ARCH)/boot/ kern/boot
 
 # Include Makefrags for subdirectories
 include user/Makefrag
diff --git a/arch/i386/boot/Makefrag b/arch/i386/boot/Makefrag
new file mode 100644 (file)
index 0000000..7a57309
--- /dev/null
@@ -0,0 +1,32 @@
+#
+# Makefile fragment for the ROS kernel.
+# This is NOT a complete makefile;
+# you must run GNU make in the top-level directory
+# where the GNUmakefile is located.
+#
+
+KERN_BOOT_DIR := $(KERN_DIR)/boot
+OBJDIRS += $(KERN_BOOT_DIR)
+
+KERN_BOOT_CFLAGS  := $(KERN_CFLAGS) -Os
+KERN_BOOT_LDFLAGS := $(KERN_LDFLAGS) -N -e start -Ttext 0x7C00
+KERN_BOOT_OBJS    := $(OBJDIR)/$(KERN_DIR)/boot.o \
+                     $(OBJDIR)/$(KERN_DIR)/main.o
+
+$(OBJDIR)/$(KERN_DIR)/%.o: $(KERN_BOOT_DIR)/%.c
+       @echo + cc [BOOT] $<
+       @mkdir -p $(@D)
+       $(V)$(CC) $(KERN_BOOT_CFLAGS) -c -o $@ $<
+
+$(OBJDIR)/$(KERN_DIR)/%.o: $(KERN_BOOT_DIR)/%.S
+       @echo + as [BOOT] $<
+       @mkdir -p $(@D)
+       $(V)$(CC) $(KERN_BOOT_CFLAGS) -c -o $@ $<
+
+$(OBJDIR)/$(KERN_DIR)/boot: $(KERN_BOOT_OBJS)
+       @echo + ld [BOOT] $<
+       $(V)$(LD) $(KERN_BOOT_LDFLAGS) -o $@.out $^
+       $(V)$(OBJDUMP) -S $@.out >$@.asm
+       $(V)$(OBJCOPY) -S -O binary $@.out $@
+       $(V)perl $(KERN_BOOT_DIR)/sign.pl $(OBJDIR)/$(KERN_DIR)/boot
+
diff --git a/arch/i386/boot/boot.S b/arch/i386/boot/boot.S
new file mode 100644 (file)
index 0000000..22c2fef
--- /dev/null
@@ -0,0 +1,98 @@
+#include <arch/mmu.h>
+       
+.set PROT_MODE_CSEG,0x8                # code segment selector
+.set PROT_MODE_DSEG,0x10        # data segment selector
+.set CR0_PE_ON,0x1             # protected mode enable flag
+       
+###############################################################################
+# ENTRY POINT  
+#   This code should be stored in the first sector of the hard disk.
+#   After the BIOS initializes the hardware on startup or system reset,
+#   it loads this code at physical address 0x7c00 - 0x7d00 (512 bytes).
+#   Then the BIOS jumps to the beginning of it, address 0x7c00,
+#   while running in 16-bit real-mode (8086 compatibility mode).
+#   The Code Segment register (CS) is initially zero on entry.
+#      
+# This code switches into 32-bit protected mode so that all of
+# memory can accessed, then calls into C.
+###############################################################################
+       
+.globl start                                   # Entry point   
+start:         .code16                         # This runs in real mode
+               cli                             # Disable interrupts
+               cld                             # String operations increment
+
+               # Set up the important data segment registers (DS, ES, SS).
+               xorw    %ax,%ax                 # Segment number zero
+               movw    %ax,%ds                 # -> Data Segment
+               movw    %ax,%es                 # -> Extra Segment
+               movw    %ax,%ss                 # -> Stack Segment
+
+               # Set up the stack pointer, growing downward from 0x7c00.
+               movw    $start,%sp              # Stack Pointer
+       
+# Enable A20:
+#   For fascinating historical reasons (related to the fact that
+#   the earliest 8086-based PCs could only address 1MB of physical memory
+#   and subsequent 80286-based PCs wanted to retain maximum compatibility),
+#   physical address line 20 is tied to low when the machine boots.
+#   Obviously this a bit of a drag for us, especially when trying to
+#   address memory above 1MB.  This code undoes this.
+       
+seta20.1:      inb     $0x64,%al               # Get status
+               testb   $0x2,%al                # Busy?
+               jnz     seta20.1                # Yes
+               movb    $0xd1,%al               # Command: Write
+               outb    %al,$0x64               #  output port
+seta20.2:      inb     $0x64,%al               # Get status
+               testb   $0x2,%al                # Busy?
+               jnz     seta20.2                # Yes
+               movb    $0xdf,%al               # Enable
+               outb    %al,$0x60               #  A20
+
+# Switch from real to protected mode:
+#   Up until now, there's been no protection, so we've gotten along perfectly
+#   well without explicitly telling the processor how to translate addresses.
+#   When we switch to protected mode, this is no longer true!
+#   We need at least to set up some "segments" that tell the processor it's
+#   OK to run code at any address, or write to any address.
+#   The 'gdt' and 'gdtdesc' tables below define these segments.
+#   This code loads them into the processor.
+#   We need this setup to ensure the transition to protected mode is smooth.
+
+real_to_prot:  cli                     # Don't allow interrupts: mandatory,
+                                       # since we didn't set up an interrupt
+                                       # descriptor table for handling them
+               lgdt    gdtdesc         # load GDT: mandatory in protected mode
+               movl    %cr0, %eax      # Turn on protected mode
+               orl     $CR0_PE_ON, %eax
+               movl    %eax, %cr0
+
+               # CPU magic: jump to relocation, flush prefetch queue, and
+               # reload %cs.  Has the effect of just jmp to the next
+               # instruction, but simultaneously loads CS with
+               # $PROT_MODE_CSEG.
+               ljmp    $PROT_MODE_CSEG, $protcseg
+       
+               # we've switched to 32-bit protected mode; tell the assembler
+               # to generate code for that mode
+protcseg:      .code32
+               # Set up the protected-mode data segment registers
+               movw    $PROT_MODE_DSEG, %ax    # Our data segment selector
+               movw    %ax, %ds                # -> DS: Data Segment
+               movw    %ax, %es                # -> ES: Extra Segment
+               movw    %ax, %fs                # -> FS
+               movw    %ax, %gs                # -> GS
+               movw    %ax, %ss                # -> SS: Stack Segment
+       
+               call cmain                      # finish the boot load from C.
+                                               # cmain() should not return
+spin:          jmp spin                        # ..but in case it does, spin
+       
+               .p2align 2                      # force 4 byte alignment
+gdt:           SEG_NULL                                # null seg
+               SEG(STA_X|STA_R, 0x0, 0xffffffff)       # code seg
+               SEG(STA_W, 0x0, 0xffffffff)             # data seg
+       
+gdtdesc:       .word   0x17                    # sizeof(gdt) - 1
+               .long   gdt                     # address gdt
diff --git a/arch/i386/boot/main.c b/arch/i386/boot/main.c
new file mode 100644 (file)
index 0000000..d2a7738
--- /dev/null
@@ -0,0 +1,134 @@
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <arch/x86.h>
+#include <arch/arch.h>
+#include <elf.h>
+
+/**********************************************************************
+ * This a dirt simple boot loader, whose sole job is to boot
+ * an elf kernel image from the first IDE hard disk.
+ *
+ * DISK LAYOUT
+ *  * This program(boot.S and main.c) is the bootloader.  It should
+ *    be stored in the first sector of the disk.
+ * 
+ *  * The 2nd sector onward holds the kernel image.
+ *     
+ *  * The kernel image must be in ELF format.
+ *
+ * BOOT UP STEPS       
+ *  * when the CPU boots it loads the BIOS into memory and executes it
+ *
+ *  * the BIOS intializes devices, sets of the interrupt routines, and
+ *    reads the first sector of the boot device(e.g., hard-drive) 
+ *    into memory and jumps to it.
+ *
+ *  * Assuming this boot loader is stored in the first sector of the
+ *    hard-drive, this code takes over...
+ *
+ *  * control starts in bootloader.S -- which sets up protected mode,
+ *    and a stack so C code then run, then calls cmain()
+ *
+ *  * cmain() in this file takes over, reads in the kernel and jumps to it.
+ **********************************************************************/
+
+#define SECTSIZE       512
+#define ELFHDR         ((elf_t *) 0x10000) // scratch space
+
+void readsect(void*, uint32_t);
+void readseg(uint32_t, uint32_t, uint32_t);
+
+void
+cmain(void)
+{
+       proghdr_t *ph, *eph;
+
+       // read 1st page off disk
+       readseg((uint32_t) ELFHDR, SECTSIZE*8, 0);
+
+       // is this a valid ELF?
+       if (ELFHDR->e_magic != ELF_MAGIC)
+               goto bad;
+
+       // load each program segment (ignores ph flags)
+       ph = (proghdr_t *) ((uint8_t *) ELFHDR + ELFHDR->e_phoff);
+       eph = ph + ELFHDR->e_phnum;
+       for (; ph < eph; ph++)
+               readseg(ph->p_va, ph->p_memsz, ph->p_offset);
+
+       // call the entry point from the ELF header
+       // note: does not return!
+       ((void (*)(void)) (ELFHDR->e_entry & 0x0FFFFFFF))();
+
+bad:
+       outw(0x8A00, 0x8A00);
+       outw(0x8A00, 0x8E00);
+       while (1)
+               /* do nothing */;
+}
+
+// Read 'count' bytes at 'offset' from kernel into virtual address 'va'.
+// Might copy more than asked
+void
+readseg(uint32_t va, uint32_t count, uint32_t offset)
+{
+       uint32_t end_va;
+
+       va &= 0x0FFFFFFF;
+       end_va = va + count;
+       
+       // round down to sector boundary
+       va &= ~(SECTSIZE - 1);
+
+       // translate from bytes to sectors, and kernel starts at sector 1
+       offset = (offset / SECTSIZE) + 1;
+
+       // If this is too slow, we could read lots of sectors at a time.
+       // We'd write more to memory than asked, but it doesn't matter --
+       // we load in increasing order.
+       while (va < end_va) {
+               readsect((uint8_t*) va, offset);
+               va += SECTSIZE;
+               offset++;
+       }
+}
+
+void
+waitdisk(void)
+{
+       // wait for disk ready
+       while ((inb(0x1F7) & 0xC0) != 0x40)
+               /* do nothing */;
+}
+
+void
+readsect(void *dst, uint32_t offset)
+{
+       // wait for disk to be ready
+       waitdisk();
+
+       /* the ISA uses a specified block of memory, 
+          addresses 0x1F0-0x1F7, that can use the special 
+          instructions inb/outb, as demonstrated in the 
+          following code in order to access the disk
+          Offset is 28 bytes long
+       */
+
+       outb(0x1F2, 1);                         // number of sectors to read
+       outb(0x1F3, offset);                    // bits 0-7 (low bits) of 28-bit offset
+       outb(0x1F4, offset >> 8);               // bits 8-15 of 28-bit offset
+       outb(0x1F5, offset >> 16);              // bits 16-23 of 28-bit offset
+       outb(0x1F6, (offset >> 24) | 0xE0);     // bits 24-27 of 28-bit offset
+                                               // bit 28 (= 0) means Disk 0
+                                               // other bits (29-31) must be set to one
+       outb(0x1F7, 0x20);                      // cmd 0x20 - read sectors
+
+       // wait for disk to be ready
+       waitdisk();
+
+       // read a sector
+       insl(0x1F0, dst, SECTSIZE/4);
+}
+
diff --git a/arch/i386/boot/sign.pl b/arch/i386/boot/sign.pl
new file mode 100644 (file)
index 0000000..8a65e9f
--- /dev/null
@@ -0,0 +1,19 @@
+#!/bin/perl
+
+open(SIG, $ARGV[0]) || die "open $ARGV[0]: $!";
+
+$n = sysread(SIG, $buf, 1000);
+
+if($n > 510){
+       print STDERR "boot block too large: $n bytes (max 510)\n";
+       exit 1;
+}
+
+print STDERR "boot block is $n bytes (max 510)\n";
+
+$buf .= "\0" x (510-$n);
+$buf .= "\x55\xAA";
+
+open(SIG, ">$ARGV[0]") || die "open >$ARGV[0]: $!";
+print SIG $buf;
+close SIG;
diff --git a/arch/i386/include/apic.h b/arch/i386/include/apic.h
new file mode 100644 (file)
index 0000000..1df0369
--- /dev/null
@@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2009 The Regents of the University of California
+ * See LICENSE for details.
+ */
+
+#ifndef ROS_KERN_APIC_H
+#define ROS_KERN_APIC_H
+
+/* 
+ * Functions and definitions for dealing with the APIC and PIC, specific to
+ * Intel.  Does not handle an x2APIC.
+ */
+
+#include <arch/mmu.h>
+#include <arch/x86.h>
+
+// PIC
+#define PIC1_CMD                                       0x20
+#define PIC1_DATA                                      0x21
+#define PIC2_CMD                                       0xA0
+#define PIC2_DATA                                      0xA1
+// These are also hardcoded into the IRQ_HANDLERs of kern/trapentry.S
+#define PIC1_OFFSET                                    0x20
+#define PIC2_OFFSET                                    0x28
+#define PIC_EOI                                                0x20
+
+// Local APIC
+#define LAPIC_BASE                                     0xfee00000 // this is the default, can be changed
+#define LAPIC_EOI                                      (LAPIC_BASE + 0x0b0)
+#define LAPIC_SPURIOUS                         (LAPIC_BASE + 0x0f0)
+#define LAPIC_VERSION                          (LAPIC_BASE + 0x030)
+#define LAPIC_ERROR                                    (LAPIC_BASE + 0x280)
+#define LAPIC_ID                                       (LAPIC_BASE + 0x020)
+#define LAPIC_LOGICAL_ID                       (LAPIC_BASE + 0x0d0)
+// LAPIC Local Vector Table
+#define LAPIC_LVT_TIMER                                (LAPIC_BASE + 0x320)
+#define LAPIC_LVT_LINT0                                (LAPIC_BASE + 0x350)
+#define LAPIC_LVT_LINT1                                (LAPIC_BASE + 0x360)
+#define LAPIC_LVT_ERROR                                (LAPIC_BASE + 0x370)
+#define LAPIC_LVT_PERFMON                      (LAPIC_BASE + 0x340)
+#define LAPIC_LVT_THERMAL                      (LAPIC_BASE + 0x330)
+#define LAPIC_LVT_MASK                         0x00010000
+// LAPIC Timer
+#define LAPIC_TIMER_INIT                       (LAPIC_BASE + 0x380)
+#define LAPIC_TIMER_CURRENT                    (LAPIC_BASE + 0x390)
+#define LAPIC_TIMER_DIVIDE                     (LAPIC_BASE + 0x3e0)
+#define LAPIC_TIMER_DEFAULT_VECTOR     0xeb
+#define LAPIC_TIMER_DEFAULT_DIVISOR    0xa // This is 128.  Ref SDM 3.a 9.6.4
+// IPI Interrupt Command Register
+#define LAPIC_IPI_ICR_LOWER                    (LAPIC_BASE + 0x300)
+#define LAPIC_IPI_ICR_UPPER                    (LAPIC_BASE + 0x310)
+
+// IOAPIC
+#define IOAPIC_BASE                                    0xfec00000 // this is the default, can be changed
+
+// PIT (Programmable Interval Timer)
+#define        TIMER_REG_CNTR0 0       /* timer 0 counter port */
+#define        TIMER_REG_CNTR1 1       /* timer 1 counter port */
+#define        TIMER_REG_CNTR2 2       /* timer 2 counter port */
+#define        TIMER_REG_MODE  3       /* timer mode port */
+#define        TIMER_SEL0      0x00    /* select counter 0 */
+#define        TIMER_SEL1      0x40    /* select counter 1 */
+#define        TIMER_SEL2      0x80    /* select counter 2 */
+#define        TIMER_INTTC     0x00    /* mode 0, intr on terminal cnt */
+#define        TIMER_ONESHOT   0x02    /* mode 1, one shot */
+#define        TIMER_RATEGEN   0x04    /* mode 2, rate generator */
+#define        TIMER_SQWAVE    0x06    /* mode 3, square wave */
+#define        TIMER_SWSTROBE  0x08    /* mode 4, s/w triggered strobe */
+#define        TIMER_HWSTROBE  0x0a    /* mode 5, h/w triggered strobe */
+#define        TIMER_LATCH     0x00    /* latch counter for reading */
+#define        TIMER_LSB       0x10    /* r/w counter LSB */
+#define        TIMER_MSB       0x20    /* r/w counter MSB */
+#define        TIMER_16BIT     0x30    /* r/w counter 16 bits, LSB first */
+#define        TIMER_BCD       0x01    /* count in BCD */
+
+#define PIT_FREQ                                       1193182
+
+#define IO_TIMER1   0x40        /* 8253 Timer #1 */
+#define TIMER_CNTR0 (IO_TIMER1 + TIMER_REG_CNTR0)
+#define TIMER_CNTR1 (IO_TIMER1 + TIMER_REG_CNTR1)
+#define TIMER_CNTR2 (IO_TIMER1 + TIMER_REG_CNTR2)
+#define TIMER_MODE  (IO_TIMER1 + TIMER_REG_MODE)
+
+typedef struct system_timing {
+       uint64_t tsc_freq;
+       uint64_t bus_freq;
+       uint16_t pit_divisor;
+       uint8_t pit_mode;
+} system_timing_t;
+
+extern system_timing_t system_timing;
+
+void pic_remap(void);
+void pic_mask_irq(uint8_t irq);
+void pic_unmask_irq(uint8_t irq);
+void __lapic_set_timer(uint32_t ticks, uint8_t vec, bool periodic, uint8_t div);
+void lapic_set_timer(uint32_t usec, bool periodic);
+uint32_t lapic_get_default_id(void);
+// PIT related
+void pit_set_timer(uint32_t freq, uint32_t mode);
+void timer_init(void);
+void udelay(uint64_t usec);
+void udelay_pit(uint64_t usec);
+// TODO: right now timer defaults to TSC
+uint64_t gettimer(void);
+uint64_t inline getfreq(void);
+
+static inline void pic_send_eoi(uint32_t irq);
+static inline void lapic_send_eoi(void);
+static inline uint32_t lapic_get_version(void);
+static inline uint32_t lapic_get_error(void);
+static inline uint32_t lapic_get_id(void);
+static inline uint8_t lapic_get_logid(void);
+static inline void lapic_set_logid(uint8_t id);
+static inline void lapic_disable(void);
+static inline void lapic_enable(void);
+static inline void lapic_wait_to_send(void);
+static inline void send_init_ipi(void);
+static inline void send_startup_ipi(uint8_t vector);
+static inline void send_self_ipi(uint8_t vector);
+static inline void send_broadcast_ipi(uint8_t vector);
+static inline void send_all_others_ipi(uint8_t vector);
+static inline void send_ipi(uint8_t dest, bool logical_mode, uint8_t vector);
+
+#define mask_lapic_lvt(entry) \
+       write_mmreg32(entry, read_mmreg32(entry) | LAPIC_LVT_MASK)
+#define unmask_lapic_lvt(entry) \
+       write_mmreg32(entry, read_mmreg32(entry) & ~LAPIC_LVT_MASK)
+
+static inline void pic_send_eoi(uint32_t irq)
+{
+       // all irqs beyond the first seven need to be chained to the slave
+       if (irq > 7)
+               outb(PIC2_CMD, PIC_EOI);
+       outb(PIC1_CMD, PIC_EOI);
+}
+
+static inline void lapic_send_eoi(void)
+{
+       write_mmreg32(LAPIC_EOI, 0);
+}
+
+static inline uint32_t lapic_get_version(void)
+{
+       return read_mmreg32(LAPIC_VERSION);     
+}
+
+static inline uint32_t lapic_get_error(void)
+{
+       write_mmreg32(LAPIC_ERROR, 0xdeadbeef);
+       return read_mmreg32(LAPIC_ERROR);
+}
+
+static inline uint32_t lapic_get_id(void)
+{
+       return read_mmreg32(LAPIC_ID) >> 24;
+}
+
+static inline uint8_t lapic_get_logid(void)
+{
+       return read_mmreg32(LAPIC_LOGICAL_ID) >> 24;
+}
+
+static inline void lapic_set_logid(uint8_t id)
+{
+       write_mmreg32(LAPIC_LOGICAL_ID, id << 24);
+}
+
+/* There are a couple ways to do it.  The MSR route doesn't seem to work
+ * in KVM.  It's also a somewhat permanent thing
+ */
+static inline void lapic_disable(void)
+{
+       write_mmreg32(LAPIC_SPURIOUS, read_mmreg32(LAPIC_SPURIOUS) & 0xffffefff);
+       //write_msr(IA32_APIC_BASE, read_msr(IA32_APIC_BASE) & ~MSR_APIC_ENABLE);
+}
+
+/* Spins until previous IPIs are delivered.  Not sure if we want it inlined
+ * Also not sure when we really need to do this. 
+ */
+static inline void lapic_wait_to_send(void)
+{
+       static inline void cpu_relax(void);
+       while(read_mmreg32(LAPIC_IPI_ICR_LOWER) & 0x1000)
+               cpu_relax();
+}
+
+static inline void lapic_enable(void)
+{
+       write_mmreg32(LAPIC_SPURIOUS, read_mmreg32(LAPIC_SPURIOUS) | 0x00000100);
+}
+
+static inline void send_init_ipi(void)
+{
+       write_mmreg32(LAPIC_IPI_ICR_LOWER, 0x000c4500);
+}
+
+static inline void send_startup_ipi(uint8_t vector)
+{
+       write_mmreg32(LAPIC_IPI_ICR_LOWER, 0x000c4600 | vector);
+}
+
+static inline void send_self_ipi(uint8_t vector)
+{
+       write_mmreg32(LAPIC_IPI_ICR_LOWER, 0x00044000 | vector);
+}
+
+static inline void send_broadcast_ipi(uint8_t vector)
+{
+       write_mmreg32(LAPIC_IPI_ICR_LOWER, 0x00084000 | vector);
+}
+
+static inline void send_all_others_ipi(uint8_t vector)
+{
+       write_mmreg32(LAPIC_IPI_ICR_LOWER, 0x000c4000 | vector);
+}
+
+static inline void send_ipi(uint8_t dest, bool logical_mode, uint8_t vector)
+{
+       write_mmreg32(LAPIC_IPI_ICR_UPPER, dest << 24);
+       write_mmreg32(LAPIC_IPI_ICR_LOWER, 0x00004000 | (logical_mode << 11) | vector);
+}
+
+/* To change the LAPIC Base (not recommended):
+       msr_val = read_msr(IA32_APIC_BASE);
+       msr_val = msr_val & ~MSR_APIC_BASE_ADDRESS | 0xfaa00000;
+       write_msr(IA32_APIC_BASE, msr_val);
+*/
+#endif /* ROS_KERN_APIC_H */
diff --git a/arch/i386/include/arch.h b/arch/i386/include/arch.h
new file mode 100644 (file)
index 0000000..aae9cc9
--- /dev/null
@@ -0,0 +1,156 @@
+#ifndef ROS_INC_ARCH_H
+#define ROS_INC_ARCH_H
+
+#include <arch/x86.h>
+#include <arch/types.h>
+#include <arch/apic.h>
+#include <arch/trap.h>
+
+/* Arch Constants */
+#define MAX_NUM_CPUS                           255
+
+static __inline void breakpoint(void) __attribute__((always_inline));
+static __inline void invlpg(void *addr) __attribute__((always_inline));
+static __inline void tlbflush(void) __attribute__((always_inline));
+static __inline uint64_t read_tsc(void) __attribute__((always_inline));
+static __inline uint64_t read_tsc_serialized(void) __attribute__((always_inline));
+static __inline void enable_irq(void) __attribute__((always_inline));
+static __inline void disable_irq(void) __attribute__((always_inline));
+static __inline void enable_irqsave(int8_t* state) __attribute__((always_inline));
+static __inline void disable_irqsave(int8_t* state) __attribute__((always_inline));
+static __inline void cpu_relax(void) __attribute__((always_inline));
+static __inline void cpu_halt(void) __attribute__((always_inline));
+static __inline void clflush(uintptr_t* addr) __attribute__((always_inline));
+static __inline int irq_is_enabled(void) __attribute__((always_inline));
+static __inline void cache_flush(void) __attribute__((always_inline));
+static __inline void reboot(void) __attribute__((always_inline)) __attribute__((noreturn));
+
+void print_cpuinfo(void);
+void show_mapping(uintptr_t start, size_t size);
+void backtrace(void);
+
+static __inline void
+breakpoint(void)
+{
+       __asm __volatile("int3");
+}
+
+static __inline void 
+invlpg(void *addr)
+{ 
+       __asm __volatile("invlpg (%0)" : : "r" (addr) : "memory");
+}  
+
+static __inline void
+tlbflush(void)
+{
+       uint32_t cr3;
+       __asm __volatile("movl %%cr3,%0" : "=r" (cr3));
+       __asm __volatile("movl %0,%%cr3" : : "r" (cr3));
+}
+
+static __inline uint64_t
+read_tsc(void)
+{
+       uint64_t tsc;
+       __asm __volatile("rdtsc" : "=A" (tsc));
+       return tsc;
+}
+
+static __inline uint64_t 
+read_tsc_serialized(void)
+{
+    uint64_t tsc;
+       cpuid(0, 0, 0, 0, 0);
+       tsc = read_tsc();
+       return tsc;
+}
+
+static __inline void
+enable_irq(void)
+{
+       asm volatile("sti");
+}
+
+static __inline void
+disable_irq(void)
+{
+       asm volatile("cli");
+}
+
+static __inline void
+enable_irqsave(int8_t* state)
+{
+       // *state tracks the number of nested enables and disables
+       // initial value of state: 0 = first run / no favorite
+       // > 0 means more enabled calls have been made
+       // < 0 means more disabled calls have been made
+       // Mostly doing this so we can call disable_irqsave first if we want
+
+       // one side or another "gets a point" if interrupts were already the
+       // way it wanted to go.  o/w, state stays at 0.  if the state was not 0
+       // then, enabling/disabling isn't even an option.  just increment/decrement
+
+       // if enabling is winning or tied, make sure it's enabled
+       if ((*state == 0) && !irq_is_enabled())
+               enable_irq();
+       else
+               (*state)++;
+}
+
+static __inline void
+disable_irqsave(int8_t* state)
+{
+       if ((*state == 0) && irq_is_enabled())
+               disable_irq();
+       else 
+               (*state)--;
+}
+
+static __inline void
+cpu_relax(void)
+{
+       // in case the compiler doesn't serialize for pause, the "m" will make sure
+       // no memory is reordered around this instruction.
+       asm volatile("pause" : : : "memory");
+}
+
+static __inline void
+cpu_halt(void)
+{
+       asm volatile("hlt" : : : "memory");
+}
+
+static __inline void
+clflush(uintptr_t* addr) __attribute__((always_inline))
+{
+       asm volatile("clflush %0" : : "m"(*addr));
+}
+
+static __inline int
+irq_is_enabled(void)
+{
+       return read_eflags() & FL_IF;
+}
+
+static __inline uint32_t
+core_id(void)
+{
+       return lapic_get_id();
+}
+
+static __inline void
+cache_flush(void)
+{
+        wbinvd();
+}
+
+static __inline void
+reboot(void)
+{
+       outb(0x92, 0x3);
+       asm volatile ("movl $0, %esp; int $0");
+       while(1);
+}
+
+#endif /* !ROS_INC_X86_H */
diff --git a/arch/i386/include/atomic.h b/arch/i386/include/atomic.h
new file mode 100644 (file)
index 0000000..9ba650d
--- /dev/null
@@ -0,0 +1,63 @@
+#ifndef ROS_INCLUDE_ATOMIC_H
+#define ROS_INCLUDE_ATOMIC_H
+
+#include <arch/types.h>
+
+#define mb() {rmb(); wmb();}
+#define rmb() ({ asm volatile("lfence"); })
+#define wmb() 
+
+//linux style atomic ops
+typedef struct {volatile uint32_t real_num;} atomic_t;
+#define atomic_read(atom) ((atom)->real_num)
+#define atomic_set(atom, val) (((atom)->real_num) = (val))
+#define atomic_init(i) {(i)}
+//and the atomic incs, etc take an atomic_t ptr, deref inside
+
+static inline void atomic_inc(atomic_t* number);
+static inline void atomic_dec(atomic_t* number);
+static inline void atomic_andb(volatile uint8_t* number, uint8_t mask);
+static inline void spin_lock(volatile uint32_t* lock);
+static inline void spin_unlock(volatile uint32_t* lock);
+
+/* Inlined functions declared above */
+
+// need to do this with pointers and deref.  %0 needs to be the memory address
+static inline void atomic_inc(atomic_t* number)
+{
+       asm volatile("lock incl %0" : "=m"(number->real_num) : : "cc");
+}
+
+static inline void atomic_dec(atomic_t* number)
+{
+       asm volatile("lock decl %0" : "=m"(number->real_num) : : "cc");
+}
+
+static inline void atomic_andb(uint8_t* number, uint8_t mask)
+{
+       asm volatile("lock andb %1,%0" : "=m"(*number) : "r"(mask) : "cc");
+}
+
+
+static inline void spin_lock(volatile uint32_t* lock)
+{
+       asm volatile(
+                       "1:                       "
+                       "       cmpb $0, %0;          "
+                       "       je 2f;                "
+                       "       pause;                "
+                       "       jmp 1b;               "
+                       "2:                       " 
+                       "       movb $1, %%al;        "
+                       "       xchgb %%al, %0;       "
+                       "       cmpb $0, %%al;        "
+                       "       jne 1b;               "
+               : : "m"(*lock) : "eax", "cc");
+}
+
+static inline void spin_unlock(volatile uint32_t* lock)
+{
+       *lock = 0;
+}
+
+#endif /* !ROS_INCLUDE_ATOMIC_H */
diff --git a/arch/i386/include/console.h b/arch/i386/include/console.h
new file mode 100644 (file)
index 0000000..c00fa58
--- /dev/null
@@ -0,0 +1,29 @@
+/* See COPYRIGHT for copyright information. */
+
+#ifndef _CONSOLE_H_
+#define _CONSOLE_H_
+#ifndef ROS_KERNEL
+# error "This is a ROS kernel header; user programs should not #include it"
+#endif
+
+#include <arch/types.h>
+
+#define MONO_BASE      0x3B4
+#define MONO_BUF       0xB0000
+#define CGA_BASE       0x3D4
+#define CGA_BUF                0xB8000
+
+#define CRT_ROWS       25
+#define CRT_COLS       80
+#define CRT_SIZE       (CRT_ROWS * CRT_COLS)
+
+void cons_init(void);
+void cons_putc(int c);
+int cons_getc(void);
+
+void kbd_intr(void); // irq 1
+void serial_intr(void); // irq 4
+void serial_send_byte(uint8_t b);
+int serial_read_byte();
+
+#endif /* _CONSOLE_H_ */
diff --git a/arch/i386/include/kbdreg.h b/arch/i386/include/kbdreg.h
new file mode 100644 (file)
index 0000000..0c7ffea
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef ROS_KBDREG_H
+#define ROS_KBDREG_H
+
+// Special keycodes
+#define KEY_HOME       0xE0
+#define KEY_END                0xE1
+#define KEY_UP         0xE2
+#define KEY_DN         0xE3
+#define KEY_LF         0xE4
+#define KEY_RT         0xE5
+#define KEY_PGUP       0xE6
+#define KEY_PGDN       0xE7
+#define KEY_INS                0xE8
+#define KEY_DEL                0xE9
+
+
+/* This is i8042reg.h + kbdreg.h from NetBSD. */
+
+#define        KBSTATP         0x64    /* kbd controller status port(I) */
+#define         KBS_DIB        0x01    /* kbd data in buffer */
+#define         KBS_IBF        0x02    /* kbd input buffer low */
+#define         KBS_WARM       0x04    /* kbd input buffer low */
+#define         KBS_OCMD       0x08    /* kbd output buffer has command */
+#define         KBS_NOSEC      0x10    /* kbd security lock not engaged */
+#define         KBS_TERR       0x20    /* kbd transmission error */
+#define         KBS_RERR       0x40    /* kbd receive error */
+#define         KBS_PERR       0x80    /* kbd parity error */
+
+#define        KBCMDP          0x64    /* kbd controller port(O) */
+#define         KBC_RAMREAD    0x20    /* read from RAM */
+#define         KBC_RAMWRITE   0x60    /* write to RAM */
+#define         KBC_AUXDISABLE 0xa7    /* disable auxiliary port */
+#define         KBC_AUXENABLE  0xa8    /* enable auxiliary port */
+#define         KBC_AUXTEST    0xa9    /* test auxiliary port */
+#define         KBC_KBDECHO    0xd2    /* echo to keyboard port */
+#define         KBC_AUXECHO    0xd3    /* echo to auxiliary port */
+#define         KBC_AUXWRITE   0xd4    /* write to auxiliary port */
+#define         KBC_SELFTEST   0xaa    /* start self-test */
+#define         KBC_KBDTEST    0xab    /* test keyboard port */
+#define         KBC_KBDDISABLE 0xad    /* disable keyboard port */
+#define         KBC_KBDENABLE  0xae    /* enable keyboard port */
+#define         KBC_PULSE0     0xfe    /* pulse output bit 0 */
+#define         KBC_PULSE1     0xfd    /* pulse output bit 1 */
+#define         KBC_PULSE2     0xfb    /* pulse output bit 2 */
+#define         KBC_PULSE3     0xf7    /* pulse output bit 3 */
+
+#define        KBDATAP         0x60    /* kbd data port(I) */
+#define        KBOUTP          0x60    /* kbd data port(O) */
+
+#define        K_RDCMDBYTE     0x20
+#define        K_LDCMDBYTE     0x60
+
+#define        KC8_TRANS       0x40    /* convert to old scan codes */
+#define        KC8_MDISABLE    0x20    /* disable mouse */
+#define        KC8_KDISABLE    0x10    /* disable keyboard */
+#define        KC8_IGNSEC      0x08    /* ignore security lock */
+#define        KC8_CPU         0x04    /* exit from protected mode reset */
+#define        KC8_MENABLE     0x02    /* enable mouse interrupt */
+#define        KC8_KENABLE     0x01    /* enable keyboard interrupt */
+#define        CMDBYTE         (KC8_TRANS|KC8_CPU|KC8_MENABLE|KC8_KENABLE)
+
+/* keyboard commands */
+#define        KBC_RESET       0xFF    /* reset the keyboard */
+#define        KBC_RESEND      0xFE    /* request the keyboard resend the last byte */
+#define        KBC_SETDEFAULT  0xF6    /* resets keyboard to its power-on defaults */
+#define        KBC_DISABLE     0xF5    /* as per KBC_SETDEFAULT, but also disable key scanning */
+#define        KBC_ENABLE      0xF4    /* enable key scanning */
+#define        KBC_TYPEMATIC   0xF3    /* set typematic rate and delay */
+#define        KBC_SETTABLE    0xF0    /* set scancode translation table */
+#define        KBC_MODEIND     0xED    /* set mode indicators(i.e. LEDs) */
+#define        KBC_ECHO        0xEE    /* request an echo from the keyboard */
+
+/* keyboard responses */
+#define        KBR_EXTENDED    0xE0    /* extended key sequence */
+#define        KBR_RESEND      0xFE    /* needs resend of command */
+#define        KBR_ACK         0xFA    /* received a valid command */
+#define        KBR_OVERRUN     0x00    /* flooded */
+#define        KBR_FAILURE     0xFD    /* diagnosic failure */
+#define        KBR_BREAK       0xF0    /* break code prefix - sent on key release */
+#define        KBR_RSTDONE     0xAA    /* reset complete */
+#define        KBR_ECHO        0xEE    /* echo response */
+
+#endif /* !ROS_KBDREG_H */
diff --git a/arch/i386/include/mmu.h b/arch/i386/include/mmu.h
new file mode 100644 (file)
index 0000000..804260d
--- /dev/null
@@ -0,0 +1,353 @@
+#ifndef ROS_INC_MMU_H
+#define ROS_INC_MMU_H
+
+/*
+ * This file contains definitions for the x86 memory management unit (MMU),
+ * including paging- and segmentation-related data structures and constants,
+ * the %cr0, %cr4, and %eflags registers, and traps.
+ */
+
+/*
+ *
+ *     Part 1.  Paging data structures and constants.
+ *
+ */
+
+// A linear address 'la' has a three-part structure as follows:
+//
+// +--------10------+-------10-------+---------12----------+
+// | Page Directory |   Page Table   | Offset within Page  |
+// |      Index     |      Index     |                     |
+// +----------------+----------------+---------------------+
+//  \--- PDX(la) --/ \--- PTX(la) --/ \---- PGOFF(la) ----/
+//  \----------- PPN(la) -----------/
+//
+// The PDX, PTX, PGOFF, and PPN macros decompose linear addresses as shown.
+// To construct a linear address la from PDX(la), PTX(la), and PGOFF(la),
+// use PGADDR(PDX(la), PTX(la), PGOFF(la)).
+
+// page number field of address
+#define PPN(la)                (((uintptr_t) (la)) >> PTXSHIFT)
+#define VPN(la)                PPN(la)         // used to index into vpt[]
+
+// page directory index
+#define PDX(la)                ((((uintptr_t) (la)) >> PDXSHIFT) & 0x3FF)
+#define VPD(la)                PDX(la)         // used to index into vpd[]
+
+// page table index
+#define PTX(la)                ((((uintptr_t) (la)) >> PTXSHIFT) & 0x3FF)
+
+// offset in page
+#define PGOFF(la)      (((uintptr_t) (la)) & 0xFFF)
+
+// offset in jumbo page
+#define JPGOFF(la)     (((uintptr_t) (la)) & 0x003FFFFF)
+
+// construct PTE from PPN and flags
+#define PTE(ppn, flags) ((ppn) << PTXSHIFT | (flags))
+
+// construct linear address from indexes and offset
+#define PGADDR(d, t, o)        ((void*SNT) ((d) << PDXSHIFT | (t) << PTXSHIFT | (o)))
+
+// Page directory and page table constants.
+#define NPDENTRIES     1024            // page directory entries per page directory
+#define NPTENTRIES     1024            // page table entries per page table
+
+#define PGSIZE         4096            // bytes mapped by a page
+#define JPGSIZE                4096*1024       // bytes mapped by a jumbo page (4MB)
+#define PGSHIFT                12              // log2(PGSIZE)
+
+#define PTSIZE         (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry
+#define PTSHIFT                22              // log2(PTSIZE)
+
+#define PTXSHIFT       12              // offset of PTX in a linear address
+#define PDXSHIFT       22              // offset of PDX in a linear address
+
+// Page table/directory entry flags.
+#define PTE_P          0x001   // Present
+#define PTE_W          0x002   // Writeable
+#define PTE_U          0x004   // User
+#define PTE_PWT                0x008   // Write-Through
+#define PTE_PCD                0x010   // Cache-Disable
+#define PTE_A          0x020   // Accessed
+#define PTE_D          0x040   // Dirty
+#define PTE_PS         0x080   // Page Size (only applies to PDEs)
+#define PTE_PAT                0x080   // PAT (only applies to second layer PTEs)
+#define PTE_G          0x100   // Global Page
+
+// commly used access modes
+#define PTE_KERN_RW    PTE_W           // Kernel Read/Write
+#define PTE_KERN_RO    0               // Kernel Read-Only
+#define PTE_USER_RW    (PTE_W | PTE_U) // Kernel/User Read/Write
+#define PTE_USER_RO    PTE_U           // Kernel/User Read-Only
+
+
+#define VALID_USER_PERMS(perm) \
+       (((perm) == PTE_U) || ((perm) == (PTE_U | PTE_W))) 
+
+// The PTE_AVAIL bits aren't used by the kernel or interpreted by the
+// hardware, so user processes are allowed to set them arbitrarily.
+#define PTE_AVAIL      0xE00   // Available for software use
+
+// Only flags in PTE_USER may be used in system calls.
+#define PTE_USER       (PTE_AVAIL | PTE_P | PTE_W | PTE_U)
+
+// address in page table entry
+#define PTE_ADDR(pte)  ((physaddr_t) (pte) & ~0xFFF)
+
+// Control Register flags
+#define CR0_PE         0x00000001      // Protection Enable
+#define CR0_MP         0x00000002      // Monitor coProcessor
+#define CR0_EM         0x00000004      // Emulation
+#define CR0_TS         0x00000008      // Task Switched
+#define CR0_ET         0x00000010      // Extension Type
+#define CR0_NE         0x00000020      // Numeric Error
+#define CR0_WP         0x00010000      // Write Protect
+#define CR0_AM         0x00040000      // Alignment Mask
+#define CR0_NW         0x20000000      // Not Writethrough - more tricky than it sounds
+#define CR0_CD         0x40000000      // Cache Disable
+#define CR0_PG         0x80000000      // Paging
+
+// These two relate to the cacheability (L1, etc) of the page directory
+#define CR3_PWT                0x00000008      // Page directory caching write through
+#define CR3_PCD                0x00000010      // Page directory caching disabled
+
+#define CR4_VME                0x00000001      // V86 Mode Extensions
+#define CR4_PVI                0x00000002      // Protected-Mode Virtual Interrupts
+#define CR4_TSD                0x00000004      // Time Stamp Disable
+#define CR4_DE         0x00000008      // Debugging Extensions
+#define CR4_PSE                0x00000010      // Page Size Extensions
+#define CR4_PAE                0x00000020      // Physical Address Extensions
+#define CR4_MCE                0x00000040      // Machine Check Enable
+#define CR4_PGE                0x00000080      // Global Pages Enabled
+#define CR4_PCE                0x00000100      // Performance counter enable
+#define CR4_OSFXSR     0x00000200      // OS support for FXSAVE/FXRSTOR
+#define CR4_OSXMME     0x00000400      // OS support for unmasked SIMD FP exceptions
+#define CR4_VMXE       0x00002000      // VMX enable
+#define CR4_SMXE       0x00004000      // SMX enable
+#define CR4_OSXSAVE    0x00040000      // XSAVE and processor extended states-enabled
+
+// Eflags register
+#define FL_CF          0x00000001      // Carry Flag
+#define FL_PF          0x00000004      // Parity Flag
+#define FL_AF          0x00000010      // Auxiliary carry Flag
+#define FL_ZF          0x00000040      // Zero Flag
+#define FL_SF          0x00000080      // Sign Flag
+#define FL_TF          0x00000100      // Trap Flag
+#define FL_IF          0x00000200      // Interrupt Flag
+#define FL_DF          0x00000400      // Direction Flag
+#define FL_OF          0x00000800      // Overflow Flag
+#define FL_IOPL_MASK   0x00003000      // I/O Privilege Level bitmask
+#define FL_IOPL_0      0x00000000      //   IOPL == 0
+#define FL_IOPL_1      0x00001000      //   IOPL == 1
+#define FL_IOPL_2      0x00002000      //   IOPL == 2
+#define FL_IOPL_3      0x00003000      //   IOPL == 3
+#define FL_NT          0x00004000      // Nested Task
+#define FL_RF          0x00010000      // Resume Flag
+#define FL_VM          0x00020000      // Virtual 8086 mode
+#define FL_AC          0x00040000      // Alignment Check
+#define FL_VIF         0x00080000      // Virtual Interrupt Flag
+#define FL_VIP         0x00100000      // Virtual Interrupt Pending
+#define FL_ID          0x00200000      // ID flag
+
+// Page fault error codes
+#define FEC_PR         0x1     // Page fault caused by protection violation
+#define FEC_WR         0x2     // Page fault caused by a write
+#define FEC_U          0x4     // Page fault occured while in user mode
+
+
+/*
+ *
+ *     Part 2.  Segmentation data structures and constants.
+ *
+ */
+
+// Global descriptor numbers
+#define GD_KT     0x08     // kernel text
+#define GD_KD     0x10     // kernel data
+#define GD_UT     0x18     // user text
+#define GD_UD     0x20     // user data
+#define GD_TSS    0x28     // Task segment selector
+
+#ifdef __ASSEMBLER__
+
+/*
+ * Macros to build GDT entries in assembly.
+ */
+#define SEG_NULL                                               \
+       .word 0, 0;                                             \
+       .byte 0, 0, 0, 0
+#define SEG(type,base,lim)                                     \
+       .word (((lim) >> 12) & 0xffff), ((base) & 0xffff);      \
+       .byte (((base) >> 16) & 0xff), (0x90 | (type)),         \
+               (0xC0 | (((lim) >> 28) & 0xf)), (((base) >> 24) & 0xff)
+
+#else  // not __ASSEMBLER__
+
+#include <arch/types.h>
+
+// Segment Descriptors
+typedef struct Segdesc {
+       unsigned sd_lim_15_0 : 16;  // Low bits of segment limit
+       unsigned sd_base_15_0 : 16; // Low bits of segment base address
+       unsigned sd_base_23_16 : 8; // Middle bits of segment base address
+       unsigned sd_type : 4;       // Segment type (see STS_ constants)
+       unsigned sd_s : 1;          // 0 = system, 1 = application
+       unsigned sd_dpl : 2;        // Descriptor Privilege Level
+       unsigned sd_p : 1;          // Present
+       unsigned sd_lim_19_16 : 4;  // High bits of segment limit
+       unsigned sd_avl : 1;        // Unused (available for software use)
+       unsigned sd_rsv1 : 1;       // Reserved
+       unsigned sd_db : 1;         // 0 = 16-bit segment, 1 = 32-bit segment
+       unsigned sd_g : 1;          // Granularity: limit scaled by 4K when set
+       unsigned sd_base_31_24 : 8; // High bits of segment base address
+} segdesc_t;
+// Null segment
+#define SEG_NULL       (segdesc_t){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+// Segment that is loadable but faults when used
+#define SEG_FAULT      (segdesc_t){ 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0 }
+// Normal segment
+#define SEG(type, base, lim, dpl) (segdesc_t)                                          \
+{ ((lim) >> 12) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,      \
+    type, 1, dpl, 1, (unsigned) (lim) >> 28, 0, 0, 1, 1,                       \
+    (unsigned) (base) >> 24 }
+#define SEG16(type, base, lim, dpl) (segdesc_t)                                                \
+{ (lim) & 0xffff, (base) & 0xffff, ((base) >> 16) & 0xff,                      \
+    type, 1, dpl, 1, (unsigned) (lim) >> 16, 0, 0, 1, 0,                       \
+    (unsigned) (base) >> 24 }
+
+#endif /* !__ASSEMBLER__ */
+
+// Application segment type bits
+#define STA_X          0x8         // Executable segment
+#define STA_E          0x4         // Expand down (non-executable segments)
+#define STA_C          0x4         // Conforming code segment (executable only)
+#define STA_W          0x2         // Writeable (non-executable segments)
+#define STA_R          0x2         // Readable (executable segments)
+#define STA_A          0x1         // Accessed
+
+// System segment type bits
+#define STS_T16A       0x1         // Available 16-bit TSS
+#define STS_LDT                0x2         // Local Descriptor Table
+#define STS_T16B       0x3         // Busy 16-bit TSS
+#define STS_CG16       0x4         // 16-bit Call Gate
+#define STS_TG         0x5         // Task Gate / Coum Transmitions
+#define STS_IG16       0x6         // 16-bit Interrupt Gate
+#define STS_TG16       0x7         // 16-bit Trap Gate
+#define STS_T32A       0x9         // Available 32-bit TSS
+#define STS_T32B       0xB         // Busy 32-bit TSS
+#define STS_CG32       0xC         // 32-bit Call Gate
+#define STS_IG32       0xE         // 32-bit Interrupt Gate
+#define STS_TG32       0xF         // 32-bit Trap Gate
+
+#define SEG_COUNT      6               // Number of segments in the steady state
+
+/*
+ *
+ *     Part 3.  Traps.
+ *
+ */
+
+#ifndef __ASSEMBLER__
+
+// Task state segment format (as described by the Pentium architecture book)
+typedef struct Taskstate {
+       uint32_t ts_link;       // Old ts selector
+       uintptr_t ts_esp0;      // Stack pointers and segment selectors
+       uint16_t ts_ss0;        //   after an increase in privilege level
+       uint16_t ts_padding1;
+       uintptr_t ts_esp1;
+       uint16_t ts_ss1;
+       uint16_t ts_padding2;
+       uintptr_t ts_esp2;
+       uint16_t ts_ss2;
+       uint16_t ts_padding3;
+       physaddr_t ts_cr3;      // Page directory base
+       uintptr_t ts_eip;       // Saved state from last task switch
+       uint32_t ts_eflags;
+       uint32_t ts_eax;        // More saved state (registers)
+       uint32_t ts_ecx;
+       uint32_t ts_edx;
+       uint32_t ts_ebx;
+       uintptr_t ts_esp;
+       uintptr_t ts_ebp;
+       uint32_t ts_esi;
+       uint32_t ts_edi;
+       uint16_t ts_es;         // Even more saved state (segment selectors)
+       uint16_t ts_padding4;
+       uint16_t ts_cs;
+       uint16_t ts_padding5;
+       uint16_t ts_ss;
+       uint16_t ts_padding6;
+       uint16_t ts_ds;
+       uint16_t ts_padding7;
+       uint16_t ts_fs;
+       uint16_t ts_padding8;
+       uint16_t ts_gs;
+       uint16_t ts_padding9;
+       uint16_t ts_ldt;
+       uint16_t ts_padding10;
+       uint16_t ts_t;          // Trap on task switch
+       uint16_t ts_iomb;       // I/O map base address
+} taskstate_t;
+
+// Gate descriptors for interrupts and traps
+typedef struct Gatedesc {
+       unsigned gd_off_15_0 : 16;   // low 16 bits of offset in segment
+       unsigned gd_ss : 16;         // segment selector
+       unsigned gd_args : 5;        // # args, 0 for interrupt/trap gates
+       unsigned gd_rsv1 : 3;        // reserved(should be zero I guess)
+       unsigned gd_type : 4;        // type(STS_{TG,IG32,TG32})
+       unsigned gd_s : 1;           // must be 0 (system)
+       unsigned gd_dpl : 2;         // DPL - highest ring allowed to use this
+       unsigned gd_p : 1;           // Present
+       unsigned gd_off_31_16 : 16;  // high bits of offset in segment
+} gatedesc_t;
+
+// Set up a normal interrupt/trap gate descriptor.
+// - istrap: 1 for a trap (= exception) gate, 0 for an interrupt gate.
+//   - interrupt gates automatically disable interrupts (cli)
+// - sel: Code segment selector for interrupt/trap handler
+// - off: Offset in code segment for interrupt/trap handler
+// - dpl: Descriptor Privilege Level -
+//       the privilege level required for software to invoke
+//       this interrupt/trap gate explicitly using an int instruction.
+#define SETGATE(gate, istrap, sel, off, dpl)                   \
+{                                                              \
+       (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff;         \
+       (gate).gd_ss = (sel);                                   \
+       (gate).gd_args = 0;                                     \
+       (gate).gd_rsv1 = 0;                                     \
+       (gate).gd_type = (istrap) ? STS_TG32 : STS_IG32;        \
+       (gate).gd_s = 0;                                        \
+       (gate).gd_dpl = (dpl);                                  \
+       (gate).gd_p = 1;                                        \
+       (gate).gd_off_31_16 = (uint32_t) (off) >> 16;           \
+}
+
+// Set up a call gate descriptor.
+#define SETCALLGATE(gate, ss, off, dpl)                        \
+{                                                              \
+       (gate).gd_off_15_0 = (uint32_t) (off) & 0xffff;         \
+       (gate).gd_ss = (ss);                                    \
+       (gate).gd_args = 0;                                     \
+       (gate).gd_rsv1 = 0;                                     \
+       (gate).gd_type = STS_CG32;                              \
+       (gate).gd_s = 0;                                        \
+       (gate).gd_dpl = (dpl);                                  \
+       (gate).gd_p = 1;                                        \
+       (gate).gd_off_31_16 = (uint32_t) (off) >> 16;           \
+}
+
+// Pseudo-descriptors used for LGDT, LLDT and LIDT instructions.
+typedef struct Pseudodesc {
+       uint16_t pd_lim;                // Limit
+       uint32_t pd_base;               // Base address
+} __attribute__ ((packed)) pseudodesc_t;
+
+extern segdesc_t (COUNT(SEG_COUNT) gdt)[];
+extern pseudodesc_t gdt_pd;
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !ROS_INC_MMU_H */
diff --git a/arch/i386/include/smp.h b/arch/i386/include/smp.h
new file mode 100644 (file)
index 0000000..8eedef4
--- /dev/null
@@ -0,0 +1,14 @@
+#ifndef ROS_ARCH_SMP_H
+#define ROS_ARCH_SMP_H
+
+#include <atomic.h>
+
+// be careful changing this, esp if you go over 16
+#define NUM_HANDLER_WRAPPERS           5
+
+typedef struct HandlerWrapper {
+       checklist_t* cpu_list;
+       uint8_t vector;
+} handler_wrapper_t;
+
+#endif
diff --git a/arch/i386/include/trap.h b/arch/i386/include/trap.h
new file mode 100644 (file)
index 0000000..00767cb
--- /dev/null
@@ -0,0 +1,86 @@
+#ifndef ROS_INCLUDE_ARCH_TRAP_H
+#define ROS_INCLUDE_ARCH_TRAP_H
+
+#define MSR_IA32_SYSENTER_CS 0x174
+#define MSR_IA32_SYSENTER_ESP 0x175
+#define MSR_IA32_SYSENTER_EIP 0x176
+
+// Trap numbers
+// These are processor defined:
+#define T_DIVIDE     0         // divide error
+#define T_DEBUG      1         // debug exception
+#define T_NMI        2         // non-maskable interrupt
+#define T_BRKPT      3         // breakpoint
+#define T_OFLOW      4         // overflow
+#define T_BOUND      5         // bounds check
+#define T_ILLOP      6         // illegal opcode
+#define T_DEVICE     7         // device not available 
+#define T_DBLFLT     8         // double fault
+/* #define T_COPROC  9 */      // reserved (not generated by recent processors)
+#define T_TSS       10         // invalid task switch segment
+#define T_SEGNP     11         // segment not present
+#define T_STACK     12         // stack exception
+#define T_GPFLT     13         // genernal protection fault
+#define T_PGFLT     14         // page fault
+/* #define T_RES    15 */      // reserved
+#define T_FPERR     16         // floating point error
+#define T_ALIGN     17         // aligment check
+#define T_MCHK      18         // machine check
+#define T_SIMDERR   19         // SIMD floating point error
+
+// These are arbitrarily chosen, but with care not to overlap
+// processor defined exceptions or interrupt vectors.
+#define T_SYSCALL   0x80                       // system call
+#define T_DEFAULT   0xdeadbeef         // catchall
+
+#ifndef __ASSEMBLER__
+
+#include <arch/types.h>
+#include <arch/mmu.h>
+
+/* The kernel's interrupt descriptor table */
+extern gatedesc_t idt[];
+extern taskstate_t ts;
+
+typedef struct PushRegs {
+       /* registers as pushed by pusha */
+       uint32_t reg_edi;
+       uint32_t reg_esi;
+       uint32_t reg_ebp;
+       uint32_t reg_oesp;              /* Useless */
+       uint32_t reg_ebx;
+       uint32_t reg_edx;
+       uint32_t reg_ecx;
+       uint32_t reg_eax;
+} push_regs_t;
+
+typedef struct Trapframe {
+       push_regs_t tf_regs;
+       uint16_t tf_es;
+       uint16_t tf_padding1;
+       uint16_t tf_ds;
+       uint16_t tf_padding2;
+       uint32_t tf_trapno;
+       /* below here defined by x86 hardware */
+       uint32_t tf_err;
+       uintptr_t tf_eip;
+       uint16_t tf_cs;
+       uint16_t tf_padding3;
+       uint32_t tf_eflags;
+       /* below here only when crossing rings, such as from user to kernel */
+       uintptr_t tf_esp;
+       uint16_t tf_ss;
+       uint16_t tf_padding4;
+} trapframe_t;
+
+typedef struct AncillaryState {
+       uint32_t silly; // remove this when you actually use this struct
+} ancillary_state_t;
+
+#endif /* !__ASSEMBLER__ */
+
+// Must equal 'sizeof(trapframe_t)'.
+// A static_assert in kern/trap.c checks this.
+#define SIZEOF_STRUCT_TRAPFRAME        0x44
+
+#endif /* !ROS_INC_ARCH_TRAP_H */
diff --git a/arch/i386/include/types.h b/arch/i386/include/types.h
new file mode 100644 (file)
index 0000000..0e65d64
--- /dev/null
@@ -0,0 +1,152 @@
+#ifndef ROS_INC_TYPES_H
+#define ROS_INC_TYPES_H
+
+#define LITTLE_ENDIAN
+
+#ifndef NULL
+#define NULL ((void*) 0)
+#endif
+
+#ifndef TRUE
+#define TRUE   1
+#endif
+
+#ifndef FALSE
+#define FALSE  0
+#endif
+
+#define CHECK_FLAG(flags,bit)   ((flags) & (1 << (bit)))
+
+// Represents true-or-false values
+typedef int bool;
+
+// Explicitly-sized versions of integer types
+typedef __signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+
+// Pointers and addresses are 32 bits long.
+// We use pointer types to represent virtual addresses,
+// uintptr_t to represent the numerical values of virtual addresses,
+// and physaddr_t to represent physical addresses.
+typedef int32_t intptr_t;
+typedef uint32_t uintptr_t;
+typedef uint32_t physaddr_t;
+
+// Registers are 32 bits long
+typedef int32_t intreg_t;
+typedef uint32_t uintreg_t;
+
+// Page numbers are 32 bits long.
+typedef uint32_t ppn_t;
+
+// size_t is used for memory object sizes.
+typedef uint32_t size_t;
+// ssize_t is a signed version of ssize_t, used in case there might be an
+// error return.
+typedef int32_t ssize_t;
+
+// off_t is used for file offsets and lengths.
+typedef int32_t off_t;
+
+// Efficient min and max operations
+#define MIN(_a, _b)                                            \
+({                                                             \
+       typeof(_a) __a = (_a);                                  \
+       typeof(_b) __b = (_b);                                  \
+       __a <= __b ? __a : __b;                                 \
+})
+#define MAX(_a, _b)                                            \
+({                                                             \
+       typeof(_a) __a = (_a);                                  \
+       typeof(_b) __b = (_b);                                  \
+       __a >= __b ? __a : __b;                                 \
+})
+
+// Rounding operations (efficient when n is a power of 2)
+// Round down to the nearest multiple of n
+#define ROUNDDOWN(a, n)                                                \
+({                                                             \
+       uint32_t __a = (uint32_t) (a);                          \
+       (typeof(a)) (__a - __a % (n));                          \
+})
+// Round up to the nearest multiple of n
+#define ROUNDUP(a, n)                                          \
+({                                                             \
+       uint32_t __n = (uint32_t) (n);                          \
+       (typeof(a)) (ROUNDDOWN((uint32_t) (a) + __n - 1, __n)); \
+})
+
+// Return the offset of 'member' relative to the beginning of a struct type
+#ifndef offsetof
+#define offsetof(type, member)  ((size_t) (&((type*)0)->member))
+#endif
+
+// Ivy currently can only handle 63 bits (OCaml thing), so use this to make
+// a uint64_t programatically
+#define UINT64(upper, lower) ( (((uint64_t)(upper)) << 32) | (lower) )
+
+/*********************** Bitmask stuff **********************/
+#define BYTES_FOR_BITMASK(size) (((size) - 1) / 8 + 1)
+#define BYTES_FOR_BITMASK_WITH_CHECK(size) ((size) ? ((size) - (1)) / (8) + (1) : (0))
+#define DECL_BITMASK(name, size) uint8_t (name)[BYTES_FOR_BITMASK((size))]
+
+#define GET_BITMASK_BIT(name, bit) (((name)[(bit)/8] & (1 << ((bit) % 8))) ? 1 : 0)
+#define SET_BITMASK_BIT(name, bit) ((name)[(bit)/8] |= (1 << ((bit) % 8)))
+#define CLR_BITMASK_BIT(name, bit) ((name)[(bit)/8] &= ~(1 << ((bit) % 8)))
+#define SET_BITMASK_BIT_ATOMIC(name, bit) (atomic_orb(&(name)[(bit)/8], (1 << ((bit) % 8))))
+#define CLR_BITMASK_BIT_ATOMIC(name, bit) (atomic_andb(&(name)[(bit)/8], ~(1 << ((bit) % 8))))
+
+#define CLR_BITMASK(name, size) \
+({ \
+       {TRUSTEDBLOCK \
+       memset((void*)((uintptr_t)(name)), 0, BYTES_FOR_BITMASK((size))); \
+       } \
+})
+
+#define FILL_BITMASK(name, size) \
+({ \
+       {TRUSTEDBLOCK \
+       memset((void*)((uintptr_t)(name)), 255, BYTES_FOR_BITMASK((size))); \
+       } \
+       (name)[BYTES_FOR_BITMASK((size))-1] >>= (((size) % 8) ? (8 - ((size) % 8)) : 0 ); \
+}) 
+
+#define COPY_BITMASK(newmask, oldmask, size) \
+({ \
+       {TRUSTEDBLOCK \
+       memcpy((void*)((uintptr_t)(newmask)), \
+           (void*)((uintptr_t)(oldmask)), \
+           BYTES_FOR_BITMASK((size))); \
+       } \
+})
+
+// this checks the entire last byte, so keep it 0 in the other macros
+#define BITMASK_IS_CLEAR(name, size) ({ \
+       uint32_t __n = BYTES_FOR_BITMASK((size)); \
+       bool clear = 1; \
+       while (__n-- > 0) { \
+               if ((name)[__n]) { \
+                       clear = 0; \
+                       break;\
+               }\
+       } \
+       clear; })
+
+#define PRINT_BITMASK(name, size) { \
+       int i;  \
+       for (i = 0; i < BYTES_FOR_BITMASK(size); i++) { \
+               int j;  \
+               for (j = 0; j < 8; j++) \
+                       printk("%x", ((name)[i] >> j) & 1);     \
+       } \
+       printk("\n"); \
+}
+/**************************************************************/
+
+#endif /* !ROS_INC_TYPES_H */
diff --git a/arch/i386/include/x86.h b/arch/i386/include/x86.h
new file mode 100644 (file)
index 0000000..c44d770
--- /dev/null
@@ -0,0 +1,317 @@
+#ifndef ROS_INC_X86_H
+#define ROS_INC_X86_H
+
+#include <arch/types.h>
+#include <arch/mmu.h>
+
+/* Model Specific Registers */
+#define IA32_APIC_BASE                         0x1b
+#define IA32_MTRR_DEF_TYPE                     0x2ff
+#define IA32_MTRR_PHYSBASE0                    0x200
+#define IA32_MTRR_PHYSMASK0                    0x201
+#define IA32_MTRR_PHYSBASE1                    0x202
+#define IA32_MTRR_PHYSMASK1                    0x203
+#define IA32_MTRR_PHYSBASE2                    0x204
+#define IA32_MTRR_PHYSMASK2                    0x205
+#define IA32_MTRR_PHYSBASE3                    0x206
+#define IA32_MTRR_PHYSMASK3                    0x207
+#define IA32_MTRR_PHYSBASE4                    0x208
+#define IA32_MTRR_PHYSMASK4                    0x209
+#define IA32_MTRR_PHYSBASE5                    0x20a
+#define IA32_MTRR_PHYSMASK5                    0x20b
+#define IA32_MTRR_PHYSBASE6                    0x20c
+#define IA32_MTRR_PHYSMASK6                    0x20d
+#define IA32_MTRR_PHYSBASE7                    0x20e
+#define IA32_MTRR_PHYSMASK7                    0x20f
+
+#define MSR_APIC_ENABLE                                0x00000800
+#define MSR_APIC_BASE_ADDRESS          0x0000000FFFFFF000
+
+/* CPUID */
+#define CPUID_PSE_SUPPORT                      0x00000008
+
+/* Arch Constants */
+#define MAX_NUM_CPUS                           255
+
+static __inline uint8_t inb(int port) __attribute__((always_inline));
+static __inline void insb(int port, void *addr, int cnt) __attribute__((always_inline));
+static __inline uint16_t inw(int port) __attribute__((always_inline));
+static __inline void insw(int port, void *addr, int cnt) __attribute__((always_inline));
+static __inline uint32_t inl(int port) __attribute__((always_inline));
+static __inline void insl(int port, void *addr, int cnt) __attribute__((always_inline));
+static __inline void outb(int port, uint8_t data) __attribute__((always_inline));
+static __inline void outsb(int port, const void *addr, int cnt) __attribute__((always_inline));
+static __inline void outw(int port, uint16_t data) __attribute__((always_inline));
+static __inline void outsw(int port, const void *addr, int cnt) __attribute__((always_inline));
+static __inline void outsl(int port, const void *addr, int cnt) __attribute__((always_inline));
+static __inline void outl(int port, uint32_t data) __attribute__((always_inline));
+static __inline void lidt(void *p) __attribute__((always_inline));
+static __inline void lldt(uint16_t sel) __attribute__((always_inline));
+static __inline void ltr(uint16_t sel) __attribute__((always_inline));
+static __inline void lcr0(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr0(void) __attribute__((always_inline));
+static __inline uint32_t rcr2(void) __attribute__((always_inline));
+static __inline void lcr3(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr3(void) __attribute__((always_inline));
+static __inline void lcr4(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr4(void) __attribute__((always_inline));
+static __inline uint32_t read_eflags(void) __attribute__((always_inline));
+static __inline void write_eflags(uint32_t eflags) __attribute__((always_inline));
+static __inline uint32_t read_ebp(void) __attribute__((always_inline));
+static __inline uint32_t read_esp(void) __attribute__((always_inline));
+static __inline void cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp);
+static __inline uint64_t read_msr(uint32_t reg) __attribute__((always_inline));
+static __inline void write_msr(uint32_t reg, uint64_t val) __attribute__((always_inline));
+static __inline uint32_t read_mmreg32(uint32_t reg) __attribute__((always_inline));
+static __inline void write_mmreg32(uint32_t reg, uint32_t val) __attribute__((always_inline));
+static __inline void wbinvd(void) __attribute__((always_inline));
+
+static __inline uint8_t
+inb(int port)
+{
+       uint8_t data;
+       __asm __volatile("inb %w1,%0" : "=a" (data) : "d" (port));
+       return data;
+}
+
+static __inline void
+insb(int port, void *addr, int cnt)
+{
+       __asm __volatile("cld\n\trepne\n\tinsb"                 :
+                        "=D" (addr), "=c" (cnt)                :
+                        "d" (port), "0" (addr), "1" (cnt)      :
+                        "memory", "cc");
+}
+
+static __inline uint16_t
+inw(int port)
+{
+       uint16_t data;
+       __asm __volatile("inw %w1,%0" : "=a" (data) : "d" (port));
+       return data;
+}
+
+static __inline void
+insw(int port, void *addr, int cnt)
+{
+       __asm __volatile("cld\n\trepne\n\tinsw"                 :
+                        "=D" (addr), "=c" (cnt)                :
+                        "d" (port), "0" (addr), "1" (cnt)      :
+                        "memory", "cc");
+}
+
+static __inline uint32_t
+inl(int port)
+{
+       uint32_t data;
+       __asm __volatile("inl %w1,%0" : "=a" (data) : "d" (port));
+       return data;
+}
+
+static __inline void
+insl(int port, void *addr, int cnt)
+{
+       __asm __volatile("cld\n\trepne\n\tinsl"                 :
+                        "=D" (addr), "=c" (cnt)                :
+                        "d" (port), "0" (addr), "1" (cnt)      :
+                        "memory", "cc");
+}
+
+static __inline void
+outb(int port, uint8_t data)
+{
+       __asm __volatile("outb %0,%w1" : : "a" (data), "d" (port));
+}
+
+static __inline void
+outsb(int port, const void *addr, int cnt)
+{
+       __asm __volatile("cld\n\trepne\n\toutsb"                :
+                        "=S" (addr), "=c" (cnt)                :
+                        "d" (port), "0" (addr), "1" (cnt)      :
+                        "cc");
+}
+
+static __inline void
+outw(int port, uint16_t data)
+{
+       __asm __volatile("outw %0,%w1" : : "a" (data), "d" (port));
+}
+
+static __inline void
+outsw(int port, const void *addr, int cnt)
+{
+       __asm __volatile("cld\n\trepne\n\toutsw"                :
+                        "=S" (addr), "=c" (cnt)                :
+                        "d" (port), "0" (addr), "1" (cnt)      :
+                        "cc");
+}
+
+static __inline void
+outsl(int port, const void *addr, int cnt)
+{
+       __asm __volatile("cld\n\trepne\n\toutsl"                :
+                        "=S" (addr), "=c" (cnt)                :
+                        "d" (port), "0" (addr), "1" (cnt)      :
+                        "cc");
+}
+
+static __inline void
+outl(int port, uint32_t data)
+{
+       __asm __volatile("outl %0,%w1" : : "a" (data), "d" (port));
+}
+
+static __inline void
+lidt(void *p)
+{
+       __asm __volatile("lidt (%0)" : : "r" (p));
+}
+
+static __inline void
+lldt(uint16_t sel)
+{
+       __asm __volatile("lldt %0" : : "r" (sel));
+}
+
+static __inline void
+ltr(uint16_t sel)
+{
+       __asm __volatile("ltr %0" : : "r" (sel));
+}
+
+static __inline void
+lcr0(uint32_t val)
+{
+       __asm __volatile("movl %0,%%cr0" : : "r" (val));
+}
+
+static __inline uint32_t
+rcr0(void)
+{
+       uint32_t val;
+       __asm __volatile("movl %%cr0,%0" : "=r" (val));
+       return val;
+}
+
+static __inline uint32_t
+rcr2(void)
+{
+       uint32_t val;
+       __asm __volatile("movl %%cr2,%0" : "=r" (val));
+       return val;
+}
+
+static __inline void
+lcr3(uint32_t val)
+{
+       __asm __volatile("movl %0,%%cr3" : : "r" (val));
+}
+
+static __inline uint32_t
+rcr3(void)
+{
+       uint32_t val;
+       __asm __volatile("movl %%cr3,%0" : "=r" (val));
+       return val;
+}
+
+static __inline void
+lcr4(uint32_t val)
+{
+       __asm __volatile("movl %0,%%cr4" : : "r" (val));
+}
+
+static __inline uint32_t
+rcr4(void)
+{
+       uint32_t cr4;
+       __asm __volatile("movl %%cr4,%0" : "=r" (cr4));
+       return cr4;
+}
+
+static __inline uint32_t
+read_eflags(void)
+{
+        uint32_t eflags;
+        __asm __volatile("pushfl; popl %0" : "=r" (eflags));
+        return eflags;
+}
+
+static __inline void
+write_eflags(uint32_t eflags)
+{
+        __asm __volatile("pushl %0; popfl" : : "r" (eflags));
+}
+
+static __inline uint32_t
+read_ebp(void)
+{
+        uint32_t ebp;
+        __asm __volatile("movl %%ebp,%0" : "=r" (ebp));
+        return ebp;
+}
+
+static __inline uint32_t
+read_esp(void)
+{
+        uint32_t esp;
+        __asm __volatile("movl %%esp,%0" : "=r" (esp));
+        return esp;
+}
+
+static __inline void
+cpuid(uint32_t info, uint32_t *eaxp, uint32_t *ebxp, uint32_t *ecxp, uint32_t *edxp)
+{
+       uint32_t eax, ebx, ecx, edx;
+       asm volatile("cpuid" 
+               : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+               : "a" (info));
+       if (eaxp)
+               *eaxp = eax;
+       if (ebxp)
+               *ebxp = ebx;
+       if (ecxp)
+               *ecxp = ecx;
+       if (edxp)
+               *edxp = edx;
+}
+
+// Might need to mfence rdmsr.  supposedly wrmsr serializes, but not for x2APIC
+static __inline uint64_t
+read_msr(uint32_t reg)
+{
+       uint32_t edx, eax;
+       asm volatile("rdmsr; mfence" : "=d"(edx), "=a"(eax) : "c"(reg));
+       return (uint64_t)edx << 32 | eax;
+}
+
+static __inline void
+write_msr(uint32_t reg, uint64_t val)
+{
+       asm volatile("wrmsr" : : "d"((uint32_t)(val >> 32)),
+                                "a"((uint32_t)(val & 0xFFFFFFFF)), 
+                                "c"(reg));
+}
+
+static __inline void
+write_mmreg32(uint32_t reg, uint32_t val)
+{
+       {TRUSTEDBLOCK *((volatile uint32_t*)reg) = val; }
+       //the C ends up producing better asm than this:
+       //asm volatile("movl %0, (%1)" : : "r"(val), "r"(reg));
+}
+
+static __inline uint32_t
+read_mmreg32(uint32_t reg)
+{
+       {TRUSTEDBLOCK return *((volatile uint32_t*)reg); }
+}
+
+static __inline void
+wbinvd(void) __attribute__((always_inline))
+{
+       asm volatile("wbinvd");
+}
+
+#endif /* !ROS_INC_X86_H */
diff --git a/arch/i386/kernel.ld b/arch/i386/kernel.ld
new file mode 100644 (file)
index 0000000..03b5d9a
--- /dev/null
@@ -0,0 +1,62 @@
+/* Simple linker script for the ROS kernel.
+   See the GNU ld 'info' manual ("info ld") to learn the syntax. */
+
+OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
+OUTPUT_ARCH(i386)
+ENTRY(_start)
+
+SECTIONS
+{
+       /* Link the kernel for 0xC01000C0, but load it at 0x001000C0) */
+
+       .text 0xC01000C0 : AT(0x001000C0) {
+               *(.text .stub .text.* .gnu.linkonce.t.*)
+       }
+
+       PROVIDE(etext = .);     /* Define the 'etext' symbol to this value */
+
+       .rodata : {
+               *(.rodata .rodata.* .gnu.linkonce.r.*)
+       }
+
+       /* Include debugging information in kernel memory */
+       .stab : {
+               PROVIDE(stab = .);
+               PROVIDE(__STAB_BEGIN__ = .);
+               *(.stab);
+               PROVIDE(estab = .);
+               PROVIDE(__STAB_END__ = .);
+               BYTE(0)         /* Force the linker to allocate space
+                                  for this section */
+       }
+
+       .stabstr : {
+               PROVIDE(stabstr = .);
+               PROVIDE(__STABSTR_BEGIN__ = .);
+               *(.stabstr);
+               PROVIDE(estabstr = .);
+               PROVIDE(__STABSTR_END__ = .);
+               BYTE(0)         /* Force the linker to allocate space
+                                  for this section */
+       }
+
+       /* Adjust the address for the data segment to the next page */
+       . = ALIGN(0x1000);
+
+       /* The data segment */
+       .data : {
+               *(.data)
+       }
+
+       PROVIDE(edata = .);
+
+       .bss : {
+               *(.bss)
+       }
+
+       PROVIDE(end = .);
+
+       /DISCARD/ : {
+               *(.eh_frame .note.GNU-stack)
+       }
+}
diff --git a/arch/i386/src/Makefrag b/arch/i386/src/Makefrag
new file mode 100644 (file)
index 0000000..538dbf5
--- /dev/null
@@ -0,0 +1,25 @@
+# Makefile fragment for ROS kernel.
+# This is NOT a complete makefile;
+# you must run GNU make in the top-level directory
+# where the GNUmakefile is located.
+#
+
+KERN_ARCH_SRC_DIR = $(KERN_DIR)/src/arch
+
+# entry.S must be first, so that it's the first code in the text segment!!!
+#
+# We also snatch the use of a couple handy source files
+# from the lib directory, to avoid gratuitous code duplication.
+KERN_ARCH_SRCFILES := $(KERN_ARCH_SRC_DIR)/entry.S \
+                      $(KERN_ARCH_SRC_DIR)/smp_entry.S \
+                      $(KERN_ARCH_SRC_DIR)/cpuinfo.c \
+                      $(KERN_ARCH_SRC_DIR)/console.c \
+                      $(KERN_ARCH_SRC_DIR)/smp_boot.c \
+                      $(KERN_ARCH_SRC_DIR)/pmap.c \
+                      $(KERN_ARCH_SRC_DIR)/trapentry.S \
+                      $(KERN_ARCH_SRC_DIR)/trap.c \
+                      $(KERN_ARCH_SRC_DIR)/kclock.c \
+                      $(KERN_ARCH_SRC_DIR)/smp.c \
+                      $(KERN_ARCH_SRC_DIR)/apic.c \
+                      $(KERN_ARCH_SRC_DIR)/kdebug.c \
+                      $(KERN_ARCH_SRC_DIR)/env.c
diff --git a/arch/i386/src/apic.c b/arch/i386/src/apic.c
new file mode 100644 (file)
index 0000000..ee4817e
--- /dev/null
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2009 The Regents of the University of California
+ * See LICENSE for details.
+ */
+
+#include <arch/mmu.h>
+#include <arch/x86.h>
+#include <arch/arch.h>
+#include <arch/apic.h>
+#include <ros/timer.h>
+#include <assert.h>
+
+system_timing_t system_timing = {0, 0, 0xffff, 0};
+
+/*
+ * Remaps the Programmable Interrupt Controller to use IRQs 32-47
+ * http://wiki.osdev.org/PIC
+ * Not 100% on this stuff, after looking over 
+ * http://bochs.sourceforge.net/techspec/PORTS.LST  The cascading and other 
+ * stuff might need to be in one command, and after that all we are doing
+ * is toggling masks.
+ */
+void pic_remap() 
+{
+       // start initialization
+       outb(PIC1_CMD, 0x11);
+       outb(PIC2_CMD, 0x11);
+       // set new offsets
+       outb(PIC1_DATA, PIC1_OFFSET);
+       outb(PIC2_DATA, PIC2_OFFSET);
+       // set up cascading
+       outb(PIC1_DATA, 0x04);
+       outb(PIC2_DATA, 0x02);
+       // other stuff (put in 8086/88 mode, or whatever)
+       outb(PIC1_DATA, 0x01);
+       outb(PIC2_DATA, 0x01);
+       // set masks, defaulting to all masked for now
+       outb(PIC1_DATA, 0xff);
+       outb(PIC2_DATA, 0xff);
+}
+
+void pic_mask_irq(uint8_t irq)
+{
+       if (irq > 7)
+               outb(PIC2_DATA, inb(PIC2_DATA) | (1 << (irq - 8)));
+       else
+               outb(PIC1_DATA, inb(PIC1_DATA) | (1 << irq));
+}
+
+void pic_unmask_irq(uint8_t irq)
+{
+       if (irq > 7) {
+               outb(PIC2_DATA, inb(PIC2_DATA) & ~(1 << (irq - 8)));
+               outb(PIC1_DATA, inb(PIC1_DATA) & 0xfb); // make sure irq2 is unmasked
+       } else
+               outb(PIC1_DATA, inb(PIC1_DATA) & ~(1 << irq));
+}
+
+
+/*
+ * Sets the LAPIC timer to go off after a certain number of ticks.  The primary
+ * clock freq is actually the bus clock, which we figure out during timer_init
+ * Unmasking is implied.  Ref SDM, 3A, 9.6.4
+ */
+void __lapic_set_timer(uint32_t ticks, uint8_t vec, bool periodic, uint8_t div)
+{
+       // clears bottom bit and then set divider
+       write_mmreg32(LAPIC_TIMER_DIVIDE, (read_mmreg32(LAPIC_TIMER_DIVIDE) &~0xf) |
+                     (div & 0xf));
+       // set LVT with interrupt handling information
+       write_mmreg32(LAPIC_LVT_TIMER, vec | (periodic << 17));
+       write_mmreg32(LAPIC_TIMER_INIT, ticks);
+       // For debugging when we expand this
+       //cprintf("LAPIC LVT Timer: 0x%08x\n", read_mmreg32(LAPIC_LVT_TIMER));
+       //cprintf("LAPIC Init Count: 0x%08x\n", read_mmreg32(LAPIC_TIMER_INIT));
+       //cprintf("LAPIC Current Count: 0x%08x\n", read_mmreg32(LAPIC_TIMER_CURRENT));
+}
+
+void lapic_set_timer(uint32_t usec, bool periodic)
+{
+       // divide the bus clock by 128, which is the max.
+       uint32_t ticks = (usec * system_timing.bus_freq / 128) / 1000000;
+       __lapic_set_timer(ticks, LAPIC_TIMER_DEFAULT_VECTOR, periodic,
+                         LAPIC_TIMER_DEFAULT_DIVISOR);
+}
+
+uint32_t lapic_get_default_id(void)
+{
+       uint32_t ebx;
+       cpuid(1, 0, &ebx, 0, 0);
+       // p6 family only uses 4 bits here, and 0xf is reserved for the IOAPIC
+       return (ebx & 0xFF000000) >> 24;
+}
+
+// timer init calibrates both tsc timer and lapic timer using PIT
+void timer_init(void){
+       uint64_t tscval[2];
+       long timercount[2];
+       pit_set_timer(0xffff, TIMER_RATEGEN);
+       // assume tsc exist
+       tscval[0] = read_tsc();
+       udelay_pit(1000000);
+       tscval[1] = read_tsc();
+       system_timing.tsc_freq = tscval[1] - tscval[0];
+       
+       cprintf("TSC Frequency: %llu\n", system_timing.tsc_freq);
+
+       __lapic_set_timer(0xffffffff, LAPIC_TIMER_DEFAULT_VECTOR, FALSE,
+                         LAPIC_TIMER_DEFAULT_DIVISOR);
+       // Mask the LAPIC Timer, so we never receive this interrupt (minor race)
+       mask_lapic_lvt(LAPIC_LVT_TIMER);
+       timercount[0] = read_mmreg32(LAPIC_TIMER_CURRENT);
+       udelay_pit(1000000);
+       timercount[1] = read_mmreg32(LAPIC_TIMER_CURRENT);
+       system_timing.bus_freq = (timercount[0] - timercount[1])*128;
+               
+       cprintf("Bus Frequency: %llu\n", system_timing.bus_freq);
+}
+
+void pit_set_timer(uint32_t divisor, uint32_t mode)
+{
+       if (divisor & 0xffff0000)
+               warn("Divisor too large!");
+       mode = TIMER_SEL0|TIMER_16BIT|mode;
+       outb(TIMER_MODE, mode); 
+       outb(TIMER_CNTR0, divisor & 0xff);
+       outb(TIMER_CNTR0, (divisor >> 8) );
+       system_timing.pit_mode = mode;
+       system_timing.pit_divisor = divisor;
+       // cprintf("timer mode set to %d, divisor %d\n",mode, divisor);
+}
+
+static int getpit()
+{
+    int high, low;
+       // TODO: need a lock to protect access to PIT
+
+    /* Select timer0 and latch counter value. */
+    outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+    
+    low = inb(TIMER_CNTR0);
+    high = inb(TIMER_CNTR0);
+
+    return ((high << 8) | low);
+}
+
+// forces cpu to relax for usec miliseconds
+void udelay(uint64_t usec)
+{
+       #if !defined(__BOCHS__)
+       if (system_timing.tsc_freq != 0)
+       {
+               uint64_t start, end, now;
+
+               start = read_tsc();
+        end = start + (system_timing.tsc_freq * usec) / 1000000;
+        //cprintf("start %llu, end %llu\n", start, end);
+               if (end == 0) cprintf("This is terribly wrong \n");
+               do {
+            cpu_relax();
+            now = read_tsc();
+                       //cprintf("now %llu\n", now);
+               } while (now < end || (now > start && end < start));
+        return;
+
+       } else
+       #endif
+       {
+               udelay_pit(usec);
+       }
+}
+
+void udelay_pit(uint64_t usec)
+{
+       
+       int64_t delta, prev_tick, tick, ticks_left;
+       prev_tick = getpit();
+       /*
+        * Calculate (n * (i8254_freq / 1e6)) without using floating point
+        * and without any avoidable overflows.
+        */
+       if (usec <= 0)
+               ticks_left = 0;
+       // some optimization from bsd code
+       else if (usec < 256)
+               /*
+                * Use fixed point to avoid a slow division by 1000000.
+                * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+                * 2^15 is the first power of 2 that gives exact results
+                * for n between 0 and 256.
+                */
+               ticks_left = ((uint64_t)usec * 39099 + (1 << 15) - 1) >> 15;
+       else
+               // round up the ticks left
+               ticks_left = ((uint64_t)usec * (long long)PIT_FREQ+ 999999)
+                            / 1000000; 
+       while (ticks_left > 0) {
+               tick = getpit();
+               delta = prev_tick - tick;
+               prev_tick = tick;
+               if (delta < 0) {
+                       // counter looped around during the delta time period
+                       delta += system_timing.pit_divisor; // maximum count 
+                       if (delta < 0)
+                               delta = 0;
+               }
+               ticks_left -= delta;
+       }
+}
+
+uint64_t gettimer(void)
+{
+       return read_tsc();      
+}
+
+uint64_t getfreq(void)
+{
+       return system_timing.tsc_freq;
+}
+
diff --git a/arch/i386/src/console.c b/arch/i386/src/console.c
new file mode 100644 (file)
index 0000000..e7fa79e
--- /dev/null
@@ -0,0 +1,585 @@
+/* See COPYRIGHT for copyright information. */
+
+#include <arch/x86.h>
+#include <arch/arch.h>
+#include <arch/console.h>
+#include <arch/kbdreg.h>
+#include <atomic.h>
+#include <string.h>
+#include <assert.h>
+
+#include <ros/memlayout.h>
+
+void cons_intr(int (*proc)(void));
+void scroll_screen(void);
+
+
+/***** Serial I/O code *****/
+
+#define COM1           0x3F8
+
+#define        COM_RX                  0               // In:  Receive buffer (DLAB=0)
+#define COM_DLL                        0               // Out: Divisor Latch Low (DLAB=1)
+#define COM_DLM                        1               // Out: Divisor Latch High (DLAB=1)
+#define COM_IER                        1               // Out: Interrupt Enable Register
+#define        COM_IER_RDI             0x01    //   Enable receiver data interrupt
+#define COM_IIR                        2               // In:  Interrupt ID Register
+#define COM_FCR                        2               // Out: FIFO Control Register
+#define COM_LCR                        3               // Out: Line Control Register
+#define        COM_LCR_DLAB    0x80    //   Divisor latch access bit
+#define        COM_LCR_WLEN8   0x03    //   Wordlength: 8 bits
+#define COM_MCR                        4               // Out: Modem Control Register
+#define        COM_MCR_RTS             0x02    // RTS complement
+#define        COM_MCR_DTR             0x01    // DTR complement
+#define        COM_MCR_OUT2    0x08    // Out2 complement
+#define COM_LSR                        5               // In:  Line Status Register
+#define COM_LSR_DATA   0x01    //   Data available
+#define COM_LSR_READY  0x20    //   Ready to send
+
+static bool serial_exists;
+
+int
+serial_proc_data(void)
+{
+       if (!(inb(COM1+COM_LSR) & COM_LSR_DATA))
+               return -1;
+       return inb(COM1+COM_RX);
+}
+
+int serial_read_byte() {
+       return serial_proc_data();
+}
+
+void
+serial_intr(void)
+{
+       if (serial_exists)
+               cons_intr(serial_proc_data);
+}
+
+void
+serial_init(void)
+{
+       // Turn off the FIFO
+       outb(COM1+COM_FCR, 0);
+       
+       // Set speed; requires DLAB latch
+       outb(COM1+COM_LCR, COM_LCR_DLAB);
+       // Setting speed to 115200 (setting the divider to 1)
+       outb(COM1+COM_DLL, 1);
+       outb(COM1+COM_DLM, 0);
+
+       // 8 data bits, 1 stop bit, parity off; turn off DLAB latch
+       outb(COM1+COM_LCR, COM_LCR_WLEN8 & ~COM_LCR_DLAB);
+
+       // This should turn on hardware flow control
+       outb(COM1+COM_MCR, COM_MCR_RTS | COM_MCR_DTR);
+       // Enable rcv interrupts
+       outb(COM1+COM_IER, COM_IER_RDI);
+
+       // Clear any preexisting overrun indications and interrupts
+       // Serial port doesn't exist if COM_LSR returns 0xFF
+       serial_exists = (inb(COM1+COM_LSR) != 0xFF);
+       (void) inb(COM1+COM_IIR);
+       (void) inb(COM1+COM_RX);
+
+}
+
+void serial_send_byte(uint8_t b)
+{
+       while (!(inb(COM1+COM_LSR) & COM_LSR_READY));
+       outb(COM1, b);
+}
+
+static void
+serial_putc(int c)
+{
+       switch (c & 0xff) {
+       case '\b':
+               serial_send_byte('\b');
+               serial_send_byte((uint8_t)(' '));
+               serial_send_byte('\b');
+               break;
+       case '\n':
+       case '\r':
+               serial_send_byte((uint8_t)('\n'));
+               serial_send_byte((uint8_t)('\r'));
+               break;
+       default:
+               serial_send_byte((uint8_t)(c & 0xff));
+               break;
+       }
+       return;
+}
+
+
+
+/***** Parallel port output code *****/
+// For information on PC parallel port programming, see the class References
+// page.
+
+// Stupid I/O delay routine necessitated by historical PC design flaws
+static void
+delay(void)
+{
+       inb(0x84);
+       inb(0x84);
+       inb(0x84);
+       inb(0x84);
+}
+
+static void
+lpt_putc(int c)
+{
+       int i;
+
+       for (i = 0; !(inb(0x378+1) & 0x80) && i < 12800; i++)
+               delay();
+       outb(0x378+0, c);
+       outb(0x378+2, 0x08|0x04|0x01);
+       outb(0x378+2, 0x08);
+}
+
+
+
+
+/***** Text-mode CGA/VGA display output with scrolling *****/
+#define MAX_SCROLL_LENGTH      20
+#define SCROLLING_CRT_SIZE     (MAX_SCROLL_LENGTH * CRT_SIZE)
+
+static unsigned addr_6845;
+static uint16_t *COUNT(CRT_SIZE) crt_buf;
+static uint16_t crt_pos;
+
+static uint16_t scrolling_crt_buf[SCROLLING_CRT_SIZE];
+static uint16_t scrolling_crt_pos;
+static uint8_t current_crt_buf;
+
+void
+cga_init(void)
+{
+       volatile uint16_t *COUNT(CRT_SIZE) cp;
+       uint16_t was;
+       unsigned pos;
+
+       cp = (uint16_t *COUNT(CRT_SIZE)) TC(KERNBASE + CGA_BUF);
+       was = *cp;
+       *cp = (uint16_t) 0xA55A;
+       if (*cp != 0xA55A) {
+               cp = (uint16_t *COUNT(CRT_SIZE)) TC(KERNBASE + MONO_BUF);
+               addr_6845 = MONO_BASE;
+       } else {
+               *cp = was;
+               addr_6845 = CGA_BASE;
+       }
+       
+       /* Extract cursor location */
+       outb(addr_6845, 14);
+       pos = inb(addr_6845 + 1) << 8;
+       outb(addr_6845, 15);
+       pos |= inb(addr_6845 + 1);
+
+       crt_buf = (uint16_t *COUNT(CRT_SIZE)) cp;
+       crt_pos = pos;
+       scrolling_crt_pos = 0;
+       current_crt_buf = 0;
+}
+
+static void set_screen(uint8_t screen_num) {
+       uint16_t leftovers = (scrolling_crt_pos % CRT_COLS);
+       leftovers = (leftovers) ? CRT_COLS - leftovers : 0;
+       
+       int offset = scrolling_crt_pos + leftovers - (screen_num + 1)*CRT_SIZE;
+       offset = (offset > 0) ? offset : 0;
+
+       memcpy(crt_buf, scrolling_crt_buf + offset, CRT_SIZE * sizeof(uint16_t));
+}
+
+static void scroll_screen_up(void) {
+       if(current_crt_buf <  (scrolling_crt_pos / CRT_SIZE))
+               current_crt_buf++;
+       set_screen(current_crt_buf);
+}
+
+static void scroll_screen_down(void) {
+       if(current_crt_buf > 0) 
+               current_crt_buf--;
+       set_screen(current_crt_buf);
+}
+
+static void reset_screen(void) {
+       current_crt_buf = 0;
+       set_screen(current_crt_buf);
+}
+
+void
+cga_putc(int c)
+{
+       // if no attribute given, then use black on white
+       if (!(c & ~0xFF))
+               c |= 0x0700;
+
+       switch (c & 0xff) {
+       case '\b':
+               if (crt_pos > 0) {
+                       crt_pos--;
+                       scrolling_crt_pos--;
+
+                       crt_buf[crt_pos] = (c & ~0xff) | ' ';
+                       scrolling_crt_buf[scrolling_crt_pos] = crt_buf[crt_pos];
+               }
+               break;
+       case '\n':
+               crt_pos += CRT_COLS;
+               scrolling_crt_pos += CRT_COLS;
+               /* fallthru */
+       case '\r':
+               crt_pos -= (crt_pos % CRT_COLS);
+               scrolling_crt_pos -= (scrolling_crt_pos % CRT_COLS);
+               break;
+       case '\t':
+               cons_putc(' ');
+               cons_putc(' ');
+               cons_putc(' ');
+               cons_putc(' ');
+               cons_putc(' ');
+               break;
+       default:
+               crt_buf[crt_pos++] = c;         /* write the character */
+               scrolling_crt_buf[scrolling_crt_pos++] = c;
+               break;
+       }
+
+       // The purpose of this is to allow the screen to appear as if it is scrolling as
+       // more lines are added beyond the size of the monitor.  The top line is dropped
+       // and everything is shifted up by one.
+       if (crt_pos >= CRT_SIZE) {
+               int i;
+
+               memcpy(crt_buf, crt_buf + CRT_COLS, (CRT_SIZE - CRT_COLS) * sizeof(uint16_t));
+               for (i = CRT_SIZE - CRT_COLS; i < CRT_SIZE; i++)
+                       crt_buf[i] = 0x0700 | ' ';
+               crt_pos -= CRT_COLS;
+       }
+       // Do the same for the scrolling crt buffer when it hits its capacity
+       if (scrolling_crt_pos >= SCROLLING_CRT_SIZE) {
+               int i;
+
+               memcpy(scrolling_crt_buf, scrolling_crt_buf + CRT_COLS, 
+                      (SCROLLING_CRT_SIZE - CRT_COLS) * sizeof(uint16_t));
+               for (i = SCROLLING_CRT_SIZE - CRT_COLS; i < SCROLLING_CRT_SIZE; i++)
+                       scrolling_crt_buf[i] = 0x0700 | ' ';
+               scrolling_crt_pos -= CRT_COLS;
+       }
+
+
+       /* move that little blinky thing */
+       outb(addr_6845, 14);
+       outb(addr_6845 + 1, crt_pos >> 8);
+       outb(addr_6845, 15);
+       outb(addr_6845 + 1, crt_pos);
+}
+
+
+/***** Keyboard input code *****/
+
+#define NO             0
+
+#define SHIFT  (1<<0)
+#define CTL            (1<<1)
+#define ALT            (1<<2)
+
+#define CAPSLOCK       (1<<3)
+#define NUMLOCK                (1<<4)
+#define SCROLLLOCK     (1<<5)
+
+#define E0ESC          (1<<6)
+
+static uint8_t shiftcode[256] = 
+{
+       [0x1D] CTL,
+       [0x2A] SHIFT,
+       [0x36] SHIFT,
+       [0x38] ALT,
+       [0x9D] CTL,
+       [0xB8] ALT
+};
+
+static uint8_t togglecode[256] = 
+{
+       [0x3A] CAPSLOCK,
+       [0x45] NUMLOCK,
+       [0x46] SCROLLLOCK
+};
+
+static uint8_t normalmap[256] =
+{
+       NO,   0x1B, '1',  '2',  '3',  '4',  '5',  '6',  // 0x00
+       '7',  '8',  '9',  '0',  '-',  '=',  '\b', '\t',
+       'q',  'w',  'e',  'r',  't',  'y',  'u',  'i',  // 0x10
+       'o',  'p',  '[',  ']',  '\n', NO,   'a',  's',
+       'd',  'f',  'g',  'h',  'j',  'k',  'l',  ';',  // 0x20
+       '\'', '`',  NO,   '\\', 'z',  'x',  'c',  'v',
+       'b',  'n',  'm',  ',',  '.',  '/',  NO,   '*',  // 0x30
+       NO,   ' ',  NO,   NO,   NO,   NO,   NO,   NO,
+       NO,   NO,   NO,   NO,   NO,   NO,   NO,   '7',  // 0x40
+       '8',  '9',  '-',  '4',  '5',  '6',  '+',  '1',
+       '2',  '3',  '0',  '.',  NO,   NO,   NO,   NO,   // 0x50
+       [0xC7] KEY_HOME,        [0x9C] '\n' /*KP_Enter*/,
+       [0xB5] '/' /*KP_Div*/,  [0xC8] KEY_UP,
+       [0xC9] KEY_PGUP,        [0xCB] KEY_LF,
+       [0xCD] KEY_RT,          [0xCF] KEY_END,
+       [0xD0] KEY_DN,          [0xD1] KEY_PGDN,
+       [0xD2] KEY_INS,         [0xD3] KEY_DEL
+};
+
+static uint8_t shiftmap[256] = 
+{
+       NO,   033,  '!',  '@',  '#',  '$',  '%',  '^',  // 0x00
+       '&',  '*',  '(',  ')',  '_',  '+',  '\b', '\t',
+       'Q',  'W',  'E',  'R',  'T',  'Y',  'U',  'I',  // 0x10
+       'O',  'P',  '{',  '}',  '\n', NO,   'A',  'S',
+       'D',  'F',  'G',  'H',  'J',  'K',  'L',  ':',  // 0x20
+       '"',  '~',  NO,   '|',  'Z',  'X',  'C',  'V',
+       'B',  'N',  'M',  '<',  '>',  '?',  NO,   '*',  // 0x30
+       NO,   ' ',  NO,   NO,   NO,   NO,   NO,   NO,
+       NO,   NO,   NO,   NO,   NO,   NO,   NO,   '7',  // 0x40
+       '8',  '9',  '-',  '4',  '5',  '6',  '+',  '1',
+       '2',  '3',  '0',  '.',  NO,   NO,   NO,   NO,   // 0x50
+       [0xC7] KEY_HOME,        [0x9C] '\n' /*KP_Enter*/,
+       [0xB5] '/' /*KP_Div*/,  [0xC8] KEY_UP,
+       [0xC9] KEY_PGUP,        [0xCB] KEY_LF,
+       [0xCD] KEY_RT,          [0xCF] KEY_END,
+       [0xD0] KEY_DN,          [0xD1] KEY_PGDN,
+       [0xD2] KEY_INS,         [0xD3] KEY_DEL
+};
+
+#define C(x) (x - '@')
+
+static uint8_t ctlmap[256] = 
+{
+       NO,      NO,      NO,      NO,      NO,      NO,      NO,      NO, 
+       NO,      NO,      NO,      NO,      NO,      NO,      NO,      NO, 
+       C('Q'),  C('W'),  C('E'),  C('R'),  C('T'),  C('Y'),  C('U'),  C('I'),
+       C('O'),  C('P'),  NO,      NO,      '\r',    NO,      C('A'),  C('S'),
+       C('D'),  C('F'),  C('G'),  C('H'),  C('J'),  C('K'),  C('L'),  NO, 
+       NO,      NO,      NO,      C('\\'), C('Z'),  C('X'),  C('C'),  C('V'),
+       C('B'),  C('N'),  C('M'),  NO,      NO,      C('/'),  NO,      NO,
+       [0x97] KEY_HOME,
+       [0xB5] C('/'),          [0xC8] KEY_UP,
+       [0xC9] KEY_PGUP,        [0xCB] KEY_LF,
+       [0xCD] KEY_RT,          [0xCF] KEY_END,
+       [0xD0] KEY_DN,          [0xD1] KEY_PGDN,
+       [0xD2] KEY_INS,         [0xD3] KEY_DEL
+};
+
+static uint8_t * COUNT(256) charcode[4] = {
+       normalmap,
+       shiftmap,
+       ctlmap,
+       ctlmap
+};
+
+/*
+ * Get data from the keyboard.  If we finish a character, return it.  Else 0.
+ * Return -1 if no data.
+ */
+static int
+kbd_proc_data(void)
+{
+       int c;
+       uint8_t data;
+       static uint32_t shift;
+       static bool crt_scrolled = FALSE;
+
+       if ((inb(KBSTATP) & KBS_DIB) == 0)
+               return -1;
+
+       data = inb(KBDATAP);
+
+       if (data == 0xE0) {
+               // E0 escape character
+               shift |= E0ESC;
+               return 0;
+       } else if (data & 0x80) {
+               // Key released
+               data = (shift & E0ESC ? data : data & 0x7F);
+               shift &= ~(shiftcode[data] | E0ESC);
+               return 0;
+       } else if (shift & E0ESC) {
+               // Last character was an E0 escape; or with 0x80
+               data |= 0x80;
+               shift &= ~E0ESC;
+       }
+
+       shift |= shiftcode[data];
+       shift ^= togglecode[data];
+
+       c = charcode[shift & (CTL | SHIFT)][data];
+
+       //Scrolling screen functionality
+       if((shift & SHIFT) && ((c == KEY_UP) || (c == KEY_PGUP))) {
+               crt_scrolled = TRUE;
+               scroll_screen_up();
+               return 0;
+       }
+       else if((shift & SHIFT) && ((c == KEY_DN) || (c == KEY_PGDN))) {
+               crt_scrolled = TRUE;
+               scroll_screen_down();
+               return 0;
+       }
+       else if((shift & SHIFT) && c == KEY_RT) {
+               crt_scrolled = FALSE;
+               reset_screen();
+               return 0;
+       }
+
+       // On keypress other than SHIFT, reset if we were scrolled
+       if(crt_scrolled && (!(shift & SHIFT))) {
+               crt_scrolled = FALSE;
+               reset_screen();
+       }
+
+       //Force character to capital if caps lock on
+       if (shift & CAPSLOCK) {
+               if ('a' <= c && c <= 'z')
+                       c += 'A' - 'a';
+               else if ('A' <= c && c <= 'Z')
+                       c += 'a' - 'A';
+       }
+
+       // Process special keys
+       // Ctrl-Alt-Del: reboot
+       if (!(~shift & (CTL | ALT)) && c == KEY_DEL) {
+               cprintf("Rebooting!\n");
+               outb(0x92, 0x3); // courtesy of Chris Frost
+       }
+
+       return c;
+}
+
+void
+kbd_intr(void)
+{
+       cons_intr(kbd_proc_data);
+}
+
+void
+kbd_init(void)
+{
+}
+
+
+
+/***** General device-independent console code *****/
+// Here we manage the console input buffer,
+// where we stash characters received from the keyboard or serial port
+// whenever the corresponding interrupt occurs.
+
+#define CONSBUFSIZE    512
+
+static struct {
+       uint8_t buf[CONSBUFSIZE];
+       uint32_t rpos;
+       uint32_t wpos;
+} cons;
+
+// called by device interrupt routines to feed input characters
+// into the circular console input buffer.
+void
+cons_intr(int (*proc)(void))
+{
+       int c;
+
+       while ((c = (*proc)()) != -1) {
+               if (c == 0)
+                       continue;
+               cons.buf[cons.wpos++] = c;
+               if (cons.wpos == CONSBUFSIZE)
+                       cons.wpos = 0;
+       }
+}
+
+// return the next input character from the console, or 0 if none waiting
+int
+cons_getc(void)
+{
+       int c;
+
+       // poll for any pending input characters,
+       // so that this function works even when interrupts are disabled
+       // (e.g., when called from the kernel monitor).
+       #ifndef SERIAL_IO
+               serial_intr();
+       #endif
+       kbd_intr();
+
+       // grab the next character from the input buffer.
+       if (cons.rpos != cons.wpos) {
+               c = cons.buf[cons.rpos++];
+               if (cons.rpos == CONSBUFSIZE)
+                       cons.rpos = 0;
+               return c;
+       }
+       return 0;
+}
+
+// output a character to the console
+void
+cons_putc(int c)
+{
+       static uint32_t lock;
+       spin_lock_irqsave(&lock);
+       #ifndef SERIAL_IO
+               serial_putc(c);
+       #endif
+       //lpt_putc(c);
+       cga_putc(c);
+       spin_unlock_irqsave(&lock);
+}
+
+// initialize the console devices
+void
+cons_init(void)
+{
+       cga_init();
+       kbd_init();
+       serial_init();
+
+       if (!serial_exists)
+               cprintf("Serial port does not exist!\n");
+}
+
+
+// `High'-level console I/O.  Used by readline and cprintf.
+
+void
+cputchar(int c)
+{
+       cons_putc(c);
+}
+
+void
+cputbuf(const char*COUNT(len) buf, int len)
+{
+       int i;
+       for(i = 0; i < len; i++)
+               cons_putc(buf[i]);
+}
+
+int
+getchar(void)
+{
+       int c;
+
+       while ((c = cons_getc()) == 0)
+               /* do nothing */;
+       return c;
+}
+
+int
+iscons(int fdnum)
+{
+       // used by readline
+       return 1;
+}
diff --git a/arch/i386/src/cpuinfo.c b/arch/i386/src/cpuinfo.c
new file mode 100644 (file)
index 0000000..077f0fb
--- /dev/null
@@ -0,0 +1,143 @@
+/* See COPYRIGHT for copyright information. */
+
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <arch/arch.h>
+#include <arch/x86.h>
+#include <arch/mmu.h>
+#include <stdio.h>
+#include <assert.h>
+#include <ros/memlayout.h>
+#include <pmap.h>
+#include <kdebug.h>
+#include <string.h>
+
+void
+print_cpuinfo(void) {
+       uint32_t eax, ebx, ecx, edx;
+       uint32_t model, family;
+       uint64_t msr_val;
+       char vendor_id[13];
+       extern char (SNT _start)[];
+
+       asm volatile ("cpuid;"
+                 "movl    %%ebx, (%2);"
+                 "movl    %%edx, 4(%2);"
+                 "movl    %%ecx, 8(%2);"
+                : "=a"(eax)
+                : "a"(0), "D"(vendor_id)
+                : "%ebx", "%ecx", "%edx");
+
+       vendor_id[12] = '\0';
+       cprintf("Vendor ID: %s\n", vendor_id);
+       cprintf("Largest Standard Function Number Supported: %d\n", eax);
+       cpuid(0x80000000, &eax, 0, 0, 0);
+       cprintf("Largest Extended Function Number Supported: 0x%08x\n", eax);
+       cpuid(1, &eax, &ebx, &ecx, &edx);
+       family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
+       model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
+       cprintf("Family: %d\n", family);
+       cprintf("Model: %d\n", model);
+       cprintf("Stepping: %d\n", eax & 0x0000000F);
+       // eventually can fill this out with SDM Vol3B App B info, or
+       // better yet with stepping info.  or cpuid 8000_000{2,3,4}
+       switch ( family << 8 | model ) {
+               case(0x061a):
+                       cprintf("Processor: Core i7\n");
+                       break;
+               case(0x060f):
+                       cprintf("Processor: Core 2 Duo or Similar\n");
+                       break;
+               default:
+                       cprintf("Unknown or non-Intel CPU\n");
+       }
+       if (!(edx & 0x00000020))
+               panic("MSRs not supported!");
+       if (!(edx & 0x00001000))
+               panic("MTRRs not supported!");
+       if (!(edx & 0x00002000))
+               panic("Global Pages not supported!");
+       if (!(edx & 0x00000200))
+               panic("Local APIC Not Detected!");
+       if (ecx & 0x00200000)
+               cprintf("x2APIC Detected\n");
+       else
+               cprintf("x2APIC Not Detected\n");
+       cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+       cprintf("Physical Address Bits: %d\n", eax & 0x000000FF);
+       cprintf("Cores per Die: %d\n", (ecx & 0x000000FF) + 1);
+    cprintf("This core's Default APIC ID: 0x%08x\n", lapic_get_default_id());
+       msr_val = read_msr(IA32_APIC_BASE);
+       if (msr_val & MSR_APIC_ENABLE)
+               cprintf("Local APIC Enabled\n");
+       else
+               cprintf("Local APIC Disabled\n");
+       if (msr_val & 0x00000100)
+               cprintf("I am the Boot Strap Processor\n");
+       else
+               cprintf("I am an Application Processor\n");
+       cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+       if (edx & 0x00000100)
+               printk("Invariant TSC present\n");
+       else
+               printk("Invariant TSC not present\n");
+}
+
+void show_mapping(uintptr_t start, size_t size)
+{
+       pde_t* pgdir = (pde_t*)vpd;
+       pte_t *pte, *pde;
+       page_t* page;
+       uintptr_t i;
+
+       cprintf("   Virtual    Physical  Ps Dr Ac CD WT U W\n");
+       cprintf("------------------------------------------\n");
+       for(i = 0; i < size; i += PGSIZE, start += PGSIZE) {
+               page = page_lookup(pgdir, (void*)start, &pte);
+               cprintf("%08p  ", start);
+               if (page) {
+                       pde = &pgdir[PDX(start)];
+                       // for a jumbo, pde = pte and PTE_PS (better be) = 1
+                       cprintf("%08p  %1d  %1d  %1d  %1d  %1d  %1d %1d\n", page2pa(page),
+                              (*pte & PTE_PS) >> 7, (*pte & PTE_D) >> 6, (*pte & PTE_A) >> 5,
+                              (*pte & PTE_PCD) >> 4, (*pte & PTE_PWT) >> 3,
+                              (*pte & *pde & PTE_U) >> 2, (*pte & *pde & PTE_W) >> 1);
+               } else
+                       cprintf("%08p\n", 0);
+       }
+}
+
+void
+backtrace(void)
+{
+       uint32_t* ebp, eip;
+       eipdebuginfo_t debuginfo;
+       char buf[256];
+       int j, i = 1;
+       ebp = (uint32_t*)read_ebp();    
+       // this is part of the way back into the call() instruction's bytes
+       // eagle-eyed readers should be able to explain why this is good enough,
+       // and retaddr (just *(ebp + 1) is not)
+       eip = *(ebp + 1) - 1;
+       // jump back a frame (out of backtrace)
+       ebp = (uint32_t*)(*ebp);
+       cprintf("Stack Backtrace:\n");
+       // on each iteration, ebp holds the stack frame and eip an addr in that func
+       while (ebp != 0) {
+               debuginfo_eip(eip, &debuginfo);
+               memset(buf, 0, 256);
+               strncpy(buf, debuginfo.eip_fn_name, MIN(debuginfo.eip_fn_namelen, 256));
+               buf[MIN(debuginfo.eip_fn_namelen, 255)] = 0;
+               cprintf("#%02d [<%x>] in %s+%x(%p) from %s:%d\n", i++,  eip, buf, 
+                       debuginfo.eip_fn_addr - (uint32_t)_start, debuginfo.eip_fn_addr, 
+                       debuginfo.eip_file, debuginfo.eip_line);
+               cprintf("    ebp: %x   Args:", ebp);
+               for (j = 0; j < MIN(debuginfo.eip_fn_narg, 5); j++)
+                       cprintf(" %08x", *(ebp + 2 + j));
+               cprintf("\n");
+               eip = *(ebp + 1) - 1;
+               ebp = (uint32_t*)(*ebp);
+       }
+}
diff --git a/arch/i386/src/entry.S b/arch/i386/src/entry.S
new file mode 100644 (file)
index 0000000..145ca5f
--- /dev/null
@@ -0,0 +1,112 @@
+/* See COPYRIGHT for copyright information. */
+
+#include <arch/mmu.h>
+#include <arch/trap.h>
+#include <ros/memlayout.h>
+
+# Shift Right Logical 
+#define SRL(val, shamt)                (((val) >> (shamt)) & ~(-1 << (32 - (shamt))))
+
+
+###################################################################
+# The kernel (this code) is linked at address (KERNBASE + 0x00100000 (1MB)), 
+# but we tell the bootloader to load it at physical address 
+# 0x00100000 (1MB), which is the start of extended memory.
+# (See kernel.ld)
+###################################################################
+
+
+###################################################################
+# RELOC(x) maps a symbol x from its link address to its actual
+# location in physical memory (its load address).       
+###################################################################
+#define        RELOC(x) ((x) - KERNBASE)
+
+
+.set CODE_SEL,0x8              # index of code seg within mygdt
+.set DATA_SEL,0x10             # index of data seg within mygdt
+
+#define MULTIBOOT_PAGE_ALIGN  (1<<0)
+#define MULTIBOOT_MEMORY_INFO (1<<1)
+#define MULTIBOOT_HEADER_MAGIC (0x1BADB002)
+#define MULTIBOOT_HEADER_FLAGS (MULTIBOOT_MEMORY_INFO | MULTIBOOT_PAGE_ALIGN)
+#define CHECKSUM (-(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS))
+
+###################################################################
+# entry point
+###################################################################
+
+.text
+
+# The Multiboot header
+.align 4
+.long MULTIBOOT_HEADER_MAGIC
+.long MULTIBOOT_HEADER_FLAGS
+.long CHECKSUM
+
+.globl         _start
+_start:
+       movw    $0x1234,0x472                   # warm boot
+
+       # Establish our own GDT in place of the boot loader's temporary GDT.
+       lgdt    RELOC(mygdtdesc)                # load descriptor table
+
+       # Immediately reload all segment registers (including CS!)
+       # with segment selectors from the new GDT.
+       movl    $DATA_SEL, %eax                 # Data segment selector
+       movw    %ax,%ds                         # -> DS: Data Segment
+       movw    %ax,%es                         # -> ES: Extra Segment
+       movw    %ax,%ss                         # -> SS: Stack Segment
+       ljmp    $CODE_SEL,$relocated            # reload CS by jumping
+relocated:
+
+       # Clear the frame pointer register (EBP)
+       # so that once we get into debugging C code,
+       # stack backtraces will be terminated properly.
+       movl    $0x0,%ebp                       # nuke frame pointer
+
+    # Set the stack pointer
+       movl    $(bootstacktop),%esp
+
+       # Save multiboot info
+       push    %ebx
+
+       # now to C code
+       movl    $0x1,num_cpus           # init global var, for now
+       call    kernel_init
+
+       # Should never get here, but in case we do, just spin.
+spin:  jmp     spin
+
+###################################################################    
+# See <inc/memlayout.h> for a complete description of these two symbols.
+###################################################################
+.data
+       .globl  vpt
+       .set    vpt, VPT
+       .globl  vpd
+       .set    vpd, (VPT + SRL(VPT, 10))
+
+
+###################################################################
+# boot stack
+###################################################################
+       .p2align        PGSHIFT         # force page alignment
+       .globl          bootstack
+bootstack:
+       .space          KSTKSIZE
+       .globl          bootstacktop   
+bootstacktop:
+
+###################################################################
+# setup the GDT        
+###################################################################
+       .p2align        2               # force 4 byte alignment
+mygdt:
+       SEG_NULL                                # null seg
+       SEG(STA_X|STA_R, -KERNBASE, 0xffffffff) # code seg
+       SEG(STA_W, -KERNBASE, 0xffffffff)       # data seg
+mygdtdesc:
+       .word   0x17                    # sizeof(mygdt) - 1
+       .long   RELOC(mygdt)            # address mygdt
+
diff --git a/arch/i386/src/env.c b/arch/i386/src/env.c
new file mode 100644 (file)
index 0000000..4f4f57f
--- /dev/null
@@ -0,0 +1,98 @@
+/* See COPYRIGHT for copyright information. */
+#ifdef __DEPUTY__
+#pragma noasync
+#endif
+
+#include <arch/trap.h>
+#include <env.h>
+#include <assert.h>
+#include <pmap.h>
+
+//
+// This exits the kernel and starts executing some environment's code.
+// This function does not return.
+// Uses 'iret' or 'sysexit' depending on CS.
+//
+void env_pop_tf(trapframe_t *tf)
+{
+       if(tf->tf_cs)
+       {
+               asm volatile ("movl %0,%%esp;           "
+                             "popal;                   "
+                             "popl %%es;               "
+                             "popl %%ds;               "
+                             "addl $0x8,%%esp;         "
+                             "iret                     "
+                             : : "g" (tf) : "memory");
+               panic("iret failed");  /* mostly to placate the compiler */
+       }
+       else
+       {
+               asm volatile ("movl %0,%%esp;           "
+                             "popal;                   "
+                             "popl %%es;               "
+                             "popl %%ds;               "
+                             "addl $0x10, %%esp;       "
+                             "popfl;                   "
+                             "movl %%ebp, %%ecx;       "
+                             "movl %%esi, %%edx;       "
+                             "sysexit                  "
+                             : : "g" (tf) : "memory");
+               panic("sysexit failed");  /* mostly to placate the compiler */
+       }
+}
+
+void
+env_set_program_counter(env_t* e, uintptr_t pc)
+{
+       e->env_tf.tf_eip = pc;
+}
+
+void
+env_init_trapframe(env_t* e)
+{
+       // Set up appropriate initial values for the segment registers.
+       // GD_UD is the user data segment selector in the GDT, and
+       // GD_UT is the user text segment selector (see inc/memlayout.h).
+       // The low 2 bits of each segment register contains the
+       // Requestor Privilege Level (RPL); 3 means user mode.
+       e->env_tf.tf_ds = GD_UD | 3;
+       e->env_tf.tf_es = GD_UD | 3;
+       e->env_tf.tf_ss = GD_UD | 3;
+       e->env_tf.tf_esp = USTACKTOP;
+       e->env_tf.tf_cs = GD_UT | 3;
+       // You will set e->env_tf.tf_eip later.
+       // set the env's EFLAGSs to have interrupts enabled
+       e->env_tf.tf_eflags |= 0x00000200; // bit 9 is the interrupts-enabled
+}
+
+// Flush all mapped pages in the user portion of the address space
+void
+env_user_mem_free(env_t* e)
+{
+       pte_t *pt;
+       uint32_t pdeno, pteno;
+       physaddr_t pa;
+
+       static_assert(UTOP % PTSIZE == 0);
+       for (pdeno = 0; pdeno < PDX(UTOP); pdeno++) {
+
+               // only look at mapped page tables
+               if (!(e->env_pgdir[pdeno] & PTE_P))
+                       continue;
+
+               // find the pa and va of the page table
+               pa = PTE_ADDR(e->env_pgdir[pdeno]);
+               pt = (pte_t*COUNT(NPTENTRIES)) KADDR(pa);
+
+               // unmap all PTEs in this page table 
+               for (pteno = 0; pteno <= PTX(~0); pteno++) {
+                       if (pt[pteno] & PTE_P)
+                               page_remove(e->env_pgdir, PGADDR(pdeno, pteno, 0));
+               }
+
+               // free the page table itself
+               e->env_pgdir[pdeno] = 0;
+               page_decref(pa2page(pa));
+       }
+}
diff --git a/arch/i386/src/kclock.c b/arch/i386/src/kclock.c
new file mode 100644 (file)
index 0000000..6f42dce
--- /dev/null
@@ -0,0 +1,28 @@
+/* See COPYRIGHT for copyright information. */
+
+/* Support for two time-related hardware gadgets: 1) the run time
+ * clock with its NVRAM access functions; 2) the 8253 timer, which
+ * generates interrupts on IRQ 0.
+ */
+
+#include <arch/x86.h>
+
+#include <kclock.h>
+
+
+unsigned
+mc146818_read(unsigned reg)
+{
+       outb(IO_RTC, reg);
+       return inb(IO_RTC+1);
+}
+
+void
+mc146818_write(unsigned reg, unsigned datum)
+{
+       outb(IO_RTC, reg);
+       outb(IO_RTC+1, datum);
+}
+
+
+
diff --git a/arch/i386/src/kdebug.c b/arch/i386/src/kdebug.c
new file mode 100644 (file)
index 0000000..43371f9
--- /dev/null
@@ -0,0 +1,241 @@
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <stab.h>
+#include <string.h>
+#include <assert.h>
+#include <kdebug.h>
+#include <pmap.h>
+#include <env.h>
+
+#include <ros/memlayout.h>
+
+extern const stab_t __STAB_BEGIN__[];  // Beginning of stabs table
+extern const stab_t __STAB_END__[];    // End of stabs table
+extern const char __STABSTR_BEGIN__[];         // Beginning of string table
+extern const char __STABSTR_END__[];           // End of string table
+
+typedef struct UserStabData {
+       const stab_t *stabs;
+       const stab_t *stab_end;
+       const char *stabstr;
+       const char *stabstr_end;
+} user_stab_data_t;
+
+
+// stab_binsearch(stabs, region_left, region_right, type, addr)
+//
+//     Some stab types are arranged in increasing order by instruction
+//     address.  For example, N_FUN stabs (stab entries with n_type ==
+//     N_FUN), which mark functions, and N_SO stabs, which mark source files.
+//
+//     Given an instruction address, this function finds the single stab
+//     entry of type 'type' that contains that address.
+//
+//     The search takes place within the range [*region_left, *region_right].
+//     Thus, to search an entire set of N stabs, you might do:
+//
+//             left = 0;
+//             right = N - 1;     /* rightmost stab */
+//             stab_binsearch(stabs, &left, &right, type, addr);
+//
+//     The search modifies *region_left and *region_right to bracket the
+//     'addr'.  *region_left points to the matching stab that contains
+//     'addr', and *region_right points just before the next stab.  If
+//     *region_left > *region_right, then 'addr' is not contained in any
+//     matching stab.
+//
+//     For example, given these N_SO stabs:
+//             Index  Type   Address
+//             0      SO     f0100000
+//             13     SO     f0100040
+//             117    SO     f0100176
+//             118    SO     f0100178
+//             555    SO     f0100652
+//             556    SO     f0100654
+//             657    SO     f0100849
+//     this code:
+//             left = 0, right = 657;
+//             stab_binsearch(stabs, &left, &right, N_SO, 0xf0100184);
+//     will exit setting left = 118, right = 554.
+//
+static void
+stab_binsearch(const stab_t *stabs, int *region_left, int *region_right,
+              int type, uintptr_t addr)
+{
+       int l = *region_left, r = *region_right, any_matches = 0;
+       
+       while (l <= r) {
+               int true_m = (l + r) / 2, m = true_m;
+               
+               // search for earliest stab with right type
+               while (m >= l && stabs[m].n_type != type)
+                       m--;
+               if (m < l) {    // no match in [l, m]
+                       l = true_m + 1;
+                       continue;
+               }
+
+               // actual binary search
+               any_matches = 1;
+               if (stabs[m].n_value < addr) {
+                       *region_left = m;
+                       l = true_m + 1;
+               } else if (stabs[m].n_value > addr) {
+                       *region_right = m - 1;
+                       r = m - 1;
+               } else {
+                       // exact match for 'addr', but continue loop to find
+                       // *region_right
+                       *region_left = m;
+                       l = m;
+                       addr++;
+               }
+       }
+
+       if (!any_matches)
+               *region_right = *region_left - 1;
+       else {
+               // find rightmost region containing 'addr'
+               for (l = *region_right;
+                    l > *region_left && stabs[l].n_type != type;
+                    l--)
+                       /* do nothing */;
+               *region_left = l;
+       }
+}
+
+
+// debuginfo_eip(addr, info)
+//
+//     Fill in the 'info' structure with information about the specified
+//     instruction address, 'addr'.  Returns 0 if information was found, and
+//     negative if not.  But even if it returns negative it has stored some
+//     information into '*info'.
+//
+int
+debuginfo_eip(uintptr_t addr, eipdebuginfo_t *info)
+{
+       const stab_t *stabs, *stab_end;
+       const char *stabstr, *stabstr_end;
+       int lfile, rfile, lfun, rfun, lline, rline;
+
+       // Initialize *info
+       info->eip_file = "<unknown>";
+       info->eip_line = 0;
+       info->eip_fn_name = "<unknown>";
+       info->eip_fn_namelen = 9;
+       info->eip_fn_addr = addr;
+       info->eip_fn_narg = 0;
+
+       // Find the relevant set of stabs
+       if (addr >= ULIM) {
+               stabs = __STAB_BEGIN__;
+               stab_end = __STAB_END__;
+               stabstr = __STABSTR_BEGIN__;
+               stabstr_end = __STABSTR_END__;
+       } else {
+               // The user-application linker script, user/user.ld,
+               // puts information about the application's stabs (equivalent
+               // to __STAB_BEGIN__, __STAB_END__, __STABSTR_BEGIN__, and
+               // __STABSTR_END__) in a structure located at virtual address
+               // USTABDATA.
+               const user_stab_data_t *usd = (const user_stab_data_t *) USTABDATA;
+
+               // Make sure this memory is valid.
+               // Return -1 if it is not.  Hint: Call user_mem_check.
+               // LAB 3: Your code here.
+               
+               stabs = usd->stabs;
+               stab_end = usd->stab_end;
+               stabstr = usd->stabstr;
+               stabstr_end = usd->stabstr_end;
+
+               // Make sure the STABS and string table memory is valid.
+               // LAB 3: Your code here.
+       }
+
+       // String table validity checks
+       if (stabstr_end <= stabstr || stabstr_end[-1] != 0)
+               return -1;
+
+       // Now we find the right stabs that define the function containing
+       // 'eip'.  First, we find the basic source file containing 'eip'.
+       // Then, we look in that source file for the function.  Then we look
+       // for the line number.
+       
+       // Search the entire set of stabs for the source file (type N_SO).
+       lfile = 0;
+       rfile = (stab_end - stabs) - 1;
+       stab_binsearch(stabs, &lfile, &rfile, N_SO, addr);
+       if (lfile == 0)
+               return -1;
+
+       // Search within that file's stabs for the function definition
+       // (N_FUN).
+       lfun = lfile;
+       rfun = rfile;
+       stab_binsearch(stabs, &lfun, &rfun, N_FUN, addr);
+
+       if (lfun <= rfun) {
+               // stabs[lfun] points to the function name
+               // in the string table, but check bounds just in case.
+               if (stabs[lfun].n_strx < stabstr_end - stabstr)
+                       info->eip_fn_name = stabstr + stabs[lfun].n_strx;
+               info->eip_fn_addr = stabs[lfun].n_value;
+               addr -= info->eip_fn_addr;
+               // Search within the function definition for the line number.
+               lline = lfun;
+               rline = rfun;
+       } else {
+               // Couldn't find function stab!  Maybe we're in an assembly
+               // file.  Search the whole file for the line number.
+               info->eip_fn_addr = addr;
+               lline = lfile;
+               rline = rfile;
+       }
+       // Ignore stuff after the colon.
+       info->eip_fn_namelen = strfind(info->eip_fn_name, ':') - info->eip_fn_name;
+       
+       // Search within [lline, rline] for the line number stab.
+       // If found, set info->eip_line to the right line number.
+       // If not found, return -1.
+       //
+       // Hint:
+       //      There's a particular stabs type used for line numbers.
+       //      Look at the STABS documentation and <inc/stab.h> to find
+       //      which one.
+       // Your code here.
+
+       stab_binsearch(stabs, &lline, &rline, N_SLINE, addr);
+       if (lline <= rline) 
+               // stabs[lline] points to the line number
+               info->eip_line = stabs[lline].n_value;
+       else
+               return -1;
+       
+       // Search backwards from the line number for the relevant filename
+       // stab.
+       // We can't just use the "lfile" stab because inlined functions
+       // can interpolate code from a different file!
+       // Such included source files use the N_SOL stab type.
+       while (lline >= lfile
+              && stabs[lline].n_type != N_SOL
+              && (stabs[lline].n_type != N_SO || !stabs[lline].n_value))
+               lline--;
+       if (lline >= lfile && stabs[lline].n_strx < stabstr_end - stabstr)
+               info->eip_file = stabstr + stabs[lline].n_strx;
+
+       // Set eip_fn_narg to the number of arguments taken by the function,
+       // or 0 if there was no containing function.
+       // Your code here.
+       info->eip_fn_narg = 0;
+       if (lfun <= rfun) {
+               lfun++;
+               while (stabs[lfun++].n_type == N_PSYM)
+                       info->eip_fn_narg++;
+       }
+       
+       return 0;
+}
diff --git a/arch/i386/src/pmap.c b/arch/i386/src/pmap.c
new file mode 100644 (file)
index 0000000..fe1f651
--- /dev/null
@@ -0,0 +1,942 @@
+/* See COPYRIGHT for copyright information. */
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <arch/x86.h>
+#include <arch/arch.h>
+#include <arch/mmu.h>
+#include <arch/apic.h>
+
+#include <ros/error.h>
+
+#include <atomic.h>
+#include <string.h>
+#include <assert.h>
+#include <pmap.h>
+#include <kclock.h>
+#include <env.h>
+
+// These variables are set in i386_vm_init()
+pde_t* boot_pgdir;             // Virtual address of boot time page directory
+physaddr_t boot_cr3;           // Physical address of boot time page directory
+char* boot_freemem;            // Pointer to next byte of free mem
+
+page_t *pages;         // Virtual address of physical page array
+page_list_t page_free_list;    // Free list of physical pages
+
+extern env_t *envs;
+
+// Global descriptor table.
+//
+// The kernel and user segments are identical (except for the DPL).
+// To load the SS register, the CPL must equal the DPL.  Thus,
+// we must duplicate the segments for the user and the kernel.
+//
+segdesc_t gdt[] =
+{
+       // 0x0 - unused (always faults -- for trapping NULL far pointers)
+       SEG_NULL,
+
+       // 0x8 - kernel code segment
+       [GD_KT >> 3] = SEG(STA_X | STA_R, 0x0, 0xffffffff, 0),
+
+       // 0x10 - kernel data segment
+       [GD_KD >> 3] = SEG(STA_W, 0x0, 0xffffffff, 0),
+
+       // 0x18 - user code segment
+       [GD_UT >> 3] = SEG(STA_X | STA_R, 0x0, 0xffffffff, 3),
+
+       // 0x20 - user data segment
+       [GD_UD >> 3] = SEG(STA_W, 0x0, 0xffffffff, 3),
+
+       // 0x28 - tss, initialized in idt_init()
+       [GD_TSS >> 3] = SEG_NULL
+};
+
+pseudodesc_t gdt_pd = {
+       sizeof(gdt) - 1, (unsigned long) gdt
+};
+
+static int
+nvram_read(int r)
+{
+       return mc146818_read(r) | (mc146818_read(r + 1) << 8);
+}
+
+bool enable_pse(void)
+{
+       uint32_t edx, cr4;
+       cpuid(1, 0, 0, 0, &edx);
+       if (edx & CPUID_PSE_SUPPORT) {
+               cr4 = rcr4();
+               cr4 |= CR4_PSE;
+               lcr4(cr4);
+               return 1;
+       } else
+               return 0;
+}
+
+// --------------------------------------------------------------
+// Set up initial memory mappings and turn on MMU.
+// --------------------------------------------------------------
+
+static void check_boot_pgdir(bool pse);
+
+//
+// Given pgdir, a pointer to a page directory,
+// walk the 2-level page table structure to find
+// the page table entry (PTE) for linear address la.
+// Return a pointer to this PTE.
+//
+// If the relevant page table doesn't exist in the page directory:
+//     - If create == 0, return 0.
+//     - Otherwise allocate a new page table, install it into pgdir,
+//       and return a pointer into it.
+//        (Questions: What data should the new page table contain?
+//       And what permissions should the new pgdir entry have?
+//       Note that we use the 486-only "WP" feature of %cr0, which
+//       affects the way supervisor-mode writes are checked.)
+//
+// This function abstracts away the 2-level nature of
+// the page directory by allocating new page tables
+// as needed.
+// 
+// boot_pgdir_walk may ONLY be used during initialization,
+// before the page_free_list has been set up.
+// It should panic on failure.  (Note that boot_alloc already panics
+// on failure.)
+//
+// Supports returning jumbo (4MB PSE) PTEs.  To create with a jumbo, pass in 2.
+// 
+// Maps non-PSE PDEs as U/W.  W so the kernel can, U so the user can read via
+// UVPT.  UVPT security comes from the UVPT mapping (U/R).  All other kernel pages
+// protected at the second layer
+static pte_t*
+boot_pgdir_walk(pde_t *pgdir, uintptr_t la, int create)
+{
+       pde_t* the_pde = &pgdir[PDX(la)];
+       void* new_table;
+
+       if (*the_pde & PTE_P) {
+               if (*the_pde & PTE_PS)
+                       return (pte_t*)the_pde;
+               return &((pde_t*)KADDR(PTE_ADDR(*the_pde)))[PTX(la)];
+       }
+       if (!create)
+               return NULL;
+       if (create == 2) {
+               if (JPGOFF(la))
+                       panic("Attempting to find a Jumbo PTE at an unaligned VA!");
+               *the_pde = PTE_PS | PTE_P;
+               return (pte_t*)the_pde;
+       }
+       new_table = boot_alloc(PGSIZE, PGSIZE);
+       memset(new_table, 0, PGSIZE);
+       *the_pde = (pde_t)PADDR(new_table) | PTE_P | PTE_W | PTE_U | PTE_G;
+       return &((pde_t*)KADDR(PTE_ADDR(*the_pde)))[PTX(la)];
+}
+
+//
+// Map [la, la+size) of linear address space to physical [pa, pa+size)
+// in the page table rooted at pgdir.  Size is a multiple of PGSIZE.
+// Use permission bits perm|PTE_P for the entries.
+//
+// This function may ONLY be used during initialization,
+// before the page_free_list has been set up.
+//
+// To map with Jumbos, set PTE_PS in perm
+static void
+boot_map_segment(pde_t *pgdir, uintptr_t la, size_t size, physaddr_t pa, int perm)
+{
+       uintptr_t i;
+       pte_t *pte;
+       // la can be page unaligned, but weird things will happen
+       // unless pa has the same offset.  pa always truncates any
+       // possible offset.  will warn.  size can be weird too. 
+       if (PGOFF(la)) {
+               warn("la not page aligned in boot_map_segment!");
+               size += PGOFF(la);
+       }
+       if (perm & PTE_PS) {
+               if (JPGOFF(la) || JPGOFF(pa))
+                       panic("Tried to map a Jumbo page at an unaligned address!");
+               // need to index with i instead of la + size, in case of wrap-around
+               for (i = 0; i < size; i += JPGSIZE, la += JPGSIZE, pa += JPGSIZE) {
+                       pte = boot_pgdir_walk(pgdir, la, 2);
+                       *pte = PTE_ADDR(pa) | PTE_P | perm;
+               }
+       } else {
+               for (i = 0; i < size; i += PGSIZE, la += PGSIZE, pa += PGSIZE) {
+                       pte = boot_pgdir_walk(pgdir, la, 1);
+                       if (*pte & PTE_PS)
+                               // if we start using the extra flag for PAT, which we aren't,
+                               // this will warn, since PTE_PS and PTE_PAT are the same....
+                               warn("Possibly attempting to map a regular page into a Jumbo PDE");
+                       *pte = PTE_ADDR(pa) | PTE_P | perm;
+               }
+       }
+}
+
+// could consider having an API to allow these to dynamically change
+// MTRRs are for physical, static ranges.  PAT are linear, more granular, and 
+// more dynamic
+void setup_default_mtrrs(barrier_t* smp_barrier)
+{
+       // disable interrupts
+       int8_t state = 0;
+       disable_irqsave(&state);
+       // barrier - if we're meant to do this for all cores, we'll be 
+       // passed a pointer to an initialized barrier
+       if (smp_barrier)
+               waiton_barrier(smp_barrier);
+       
+       // disable caching      cr0: set CD and clear NW
+       lcr0((rcr0() | CR0_CD) & ~CR0_NW);
+       // flush caches
+       cache_flush();
+       // flush tlb
+       tlb_flush_global();
+       // disable MTRRs, and sets default type to WB (06)
+       write_msr(IA32_MTRR_DEF_TYPE, 0x00000006);
+
+       // Now we can actually safely adjust the MTRRs
+       // MTRR for IO Holes (note these are 64 bit values we are writing)
+       // 0x000a0000 - 0x000c0000 : VGA - WC 0x01
+       write_msr(IA32_MTRR_PHYSBASE0, PTE_ADDR(VGAPHYSMEM) | 0x01);
+       // if we need to have a full 64bit val, use the UINT64 macro
+       write_msr(IA32_MTRR_PHYSMASK0, 0x0000000ffffe0800);
+       // 0x000c0000 - 0x00100000 : IO devices (and ROM BIOS) - UC 0x00
+       write_msr(IA32_MTRR_PHYSBASE1, PTE_ADDR(DEVPHYSMEM) | 0x00);
+       write_msr(IA32_MTRR_PHYSMASK1, 0x0000000ffffc0800);
+       // APIC/IOAPIC holes
+       /* Going to skip them, since we set their mode using PAT when we 
+        * map them in 
+        */
+       // make sure all other MTRR ranges are disabled (should be unnecessary)
+       write_msr(IA32_MTRR_PHYSMASK2, 0);
+       write_msr(IA32_MTRR_PHYSMASK3, 0);
+       write_msr(IA32_MTRR_PHYSMASK4, 0);
+       write_msr(IA32_MTRR_PHYSMASK5, 0);
+       write_msr(IA32_MTRR_PHYSMASK6, 0);
+       write_msr(IA32_MTRR_PHYSMASK7, 0);
+
+       // keeps default type to WB (06), turns MTRRs on, and turns off fixed ranges
+       write_msr(IA32_MTRR_DEF_TYPE, 0x00000806);
+       // reflush caches and TLB
+       cache_flush();
+       tlb_flush_global();
+       // turn on caching
+       lcr0(rcr0() & ~(CR0_CD | CR0_NW));
+       // barrier
+       if (smp_barrier)
+               waiton_barrier(smp_barrier);
+       // enable interrupts
+       enable_irqsave(&state);
+}
+
+
+// Set up a two-level page table:
+//    boot_pgdir is its linear (virtual) address of the root
+//    boot_cr3 is the physical adresss of the root
+// Then turn on paging.  Then effectively turn off segmentation.
+// (i.e., the segment base addrs are set to zero).
+// 
+// This function only sets up the kernel part of the address space
+// (ie. addresses >= UTOP).  The user part of the address space
+// will be setup later.
+//
+// From UTOP to ULIM, the user is allowed to read but not write.
+// Above ULIM the user cannot read (or write). 
+void
+vm_init(void)
+{
+       pde_t* pgdir;
+       uint32_t cr0, edx;
+       size_t n;
+       bool pse;
+
+       pse = enable_pse();
+       if (pse)
+               cprintf("PSE capability detected.\n");
+
+       // we paniced earlier if we don't support PGE.  turn it on now.
+       // it's used in boot_map_segment, which covers all of the mappings that are
+       // the same for all address spaces.  and also for the VPT mapping below.
+       lcr4(rcr4() | CR4_PGE);
+
+       // set up mtrr's for core0.  other cores will do the same later
+       setup_default_mtrrs(0);
+
+       /*
+        * PSE status: 
+        * - can walk and set up boot_map_segments with jumbos but can't
+        *   insert yet.  need to look at the page_dir and friends.
+        * - anything related to a single struct Page still can't handle 
+        *   jumbos.  will need to think about and adjust Page functions
+        * - do we want to store info like this in the struct Page?  or just check
+        *   by walking the PTE
+        * - when we alloc a page, and we want it to be 4MB, we'll need
+        *   to have contiguous memory, etc
+        * - there's a difference between having 4MB page table entries
+        *   and having 4MB Page tracking structs.  changing the latter will
+        *   break a lot of things
+        * - showmapping and friends work on a 4KB granularity, but map to the
+        *   correct entries
+        * - need to not insert / boot_map a single page into an area that is 
+        *   already holding a jumbo page.  will need to break the jumbo up so that
+        *   we can then insert the lone page.  currently warns.
+        * - some inherent issues with the pgdir_walks returning a PTE, and we
+        *   don't know whether it is a jumbo (PDE) or a regular PTE.
+        */
+
+       //////////////////////////////////////////////////////////////////////
+       // create initial page directory.
+       pgdir = boot_alloc(PGSIZE, PGSIZE);
+       memset(pgdir, 0, PGSIZE);
+       boot_pgdir = pgdir;
+       boot_cr3 = PADDR(pgdir);
+       // helpful if you want to manually walk with kvm / bochs
+       //printk("pgdir va = %08p, pgdir pa = %08p\n\n", pgdir, PADDR(pgdir));
+
+       //////////////////////////////////////////////////////////////////////
+       // Recursively insert PD in itself as a page table, to form
+       // a virtual page table at virtual address VPT.
+       // (For now, you don't have understand the greater purpose of the
+       // following two lines.  Unless you are eagle-eyed, in which case you
+       // should already know.)
+
+       // Permissions: kernel RW, user NONE, Global Page
+       pgdir[PDX(VPT)] = PADDR(pgdir) | PTE_W | PTE_P | PTE_G;
+
+       // same for UVPT
+       // Permissions: kernel R, user R, Global Page
+       pgdir[PDX(UVPT)] = PADDR(pgdir) | PTE_U | PTE_P | PTE_G;
+
+       //////////////////////////////////////////////////////////////////////
+       // Map the kernel stack (symbol name "bootstack").  The complete VA
+       // range of the stack, [KSTACKTOP-PTSIZE, KSTACKTOP), breaks into two
+       // pieces:
+       //     * [KSTACKTOP-KSTKSIZE, KSTACKTOP) -- backed by physical memory
+       //     * [KSTACKTOP-PTSIZE, KSTACKTOP-KSTKSIZE) -- not backed => faults
+       //     Permissions: kernel RW, user NONE
+       // Your code goes here:
+
+       // remember that the space for the kernel stack is allocated in the binary.
+       // bootstack and bootstacktop point to symbols in the data section, which 
+       // at this point are like 0xc010b000.  KSTACKTOP is the desired loc in VM
+       boot_map_segment(pgdir, (uintptr_t)KSTACKTOP - KSTKSIZE, 
+                        KSTKSIZE, PADDR(bootstack), PTE_W | PTE_G);
+
+       //////////////////////////////////////////////////////////////////////
+       // Map all of physical memory at KERNBASE. 
+       // Ie.  the VA range [KERNBASE, 2^32) should map to
+       //      the PA range [0, 2^32 - KERNBASE)
+       // We might not have 2^32 - KERNBASE bytes of physical memory, but
+       // we just set up the mapping anyway.
+       // Permissions: kernel RW, user NONE
+       // Your code goes here: 
+       
+       // this maps all of the possible phys memory
+       // note the use of unsigned underflow to get size = 0x40000000
+       //boot_map_segment(pgdir, KERNBASE, -KERNBASE, 0, PTE_W);
+       // but this only maps what is available, and saves memory.  every 4MB of
+       // mapped memory requires a 2nd level page: 2^10 entries, each covering 2^12
+       // need to modify tests below to account for this
+       if (pse) {
+               // map the first 4MB as regular entries, to support different MTRRs
+               boot_map_segment(pgdir, KERNBASE, JPGSIZE, 0, PTE_W | PTE_G);
+               boot_map_segment(pgdir, KERNBASE + JPGSIZE, maxaddrpa - JPGSIZE, JPGSIZE,
+                                PTE_W | PTE_G | PTE_PS);
+       } else
+               boot_map_segment(pgdir, KERNBASE, maxaddrpa, 0, PTE_W | PTE_G);
+
+       // APIC mapping: using PAT (but not *the* PAT flag) to make these type UC
+       // IOAPIC
+       boot_map_segment(pgdir, (uintptr_t)IOAPIC_BASE, PGSIZE, IOAPIC_BASE, 
+                        PTE_PCD | PTE_PWT | PTE_W | PTE_G);
+       // Local APIC
+       boot_map_segment(pgdir, (uintptr_t)LAPIC_BASE, PGSIZE, LAPIC_BASE,
+                        PTE_PCD | PTE_PWT | PTE_W | PTE_G);
+
+       //////////////////////////////////////////////////////////////////////
+       // Make 'pages' point to an array of size 'npage' of 'struct Page'.
+       // The kernel uses this structure to keep track of physical pages;
+       // 'npage' equals the number of physical pages in memory.  User-level
+       // programs get read-only access to the array as well.
+       // You must allocate the array yourself.
+       // Map this array read-only by the user at linear address UPAGES
+       // (ie. perm = PTE_U | PTE_P)
+       // Permissions:
+       //    - pages -- kernel RW, user NONE
+       //    - the read-only version mapped at UPAGES -- kernel R, user R
+       // Your code goes here: 
+       
+       // round up to the nearest page
+       size_t page_array_size = ROUNDUP(npage*sizeof(page_t), PGSIZE);
+       pages = (page_t *)boot_alloc(page_array_size, PGSIZE);
+       memset(pages, 0, page_array_size);
+       if (page_array_size > PTSIZE) {
+               warn("page_array_size bigger than PTSIZE, userland will not see all pages");
+               page_array_size = PTSIZE;
+       }
+       boot_map_segment(pgdir, UPAGES, page_array_size, PADDR(pages), PTE_U | PTE_G);
+
+       //////////////////////////////////////////////////////////////////////
+       // Make 'envs' point to an array of size 'NENV' of 'env_t'.
+       // No longer mapping ENVS into the address space
+       
+       // round up to the nearest page
+       size_t env_array_size = ROUNDUP(NENV*sizeof(env_t), PGSIZE);
+       envs = (env_t *)boot_alloc(env_array_size, PGSIZE);
+       memset(envs, 0, env_array_size);
+
+       // Check that the initial page directory has been set up correctly.
+       check_boot_pgdir(pse);
+
+       //////////////////////////////////////////////////////////////////////
+       // On x86, segmentation maps a VA to a LA (linear addr) and
+       // paging maps the LA to a PA.  I.e. VA => LA => PA.  If paging is
+       // turned off the LA is used as the PA.  Note: there is no way to
+       // turn off segmentation.  The closest thing is to set the base
+       // address to 0, so the VA => LA mapping is the identity.
+
+       // Current mapping: VA KERNBASE+x => PA x.
+       //     (segmentation base=-KERNBASE and paging is off)
+
+       // From here on down we must maintain this VA KERNBASE + x => PA x
+       // mapping, even though we are turning on paging and reconfiguring
+       // segmentation.
+
+       // Map VA 0:4MB same as VA KERNBASE, i.e. to PA 0:4MB.
+       // (Limits our kernel to <4MB)
+       /* They mean linear address 0:4MB, and the kernel < 4MB is only until 
+        * segmentation is turned off.
+        * once we turn on paging, segmentation is still on, so references to
+        * KERNBASE+x will get mapped to linear address x, which we need to make 
+        * sure can map to phys addr x, until we can turn off segmentation and
+        * KERNBASE+x maps to LA KERNBASE+x, which maps to PA x, via paging
+        */
+       pgdir[0] = pgdir[PDX(KERNBASE)];
+
+       // Install page table.
+       lcr3(boot_cr3);
+
+       // Turn on paging.
+       cr0 = rcr0();
+       // CD and NW should already be on, but just in case these turn on caching
+       cr0 |= CR0_PE|CR0_PG|CR0_AM|CR0_WP|CR0_NE|CR0_MP;
+       cr0 &= ~(CR0_TS|CR0_EM|CR0_CD|CR0_NW);
+       lcr0(cr0);
+
+       // Current mapping: KERNBASE+x => x => x.
+       // (x < 4MB so uses paging pgdir[0])
+
+       // Reload all segment registers.
+       asm volatile("lgdt gdt_pd");
+       asm volatile("movw %%ax,%%gs" :: "a" (GD_UD|3));
+       asm volatile("movw %%ax,%%fs" :: "a" (GD_UD|3));
+       asm volatile("movw %%ax,%%es" :: "a" (GD_KD));
+       asm volatile("movw %%ax,%%ds" :: "a" (GD_KD));
+       asm volatile("movw %%ax,%%ss" :: "a" (GD_KD));
+       asm volatile("ljmp %0,$1f\n 1:\n" :: "i" (GD_KT));  // reload cs
+       asm volatile("lldt %%ax" :: "a" (0));
+
+       // Final mapping: KERNBASE+x => KERNBASE+x => x.
+
+       // This mapping was only used after paging was turned on but
+       // before the segment registers were reloaded.
+       pgdir[0] = 0;
+
+       // Flush the TLB for good measure, to kill the pgdir[0] mapping.
+       lcr3(boot_cr3);
+}
+
+//
+// Checks that the kernel part of virtual address space
+// has been setup roughly correctly(by i386_vm_init()).
+//
+// This function doesn't test every corner case,
+// in fact it doesn't test the permission bits at all,
+// but it is a pretty good sanity check. 
+//
+static physaddr_t check_va2pa(pde_t *pgdir, uintptr_t va);
+static pte_t get_vaperms(pde_t *pgdir, uintptr_t va);
+
+static void
+check_boot_pgdir(bool pse)
+{
+       uint32_t i, n;
+       pde_t *pgdir, pte;
+
+       pgdir = boot_pgdir;
+
+       // check pages array
+       n = ROUNDUP(naddrpage*sizeof(page_t), PGSIZE);
+       for (i = 0; i < n; i += PGSIZE)
+               assert(check_va2pa(pgdir, UPAGES + i) == PADDR(pages) + i);
+
+       // check phys mem
+       //for (i = 0; KERNBASE + i != 0; i += PGSIZE)
+       // adjusted check to account for only mapping avail mem
+       if (pse)
+               for (i = 0; i < maxaddrpa; i += JPGSIZE)
+                       assert(check_va2pa(pgdir, KERNBASE + i) == i);
+       else
+               for (i = 0; i < maxaddrpa; i += PGSIZE)
+                       assert(check_va2pa(pgdir, KERNBASE + i) == i);
+
+       // check kernel stack
+       for (i = 0; i < KSTKSIZE; i += PGSIZE)
+               assert(check_va2pa(pgdir, KSTACKTOP - KSTKSIZE + i) == PADDR(bootstack) + i);
+
+       // check for zero/non-zero in PDEs
+       for (i = 0; i < NPDENTRIES; i++) {
+               switch (i) {
+               case PDX(VPT):
+               case PDX(UVPT):
+               case PDX(KSTACKTOP-1):
+               case PDX(UPAGES):
+               case PDX(LAPIC_BASE): // LAPIC mapping.  TODO: remove when MTRRs are up
+                       assert(pgdir[i]);
+                       break;
+               default:
+                       //if (i >= PDX(KERNBASE))
+                       // adjusted check to account for only mapping avail mem
+                       // and you can't KADDR maxpa (just above legal range)
+                       // maxaddrpa can be up to maxpa, so assume the worst
+                       if (i >= PDX(KERNBASE) && i <= PDX(KADDR(maxaddrpa-1)))
+                               assert(pgdir[i]);
+                       else
+                               assert(pgdir[i] == 0);
+                       break;
+               }
+       }
+
+       // check permissions
+       // user read-only.  check for user and write, should be only user
+       // eagle-eyed viewers should be able to explain the extra cases
+       for (i = UTOP; i < ULIM; i+=PGSIZE) {
+               pte = get_vaperms(pgdir, i);
+               if ((pte & PTE_P) && (i != UVPT+(VPT>>10))) {
+                       if (pte & PTE_PS) {
+                               assert((pte & PTE_U) != PTE_U);
+                               assert((pte & PTE_W) != PTE_W);
+                       } else {
+                               assert((pte & PTE_U) == PTE_U);
+                               assert((pte & PTE_W) != PTE_W);
+                       }
+               }
+       }
+       // kernel read-write.
+       for (i = ULIM; i <= KERNBASE + maxaddrpa - PGSIZE; i+=PGSIZE) {
+               pte = get_vaperms(pgdir, i);
+               if ((pte & PTE_P) && (i != VPT+(UVPT>>10))) {
+                       assert((pte & PTE_U) != PTE_U);
+                       assert((pte & PTE_W) == PTE_W);
+               }
+       }
+       // special mappings
+       pte = get_vaperms(pgdir, UVPT+(VPT>>10));
+       assert((pte & PTE_U) != PTE_U);
+       assert((pte & PTE_W) != PTE_W);
+
+       // note this means the kernel cannot directly manipulate this virtual address
+       // convince yourself this isn't a big deal, eagle-eyes!
+       pte = get_vaperms(pgdir, VPT+(UVPT>>10));
+       assert((pte & PTE_U) != PTE_U);
+       assert((pte & PTE_W) != PTE_W);
+
+       cprintf("check_boot_pgdir() succeeded!\n");
+}
+
+// This function returns the physical address of the page containing 'va',
+// defined by the page directory 'pgdir'.  The hardware normally performs
+// this functionality for us!  We define our own version to help check
+// the check_boot_pgdir() function; it shouldn't be used elsewhere.
+
+static physaddr_t
+check_va2pa(pde_t *pgdir, uintptr_t va)
+{
+       pte_t *p;
+
+       pgdir = &pgdir[PDX(va)];
+       if (!(*pgdir & PTE_P))
+               return ~0;
+       if (*pgdir & PTE_PS)
+               return PTE_ADDR(*pgdir);
+       p = (pte_t*) KADDR(PTE_ADDR(*pgdir));
+       if (!(p[PTX(va)] & PTE_P))
+               return ~0;
+       return PTE_ADDR(p[PTX(va)]);
+}
+
+/* 
+ * This function returns a PTE with the aggregate permissions equivalent
+ * to walking the two levels of paging.  PPN = 0.  Somewhat fragile, in that
+ * it returns PTE_PS if either entry has PTE_PS (which should only happen
+ * for some of the recusive walks)
+ */
+
+static pte_t
+get_vaperms(pde_t *pgdir, uintptr_t va)
+{
+       pde_t* pde = &pgdir[PDX(va)];
+       pte_t* pte = pgdir_walk(pgdir, (void*)va, 0);
+       if (!pte || !(*pte & PTE_P))
+               return 0;
+       return PGOFF(*pde & *pte) + PTE_PS & (*pde | *pte);
+}
+               
+// --------------------------------------------------------------
+// Tracking of physical pages.
+// The 'pages' array has one 'page_t' entry per physical page.
+// Pages are reference counted, and free pages are kept on a linked list.
+// --------------------------------------------------------------
+
+//  
+// Initialize page structure and memory free list.
+// After this point, ONLY use the functions below
+// to allocate and deallocate physical memory via the page_free_list,
+// and NEVER use boot_alloc() or the related boot-time functions above.
+//
+void
+page_init(void)
+{
+       // The example code here marks all pages as free.
+       // However this is not truly the case.  What memory is free?
+       //  1) Mark page 0 as in use.
+       //     This way we preserve the real-mode IDT and BIOS structures
+       //     in case we ever need them.  (Currently we don't, but...)
+       //  2) Mark the rest of base memory as free.
+       //  3) Then comes the IO hole [IOPHYSMEM, EXTPHYSMEM).
+       //     Mark it as in use so that it can never be allocated.      
+       //  4) Then extended memory [EXTPHYSMEM, ...).
+       //     Some of it is in use, some is free. Where is the kernel?
+       //     Which pages are used for page tables and other data structures?
+       //
+       // Change the code to reflect this.
+       int i;
+       physaddr_t physaddr_after_kernel = PADDR(ROUNDUP(boot_freemem, PGSIZE));
+       LIST_INIT(&page_free_list);
+
+       pages[0].pp_ref = 1;
+       // alloc the second page, since we will need it later to init the other cores
+       // probably need to be smarter about what page we use (make this dynamic) TODO
+       pages[1].pp_ref = 1;
+       for (i = 2; i < PPN(IOPHYSMEM); i++) {
+               pages[i].pp_ref = 0;
+               LIST_INSERT_HEAD(&page_free_list, &pages[i], pp_link);
+       }
+       for (i = PPN(IOPHYSMEM); i < PPN(EXTPHYSMEM); i++) {
+               pages[i].pp_ref = 1;
+       }
+       for (i = PPN(EXTPHYSMEM); i < PPN(physaddr_after_kernel); i++) {
+               pages[i].pp_ref = 1;
+       }
+       for (i = PPN(physaddr_after_kernel); i < PPN(maxaddrpa); i++) {
+               pages[i].pp_ref = 0;
+               LIST_INSERT_HEAD(&page_free_list, &pages[i], pp_link);
+       }
+       // this block out all memory above maxaddrpa.  will need another mechanism
+       // to allocate and map these into the kernel address space
+       for (i = PPN(maxaddrpa); i < npage; i++) {
+               pages[i].pp_ref = 1;
+       }
+}
+
+/* 
+ * Remove the second level page table associated with virtual address va.
+ * Will 0 out the PDE for that page table.
+ * Panics if the page table has any present entries.
+ * This should be called rarely and with good cause.
+ * Currently errors if the PDE is jumbo or not present.
+ */
+error_t        pagetable_remove(pde_t *pgdir, void *va)
+{
+       pde_t* the_pde = &pgdir[PDX(va)];
+
+       if (!(*the_pde & PTE_P) || (*the_pde & PTE_PS))
+               return -EFAULT;
+       pte_t* page_table = (pde_t*)KADDR(PTE_ADDR(*the_pde));
+       for (int i = 0; i < NPTENTRIES; i++) 
+               if (page_table[i] & PTE_P)
+                       panic("Page table not empty during attempted removal!");
+       *the_pde = 0;
+       page_decref(pa2page(PADDR(page_table)));
+       return 0;
+}
+
+// Given 'pgdir', a pointer to a page directory, pgdir_walk returns
+// a pointer to the page table entry (PTE) for linear address 'va'.
+// This requires walking the two-level page table structure.
+//
+// If the relevant page table doesn't exist in the page directory, then:
+//    - If create == 0, pgdir_walk returns NULL.
+//    - Otherwise, pgdir_walk tries to allocate a new page table
+//     with page_alloc.  If this fails, pgdir_walk returns NULL.
+//    - Otherwise, pgdir_walk returns a pointer into the new page table.
+//
+// This is boot_pgdir_walk, but using page_alloc() instead of boot_alloc().
+// Unlike boot_pgdir_walk, pgdir_walk can fail.
+//
+// Hint: you can turn a Page * into the physical address of the
+// page it refers to with page2pa() from kern/pmap.h.
+//
+// Supports returning jumbo (4MB PSE) PTEs.  To create with a jumbo, pass in 2.
+pte_t*
+pgdir_walk(pde_t *pgdir, const void *SNT va, int create)
+{
+       pde_t* the_pde = &pgdir[PDX(va)];
+       page_t *new_table;
+
+       if (*the_pde & PTE_P) {
+               if (*the_pde & PTE_PS)
+                       return (pte_t*)the_pde;
+               return &((pde_t*)KADDR(PTE_ADDR(*the_pde)))[PTX(va)];
+       }
+       if (!create)
+               return NULL;
+       if (create == 2) {
+               if (JPGOFF(va))
+                       panic("Attempting to find a Jumbo PTE at an unaligned VA!");
+               *the_pde = PTE_PS | PTE_P;
+               return (pte_t*)the_pde;
+       }
+       if (page_alloc(&new_table))
+               return NULL;
+       new_table->pp_ref = 1;
+       memset(page2kva(new_table), 0, PGSIZE);
+       *the_pde = (pde_t)page2pa(new_table) | PTE_P | PTE_W | PTE_U;
+       return &((pde_t*)KADDR(PTE_ADDR(*the_pde)))[PTX(va)];
+}
+
+/* Flushes a TLB, including global pages.  We should always have the CR4_PGE
+ * flag set, but just in case, we'll check.  Toggling this bit flushes the TLB.
+ */
+void tlb_flush_global(void)
+{
+       uint32_t cr4 = rcr4();
+       if (cr4 & CR4_PGE) {
+               lcr4(cr4 & ~CR4_PGE);
+               lcr4(cr4);
+       } else 
+               lcr3(rcr3());
+}
+
+void
+page_check(void)
+{
+       page_t *pp, *pp0, *pp1, *pp2;
+       page_list_t fl;
+       pte_t *ptep;
+
+       // should be able to allocate three pages
+       pp0 = pp1 = pp2 = 0;
+       assert(page_alloc(&pp0) == 0);
+       assert(page_alloc(&pp1) == 0);
+       assert(page_alloc(&pp2) == 0);
+
+       assert(pp0);
+       assert(pp1 && pp1 != pp0);
+       assert(pp2 && pp2 != pp1 && pp2 != pp0);
+
+       // temporarily steal the rest of the free pages
+       fl = page_free_list;
+       LIST_INIT(&page_free_list);
+
+       // should be no free memory
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // Fill pp1 with bogus data and check for invalid tlb entries
+       memset(page2kva(pp1), 0xFFFFFFFF, PGSIZE);
+
+       // there is no page allocated at address 0
+       assert(page_lookup(boot_pgdir, (void *) 0x0, &ptep) == NULL);
+
+       // there is no free memory, so we can't allocate a page table 
+       assert(page_insert(boot_pgdir, pp1, 0x0, 0) < 0);
+
+       // free pp0 and try again: pp0 should be used for page table
+       page_free(pp0);
+       assert(page_insert(boot_pgdir, pp1, 0x0, 0) == 0);
+       tlb_invalidate(boot_pgdir, 0x0);
+       // DEP Should have shot down invalid TLB entry - let's check
+       {
+         int *x = 0x0;
+         assert(*x == 0xFFFFFFFF);
+       }
+       assert(PTE_ADDR(boot_pgdir[0]) == page2pa(pp0));
+       assert(check_va2pa(boot_pgdir, 0x0) == page2pa(pp1));
+       assert(pp1->pp_ref == 1);
+       assert(pp0->pp_ref == 1);
+
+       // should be able to map pp2 at PGSIZE because pp0 is already allocated for page table
+       assert(page_insert(boot_pgdir, pp2, (void*) PGSIZE, 0) == 0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp2));
+       assert(pp2->pp_ref == 1);
+
+       // Make sure that pgdir_walk returns a pointer to the pte and
+       // not the table or some other garbage
+       {
+         pte_t *p = KADDR(PTE_ADDR(boot_pgdir[PDX(PGSIZE)]));
+         assert(pgdir_walk(boot_pgdir, (void *)PGSIZE, 0) == &p[PTX(PGSIZE)]);
+       }
+
+       // should be no free memory
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // should be able to map pp2 at PGSIZE because it's already there
+       assert(page_insert(boot_pgdir, pp2, (void*) PGSIZE, PTE_U) == 0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp2));
+       assert(pp2->pp_ref == 1);
+
+       // Make sure that we actually changed the permission on pp2 when we re-mapped it
+       {
+         pte_t *p = pgdir_walk(boot_pgdir, (void*)PGSIZE, 0);
+         assert(((*p) & PTE_U) == PTE_U);
+       }
+
+       // pp2 should NOT be on the free list
+       // could happen in ref counts are handled sloppily in page_insert
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // should not be able to map at PTSIZE because need free page for page table
+       assert(page_insert(boot_pgdir, pp0, (void*) PTSIZE, 0) < 0);
+
+       // insert pp1 at PGSIZE (replacing pp2)
+       assert(page_insert(boot_pgdir, pp1, (void*) PGSIZE, 0) == 0);
+
+       // should have pp1 at both 0 and PGSIZE, pp2 nowhere, ...
+       assert(check_va2pa(boot_pgdir, 0) == page2pa(pp1));
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp1));
+       // ... and ref counts should reflect this
+       assert(pp1->pp_ref == 2);
+       assert(pp2->pp_ref == 0);
+
+       // pp2 should be returned by page_alloc
+       assert(page_alloc(&pp) == 0 && pp == pp2);
+
+       // unmapping pp1 at 0 should keep pp1 at PGSIZE
+       page_remove(boot_pgdir, 0x0);
+       assert(check_va2pa(boot_pgdir, 0x0) == ~0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp1));
+       assert(pp1->pp_ref == 1);
+       assert(pp2->pp_ref == 0);
+
+       // unmapping pp1 at PGSIZE should free it
+       page_remove(boot_pgdir, (void*) PGSIZE);
+       assert(check_va2pa(boot_pgdir, 0x0) == ~0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == ~0);
+       assert(pp1->pp_ref == 0);
+       assert(pp2->pp_ref == 0);
+
+       // so it should be returned by page_alloc
+       assert(page_alloc(&pp) == 0 && pp == pp1);
+
+       // should be no free memory
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // forcibly take pp0 back
+       assert(PTE_ADDR(boot_pgdir[0]) == page2pa(pp0));
+       boot_pgdir[0] = 0;
+       assert(pp0->pp_ref == 1);
+       pp0->pp_ref = 0;
+
+       // Catch invalid pointer addition in pgdir_walk - i.e. pgdir + PDX(va)
+       {
+         // Give back pp0 for a bit
+         page_free(pp0);
+
+         void * va = (void *)((PGSIZE * NPDENTRIES) + PGSIZE);
+         pte_t *p2 = pgdir_walk(boot_pgdir, va, 1);
+         pte_t *p = KADDR(PTE_ADDR(boot_pgdir[PDX(va)]));
+         assert(p2 == &p[PTX(va)]);
+
+         // Clean up again
+         boot_pgdir[PDX(va)] = 0;
+         pp0->pp_ref = 0;
+       }
+
+       // give free list back
+       page_free_list = fl;
+
+       // free the pages we took
+       page_free(pp0);
+       page_free(pp1);
+       page_free(pp2);
+
+       cprintf("page_check() succeeded!\n");
+}
+
+/* 
+
+    // testing code for boot_pgdir_walk 
+       pte_t* temp;
+       temp = boot_pgdir_walk(pgdir, VPT + (VPT >> 10), 1);
+       cprintf("pgdir = %p\n", pgdir);
+       cprintf("test recursive walking pte_t* = %p\n", temp);
+       cprintf("test recursive walking entry = %p\n", PTE_ADDR(temp));
+       temp = boot_pgdir_walk(pgdir, 0xc0400000, 1);
+       cprintf("LA = 0xc0400000 = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0400070, 1);
+       cprintf("LA = 0xc0400070 = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0800000, 0);
+       cprintf("LA = 0xc0800000, no create = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0600070, 1);
+       cprintf("LA = 0xc0600070 = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0600090, 0);
+       cprintf("LA = 0xc0600090, nc = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0608070, 0);
+       cprintf("LA = 0xc0608070, nc = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0800070, 1);
+       cprintf("LA = 0xc0800070 = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0b00070, 0);
+       cprintf("LA = 0xc0b00070, nc = %p\n", temp);
+       temp = boot_pgdir_walk(pgdir, 0xc0c00000, 0);
+       cprintf("LA = 0xc0c00000, nc = %p\n", temp);
+
+       // testing for boot_map_seg
+       cprintf("\n");
+       cprintf("before mapping 1 page to 0x00350000\n");
+       cprintf("0xc4000000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xc4000000, 1));
+       cprintf("0xc4000000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xc4000000, 1)));
+       boot_map_segment(pgdir, 0xc4000000, 4096, 0x00350000, PTE_W);
+       cprintf("after mapping\n");
+       cprintf("0xc4000000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xc4000000, 1));
+       cprintf("0xc4000000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xc4000000, 1)));
+
+       cprintf("\n");
+       cprintf("before mapping 3 pages to 0x00700000\n");
+       cprintf("0xd0000000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xd0000000, 1));
+       cprintf("0xd0000000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xd0000000, 1)));
+       cprintf("0xd0001000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xd0001000, 1));
+       cprintf("0xd0001000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xd0001000, 1)));
+       cprintf("0xd0002000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xd0002000, 1));
+       cprintf("0xd0002000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xd0002000, 1)));
+       boot_map_segment(pgdir, 0xd0000000, 4096*3, 0x00700000, 0);
+       cprintf("after mapping\n");
+       cprintf("0xd0000000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xd0000000, 1));
+       cprintf("0xd0000000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xd0000000, 1)));
+       cprintf("0xd0001000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xd0001000, 1));
+       cprintf("0xd0001000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xd0001000, 1)));
+       cprintf("0xd0002000's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xd0002000, 1));
+       cprintf("0xd0002000's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xd0002000, 1)));
+
+       cprintf("\n");
+       cprintf("before mapping 1 unaligned to 0x00500010\n");
+       cprintf("0xc8000010's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xc8000010, 1));
+       cprintf("0xc8000010's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xc8000010, 1)));
+       cprintf("0xc8001010's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xc8001010, 1));
+       cprintf("0xc8001010's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xc8001010, 1)));
+       boot_map_segment(pgdir, 0xc8000010, 4096, 0x00500010, PTE_W);
+       cprintf("after mapping\n");
+       cprintf("0xc8000010's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xc8000010, 1));
+       cprintf("0xc8000010's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xc8000010, 1)));
+       cprintf("0xc8001010's &pte: %08x\n",boot_pgdir_walk(pgdir, 0xc8001010, 1));
+       cprintf("0xc8001010's pte: %08x\n",*(boot_pgdir_walk(pgdir, 0xc8001010, 1)));
+
+       cprintf("\n");
+       boot_map_segment(pgdir, 0xe0000000, 4096, 0x10000000, PTE_W);
+
+*/
diff --git a/arch/i386/src/smp.c b/arch/i386/src/smp.c
new file mode 100644 (file)
index 0000000..dd3cf0d
--- /dev/null
@@ -0,0 +1,170 @@
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <arch/arch.h>
+#include <smp.h>
+
+#include <atomic.h>
+#include <ros/error.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <pmap.h>
+#include <env.h>
+#include <trap.h>
+
+/*************************** IPI Wrapper Stuff ********************************/
+// checklists to protect the global interrupt_handlers for 0xf0, f1, f2, f3, f4
+// need to be global, since there is no function that will always exist for them
+handler_wrapper_t             handler_wrappers[NUM_HANDLER_WRAPPERS];
+
+static int smp_call_function(uint8_t type, uint8_t dest, isr_t handler, void* data,
+                              handler_wrapper_t** wait_wrapper)
+{
+       extern handler_t interrupt_handlers[];
+       int8_t state = 0;
+       uint32_t wrapper_num;
+       handler_wrapper_t* wrapper;
+       extern atomic_t outstanding_calls;
+
+       // prevents us from ever having more than NUM_HANDLER_WRAPPERS callers in
+       // the process of competing for vectors.  not decremented until both after
+       // the while(1) loop and after it's been waited on.
+       atomic_inc(&outstanding_calls);
+       if (atomic_read(&outstanding_calls) > NUM_HANDLER_WRAPPERS) {
+               atomic_dec(&outstanding_calls);
+               return -EBUSY;
+       }
+       
+       // assumes our cores are numbered in order
+       if ((type == 4) && (dest >= num_cpus))
+               panic("Destination CPU does not exist!");
+
+       // build the mask based on the type and destination
+       INIT_CHECKLIST_MASK(cpu_mask, MAX_NUM_CPUS);
+       // set checklist mask's size dynamically to the num cpus actually present
+       cpu_mask.size = num_cpus;
+       switch (type) {
+               case 1: // self
+                       SET_BITMASK_BIT(cpu_mask.bits, core_id());
+                       break;
+               case 2: // all
+                       FILL_BITMASK(cpu_mask.bits, num_cpus);
+                       break;
+               case 3: // all but self
+                       FILL_BITMASK(cpu_mask.bits, num_cpus);
+                       CLR_BITMASK_BIT(cpu_mask.bits, core_id());
+                       break;
+               case 4: // physical mode
+                       // note this only supports sending to one specific physical id
+                       // (only sets one bit, so if multiple cores have the same phys id
+                       // the first one through will set this).
+                       SET_BITMASK_BIT(cpu_mask.bits, dest);
+                       break;
+               case 5: // logical mode
+                       // TODO
+                       warn("Logical mode bitmask handler protection not implemented!");
+                       break;
+               default:
+                       panic("Invalid type for cross-core function call!");
+       }
+
+       // Find an available vector/wrapper.  Starts with this core's id (mod the
+       // number of wrappers).  Walk through on conflict.
+       // Commit returns an error if it wanted to give up for some reason,
+       // like taking too long to acquire the lock or clear the mask, at which
+       // point, we try the next one.
+       // When we are done, wrapper points to the one we finally got.
+       // this wrapper_num trick doesn't work as well if you send a bunch in a row
+       // and wait, since you always check your main one (which is currently busy).
+       wrapper_num = core_id() % NUM_HANDLER_WRAPPERS;
+       while(1) {
+               wrapper = &handler_wrappers[wrapper_num];
+               if (!commit_checklist_wait(wrapper->cpu_list, &cpu_mask))
+                       break;
+               wrapper_num = (wrapper_num + 1) % NUM_HANDLER_WRAPPERS;
+               /*
+               uint32_t count = 0;
+               // instead of deadlock, smp_call can fail with this.  makes it harder
+               // to use (have to check your return value).  consider putting a delay
+               // here too (like if wrapper_num == initial_wrapper_num)
+               if (count++ > NUM_HANDLER_WRAPPERS * 1000) // note 1000 isn't enough...
+                       return -EBUSY;
+               */
+       }
+
+       // Wanting to wait is expressed by having a non-NULL handler_wrapper_t**
+       // passed in.  Pass out our reference to wrapper, to wait later.
+       // If we don't want to wait, release the checklist (though it is still not
+       // clear, so it can't be used til everyone checks in).
+       if (wait_wrapper)
+               *wait_wrapper = wrapper;
+       else {
+               release_checklist(wrapper->cpu_list);
+               atomic_dec(&outstanding_calls);
+       }
+
+       // now register our handler to run
+       register_interrupt_handler(interrupt_handlers, wrapper->vector, handler, data);
+       // WRITE MEMORY BARRIER HERE
+       enable_irqsave(&state);
+       // Send the proper type of IPI.  I made up these numbers.
+       switch (type) {
+               case 1:
+                       send_self_ipi(wrapper->vector);
+                       break;
+               case 2:
+                       send_broadcast_ipi(wrapper->vector);
+                       break;
+               case 3:
+                       send_all_others_ipi(wrapper->vector);
+                       break;
+               case 4: // physical mode
+                       send_ipi(dest, 0, wrapper->vector);
+                       break;
+               case 5: // logical mode
+                       send_ipi(dest, 1, wrapper->vector);
+                       break;
+               default:
+                       panic("Invalid type for cross-core function call!");
+       }
+       // wait long enough to receive our own broadcast (PROBABLY WORKS) TODO
+       lapic_wait_to_send();
+       disable_irqsave(&state);
+       return 0;
+}
+
+// Wrapper functions.  Add more as they are needed.
+int smp_call_function_self(isr_t handler, void* data,
+                           handler_wrapper_t** wait_wrapper)
+{
+       return smp_call_function(1, 0, handler, data, wait_wrapper);
+}
+
+int smp_call_function_all(isr_t handler, void* data,
+                          handler_wrapper_t** wait_wrapper)
+{
+       return smp_call_function(2, 0, handler, data, wait_wrapper);
+}
+
+int smp_call_function_single(uint8_t dest, isr_t handler, void* data,
+                             handler_wrapper_t** wait_wrapper)
+{
+       return smp_call_function(4, dest, handler, data, wait_wrapper);
+}
+
+// If you want to wait, pass the address of a pointer up above, then call
+// this to do the actual waiting.  Be somewhat careful about uninitialized 
+// or old wrapper pointers.
+int smp_call_wait(handler_wrapper_t* wrapper)
+{
+       if (wrapper) {
+               waiton_checklist(wrapper->cpu_list);
+               return 0;
+       } else {
+               warn("Attempting to wait on null wrapper!  Check your return values!");
+               return -EFAIL;
+       }
+}
+
diff --git a/arch/i386/src/smp_boot.c b/arch/i386/src/smp_boot.c
new file mode 100644 (file)
index 0000000..1c27a44
--- /dev/null
@@ -0,0 +1,210 @@
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <arch/x86.h>
+#include <arch/arch.h>
+#include <smp.h>
+#include <arch/console.h>
+#include <arch/apic.h>
+
+#include <atomic.h>
+#include <ros/error.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <pmap.h>
+#include <env.h>
+#include <trap.h>
+
+extern handler_wrapper_t handler_wrappers[NUM_HANDLER_WRAPPERS];
+volatile uint8_t num_cpus = 0xee;
+uintptr_t smp_stack_top;
+
+#define DECLARE_HANDLER_CHECKLISTS(vector)                          \
+       INIT_CHECKLIST(f##vector##_cpu_list, MAX_NUM_CPUS);
+
+#define INIT_HANDLER_WRAPPER(v)                                     \
+{                                                                   \
+       handler_wrappers[(v)].vector = 0xf##v;                          \
+       handler_wrappers[(v)].cpu_list = &f##v##_cpu_list;              \
+       handler_wrappers[(v)].cpu_list->mask.size = num_cpus;           \
+}
+
+DECLARE_HANDLER_CHECKLISTS(0);
+DECLARE_HANDLER_CHECKLISTS(1);
+DECLARE_HANDLER_CHECKLISTS(2);
+DECLARE_HANDLER_CHECKLISTS(3);
+DECLARE_HANDLER_CHECKLISTS(4);
+
+static void init_smp_call_function(void)
+{
+       INIT_HANDLER_WRAPPER(0);
+       INIT_HANDLER_WRAPPER(1);
+       INIT_HANDLER_WRAPPER(2);
+       INIT_HANDLER_WRAPPER(3);
+       INIT_HANDLER_WRAPPER(4);
+}
+
+/******************************************************************************/
+
+static void smp_mtrr_handler(trapframe_t *tf, void* data)
+{
+       setup_default_mtrrs((barrier_t*)data);
+}
+
+void smp_boot(void)
+{
+       // this needs to be set in smp_entry too...
+       #define trampoline_pg 0x00001000
+       page_t *smp_stack;
+       // NEED TO GRAB A LOWMEM FREE PAGE FOR AP BOOTUP CODE
+       // page1 (2nd page) is reserved, hardcoded in pmap.c
+       extern smp_entry(), smp_entry_end(), smp_boot_lock(), smp_semaphore();
+       memset(KADDR(trampoline_pg), 0, PGSIZE);
+       memcpy(KADDR(trampoline_pg), &smp_entry, &smp_entry_end - &smp_entry);
+
+       // This mapping allows access to the trampoline with paging on and off
+       // via trampoline_pg
+       page_insert(boot_pgdir, pa2page(trampoline_pg), (void*)trampoline_pg, PTE_W);
+
+       // Allocate a stack for the cores starting up.  One for all, must share
+       if (page_alloc(&smp_stack))
+               panic("No memory for SMP boot stack!");
+       smp_stack->pp_ref++;
+       smp_stack_top = (uintptr_t)(page2kva(smp_stack) + PGSIZE);
+
+       // Start the IPI process (INIT, wait, SIPI, wait, SIPI, wait)
+       send_init_ipi();
+       // SDM 3A is a little wonky wrt the proper delays.  These are my best guess.
+       udelay(10000);
+       // first SIPI
+       send_startup_ipi(0x01);
+       /* BOCHS does not like this second SIPI.
+       // second SIPI
+       udelay(200);
+       send_startup_ipi(0x01);
+       */
+       udelay(100000);
+
+       // Each core will also increment smp_semaphore, and decrement when it is done,
+       // all in smp_entry.  It's purpose is to keep Core0 from competing for the
+       // smp_boot_lock.  So long as one AP increments the sem before the final
+       // LAPIC timer goes off, all available cores will be initialized.
+       while(*(volatile uint32_t*)(&smp_semaphore - &smp_entry + trampoline_pg));
+
+       // From here on, no other cores are coming up.  Grab the lock to ensure it.
+       // Another core could be in it's prelock phase and be trying to grab the lock
+       // forever....
+       // The lock exists on the trampoline, so it can be grabbed right away in
+       // real mode.  If core0 wins the race and blocks other CPUs from coming up
+       // it can crash the machine if the other cores are allowed to proceed with
+       // booting.  Specifically, it's when they turn on paging and have that temp
+       // mapping pulled out from under them.  Now, if a core loses, it will spin
+       // on the trampoline (which we must be careful to not deallocate)
+       spin_lock((uint32_t*)(&smp_boot_lock - &smp_entry + trampoline_pg));
+       cprintf("Num_Cpus Detected: %d\n", num_cpus);
+
+       // Remove the mapping of the page used by the trampoline
+       page_remove(boot_pgdir, (void*)trampoline_pg);
+       // It had a refcount of 2 earlier, so we need to dec once more to free it
+       // but only if all cores are in (or we reset / reinit those that failed)
+       // TODO after we parse ACPI tables
+       if (num_cpus == 8) // TODO - ghetto coded for our 8 way SMPs
+               page_decref(pa2page(trampoline_pg));
+       // Remove the page table used for that mapping
+       pagetable_remove(boot_pgdir, (void*)trampoline_pg);
+       // Dealloc the temp shared stack
+       page_decref(smp_stack);
+
+       // Set up the generic remote function call facility
+       init_smp_call_function();
+
+       // Set up all cores to use the proper MTRRs
+       barrier_t generic_barrier;
+       init_barrier(&generic_barrier, num_cpus); // barrier used by smp_mtrr_handler
+       smp_call_function_all(smp_mtrr_handler, &generic_barrier, 0);
+
+       // Should probably flush everyone's TLB at this point, to get rid of
+       // temp mappings that were removed.  TODO
+}
+
+/*
+ * This is called from smp_entry by each core to finish the core bootstrapping.
+ * There is a spinlock around this entire function in smp_entry, for a few reasons,
+ * the most important being that all cores use the same stack when entering here.
+ */
+uint32_t smp_main(void)
+{
+       /*
+       // Print some diagnostics.  Uncomment if there're issues.
+       cprintf("Good morning Vietnam!\n");
+       cprintf("This core's Default APIC ID: 0x%08x\n", lapic_get_default_id());
+       cprintf("This core's Current APIC ID: 0x%08x\n", lapic_get_id());
+       if (read_msr(IA32_APIC_BASE) & 0x00000100)
+               cprintf("I am the Boot Strap Processor\n");
+       else
+               cprintf("I am an Application Processor\n");
+       cprintf("Num_Cpus: %d\n\n", num_cpus);
+       */
+
+       // Get a per-core kernel stack
+       page_t *my_stack;
+       if (page_alloc(&my_stack))
+               panic("Unable to alloc a per-core stack!");
+       my_stack->pp_ref++;
+       memset(page2kva(my_stack), 0, PGSIZE);
+
+       // Set up a gdt / gdt_pd for this core, stored at the top of the stack
+       // This is necessary, eagle-eyed readers know why
+       // GDT should be 4-byte aligned.  TS isn't aligned.  Not sure if it matters.
+       pseudodesc_t *my_gdt_pd = page2kva(my_stack) + PGSIZE -
+               sizeof(pseudodesc_t) - sizeof(segdesc_t)*SEG_COUNT;
+       segdesc_t *my_gdt = page2kva(my_stack) + PGSIZE -
+               sizeof(segdesc_t)*SEG_COUNT;
+       // TS also needs to be permanent
+       taskstate_t *my_ts = page2kva(my_stack) + PGSIZE -
+               sizeof(pseudodesc_t) - sizeof(segdesc_t)*SEG_COUNT -
+               sizeof(taskstate_t);
+       // Usable portion of the KSTACK grows down from here
+       // Won't actually start using this stack til our first interrupt
+       // (issues with changing the stack pointer and then trying to "return")
+       uintptr_t my_stack_top = (uintptr_t)my_ts;
+       
+       // Set up MSR for SYSENTER 
+       write_msr(MSR_IA32_SYSENTER_CS, GD_KT);
+       write_msr(MSR_IA32_SYSENTER_ESP, my_stack_top);
+       write_msr(MSR_IA32_SYSENTER_EIP, (uint32_t) &sysenter_handler);
+
+       // Build and load the gdt / gdt_pd
+       memcpy(my_gdt, gdt, sizeof(segdesc_t)*SEG_COUNT);
+       *my_gdt_pd = (pseudodesc_t) {
+               sizeof(segdesc_t)*SEG_COUNT - 1, (uintptr_t) my_gdt };
+       asm volatile("lgdt %0" : : "m"(*my_gdt_pd));
+
+       // Need to set the TSS so we know where to trap on this core
+       my_ts->ts_esp0 = my_stack_top;
+       my_ts->ts_ss0 = GD_KD;
+       // Initialize the TSS field of my_gdt.
+       my_gdt[GD_TSS >> 3] = SEG16(STS_T32A, (uint32_t) (my_ts), sizeof(taskstate_t), 0);
+       my_gdt[GD_TSS >> 3].sd_s = 0;
+       // Load the TSS
+       ltr(GD_TSS);
+
+       // Loads the same IDT used by the other cores
+       asm volatile("lidt idt_pd");
+
+       // APIC setup
+       // set LINT0 to receive ExtINTs (KVM's default).  At reset they are 0x1000.
+       write_mmreg32(LAPIC_LVT_LINT0, 0x700);
+       // mask it to shut it up for now.  Doesn't seem to matter yet, since both
+       // KVM and Bochs seem to only route the PIC to core0.
+       mask_lapic_lvt(LAPIC_LVT_LINT0);
+       // and then turn it on
+       lapic_enable();
+
+       // set a default logical id for now
+       lapic_set_logid(lapic_get_id());
+
+       return my_stack_top; // will be loaded in smp_entry.S
+}
diff --git a/arch/i386/src/smp_entry.S b/arch/i386/src/smp_entry.S
new file mode 100644 (file)
index 0000000..85557aa
--- /dev/null
@@ -0,0 +1,107 @@
+#include <arch/mmu.h>
+#include <ros/memlayout.h>
+#include <arch/trap.h>
+
+#define        RELOC(x) ((x) - KERNBASE)
+#define        CPUID_PSE_SUPPORT       0x00000008
+
+.globl                 smp_entry
+smp_entry:             .code16
+       cli
+       cld
+       lock incw       smp_semaphore - smp_entry + 0x1000  # announce our presence
+spin_start:                                            # grab lock in real mode
+       movw    $1, %ax
+       xchgw   %ax, smp_boot_lock - smp_entry + 0x1000
+       test    %ax, %ax
+       jne             spin_start
+
+       # Set up rudimentary segmentation
+       xorw    %ax, %ax                        # Segment number zero
+       movw    %ax, %ds                        # -> Data Segment
+       movw    %ax, %es                        # -> Extra Segment
+       movw    %ax, %ss                        # -> Stack Segment
+       # Would like to patch all of these 0x1000's at trampoline relocation time
+       # There's three of them, so we could patch the trampoline code when we load,
+       # once we're sure the entry code will not change anymore
+       # Note that this GDT is straight through, with no KERNBASE translation
+       lgdt    gdtdesc - smp_entry + 0x1000
+
+       # Turn on protected mode
+       movl    %cr0, %eax
+       orl             $CR0_PE, %eax
+       movl    %eax, %cr0
+       ljmp    $GD_KT, $(protcseg - smp_entry + 0x1000)
+       
+protcseg:      .code32
+       # Set up the protected-mode data segment registers
+       movw    $GD_KD, %ax             # Kernel segment selector
+       movw    %ax, %ds                # -> DS: Data Segment
+       movw    %ax, %es                # -> ES: Extra Segment
+       movw    %ax, %ss                # -> SS: Stack Segment
+       movw    %ax, %fs                # -> FS
+       movw    %ax, %gs                # -> GS
+
+       # Turn on Paging
+       movl    RELOC(boot_cr3), %eax
+       movl    %eax, %cr3
+       # Enable PSE, if available
+       movl    $1, %eax
+       cpuid
+       test    $CPUID_PSE_SUPPORT, %edx
+       jz              past_pse
+       movl    %cr4, %eax
+       orl             $CR4_PSE, %eax
+       movl    %eax, %cr4
+past_pse:
+       # Turn on PGE, no matter what.  Ghetto, but we panic if it's not supported.
+       movl    %cr4, %eax
+       orl             $CR4_PGE, %eax
+       movl    %eax, %cr4
+       movl    %cr0, %eax      
+       # These cr0 flags are the same as in pmap.c.  Keep them in sync
+       orl             $(CR0_PE|CR0_PG|CR0_AM|CR0_WP|CR0_NE|CR0_MP), %eax  
+       andl    $(~(CR0_TS|CR0_EM|CR0_CD|CR0_NW)), %eax  
+       movl    %eax, %cr0
+
+       # Reload Segments, using the same gdt_pd as Core 0
+       lgdt    gdt_pd
+       movw    $GD_KD, %ax             # Kernel segment selector
+       movw    %ax, %ds                # -> DS: Data Segment
+       movw    %ax, %es                # -> ES: Extra Segment
+       movw    %ax, %ss                # -> SS: Stack Segment
+       movw    $GD_UD|3, %ax   # User segment selector, with RPL=3
+       movw    %ax, %fs                # -> FS
+       movw    %ax, %gs                # -> GS
+       ljmp    $GD_KT, $here   # jumping to original location of trampoline!
+here:
+       xorl    %eax, %eax
+       lldt    %ax
+       incl    num_cpus
+       movl    (smp_stack_top), %esp
+       movl    $0, %ebp                # so backtrace works
+       call    smp_main
+       movl    %eax, %esp              # use our new stack, value returned from smp_main
+       # note the next two lines are using the direct mapping from smp_boot()
+       movw    $0, smp_boot_lock - smp_entry + 0x1000  # release lock
+       lock decw       smp_semaphore - smp_entry + 0x1000  # show we are done
+       call    smp_idle                # idle loop, will have interrupts turned on
+
+       # Below here is just data, stored with the code text
+       .p2align        2                                               # force 4 byte alignment
+gdt:
+       SEG_NULL                                                        # null seg
+       SEG(STA_X|STA_R, 0, 0xffffffff)         # code seg
+       SEG(STA_W, 0, 0xffffffff)                       # data seg
+gdtdesc:
+       .word   gdtdesc - gdt - 1                       # sizeof(gdt) - 1
+       .long   gdt - smp_entry + 0x1000        # address gdt
+       .p2align        2                                               # force 4 byte alignment
+.globl                 smp_boot_lock
+smp_boot_lock:                                                 # this lock word will be only used from
+       .word   0                                                       # its spot in the trampoline (0x1000)
+.globl                 smp_semaphore
+smp_semaphore:                                                 # poor man's polling semaphore
+       .word   0                                                       
+.globl                 smp_entry_end
+smp_entry_end:
diff --git a/arch/i386/src/trap.c b/arch/i386/src/trap.c
new file mode 100644 (file)
index 0000000..498022f
--- /dev/null
@@ -0,0 +1,361 @@
+#ifdef __DEPUTY__
+#pragma noasync
+#endif
+
+#include <arch/mmu.h>
+#include <arch/x86.h>
+#include <arch/arch.h>
+#include <arch/console.h>
+#include <arch/apic.h>
+#include <smp.h>
+#include <assert.h>
+#include <pmap.h>
+#include <trap.h>
+#include <monitor.h>
+#include <env.h>
+
+#include <syscall.h>
+
+taskstate_t ts;
+
+/* Interrupt descriptor table.  (Must be built at run time because
+ * shifted function addresses can't be represented in relocation records.)
+ */
+// Aligned on an 8 byte boundary (SDM V3A 5-13)
+gatedesc_t __attribute__ ((aligned (8))) idt[256] = { { 0 } };
+pseudodesc_t idt_pd = {
+       sizeof(idt) - 1, (uint32_t) idt
+};
+
+/* global handler table, used by core0 (for now).  allows the registration
+ * of functions to be called when servicing an interrupt.  other cores
+ * can set up their own later.
+ */
+handler_t interrupt_handlers[256];
+
+static const char *NTS (IN_HANDLER trapname)(int trapno)
+{
+    // zra: excnames is NORACE because Ivy doesn't trust const
+       static const char *NT const (NORACE excnames)[] = {
+               "Divide error",
+               "Debug",
+               "Non-Maskable Interrupt",
+               "Breakpoint",
+               "Overflow",
+               "BOUND Range Exceeded",
+               "Invalid Opcode",
+               "Device Not Available",
+               "Double Fault",
+               "Coprocessor Segment Overrun",
+               "Invalid TSS",
+               "Segment Not Present",
+               "Stack Fault",
+               "General Protection",
+               "Page Fault",
+               "(unknown trap)",
+               "x87 FPU Floating-Point Error",
+               "Alignment Check",
+               "Machine-Check",
+               "SIMD Floating-Point Exception"
+       };
+
+       if (trapno < sizeof(excnames)/sizeof(excnames[0]))
+               return excnames[trapno];
+       if (trapno == T_SYSCALL)
+               return "System call";
+       return "(unknown trap)";
+}
+
+
+void
+idt_init(void)
+{
+       extern segdesc_t gdt[];
+
+       // This table is made in trapentry.S by each macro in that file.
+       // It is layed out such that the ith entry is the ith's traphandler's
+       // (uint32_t) trap addr, then (uint32_t) trap number
+       struct trapinfo { uint32_t trapaddr; uint32_t trapnumber; };
+       extern struct trapinfo (BND(__this,trap_tbl_end) trap_tbl)[];
+       extern struct trapinfo (SNT trap_tbl_end)[];
+       int i, trap_tbl_size = trap_tbl_end - trap_tbl;
+       extern void ISR_default(void);
+
+       // set all to default, to catch everything
+       for(i = 0; i < 256; i++)
+               SETGATE(idt[i], 0, GD_KT, &ISR_default, 0);
+
+       // set all entries that have real trap handlers
+       // we need to stop short of the last one, since the last is the default
+       // handler with a fake interrupt number (500) that is out of bounds of
+       // the idt[]
+       // if we set these to trap gates, be sure to handle the IRQs separately
+       // and we might need to break our pretty tables
+       for(i = 0; i < trap_tbl_size - 1; i++)
+               SETGATE(idt[trap_tbl[i].trapnumber], 0, GD_KT, trap_tbl[i].trapaddr, 0);
+
+       // turn on syscall handling and other user-accessible ints
+       // DPL 3 means this can be triggered by the int instruction
+       // STS_TG32 sets the IDT type to a Trap Gate (interrupts enabled)
+       idt[T_SYSCALL].gd_dpl = 3;
+       idt[T_SYSCALL].gd_type = STS_TG32;
+       idt[T_BRKPT].gd_dpl = 3;
+
+       // Setup a TSS so that we get the right stack
+       // when we trap to the kernel.
+       ts.ts_esp0 = KSTACKTOP;
+       ts.ts_ss0 = GD_KD;
+
+       // Initialize the TSS field of the gdt.
+       gdt[GD_TSS >> 3] = SEG16(STS_T32A, (uint32_t) (&ts),
+                                       sizeof(taskstate_t), 0);
+       gdt[GD_TSS >> 3].sd_s = 0;
+
+       // Load the TSS
+       ltr(GD_TSS);
+
+       // Load the IDT
+       asm volatile("lidt idt_pd");
+
+       // This will go away when we start using the IOAPIC properly
+       pic_remap();
+       // set LINT0 to receive ExtINTs (KVM's default).  At reset they are 0x1000.
+       write_mmreg32(LAPIC_LVT_LINT0, 0x700);
+       // mask it to shut it up for now
+       mask_lapic_lvt(LAPIC_LVT_LINT0);
+       // and turn it on
+       lapic_enable();
+}
+
+void
+(IN_HANDLER print_regs)(push_regs_t *regs)
+{
+       cprintf("  edi  0x%08x\n", regs->reg_edi);
+       cprintf("  esi  0x%08x\n", regs->reg_esi);
+       cprintf("  ebp  0x%08x\n", regs->reg_ebp);
+       cprintf("  oesp 0x%08x\n", regs->reg_oesp);
+       cprintf("  ebx  0x%08x\n", regs->reg_ebx);
+       cprintf("  edx  0x%08x\n", regs->reg_edx);
+       cprintf("  ecx  0x%08x\n", regs->reg_ecx);
+       cprintf("  eax  0x%08x\n", regs->reg_eax);
+}
+
+void
+(IN_HANDLER print_trapframe)(trapframe_t *tf)
+{
+       cprintf("TRAP frame at %p on core %d\n", tf, lapic_get_id());
+       print_regs(&tf->tf_regs);
+       cprintf("  es   0x----%04x\n", tf->tf_es);
+       cprintf("  ds   0x----%04x\n", tf->tf_ds);
+       cprintf("  trap 0x%08x %s\n", tf->tf_trapno, trapname(tf->tf_trapno));
+       cprintf("  err  0x%08x\n", tf->tf_err);
+       cprintf("  eip  0x%08x\n", tf->tf_eip);
+       cprintf("  cs   0x----%04x\n", tf->tf_cs);
+       cprintf("  flag 0x%08x\n", tf->tf_eflags);
+       cprintf("  esp  0x%08x\n", tf->tf_esp);
+       cprintf("  ss   0x----%04x\n", tf->tf_ss);
+}
+
+static void
+(IN_HANDLER trap_dispatch)(trapframe_t *tf)
+{
+       env_t* curenv = curenvs[core_id()];
+
+       // Handle processor exceptions.
+       switch(tf->tf_trapno) {
+               case T_BRKPT:
+                       while (1)
+                               monitor(tf);
+                       // never get to this
+                       assert(0);
+               case T_PGFLT:
+                       page_fault_handler(tf);
+                       break;
+               case T_SYSCALL:
+                       // check for userspace, for now
+                       assert(tf->tf_cs != GD_KT);
+                       tf->tf_regs.reg_eax =
+                               syscall(curenv, tf->tf_regs.reg_eax, tf->tf_regs.reg_edx,
+                                       tf->tf_regs.reg_ecx, tf->tf_regs.reg_ebx,
+                                       tf->tf_regs.reg_edi, tf->tf_regs.reg_esi);
+                       env_run(curenv);
+                       break;
+               default:
+                       // Unexpected trap: The user process or the kernel has a bug.
+                       print_trapframe(tf);
+                       if (tf->tf_cs == GD_KT)
+                               panic("Damn Damn!  Unhandled trap in the kernel!");
+                       else {
+                               warn("Unexpected trap from userspace");
+                               env_destroy(curenv);
+                               return;
+                       }
+       }
+       return;
+}
+
+void
+(IN_HANDLER env_push_ancillary_state)(env_t* e)
+{
+       // Here's where you'll save FP/MMX/XMM regs
+}
+
+void
+(IN_HANDLER env_pop_ancillary_state)(env_t* e)
+{
+       // Here's where you'll restore FP/MMX/XMM regs
+}
+
+void
+(IN_HANDLER trap)(trapframe_t *tf)
+{
+       //cprintf("Incoming TRAP frame at %p\n", tf);
+
+       env_t* curenv = curenvs[lapic_get_id()];
+
+       env_push_ancillary_state(curenv);
+
+       if ((tf->tf_cs & ~3) != GD_UT && (tf->tf_cs & ~3) != GD_KT) {
+               print_trapframe(tf);
+               panic("Trapframe with invalid CS!");
+       }
+
+       if ((tf->tf_cs & 3) == 3) {
+               // Trapped from user mode.
+               // TODO: this will change when an env has more than one context
+               // Copy trap frame (which is currently on the stack)
+               // into 'curenv->env_tf', so that running the environment
+               // will restart at the trap point.
+               assert(curenv);
+               curenv->env_tf = *tf;
+               // The trapframe on the stack should be ignored from here on.
+               tf = &curenv->env_tf;
+       }
+
+       // Dispatch based on what type of trap occurred
+       trap_dispatch(tf);
+
+       // should this be if == 3?  Sort out later when we handle traps.
+       // so far we never get here
+       assert(0);
+        // Return to the current environment, which should be runnable.
+        assert(curenv && curenv->env_status == ENV_RUNNABLE);
+        env_run(curenv);
+}
+
+void
+(IN_HANDLER irq_handler)(trapframe_t *tf)
+{
+       //if (lapic_get_id())
+       //      cprintf("Incoming IRQ, ISR: %d on core %d\n", tf->tf_trapno, lapic_get_id());
+       // merge this with alltraps?  other than the EOI... or do the same in all traps
+
+       extern handler_wrapper_t handler_wrappers[NUM_HANDLER_WRAPPERS];
+
+       // determine the interrupt handler table to use.  for now, pick the global
+       handler_t* handler_tbl = interrupt_handlers;
+
+       if (handler_tbl[tf->tf_trapno].isr != 0)
+               handler_tbl[tf->tf_trapno].isr(tf, handler_tbl[tf->tf_trapno].data);
+       // if we're a general purpose IPI function call, down the cpu_list
+       if ((0xf0 <= tf->tf_trapno) && (tf->tf_trapno < 0xf0 +NUM_HANDLER_WRAPPERS))
+               down_checklist(handler_wrappers[tf->tf_trapno & 0x0f].cpu_list);
+
+       // Send EOI.  might want to do this in assembly, and possibly earlier
+       // This is set up to work with an old PIC for now
+       // Convention is that all IRQs between 32 and 47 are for the PIC.
+       // All others are LAPIC (timer, IPIs, perf, non-ExtINT LINTS, etc)
+       // For now, only 235-255 are available
+       assert(tf->tf_trapno >= 32); // slows us down, but we should never have this
+       if (tf->tf_trapno < 48)
+               pic_send_eoi(tf->tf_trapno - PIC1_OFFSET);
+       else
+               lapic_send_eoi();
+}
+
+void
+register_interrupt_handler(handler_t table[], uint8_t int_num, isr_t handler,
+                           void* data)
+{
+       table[int_num].isr = handler;
+       table[int_num].data = data;
+}
+
+void
+page_fault_handler(trapframe_t *tf)
+{
+       uint32_t fault_va;
+
+       // Read processor's CR2 register to find the faulting address
+       fault_va = rcr2();
+
+       // Handle kernel-mode page faults.
+
+       // TODO - one day, we'll want to handle this.
+       if ((tf->tf_cs & 3) == 0) {
+               print_trapframe(tf);
+               panic("Page Fault in the Kernel at 0x%08x!", fault_va);
+       }
+
+       // We've already handled kernel-mode exceptions, so if we get here,
+       // the page fault happened in user mode.
+
+       // Call the environment's page fault upcall, if one exists.  Set up a
+       // page fault stack frame on the user exception stack (below
+       // UXSTACKTOP), then branch to curenv->env_pgfault_upcall.
+       //
+       // The page fault upcall might cause another page fault, in which case
+       // we branch to the page fault upcall recursively, pushing another
+       // page fault stack frame on top of the user exception stack.
+       //
+       // The trap handler needs one word of scratch space at the top of the
+       // trap-time stack in order to return.  In the non-recursive case, we
+       // don't have to worry about this because the top of the regular user
+       // stack is free.  In the recursive case, this means we have to leave
+       // an extra word between the current top of the exception stack and
+       // the new stack frame because the exception stack _is_ the trap-time
+       // stack.
+       //
+       // If there's no page fault upcall, the environment didn't allocate a
+       // page for its exception stack, or the exception stack overflows,
+       // then destroy the environment that caused the fault.
+       //
+       // Hints:
+       //   user_mem_assert() and env_run() are useful here.
+       //   To change what the user environment runs, modify 'curenv->env_tf'
+       //   (the 'tf' variable points at 'curenv->env_tf').
+
+       // LAB 4: Your code here.
+
+       // Destroy the environment that caused the fault.
+       env_t* curenv = curenvs[lapic_get_id()];
+       cprintf("[%08x] user fault va %08x ip %08x from core %d\n",
+               curenv->env_id, fault_va, tf->tf_eip, lapic_get_id());
+       print_trapframe(tf);
+       env_destroy(curenv);
+}
+
+void sysenter_init(void)
+{
+       write_msr(MSR_IA32_SYSENTER_CS, GD_KT);
+       write_msr(MSR_IA32_SYSENTER_ESP, ts.ts_esp0);
+       write_msr(MSR_IA32_SYSENTER_EIP, (uint32_t) &sysenter_handler);
+}
+
+/* This is called from sysenter's asm, with the tf on the kernel stack. */
+void sysenter_callwrapper(struct Trapframe *tf)
+{
+       env_t* curenv = curenvs[lapic_get_id()];
+       curenv->env_tf = *tf;
+       
+       // The trapframe on the stack should be ignored from here on.
+       tf = &curenv->env_tf;
+       tf->tf_regs.reg_eax = (intreg_t) syscall(curenv,
+                                                tf->tf_regs.reg_eax,
+                                                tf->tf_regs.reg_edx,
+                                                tf->tf_regs.reg_ecx,
+                                                tf->tf_regs.reg_ebx,
+                                                tf->tf_regs.reg_edi,
+                                                0);
+       env_run(curenv);
+}
diff --git a/arch/i386/src/trapentry.S b/arch/i386/src/trapentry.S
new file mode 100644 (file)
index 0000000..95c341a
--- /dev/null
@@ -0,0 +1,210 @@
+/* See COPYRIGHT for copyright information. */
+
+#include <arch/mmu.h>
+#include <arch/trap.h>
+#include <ros/memlayout.h>
+
+
+
+###################################################################
+# exceptions/interrupts
+###################################################################
+
+/* The TRAPHANDLER macro defines a globally-visible function for handling
+ * a trap.  It pushes a trap number onto the stack, then jumps to _alltraps.
+ * It also builds this traps portion of the trap_tbl.
+ * Use TRAPHANDLER for traps where the CPU automatically pushes an error code.
+ */ 
+#define TRAPHANDLER(name, num)                                                                 \
+       .text;                                                                                                          \
+       .globl name;            /* define global symbol for 'name' */   \
+       .type name, @function;  /* symbol type is function */           \
+       .align 2;               /* align function definition */                         \
+       name:                   /* function starts here */                                      \
+       pushl $(num);                                                                                           \
+       jmp _alltraps;                                                                                          \
+       .data;                                                                                                          \
+       .long name;                                                                                                     \
+       .long num
+
+/* Use TRAPHANDLER_NOEC for traps where the CPU doesn't push an error code.
+ * It pushes a 0 in place of the error code, so the trap frame has the same
+ * format in either case.
+ */
+#define TRAPHANDLER_NOEC(name, num)            \
+       .text;                                                          \
+       .globl name;                                            \
+       .type name, @function;                          \
+       .align 2;                                                       \
+       name:                                                           \
+       pushl $0;                                                       \
+       pushl $(num);                                           \
+       jmp _alltraps;                                          \
+       .data;                                                          \
+       .long name;                                                     \
+       .long num
+
+/* Same as NOEC, but for IRQs instead.  num is the ISR number it is mapped to */
+#define IRQ_HANDLER(name, num)                 \
+       .text;                                                          \
+       .globl name;                                            \
+       .type name, @function;                          \
+       .align 2;                                                       \
+       name:                                                           \
+       pushl $0;                                                       \
+       pushl $(num);                                           \
+       jmp _allirqs;                                           \
+       .data;                                                          \
+       .long name;                                                     \
+       .long num
+
+.data
+.globl trap_tbl
+trap_tbl:
+
+/*
+ * Lab 3: Your code here for generating entry points for the different traps.
+ */
+TRAPHANDLER_NOEC(ISR_divide_error, T_DIVIDE)
+TRAPHANDLER_NOEC(ISR_debug_exceptions, T_DEBUG)
+TRAPHANDLER_NOEC(ISR_NMI, T_NMI)
+TRAPHANDLER_NOEC(ISR_breakpoint, T_BRKPT)
+TRAPHANDLER_NOEC(ISR_overflow, T_OFLOW)
+TRAPHANDLER_NOEC(ISR_bounds_check, T_BOUND)
+TRAPHANDLER_NOEC(ISR_invalid_opcode, T_ILLOP)
+TRAPHANDLER_NOEC(ISR_device_not_available, T_DEVICE)
+/* supposedly, DF generates an error code, but the one time we've had a DF so
+ * far, it didn't.  eventually, this should probably be handled with a task gate
+ * it might have pushed a 0, but just the rest of the stack was corrupt
+ */
+TRAPHANDLER_NOEC(ISR_double_fault, T_DBLFLT)
+/* 9 reserved */
+TRAPHANDLER(ISR_invalid_TSS, T_TSS)
+TRAPHANDLER(ISR_segment_not_present, T_SEGNP)
+TRAPHANDLER(ISR_stack_exception, T_STACK)
+TRAPHANDLER(ISR_general_protection_fault, T_GPFLT)
+TRAPHANDLER(ISR_page_fault, T_PGFLT)
+/* 15 reserved */
+TRAPHANDLER_NOEC(ISR_floating_point_error, T_FPERR)
+TRAPHANDLER(ISR_alignment_check, T_ALIGN)
+TRAPHANDLER_NOEC(ISR_machine_check, T_MCHK)
+TRAPHANDLER_NOEC(ISR_simd_error, T_SIMDERR)
+/* 20 - 31 reserved */
+IRQ_HANDLER(IRQ0, 32)
+IRQ_HANDLER(IRQ1, 33)
+IRQ_HANDLER(IRQ2, 34)
+IRQ_HANDLER(IRQ3, 35)
+IRQ_HANDLER(IRQ4, 36)
+IRQ_HANDLER(IRQ5, 37)
+IRQ_HANDLER(IRQ6, 38)
+IRQ_HANDLER(IRQ7, 39)
+IRQ_HANDLER(IRQ8, 40)
+IRQ_HANDLER(IRQ9, 41)
+IRQ_HANDLER(IRQ10, 42)
+IRQ_HANDLER(IRQ11, 43)
+IRQ_HANDLER(IRQ12, 44)
+IRQ_HANDLER(IRQ13, 45)
+IRQ_HANDLER(IRQ14, 46)
+IRQ_HANDLER(IRQ15, 47)
+/* 20 general purpose vectors, for use by the LAPIC.  Can expand later. */
+IRQ_HANDLER(IRQ203, 235)
+IRQ_HANDLER(IRQ204, 236)
+IRQ_HANDLER(IRQ205, 237)
+IRQ_HANDLER(IRQ206, 238)
+IRQ_HANDLER(IRQ207, 239)
+IRQ_HANDLER(IRQ208, 240)
+IRQ_HANDLER(IRQ209, 241)
+IRQ_HANDLER(IRQ210, 242)
+IRQ_HANDLER(IRQ211, 243)
+IRQ_HANDLER(IRQ212, 244)
+IRQ_HANDLER(IRQ213, 245)
+IRQ_HANDLER(IRQ214, 246)
+IRQ_HANDLER(IRQ215, 247)
+IRQ_HANDLER(IRQ216, 248)
+IRQ_HANDLER(IRQ217, 249)
+IRQ_HANDLER(IRQ218, 250)
+IRQ_HANDLER(IRQ219, 251)
+IRQ_HANDLER(IRQ220, 252)
+IRQ_HANDLER(IRQ221, 253)
+IRQ_HANDLER(IRQ222, 254)
+IRQ_HANDLER(IRQ223, 255)
+
+TRAPHANDLER_NOEC(ISR_syscall, T_SYSCALL)
+/* Make sure default is last!! */
+TRAPHANDLER_NOEC(ISR_default, T_DEFAULT)
+
+.data
+.globl trap_tbl_end
+trap_tbl_end:
+
+/* Keep the exit paths of _alltraps, _allirqs, and sysenter_handler in sync
+ * with the corresponding pop_tf's.
+ */
+.text
+_alltraps:
+       cld
+       pushl %ds
+       pushl %es
+       pushal
+       movw $GD_KD, %ax                # data segments aren't accessible by default
+       movw %ax, %ds
+       movw %ax, %es
+       pushl %esp
+       movl $0, %ebp                   # so we can backtrace to this point
+       call trap
+       popl %esp
+       popal
+       popl %es
+       popl %ds
+       addl $0x8, %esp                 # skip trapno and err
+       iret
+
+/* will need to think about when we reenable interrupts.  right now, iret does it,
+ * if the previous EFLAGS had interrupts enabled 
+ */
+_allirqs:
+       cld
+       pushl %ds
+       pushl %es
+       pushal
+       movw $GD_KD, %ax                # data segments aren't accessible by default
+       movw %ax, %ds
+       movw %ax, %es
+       pushl %esp
+       movl $0, %ebp                   # so we can backtrace to this point
+       call irq_handler
+       popl %esp
+       popal
+       popl %es
+       popl %ds
+       addl $0x8, %esp                 # skip IRQ number and err (which is 0)
+       iret
+
+.globl sysenter_handler;
+.type sysenter_handler, @function;
+sysenter_handler:
+       sti                                             # enable interrupts (things are sane here)
+       cld
+       pushfl                                  # save the eflags
+       pushl $0                                # these zeros keep the trapframe looking the same 
+       pushl $0                                # as when we receive a trap or interrupt
+       pushl $0                                # and CS == 0 lets the kernel know it was a sysenter
+       pushl $T_SYSCALL                # helps with print_trapframe
+       pushl %ds
+       pushl %es
+       pushal
+       movw $GD_KD, %ax
+       movw %ax, %ds
+       movw %ax, %es
+       pushl %esp
+       movl $0, %ebp                   # so we can backtrace to this point
+       call sysenter_callwrapper
+       popl %esp
+       popal
+       popl %es
+       popl %ds
+       addl $0x10, %esp                # pop T_SYSCALL and the three zeros
+       popfl                                   # restore EFLAGS
+       movl %ebp, %ecx
+       movl %esi, %edx
+       sysexit
diff --git a/arch/sparc/boot/Makefrag b/arch/sparc/boot/Makefrag
new file mode 100644 (file)
index 0000000..cfcc922
--- /dev/null
@@ -0,0 +1,6 @@
+#
+# Makefile fragment for the ROS kernel.
+# This is NOT a complete makefile;
+# you must run GNU make in the top-level directory
+# where the GNUmakefile is located.
+#
diff --git a/arch/sparc/include/arch.h b/arch/sparc/include/arch.h
new file mode 100644 (file)
index 0000000..b9d0d5b
--- /dev/null
@@ -0,0 +1,175 @@
+#ifndef ROS_INC_ARCH_H
+#define ROS_INC_ARCH_H
+
+/* Arch Constants */
+#define MAX_NUM_CPUS           64
+#define IOAPIC_BASE            0xFEC00000 // max virtual address
+
+#include <arch/mmu.h>
+#include <arch/sparc.h>
+
+#ifndef __ASSEMBLER__
+
+#include <arch/timer.h>
+#include <arch/types.h>
+#include <arch/frontend.h>
+
+static __inline void breakpoint(void) __attribute__((always_inline));
+static __inline void invlpg(void *addr) __attribute__((always_inline));
+static __inline uint64_t read_tsc(void) __attribute__((always_inline));
+static __inline uint64_t read_tsc_serialized(void) __attribute__((always_inline));
+static __inline void enable_irq(void) __attribute__((always_inline));
+static __inline void disable_irq(void) __attribute__((always_inline));
+static __inline void enable_irqsave(int8_t* state) __attribute__((always_inline));
+static __inline void disable_irqsave(int8_t* state) __attribute__((always_inline));
+static __inline void cpu_relax(void) __attribute__((always_inline));
+static __inline void cpu_halt(void) __attribute__((always_inline));
+static __inline void clflush(uintptr_t* addr) __attribute__((always_inline));
+static __inline int irq_is_enabled(void) __attribute__((always_inline));
+static __inline uint32_t core_id(void) __attribute__((always_inline));
+static __inline void cache_flush(void) __attribute__((always_inline));
+static __inline void reboot(void) __attribute__((always_inline)) __attribute__((noreturn));
+static __inline void lcr3(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t rcr3(void) __attribute__((always_inline));
+
+void print_cpuinfo(void);
+void show_mapping(uintptr_t start, size_t size);
+void backtrace(void);
+
+static __inline void
+breakpoint(void)
+{
+       __asm __volatile("ta 0x7f");
+}
+
+static __inline void 
+invlpg(void *addr)
+{ 
+       store_alternate(((intptr_t)addr) & ~0xFFF,3,0);
+}  
+
+static __inline void
+tlbflush(void)
+{
+       // unsure if we'll support this yet...
+       // may have to just do invlpg() in a loop
+       store_alternate(0x400,3,0);
+}
+
+static __inline uint64_t
+read_tsc(void)
+{
+       return read_perfctr(0);
+}
+
+static __inline uint64_t 
+read_tsc_serialized(void)
+{
+       return read_tsc();
+}
+
+static __inline void
+enable_irq(void)
+{
+       write_psr(read_psr() & ~0xF00);
+}
+
+static __inline void
+disable_irq(void)
+{
+       write_psr(read_psr() | 0xF00);
+}
+
+static __inline void
+enable_irqsave(int8_t* state)
+{
+       // *state tracks the number of nested enables and disables
+       // initial value of state: 0 = first run / no favorite
+       // > 0 means more enabled calls have been made
+       // < 0 means more disabled calls have been made
+       // Mostly doing this so we can call disable_irqsave first if we want
+
+       // one side or another "gets a point" if interrupts were already the
+       // way it wanted to go.  o/w, state stays at 0.  if the state was not 0
+       // then, enabling/disabling isn't even an option.  just increment/decrement
+
+       // if enabling is winning or tied, make sure it's enabled
+       if ((*state == 0) && !irq_is_enabled())
+               enable_irq();
+       else
+               (*state)++;
+}
+
+static __inline void
+disable_irqsave(int8_t* state)
+{
+       if ((*state == 0) && irq_is_enabled())
+               disable_irq();
+       else 
+               (*state)--;
+}
+
+static __inline void
+cpu_relax(void)
+{
+       int ctr = 8;
+       asm volatile("1: deccc %0; bne 1b; nop" :
+                    "=r"(ctr) : "0"(ctr) : "cc","memory");
+}
+
+static __inline void
+cpu_halt(void)
+{
+       asm volatile("1: ba 1b; nop" : : : "memory");
+}
+
+static __inline void
+clflush(uintptr_t* addr)
+{
+       asm volatile("flush %0" : : "r"(addr));
+}
+
+static __inline int
+irq_is_enabled(void)
+{
+       return (read_psr() & 0xF00) == 0;
+}
+
+static __inline uint32_t
+core_id(void)
+{
+       uint32_t reg;
+       __asm__ __volatile__("mov %" XSTR(CORE_ID_REG) ",%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline void
+cache_flush(void)
+{
+}
+
+static __inline void
+reboot(void)
+{
+       frontend_syscall(RAMP_SYSCALL_exit,0,0,0);
+       while(1);
+}
+
+static __inline void
+lcr3(uint32_t val)
+{
+       extern uintptr_t mmu_context_table[NCONTEXTS];
+       *mmu_context_table = val >> 4 | PTE_PTD;
+       tlbflush();
+}
+
+static __inline uint32_t
+rcr3(void)
+{
+       extern uintptr_t mmu_context_table[NCONTEXTS];
+       return (*mmu_context_table & ~0x3) << 4;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !ROS_INC_X86_H */
diff --git a/arch/sparc/include/atomic.h b/arch/sparc/include/atomic.h
new file mode 100644 (file)
index 0000000..33ed9cd
--- /dev/null
@@ -0,0 +1,89 @@
+#ifndef ROS_INCLUDE_ATOMIC_H
+#define ROS_INCLUDE_ATOMIC_H
+
+#include <arch/types.h>
+
+#define mb() {rmb(); wmb();}
+#define rmb()
+#define wmb() ({ asm volatile("stbar"); })
+
+typedef volatile uint32_t spinlock_t;
+
+//linux style atomic ops
+typedef struct {volatile int32_t real_num;} atomic_t;
+#define atomic_read(atom) ((atom)->real_num >> 8)
+#define atomic_init(i) {(i) << 8}
+//and the atomic incs, etc take an atomic_t ptr, deref inside
+
+static inline void atomic_set(atomic_t*SAFE number, int32_t val);
+static inline void atomic_add(atomic_t*SAFE number, int32_t inc);
+static inline void atomic_inc(atomic_t*SAFE number);
+static inline void atomic_dec(atomic_t*SAFE number);
+static inline uint32_t spin_trylock(spinlock_t*SAFE lock);
+static inline void spin_lock(spinlock_t*SAFE lock);
+static inline void spin_unlock(spinlock_t*SAFE lock);
+
+/* Inlined functions declared above */
+
+static inline void atomic_add(atomic_t*SAFE number, int32_t inc)
+{
+       // this is pretty clever.  the lower 8 bits (i.e byte 3)
+       // of the atomic_t serve as a spinlock.  let's acquire it.
+       spin_lock((spinlock_t*SAFE)number);
+
+       // compute new counter value.
+       // must shift the old counter right by 8
+       inc += number->real_num >> 8;
+
+       // set the new counter value.
+       // since the lower 8 bits will be cleared by the shift,
+       // we also release the lock (for free!)
+       number->real_num = inc << 8;
+}
+
+static inline void atomic_set(atomic_t*SAFE number, uint32_t val)
+{
+       // this works basically the same as atomic_add
+       spin_lock((spinlock_t*)number);
+       number->real_num = val << 8;
+}
+
+static inline void atomic_inc(atomic_t*SAFE number)
+{
+       atomic_add(number,1);
+}
+
+static inline void atomic_dec(atomic_t*SAFE number)
+{
+       atomic_add(number,-1);
+}
+
+static inline uint32_t spin_trylock(spinlock_t*SAFE lock)
+{
+       // we don't need to initialize reg, but it quiets the compiler
+       uint32_t reg;
+       asm volatile("ldstub [%1+3],%0"
+                    : "=r"(reg)
+                    : "r"(lock)
+                    : "memory");
+       return reg;
+}
+
+static inline uint8_t spin_locked(spinlock_t*SAFE lock)
+{
+       return *((volatile uint8_t*COUNT(sizeof(spinlock_t)))lock+3);
+}
+
+static inline void spin_lock(spinlock_t*SAFE lock)
+{
+       while(spin_trylock(lock))
+               while(spin_locked(lock));
+}
+
+static inline void spin_unlock(spinlock_t*SAFE lock)
+{
+       wmb();
+       *((volatile uint8_t*COUNT(sizeof(spinlock_t)))lock+3) = 0;
+}
+
+#endif /* !ROS_INCLUDE_ATOMIC_H */
diff --git a/arch/sparc/include/console.h b/arch/sparc/include/console.h
new file mode 100644 (file)
index 0000000..856f146
--- /dev/null
@@ -0,0 +1,19 @@
+/* See COPYRIGHT for copyright information. */
+
+#ifndef _CONSOLE_H_
+#define _CONSOLE_H_
+#ifndef ROS_KERNEL
+# error "This is a ROS kernel header; user programs should not #include it"
+#endif
+
+#include <arch/types.h>
+
+#define CRT_ROWS       25
+#define CRT_COLS       80
+#define CRT_SIZE       (CRT_ROWS * CRT_COLS)
+
+void cons_init(void);
+void cons_putc(int c);
+int cons_getc(void);
+
+#endif /* _CONSOLE_H_ */
diff --git a/arch/sparc/include/frontend.h b/arch/sparc/include/frontend.h
new file mode 100644 (file)
index 0000000..fbabfab
--- /dev/null
@@ -0,0 +1,23 @@
+#ifndef ROS_ARCH_FRONTEND_H
+#define ROS_ARCH_FRONTEND_H
+
+#include <arch/types.h>
+
+int32_t frontend_syscall(int32_t syscall_num, uint32_t arg0, uint32_t arg1, uint32_t arg2);
+
+#define RAMP_SYSCALL_exit              1
+#define RAMP_SYSCALL_read              3
+#define RAMP_SYSCALL_write             4
+#define RAMP_SYSCALL_open              5
+#define RAMP_SYSCALL_close             6
+#define RAMP_SYSCALL_unlink            10
+#define RAMP_SYSCALL_chdir             12
+#define RAMP_SYSCALL_brk               17
+#define RAMP_SYSCALL_stat              18
+#define RAMP_SYSCALL_lseek             19
+#define RAMP_SYSCALL_fstat             28
+#define RAMP_SYSCALL_lstat             88
+#define RAMP_SYSCALL_getch             98
+#define RAMP_SYSCALL_gettimeofday      156
+
+#endif /* !ROS_ARCH_FRONTEND_H */
diff --git a/arch/sparc/include/kbdreg.h b/arch/sparc/include/kbdreg.h
new file mode 100644 (file)
index 0000000..0c7ffea
--- /dev/null
@@ -0,0 +1,83 @@
+#ifndef ROS_KBDREG_H
+#define ROS_KBDREG_H
+
+// Special keycodes
+#define KEY_HOME       0xE0
+#define KEY_END                0xE1
+#define KEY_UP         0xE2
+#define KEY_DN         0xE3
+#define KEY_LF         0xE4
+#define KEY_RT         0xE5
+#define KEY_PGUP       0xE6
+#define KEY_PGDN       0xE7
+#define KEY_INS                0xE8
+#define KEY_DEL                0xE9
+
+
+/* This is i8042reg.h + kbdreg.h from NetBSD. */
+
+#define        KBSTATP         0x64    /* kbd controller status port(I) */
+#define         KBS_DIB        0x01    /* kbd data in buffer */
+#define         KBS_IBF        0x02    /* kbd input buffer low */
+#define         KBS_WARM       0x04    /* kbd input buffer low */
+#define         KBS_OCMD       0x08    /* kbd output buffer has command */
+#define         KBS_NOSEC      0x10    /* kbd security lock not engaged */
+#define         KBS_TERR       0x20    /* kbd transmission error */
+#define         KBS_RERR       0x40    /* kbd receive error */
+#define         KBS_PERR       0x80    /* kbd parity error */
+
+#define        KBCMDP          0x64    /* kbd controller port(O) */
+#define         KBC_RAMREAD    0x20    /* read from RAM */
+#define         KBC_RAMWRITE   0x60    /* write to RAM */
+#define         KBC_AUXDISABLE 0xa7    /* disable auxiliary port */
+#define         KBC_AUXENABLE  0xa8    /* enable auxiliary port */
+#define         KBC_AUXTEST    0xa9    /* test auxiliary port */
+#define         KBC_KBDECHO    0xd2    /* echo to keyboard port */
+#define         KBC_AUXECHO    0xd3    /* echo to auxiliary port */
+#define         KBC_AUXWRITE   0xd4    /* write to auxiliary port */
+#define         KBC_SELFTEST   0xaa    /* start self-test */
+#define         KBC_KBDTEST    0xab    /* test keyboard port */
+#define         KBC_KBDDISABLE 0xad    /* disable keyboard port */
+#define         KBC_KBDENABLE  0xae    /* enable keyboard port */
+#define         KBC_PULSE0     0xfe    /* pulse output bit 0 */
+#define         KBC_PULSE1     0xfd    /* pulse output bit 1 */
+#define         KBC_PULSE2     0xfb    /* pulse output bit 2 */
+#define         KBC_PULSE3     0xf7    /* pulse output bit 3 */
+
+#define        KBDATAP         0x60    /* kbd data port(I) */
+#define        KBOUTP          0x60    /* kbd data port(O) */
+
+#define        K_RDCMDBYTE     0x20
+#define        K_LDCMDBYTE     0x60
+
+#define        KC8_TRANS       0x40    /* convert to old scan codes */
+#define        KC8_MDISABLE    0x20    /* disable mouse */
+#define        KC8_KDISABLE    0x10    /* disable keyboard */
+#define        KC8_IGNSEC      0x08    /* ignore security lock */
+#define        KC8_CPU         0x04    /* exit from protected mode reset */
+#define        KC8_MENABLE     0x02    /* enable mouse interrupt */
+#define        KC8_KENABLE     0x01    /* enable keyboard interrupt */
+#define        CMDBYTE         (KC8_TRANS|KC8_CPU|KC8_MENABLE|KC8_KENABLE)
+
+/* keyboard commands */
+#define        KBC_RESET       0xFF    /* reset the keyboard */
+#define        KBC_RESEND      0xFE    /* request the keyboard resend the last byte */
+#define        KBC_SETDEFAULT  0xF6    /* resets keyboard to its power-on defaults */
+#define        KBC_DISABLE     0xF5    /* as per KBC_SETDEFAULT, but also disable key scanning */
+#define        KBC_ENABLE      0xF4    /* enable key scanning */
+#define        KBC_TYPEMATIC   0xF3    /* set typematic rate and delay */
+#define        KBC_SETTABLE    0xF0    /* set scancode translation table */
+#define        KBC_MODEIND     0xED    /* set mode indicators(i.e. LEDs) */
+#define        KBC_ECHO        0xEE    /* request an echo from the keyboard */
+
+/* keyboard responses */
+#define        KBR_EXTENDED    0xE0    /* extended key sequence */
+#define        KBR_RESEND      0xFE    /* needs resend of command */
+#define        KBR_ACK         0xFA    /* received a valid command */
+#define        KBR_OVERRUN     0x00    /* flooded */
+#define        KBR_FAILURE     0xFD    /* diagnosic failure */
+#define        KBR_BREAK       0xF0    /* break code prefix - sent on key release */
+#define        KBR_RSTDONE     0xAA    /* reset complete */
+#define        KBR_ECHO        0xEE    /* echo response */
+
+#endif /* !ROS_KBDREG_H */
diff --git a/arch/sparc/include/mmu.h b/arch/sparc/include/mmu.h
new file mode 100644 (file)
index 0000000..d172c6a
--- /dev/null
@@ -0,0 +1,139 @@
+#ifndef ROS_INC_MMU_H
+#define ROS_INC_MMU_H
+
+/*
+ * This file contains definitions for the x86 memory management unit (MMU),
+ * including paging- and segmentation-related data structures and constants,
+ * the %cr0, %cr4, and %eflags registers, and traps.
+ */
+
+/*
+ *
+ *     Part 1.  Paging data structures and constants.
+ *
+ */
+
+// A linear address 'la' has a four-part structure as follows:
+//
+// +--------8--------+------6------+------6------+-----------12----------+
+// |  L1 Page Table  |    L2 PT    |    L3 PT    |  Offset within Page   |
+// |      Index      |    Index    |    Index    |                       |
+// +-----------------+-------------+-------------+-----------------------+
+//  \--- L1X(la) --/  \- L2X(la) -/ \- L3X(la) -/ \----- PGOFF(la) -----/
+//  \----------- PPN(la) -----------------------/
+//
+// The L1X, L2X, L3X, PGOFF, and PPN macros decompose linear addresses
+// as shown.  To construct a linear address la from L1X(la), L2X(la),
+// and PGOFF(la), use PGADDR(L1X(la), L2X(la), L3X(la), PGOFF(la)).
+
+// page number field of address
+#define PPN(la)                (((uintptr_t) (la)) >> L3PGSHIFT)
+
+// index into L1 PT
+#define L1X(la)                ((((uintptr_t) (la)) >> L1PGSHIFT) & 0xFF)
+
+// index into L2 PT
+#define L2X(la)                ((((uintptr_t) (la)) >> L2PGSHIFT) & 0x3F)
+
+// index into L3 PT
+#define L3X(la)                ((((uintptr_t) (la)) >> L3PGSHIFT) & 0x3F)
+
+// offset in page
+#define PGOFF(la)      (((uintptr_t) (la)) & 0xFFF)
+
+// construct linear address from indexes and offset
+#define PGADDR(l1, l2, l3, o) ((void*SNT) ((l1) << L1PGSHIFT | (l2) << L2PGSHIFT | (l3) << L3PGSHIFT | (o)))
+
+// construct PTE from PPN and flags
+#define PTE(ppn, flags) ((ppn) << 8 | (flags))
+
+// construct PTD from physical address
+#define PTD(pa) ((pa) >> 4 | PTE_PTD)
+
+// Number of L1 page tables (contexts) the MMU can store at any time
+#define NCONTEXTS      8
+
+// Page directory and page table constants.
+#define NL3ENTRIES     64              // # entries in an L3 page table
+#define NL2ENTRIES     64              // # entries in an L2 page table
+#define NL1ENTRIES     256             // # entries in an L1 page table
+
+#define L3PGSIZE       4096            // bytes mapped by an L3 page
+#define L3PGSHIFT      12              // log2(L3PGSIZE)
+
+#define L2PGSIZE       (4096*64)       // bytes mapped by an L2 page
+#define L2PGSHIFT      (12+6)          // log2(L2PGSIZE)
+
+#define L1PGSIZE       (4096*64*64)    // bytes mapped by an L1 page
+#define L1PGSHIFT      (12+6+6)        // log2(L1PGSIZE)
+
+// The only page size we actually support for now is L3
+#define PGSIZE         L3PGSIZE
+#define        PGSHIFT         L3PGSHIFT
+
+// Page table/directory entry flags.
+#define PTE_PTD                0x001   // Entry is a Page Table Descriptor
+#define PTE_PTE                0x002   // Entry is a Page Table Entry
+#define PTE_R          0x020   // Referenced
+#define PTE_M          0x040   // Modified
+#define PTE_C          0x080   // Cacheable
+
+// commly used access modes
+#define PTE_KERN_RW    (7 << 2)                // Kernel Read/Write
+#define PTE_KERN_RO    (6 << 2)                // Kernel Read-Only
+#define PTE_USER_RW    (3 << 2)                // Kernel/User Read/Write
+#define PTE_USER_RO    (2 << 2)                // Kernel/User Read-Only
+
+// x86 equivalencies
+#define PTE_P          PTE_PTE                 // present <=> PTE
+#define PTSIZE         L1PGSIZE                // dunno yet
+#define NPDENTRIES     NL1ENTRIES              // this either
+#define PDX(la)                L1X(la)                 // for env stuff
+#define PTX(la)                L3X(la)                 // same
+
+#define PTE_ACC(PTE)   (((PTE) & 0x1C) >> 2)   // Access bits
+
+// based upon PTE and Supervisor bit, can I read/write/execute this page?
+#define PTE_RD(PTE,S)  (PTE_ACC(PTE) != 4 && (PTE_ACC(PTE) < 6 || (S)))
+#define PTE_WR(PTE,S)  ((PTE_ACC(PTE) & 0x1) && (PTE_ACC(PTE) < 4 || (S)))
+#define PTE_EX(PTE,S)  (PTE_ACC(PTE) == 4 || (PTE_ACC(PTE) & 0x2) && (PTE_ACC(PTE) < 4 || (S)))
+
+// +-----+-------------------+
+// |     |   Allowed Access  |
+// | ACC +------+------------+
+// |     | User | Supervisor |
+// +-----+------+------------+
+// |  0  |  R-- |  R--       |
+// |  1  |  RW- |  RW-       |
+// |  2  |  R-X |  R-X       |
+// |  3  |  RWX |  RWX       |
+// |  4  |  --X |  --X       |
+// |  5  |  R-- |  RW-       |
+// |  6  |  --- |  R-X       |
+// |  7  |  --- |  RWX       |
+// +-----+------+------------+
+
+// address in page table entry
+#define PTE_ADDR(pte)  (((physaddr_t) (pte) & ~0xFF) << 4)
+
+// address in page table descriptor
+#define PTD_ADDR(ptd)  (((physaddr_t) (ptd) & ~0x3) << 4)
+
+// MMU Control Register flags
+#define MMU_CR_E       0x00000001      // Protection Enable
+#define MMU_CR_NF      0x00000002      // No Fault mode
+#define MMU_CR_PSO     0x00000080      // Partial Store Order (TSO disabled)
+
+// MMU Fault Status Register flags
+#define MMU_FSR_USER   0x00000020      // Fault caused by user-space access
+#define MMU_FSR_EX     0x00000040      // Fault occured in instruction-space
+#define MMU_FSR_WR     0x00000080      // Fault caused by a store
+
+// MMU Register Addresses
+#define MMU_REG_CTRL   0x00000000      // MMU Control Register
+#define MMU_REG_CTXTBL 0x00000100      // MMU Context Table Pointer Register
+#define MMU_REG_CTX    0x00000200      // MMU Context Register
+#define MMU_REG_FSR    0x00000300      // MMU Fault Status Register
+#define MMU_REG_FAR    0x00000400      // MMU Fault Address Register
+
+#endif /* !ROS_INC_MMU_H */
diff --git a/arch/sparc/include/smp.h b/arch/sparc/include/smp.h
new file mode 100644 (file)
index 0000000..604c7e9
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef ROS_ARCH_SMP_H
+#define ROS_ARCH_SMP_H
+
+#include <arch/types.h>
+#include <arch/arch.h>
+#include <atomic.h>
+
+typedef volatile uint8_t wait_list_t[MAX_NUM_CPUS];
+
+typedef struct
+{
+       wait_list_t wait_list;
+       spinlock_t lock;
+} handler_wrapper_t;
+
+#endif
diff --git a/arch/sparc/include/sparc.h b/arch/sparc/include/sparc.h
new file mode 100644 (file)
index 0000000..4f95bba
--- /dev/null
@@ -0,0 +1,166 @@
+#ifndef ROS_INC_SPARC_H
+#define ROS_INC_SPARC_H
+
+#define CORE_ID_REG    %asr15
+#define NUM_CORES_REG  %asr14
+#define MEMSIZE_MB_REG %asr13
+
+#define PSR_CWP                0x0000001F
+#define PSR_ET         0x00000020
+#define PSR_PS         0x00000040
+#define PSR_S          0x00000080
+#define PSR_PIL                0x00000F00
+#define PSR_EF         0x00001000
+#define PSR_EC         0x00002000
+#define PSR_RESERVED   0x000FC000
+#define PSR_ICC                0x00F00000
+#define PSR_VER                0x0F000000
+#define PSR_IMPL       0xF0000000
+
+#ifndef __ASSEMBLER__
+
+#define STR(arg) #arg
+#define XSTR(arg) STR(arg)
+
+#include <arch/types.h>
+#include <arch/trap.h>
+
+static __inline uint32_t read_psr(void) __attribute__((always_inline));
+static __inline uint32_t read_wim(void) __attribute__((always_inline));
+static __inline uint32_t read_tbr(void) __attribute__((always_inline));
+static __inline uint32_t read_mmu_reg(uint32_t which) __attribute__((always_inline));
+static __inline uint32_t read_y(void) __attribute__((always_inline));
+static __inline uint32_t read_fsr(void) __attribute__((always_inline));
+static __inline uint64_t read_perfctr(uint32_t which) __attribute__((always_inline));
+static __inline void write_psr(uint32_t val) __attribute__((always_inline));
+static __inline void write_wim(uint32_t val) __attribute__((always_inline));
+static __inline void write_tbr(uint32_t val) __attribute__((always_inline));
+static __inline void write_mmu_reg(uint32_t which, uint32_t val) __attribute__((always_inline));
+static __inline void write_y(uint32_t val) __attribute__((always_inline));
+static __inline void write_fsr(uint32_t val) __attribute__((always_inline));
+static __inline uint32_t memsize_mb(void) __attribute__((always_inline));
+static __inline uint32_t mmu_probe(uint32_t va) __attribute__((always_inline));
+
+uint32_t send_active_message(uint32_t dst, amr_t pc, uint32_t arg0, uint32_t arg1, uint32_t arg2);
+void flush_windows();
+
+#define store_alternate(addr,asi,data) ({ uint32_t __my_addr = (addr); uint32_t __my_data = (data); __asm__ __volatile__ ("sta %0,[%1] %2" : : "r"(__my_data),"r"(__my_addr),"i"(asi)); })
+#define load_alternate(addr,asi) ({ uint32_t __my_addr = (addr); uint32_t __my_data; __asm__ __volatile__ ("lda [%1] %2,%0" : "=r"(__my_data) : "r"(__my_addr),"i"(asi)); __my_data; })
+
+static __inline uint32_t
+read_psr(void)
+{
+       uint32_t reg;
+       asm volatile ("mov %%psr,%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline uint32_t
+read_wim(void)
+{
+       uint32_t reg;
+       asm volatile ("mov %%wim,%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline uint32_t
+read_tbr(void)
+{
+       uint32_t reg;
+       asm volatile ("mov %%tbr,%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline uint32_t
+read_mmu_reg(uint32_t which)
+{
+       return load_alternate(which,4);
+}
+
+static __inline uint32_t
+read_y(void)
+{
+       uint32_t reg;
+       asm volatile ("mov %%y,%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline uint32_t
+read_fsr(void)
+{
+       uint32_t reg;
+       asm volatile ("st %%fsr,%0" : "=m"(reg));
+       return reg;
+}
+
+static __inline uint64_t
+read_perfctr(uint32_t which)
+{
+       uint32_t hi,lo;
+       intptr_t addr = which<<3;
+       hi = load_alternate(addr,2);
+       lo = load_alternate(addr+4,2);
+       return (((uint64_t)hi) << 32) | lo;
+}
+
+static __inline void
+write_psr(uint32_t val)
+{
+       asm volatile ("mov %0,%%psr; nop;nop;nop" : : "r"(val) : "memory");
+}
+
+static __inline void
+write_wim(uint32_t val)
+{
+       asm volatile ("mov %0,%%wim; nop;nop;nop" : : "r"(val) : "memory");
+}
+
+static __inline void
+write_tbr(uint32_t val)
+{
+       asm volatile ("mov %0,%%tbr; nop;nop;nop" : : "r"(val) : "memory");
+}
+
+static __inline void
+write_mmu_reg(uint32_t which, uint32_t val)
+{
+       store_alternate(which,4,val);
+}
+
+static __inline void
+write_y(uint32_t val)
+{
+       asm volatile ("mov %0,%%y; nop;nop;nop" : : "r"(val) : "memory");
+}
+
+static __inline void
+write_fsr(uint32_t val)
+{
+       asm volatile ("ld %0,%%fsr; nop;nop;nop" : : "m"(val) : "memory");
+}
+
+static __inline uint32_t
+memsize_mb(void)
+{
+       uint32_t reg;
+       __asm__ __volatile__("mov %" XSTR(MEMSIZE_MB_REG) ",%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline uint32_t
+num_cores(void)
+{
+       uint32_t reg;
+       __asm__ __volatile__("mov %" XSTR(NUM_CORES_REG) ",%0" : "=r"(reg));
+       return reg;
+}
+
+static __inline uint32_t
+mmu_probe(uint32_t va)
+{
+       return load_alternate(va & ~0xFFF | 0x400, 3);
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !ROS_INC_X86_H */
diff --git a/arch/sparc/include/timer.h b/arch/sparc/include/timer.h
new file mode 100644 (file)
index 0000000..2e1a221
--- /dev/null
@@ -0,0 +1,16 @@
+#ifndef ROS_ARCH_TIMER_H
+#define ROS_ARCH_TIMER_H
+
+#define INTERRUPT_TIMER_HZ     100
+
+#include <arch/types.h>
+
+typedef struct system_timing {
+       uint64_t tsc_freq;
+} system_timing_t;
+
+extern system_timing_t system_timing;
+
+void timer_init(void);
+
+#endif /* !ROS_ARCH_TIMER_H */
diff --git a/arch/sparc/include/trap.h b/arch/sparc/include/trap.h
new file mode 100644 (file)
index 0000000..1e6b700
--- /dev/null
@@ -0,0 +1,45 @@
+#ifndef ROS_INC_ARCH_TRAP_H
+#define ROS_INC_ARCH_TRAP_H
+
+#define SIZEOF_TRAPFRAME_T     0xA8
+#define SIZEOF_ACTIVE_MESSAGE_T        0x18
+
+#ifndef __ASSEMBLER__
+
+#include <arch/types.h>
+
+typedef struct
+{
+       uint32_t gpr[32];
+       uint32_t psr;
+       uint32_t pc;
+       uint32_t npc;
+       uint32_t wim;
+       uint32_t tbr;
+       uint32_t y;
+       uint32_t fault_status;
+       uint32_t fault_addr;
+       uint64_t timestamp;
+} trapframe_t;
+
+typedef void (*amr_t)(trapframe_t* tf, uint32_t srcid, uint32_t a0, uint32_t a1, uint32_t a2);
+
+typedef struct
+{
+       uint32_t srcid;
+       amr_t pc;
+       uint32_t arg0;
+       uint32_t arg1;
+       uint32_t arg2;
+       uint32_t pad;
+} active_message_t;
+
+typedef struct
+{
+       uint32_t fpr[32];
+       uint32_t fsr;
+} ancillary_state_t;
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* !ROS_INC_ARCH_TRAP_H */
diff --git a/arch/sparc/include/trap_table.h b/arch/sparc/include/trap_table.h
new file mode 100644 (file)
index 0000000..80c88f4
--- /dev/null
@@ -0,0 +1,11 @@
+#ifndef ROS_ARCH_TRAP_TABLE_H
+#define ROS_ARCH_TRAP_TABLE_H
+
+#define TRAP_TABLE_ENTRY(label) sethi %hi(handle_trap),%l6; sethi %hi(label),%l5; jmp %lo(handle_trap)+%l6; or %lo(label),%l5,%l5
+#define JMP(target) sethi %hi(target),%l4; jmp %lo(target)+%l4; mov %psr,%l0; nop
+
+#define ENTER_ERROR_MODE unimp; unimp; unimp; unimp
+#define UNHANDLED_TRAP TRAP_TABLE_ENTRY(unhandled_trap)
+
+
+#endif
diff --git a/arch/sparc/include/types.h b/arch/sparc/include/types.h
new file mode 100644 (file)
index 0000000..beddd75
--- /dev/null
@@ -0,0 +1,144 @@
+#ifndef ROS_INC_TYPES_H
+#define ROS_INC_TYPES_H
+
+#define BIG_ENDIAN
+
+#ifndef NULL
+#define NULL ((void*) 0)
+#endif
+
+#ifndef TRUE
+#define TRUE   1
+#endif
+
+#ifndef FALSE
+#define FALSE  0
+#endif
+
+#define CHECK_FLAG(flags,bit)   ((flags) & (1 << (bit)))
+
+// Represents true-or-false values
+typedef int bool;
+
+// Explicitly-sized versions of integer types
+typedef __signed char int8_t;
+typedef unsigned char uint8_t;
+typedef short int16_t;
+typedef unsigned short uint16_t;
+typedef int int32_t;
+typedef unsigned int uint32_t;
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+
+// Pointers and addresses are 32 bits long.
+// We use pointer types to represent virtual addresses,
+// uintptr_t to represent the numerical values of virtual addresses,
+// and physaddr_t to represent physical addresses.
+typedef int32_t intptr_t;
+typedef uint32_t uintptr_t;
+typedef uint32_t physaddr_t;
+
+// Registers are 32 bits long
+typedef int32_t intreg_t;
+typedef uint32_t uintreg_t;
+
+// Page numbers are 32 bits long.
+typedef uint32_t ppn_t;
+
+// size_t is used for memory object sizes.
+typedef uint32_t size_t;
+// ssize_t is a signed version of ssize_t, used in case there might be an
+// error return.
+typedef int32_t ssize_t;
+
+// off_t is used for file offsets and lengths.
+typedef int32_t off_t;
+
+// Efficient min and max operations
+#define MIN(_a, _b)                                            \
+({                                                             \
+       typeof(_a) __a = (_a);                                  \
+       typeof(_b) __b = (_b);                                  \
+       __a <= __b ? __a : __b;                                 \
+})
+#define MAX(_a, _b)                                            \
+({                                                             \
+       typeof(_a) __a = (_a);                                  \
+       typeof(_b) __b = (_b);                                  \
+       __a >= __b ? __a : __b;                                 \
+})
+
+// Rounding operations (efficient when n is a power of 2)
+// Round down to the nearest multiple of n
+#define ROUNDDOWN(a, n)                                                \
+({                                                             \
+       uint32_t __a = (uint32_t) (a);                          \
+       (typeof(a)) (__a - __a % (n));                          \
+})
+// Round up to the nearest multiple of n
+#define ROUNDUP(a, n)                                          \
+({                                                             \
+       uint32_t __n = (uint32_t) (n);                          \
+       (typeof(a)) (ROUNDDOWN((uint32_t) (a) + __n - 1, __n)); \
+})
+
+// Return the offset of 'member' relative to the beginning of a struct type
+#ifndef offsetof
+#define offsetof(type, member)  ((size_t) (&((type*)0)->member))
+#endif
+
+// Ivy currently can only handle 63 bits (OCaml thing), so use this to make
+// a uint64_t programatically
+#define UINT64(upper, lower) ( (((uint64_t)(upper)) << 32) | (lower) )
+
+/*********************** Bitmask stuff **********************/
+#define BYTES_FOR_BITMASK(size) (size)
+#define BYTES_FOR_BITMASK_WITH_CHECK(size) (size)
+#define DECL_BITMASK(name, size) uint8_t (name)[BYTES_FOR_BITMASK((size))]
+
+#define GET_BITMASK_BIT(name, bit) ((name)[(bit)])
+#define SET_BITMASK_BIT(name, bit) ((name)[(bit)] = 1)
+#define CLR_BITMASK_BIT(name, bit) ((name)[(bit)] = 0)
+#define SET_BITMASK_BIT_ATOMIC(name, bit) SET_BITMASK_BIT(name,bit)
+#define CLR_BITMASK_BIT_ATOMIC(name, bit) CLR_BITMASK_BIT(name,bit)
+
+#define CLR_BITMASK(name, size) \
+({ \
+       {TRUSTEDBLOCK \
+       memset((void*)((uintptr_t)(name)), 0, BYTES_FOR_BITMASK((size))); \
+       } \
+})
+
+#define FILL_BITMASK(name, size) \
+({ \
+       {TRUSTEDBLOCK \
+       memset((void*)((uintptr_t)(name)), 1, BYTES_FOR_BITMASK((size))); \
+       } \
+}) 
+
+#define COPY_BITMASK(newmask, oldmask, size) \
+({ \
+       {TRUSTEDBLOCK \
+       memcpy((void*)((uintptr_t)(newmask)), \
+           (void*)((uintptr_t)(oldmask)), \
+           BYTES_FOR_BITMASK((size))); \
+       } \
+})
+
+// this checks the entire last byte, so keep it 0 in the other macros
+#define BITMASK_IS_CLEAR(name, size) ({ \
+       uint32_t __i; \
+       uint8_t __notclear = 0; \
+       for(__i = 0; __i < BYTES_FOR_BITMASK(size); __i++) \
+               __notclear |= (name)[__i]; \
+       1-__notclear; })
+
+#define PRINT_BITMASK(name, size) { \
+       int __i; \
+       for (__i = 0; __i < BYTES_FOR_BITMASK(size); __i++) \
+               printk("%x", (name)[__i]); \
+       printk("\n"); \
+}
+/**************************************************************/
+
+#endif /* !ROS_INC_TYPES_H */
diff --git a/arch/sparc/kernel.ld b/arch/sparc/kernel.ld
new file mode 100644 (file)
index 0000000..1da0116
--- /dev/null
@@ -0,0 +1,62 @@
+/* Simple linker script for the ROS kernel.
+   See the GNU ld 'info' manual ("info ld") to learn the syntax. */
+
+OUTPUT_FORMAT("elf32-sparc", "elf32-sparc", "elf32-sparc")
+OUTPUT_ARCH(sparc)
+ENTRY(_start)
+
+SECTIONS
+{
+       /* Link the kernel for 0xC01000C0, but load it at 0x001000C0) */
+
+       .text 0xC0000000 : AT(0x00000000) {
+               *(.text .stub .text.* .gnu.linkonce.t.*)
+       }
+
+       PROVIDE(etext = .);     /* Define the 'etext' symbol to this value */
+
+       .rodata : {
+               *(.rodata .rodata.* .gnu.linkonce.r.*)
+       }
+
+       /* Include debugging information in kernel memory */
+       .stab : {
+               PROVIDE(stab = .);
+               PROVIDE(__STAB_BEGIN__ = .);
+               *(.stab);
+               PROVIDE(estab = .);
+               PROVIDE(__STAB_END__ = .);
+               BYTE(0)         /* Force the linker to allocate space
+                                  for this section */
+       }
+
+       .stabstr : {
+               PROVIDE(stabstr = .);
+               PROVIDE(__STABSTR_BEGIN__ = .);
+               *(.stabstr);
+               PROVIDE(estabstr = .);
+               PROVIDE(__STABSTR_END__ = .);
+               BYTE(0)         /* Force the linker to allocate space
+                                  for this section */
+       }
+
+       /* Adjust the address for the data segment to the next page */
+       . = ALIGN(0x1000);
+
+       /* The data segment */
+       .data : {
+               *(.data)
+       }
+
+       PROVIDE(edata = .);
+
+       .bss : {
+               *(.bss)
+       }
+
+       PROVIDE(end = .);
+
+       /DISCARD/ : {
+               *(.eh_frame .note.GNU-stack)
+       }
+}
diff --git a/arch/sparc/src/Makefrag b/arch/sparc/src/Makefrag
new file mode 100644 (file)
index 0000000..519181e
--- /dev/null
@@ -0,0 +1,25 @@
+# Makefile fragment for ROS kernel.
+# This is NOT a complete makefile;
+# you must run GNU make in the top-level directory
+# where the GNUmakefile is located.
+#
+
+KERN_ARCH_SRC_DIR = $(KERN_DIR)/src/arch
+
+# entry.S must be first, so that it's the first code in the text segment!!!
+#
+# We also snatch the use of a couple handy source files
+# from the lib directory, to avoid gratuitous code duplication.
+KERN_ARCH_SRCFILES := $(KERN_ARCH_SRC_DIR)/entry.S \
+                      $(KERN_ARCH_SRC_DIR)/trap_table.S \
+                      $(KERN_ARCH_SRC_DIR)/trap_entry.S \
+                      $(KERN_ARCH_SRC_DIR)/spillfill.S \
+                      $(KERN_ARCH_SRC_DIR)/trap.c \
+                      $(KERN_ARCH_SRC_DIR)/boot.c \
+                      $(KERN_ARCH_SRC_DIR)/cpuinfo.c \
+                      $(KERN_ARCH_SRC_DIR)/console.c \
+                      $(KERN_ARCH_SRC_DIR)/frontend.c \
+                      $(KERN_ARCH_SRC_DIR)/pmap.c \
+                      $(KERN_ARCH_SRC_DIR)/timer.c \
+                      $(KERN_ARCH_SRC_DIR)/env.c \
+                      $(KERN_ARCH_SRC_DIR)/smp.c
diff --git a/arch/sparc/src/boot.c b/arch/sparc/src/boot.c
new file mode 100644 (file)
index 0000000..61f0258
--- /dev/null
@@ -0,0 +1,129 @@
+#include <multiboot.h>
+#include <arch/mmu.h>
+#include <arch/arch.h>
+#include <arch/types.h>
+#include <ros/memlayout.h>
+#include <string.h>
+
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+void
+build_multiboot_info(multiboot_info_t* mbi)
+{
+       uint32_t memsize_kb = memsize_mb()*1024;
+       uint32_t basemem_kb = EXTPHYSMEM/1024;
+
+       memset(mbi,0,sizeof(mbi));
+
+       mbi->flags = 0x00000001;
+       mbi->mem_lower = basemem_kb;
+       mbi->mem_upper = memsize_kb-basemem_kb;
+}
+
+// set up a basic virtual -> physical mapping so we can boot the kernel
+void
+build_boot_pgdir(void)
+{
+       extern uintptr_t mmu_context_table[NCONTEXTS];
+       extern uintptr_t l1_page_table[NL1ENTRIES];
+
+       // relocate symbols
+       uintptr_t* mmuctxtbl = (uintptr_t*)((uint8_t*)mmu_context_table-KERNBASE);
+       uintptr_t* l1 = (uintptr_t*)((uint8_t*)l1_page_table-KERNBASE);
+
+       uintptr_t kernsize = /* 4GB */ - KERNBASE;
+
+       // make all context table entries invalid
+       int i;
+       for(i = 0; i < NCONTEXTS; i++)
+               mmuctxtbl[i] = 0;
+
+       // except for the zeroth one, which points to our L1 PT
+       *mmuctxtbl = PTD((uintptr_t)l1);
+
+       // make all L1 PTEs invalid by default
+       for(i = 0; i < NL1ENTRIES; i++)
+               l1[i] = 0;
+
+       // Retain the identity mapping
+       // [0,4GB-KERNBASE] -> [0,4GB-KERNBASE]
+       // so we don't nuke ourselveswhen we turn on protection!!
+       for(i = 0; i < kernsize/L1PGSIZE; i++)
+               l1[i] = (i << 20) | PTE_KERN_RW | PTE_PTE;
+
+       // make the relocated mapping
+       // [KERNBASE,4GB] -> [0,4GB-KERNBASE]
+       for(i = 0; i < kernsize/L1PGSIZE; i++)
+               l1[i+KERNBASE/L1PGSIZE] = (i << 20) | PTE_KERN_RW | PTE_PTE;
+}
+
+void
+mmu_init(void)
+{
+       int zero = 0;
+       uintptr_t* mmuctxtbl = (uintptr_t*)((uint8_t*)mmu_context_table-KERNBASE);
+
+       // set physical address of context table
+       store_alternate(0x100,4,(uintptr_t)mmuctxtbl>>4);
+
+       // set current context (== 0)
+       store_alternate(0x200,4,zero);
+
+       // turn on MMU
+       store_alternate(0x000,4,1);
+
+       tlbflush();
+}
+
+// delete temporary mappings used by the entry code
+void
+mmu_boot_cleanup(void)
+{
+       extern uintptr_t l1_page_table[NL1ENTRIES];
+       uintptr_t kernsize = -KERNBASE;
+
+       // make the temporary mapping invalid
+       int i;
+       for(i = 0; i < kernsize/L1PGSIZE; i++)
+               l1_page_table[i] = 0;
+}
+
+void
+mmu_boot(void)
+{
+       int id = core_id(), i, ncores = num_cores();
+
+       static volatile int barrier[MAX_NUM_CPUS] = {0};
+       static volatile int done_0 = 0, done1 = 0;
+       volatile int* done0 = (int*)((uintptr_t)&done_0 - KERNBASE); 
+
+       if(id == 0)
+       {
+               build_boot_pgdir();
+               *done0 = 1;
+       }
+       else
+               while(!*done0);
+
+       mmu_init();
+
+       extern void relocate(void);
+       relocate();
+
+       if(id == 0)
+       {
+               for(i = 1; i < ncores; i++)
+                       while(!barrier[i]);
+               mmu_boot_cleanup();
+               done1 = 1;
+       }
+       else
+       {
+               barrier[id] = 1;
+               while(!done1);
+       }
+
+       tlbflush();
+}
diff --git a/arch/sparc/src/console.c b/arch/sparc/src/console.c
new file mode 100644 (file)
index 0000000..37c1ff7
--- /dev/null
@@ -0,0 +1,61 @@
+#include <arch/frontend.h>
+
+void
+cons_init(void)
+{
+}
+
+// `High'-level console I/O.  Used by readline and cprintf.
+
+void
+cputbuf(const char*COUNT(len) buf, int len)
+{
+       frontend_syscall(RAMP_SYSCALL_write,1,buf,len);
+}
+
+// Low-level console I/O
+
+inline void
+cons_putc(int c)
+{
+       if(c == '\b')
+       {
+               char buf[3] = {'\b', ' ', '\b'};
+               cputbuf(buf,3);
+       }
+       else
+       {
+               char ch = c;
+               cputbuf(&ch,1);
+       }
+}
+
+
+void
+cputchar(int c)
+{
+        cons_putc(c);
+}
+
+int
+cons_getc()
+{
+       return frontend_syscall(RAMP_SYSCALL_getch,0,0,0);
+}
+
+int
+getchar(void)
+{
+        int c;
+
+        while ((c = cons_getc()) == 0)
+                /* do nothing */;
+        return c;
+}
+
+int
+iscons(int fdnum)
+{
+        // used by readline
+        return 1;
+}
diff --git a/arch/sparc/src/cpuinfo.c b/arch/sparc/src/cpuinfo.c
new file mode 100644 (file)
index 0000000..3609f51
--- /dev/null
@@ -0,0 +1,135 @@
+#include <arch/sparc.h>
+#include <arch/arch.h>
+#include <arch/mmu.h>
+#include <stdio.h>
+#include <assert.h>
+#include <smp.h>
+#include <pmap.h>
+
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+void
+static_asserts_can_go_here()
+{
+       static_assert(SIZEOF_TRAPFRAME_T == sizeof(trapframe_t));
+       static_assert(SIZEOF_TRAPFRAME_T % 8 == 0);
+       static_assert(SIZEOF_ACTIVE_MESSAGE_T == sizeof(active_message_t));
+       static_assert(SIZEOF_ACTIVE_MESSAGE_T % 8 == 0);
+       static_assert(offsetof(env_t,env_tf) % 8 == 0);
+       static_assert(offsetof(env_t,env_ancillary_state) % 8 == 0);
+}
+
+void
+print_cpuinfo(void)
+{
+       uint32_t psr = read_psr();
+       uint32_t wim = read_wim();
+       uint32_t tbr = read_tbr();
+
+       uint32_t mmucr  = read_mmu_reg(MMU_REG_CTRL);
+       uint32_t mmuctp = read_mmu_reg(MMU_REG_CTXTBL);
+       uint32_t mmuctx = read_mmu_reg(MMU_REG_CTX);
+       uint32_t mmufsr = read_mmu_reg(MMU_REG_FSR);
+       uint32_t mmufar = read_mmu_reg(MMU_REG_FAR);
+
+       cprintf("CPU Info:\n");
+       cprintf("ISA:             SPARC V8\n");
+       cprintf("Implementation:  0x%x\n",(psr >> 28) & 0xF);
+       cprintf("Version:         0x%x\n",(psr >> 24) & 0xF);
+       cprintf("Number of Cores: %d\n",num_cpus);
+       cprintf("Current PSR:     0x%08x\n",psr);
+       cprintf("Current WIM:     0x%08x\n",wim);
+       cprintf("Current TBR:     0x%08x\n",tbr);
+
+       cprintf("SRMMU Info:\n");
+       cprintf("Implementation:  0x%x\n",(mmucr >> 28) & 0xF);
+       cprintf("Version:         0x%x\n",(mmucr >> 24) & 0xF);
+       cprintf("Current CR:      0x%08x\n",mmucr);
+       cprintf("Current CTP:     0x%08x\n",mmuctp);
+       cprintf("Current CTX:     0x%08x\n",mmuctx);
+       cprintf("Current FSR:     0x%08x\n",mmufsr);
+       cprintf("Current FAR:     0x%08x\n",mmufar);
+}
+
+void show_mapping(uintptr_t start, size_t size)
+{
+       extern pde_t l1_page_table[NL1ENTRIES];
+       pte_t* pte;
+       uintptr_t i;
+       page_t* page;
+
+       cprintf("   Virtual    Physical  C M R ACC P\n");
+       cprintf("------------------------------------------\n");
+       for(i = 0; i < size; i += PGSIZE, start += PGSIZE)
+       {
+               page = page_lookup(l1_page_table,(void*)start,&pte);
+               cprintf("%08p  ",start);
+               if(page)
+               {
+                       cprintf("%08p  %1d %1d %1d  %1x  %1d\n",page2pa(page),
+                               !!(*pte & PTE_C),!!(*pte & PTE_M),
+                               !!(*pte & PTE_R),PTE_ACC(*pte),
+                               !!(*pte & PTE_PTE));
+               }
+               else
+                       cprintf("%08p\n",0);
+       }
+}
+
+void
+backtrace(void)
+{
+       int i = 0, j;
+       env_t* curenv = curenvs[core_id()];
+
+       flush_windows();
+
+       cprintf("Backtrace:\n");
+
+       // hack: assumes (correctly) we aren't a leaf routine
+       void *sp, *pc, *newsp;
+       __asm__ __volatile__ ("mov %%sp,%0; mov %%i7,%1" : "=r"(sp),"=r"(pc));
+
+       assert(sp >= (void*)KERNBASE);
+
+       newsp = *((void**)sp+14);
+       pc = *((void**)sp+15);
+
+       cprintf("initial sp = %x, newsp = %x, pc = %x\n",sp,newsp,pc);
+       assert(newsp >= (void*)KERNBASE);
+
+       while(newsp)
+       {
+               cprintf("#%02d [<%x>]:\n",++i,pc);
+               cprintf("    %%sp: %x   Args:",newsp);
+               for(j = 8; j < 14; j++)
+                       cprintf(" %x",*((void**)sp+j));
+               cprintf("\n");
+
+               sp = newsp;
+
+               if(sp >= (void*)KERNBASE && (void**)sp+16 > ((void**)0+16))
+               {
+                       newsp = *((void**)sp+14);
+                       pc = *((void**)sp+15);
+               }
+               else if(curenv)
+               {
+                       error_t ret;
+                       ret  = memcpy_from_user(curenv,&newsp,(void**)sp+14,sizeof(void*));
+                       ret |= memcpy_from_user(curenv,&pc,(void**)sp+15,sizeof(void*));
+                       if(ret)
+                       {
+                               warn("Backtrace would have caused access exception; corrupt user stack?");
+                               break;
+                       }
+               }
+               else
+               {
+                       warn("Can't backtrace from user with curenv == NULL!");
+                       break;
+               }
+       }
+}
diff --git a/arch/sparc/src/entry.S b/arch/sparc/src/entry.S
new file mode 100644 (file)
index 0000000..56f9eeb
--- /dev/null
@@ -0,0 +1,197 @@
+/* See COPYRIGHT for copyright information. */
+
+#include <arch/mmu.h>
+#include <arch/sparc.h>
+#include <arch/arch.h>
+#include <ros/memlayout.h>
+
+###################################################################
+# The kernel (this code) is linked at address (KERNBASE + 0x00000000),
+# but we tell the bootloader to load it at physical address 
+# 0x00000000, which is the start of extended memory.
+# (See kernel.ld)
+###################################################################
+
+
+###################################################################
+# RELOC(x) maps a symbol x from its link address to its actual
+# location in physical memory (its load address).       
+###################################################################
+#define        RELOC(x) ((x) - KERNBASE)
+
+###################################################################
+# entry point
+###################################################################
+
+.text
+
+.global                _start
+_start:
+       # This is the first code that ever executes.  It executes on all
+       # cores (RAMP Gold-specific).  All we know is that PSR.S (supervisor)
+       # and PSR.ET (enable traps) are both 0.  Before we can enable traps,
+       # we must determine how many register windows we have, set up the
+       # trap table, and set up a stack frame.
+
+       # compute NWINDOWS
+
+       mov     -1,%wim                 ! mark all windows invalid.
+       mov     (PSR_S|PSR_PS),%psr
+       nop                             ! 3 insns between wrwim/rdwim
+       mov     0,%g2           ! g2 will contain NWINDOWS-1
+       mov     %wim,%g1        ! get wim. nonexistent windows set to 0
+
+1:     srl     %g1,1,%g1
+       tst     %g1
+       bne,a   1b
+        inc    %g2
+
+       # now g2 = NWINDOWS - 1.  Patch the window spill trap handler.
+       set     RELOC(spill_patchme),%g1
+       ld      [%g1],%g3
+       or      %g2,%g3,%g3
+       st      %g3,[%g1]
+       flush   %g1
+
+       # and patch the window fill trap handler.
+       set     RELOC(fill_patchme),%g1
+       ld      [%g1],%g3
+       or      %g2,%g3,%g3
+       st      %g3,[%g1]
+       flush   %g1
+
+       # store NWINDOWS away for safekeeping
+       set     RELOC(NWINDOWS),%g1
+       inc     %g2
+       st      %g2,[%g1]
+
+       # PSR.CWP (current window pointer) == 0.
+       # Set WIM so we'll trap on the next save instruction.
+       mov     1 << 1,%wim
+
+       # set up the TBR (trap base register)
+       set     RELOC(trap_table),%g1
+       mov     %g1,%tbr
+
+       # clear frame pointer for backtrace termination
+       mov     0,%fp
+
+       # set stack pointer (-64 is space for window spill)
+       # sp = bootstacktop - core_id*KSTKSIZE - 64
+       set     RELOC(bootstacktop)-64,%sp
+       mov     CORE_ID_REG,%g1
+       sll     %g1,KSTKSHIFT,%g1
+       sub     %sp,%g1,%sp
+
+       # set up a virtual->physical mapping and relocate
+       call    mmu_boot
+        nop
+
+       # now we're relocated, so set %sp and TBR again
+       set     KERNBASE,%g1
+       add     %sp,%g1,%sp
+       set     trap_table,%g1
+       mov     %g1,%tbr
+
+       # now it's safe to enable traps
+       mov     %psr,%g1
+       wr      %g1,PSR_ET,%psr
+       nop; nop; nop
+
+       # am i core 0?  (do i run BSD?!?)
+       mov     CORE_ID_REG,%g1
+       tst     %g1
+       bne     4f
+        nop
+
+       # only core 0 gets here
+       # set num_cpus
+       set     num_cpus,%l0
+       mov     NUM_CORES_REG,%l1
+       st      %l1,[%l0]
+
+       sub     %sp,64,%sp              ! 64 >= sizeof(multiboot_header_t)
+       call    build_multiboot_info
+        add    %sp,64,%o0
+
+       # kernel_init time!
+       # first arg is pointer to multiboot_info_t, but kernel_init
+       # expects it to be a pre-relocation address, so lop off KERNBASE
+       set     KERNBASE,%l0
+       add     %sp,64,%o0
+       call    kernel_init
+        sub    %o0,%l0,%o0
+
+       # shouldn't get here
+3:     ba      3b
+        nop
+
+       # i'm not core 0, so i'll call smp_init when the time is nigh
+4:     set     time_for_smp_init,%l1
+       ld      [%l1],%l0
+       tst     %l0
+       be      4b
+        nop
+
+       call    smp_init
+        nop
+
+       # shouldn't get here
+5:     ba      5b
+        nop
+
+
+
+# this function (against the ABI!) relocates its caller's stack pointer
+# and return address, then returns to the caller, relocated
+
+.global                relocate
+relocate:
+       set     KERNBASE,%o0
+       inc     8,%o7
+       add     %i7,%o0,%i7
+       jmp     %o7+%o0
+       add     %sp,%o0,%sp
+
+.data
+
+###################################################################
+# various data
+###################################################################
+
+       .global         time_for_smp_init
+time_for_smp_init:
+       .word           0
+
+       .global         NWINDOWS
+NWINDOWS:
+       .word           0
+
+       .global         num_cpus
+num_cpus:
+       .word           0
+
+###################################################################
+# boot stack
+###################################################################
+
+       .align          PGSIZE          ! force page alignment
+       .global         bootstack
+bootstack:
+       .space          KSTKSIZE*MAX_NUM_CPUS
+       .global         bootstacktop   
+bootstacktop:
+
+###################################################################
+# page tables
+###################################################################
+       .align          64
+       .align          NCONTEXTS*4
+       .global         mmu_context_table
+mmu_context_table:
+       .skip           NCONTEXTS*4
+
+       .align          1024
+       .global         l1_page_table
+l1_page_table:
+       .skip           1024
diff --git a/arch/sparc/src/env.c b/arch/sparc/src/env.c
new file mode 100644 (file)
index 0000000..c98510f
--- /dev/null
@@ -0,0 +1,154 @@
+/* See COPYRIGHT for copyright information. */
+#ifdef __DEPUTY__
+#pragma noasync
+#endif
+
+#include <arch/trap.h>
+#include <env.h>
+#include <assert.h>
+#include <arch/arch.h>
+#include <pmap.h>
+
+void
+(IN_HANDLER env_push_ancillary_state)(env_t* e)
+{
+       static_assert(offsetof(ancillary_state_t,fpr) % 8 == 0);
+
+       #define push_two_fp_regs(pdest,n) \
+           __asm__ __volatile__ ("std  %%f" XSTR(n) ",[%0+4*" XSTR(n) "]" \
+                             : : "r"(pdest) : "memory");
+
+       if(e->env_tf.psr & PSR_EF)
+       {
+               write_psr(read_psr() | PSR_EF);
+
+               e->env_ancillary_state.fsr = read_fsr();
+
+               push_two_fp_regs(e->env_ancillary_state.fpr,0);
+               push_two_fp_regs(e->env_ancillary_state.fpr,2);
+               push_two_fp_regs(e->env_ancillary_state.fpr,4);
+               push_two_fp_regs(e->env_ancillary_state.fpr,6);
+               push_two_fp_regs(e->env_ancillary_state.fpr,8);
+               push_two_fp_regs(e->env_ancillary_state.fpr,10);
+               push_two_fp_regs(e->env_ancillary_state.fpr,12);
+               push_two_fp_regs(e->env_ancillary_state.fpr,14);
+               push_two_fp_regs(e->env_ancillary_state.fpr,16);
+               push_two_fp_regs(e->env_ancillary_state.fpr,18);
+               push_two_fp_regs(e->env_ancillary_state.fpr,20);
+               push_two_fp_regs(e->env_ancillary_state.fpr,22);
+               push_two_fp_regs(e->env_ancillary_state.fpr,24);
+               push_two_fp_regs(e->env_ancillary_state.fpr,26);
+               push_two_fp_regs(e->env_ancillary_state.fpr,28);
+               push_two_fp_regs(e->env_ancillary_state.fpr,30);
+
+               write_psr(read_psr() & ~PSR_EF);
+       }
+}
+
+void
+(IN_HANDLER env_pop_ancillary_state)(env_t* e)
+{ 
+
+       #define pop_two_fp_regs(pdest,n) \
+           __asm__ __volatile__ ("ldd  [%0+4*" XSTR(n) "], %%f" XSTR(n) \
+                             : : "r"(pdest) : "memory");
+
+       if(e->env_tf.psr & PSR_EF)
+       {
+               write_psr(read_psr() | PSR_EF);
+
+               pop_two_fp_regs(e->env_ancillary_state.fpr,0);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,2);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,4);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,6);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,8);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,10);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,12);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,14);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,16);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,18);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,20);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,22);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,24);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,26);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,28);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,30);
+
+               write_fsr(e->env_ancillary_state.fsr);
+
+               write_psr(read_psr() & ~PSR_EF);
+       }
+}
+
+void
+env_set_program_counter(env_t* e, uintptr_t pc)
+{
+       e->env_tf.pc = pc;
+       e->env_tf.npc = pc+4;
+}
+
+void
+env_init_trapframe(env_t* e)
+{
+       extern char trap_table;
+
+       e->env_tf.gpr[14] = USTACKTOP-64;
+       e->env_tf.psr = PSR_S; // but PS = 0
+       e->env_tf.wim = 0;
+       e->env_tf.tbr = (uint32_t)&trap_table;
+}
+
+// Flush all mapped pages in the user portion of the address space
+// TODO: only supports L3 user pages
+void
+env_user_mem_free(env_t* e)
+{
+       pte_t *l1pt = e->env_pgdir, *l2pt, *l3pt;
+       uint32_t l1x,l2x,l3x;
+       physaddr_t l2ptpa,l3ptpa,page_pa;
+       uint32_t l2_tables_per_page,l3_tables_per_page;
+
+       l2_tables_per_page = PGSIZE/(sizeof(pte_t)*NL2ENTRIES);
+       l3_tables_per_page = PGSIZE/(sizeof(pte_t)*NL3ENTRIES);
+
+       static_assert(L2X(UTOP) == 0 && L3X(UTOP) == 0);
+       for(l1x = 0; l1x < L1X(UTOP); l1x++)
+       {
+               if(!(l1pt[l1x] & PTE_PTD))
+                       continue;
+
+               l2ptpa = PTD_ADDR(l1pt[l1x]);
+               l2pt = (pte_t*COUNT(NL2ENTRIES)) KADDR(l2ptpa);
+
+               for(l2x = 0; l2x < NL2ENTRIES; l2x++)
+               {
+                       if(!(l2pt[l2x] & PTE_PTD))
+                               continue;
+
+                       l3ptpa = PTD_ADDR(l2pt[l2x]);
+                       l3pt = (pte_t*COUNT(NL3ENTRIES)) KADDR(l3ptpa);
+
+                       for(l3x = 0; l3x < NL3ENTRIES; l3x++)
+                       {
+                               if(l3pt[l3x] & PTE_PTE)
+                               {
+                                       page_pa = PTE_ADDR(l3pt[l3x]);
+                                       l3pt[l3x] = 0;
+                                       page_decref(pa2page(page_pa));
+                               }
+                       }
+
+                       l2pt[l2x] = 0;
+
+                       // free the L3 PT itself
+                       page_decref(pa2page(l2ptpa));
+               }
+
+               l1pt[l1x] = 0;
+
+               // free the L2 PT itself
+               page_decref(pa2page(l2ptpa));
+       }
+
+       tlbflush();
+}
diff --git a/arch/sparc/src/env.c.save b/arch/sparc/src/env.c.save
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/arch/sparc/src/env.c.save.1 b/arch/sparc/src/env.c.save.1
new file mode 100644 (file)
index 0000000..62228f1
--- /dev/null
@@ -0,0 +1,156 @@
+/* See COPYRIGHT for copyright information. */
+#ifdef __DEPUTY__
+#pragma noasync
+#endif
+
+#include <arch/trap.h>
+#include <env.h>
+#include <assert.h>
+#include <arch/arch.h>
+#include <pmap.h>
+
+void
+(IN_HANDLER env_push_ancillary_state)(env_t* e)
+{
+       static_assert(offsetof(ancillary_state_t,fpr) % 8 == 0);
+
+       #define push_two_fp_regs(pdest,n) \
+           __asm__ __volatile__ ("std  %%f" XSTR(n) ",[%0+4*" XSTR(n) "]" \
+                             : : "r"(pdest) : "memory");
+
+       // do I need to save FP regs?
+       if(e->env_tf.psr & PSR_EF)
+       {
+               // temporarily turn on FP in the kernel
+               write_psr(read_psr() | PSR_EF);
+
+               e->env_ancillary_state.fsr = read_fsr();
+
+               push_two_fp_regs(e->env_ancillary_state.fpr,0);
+               push_two_fp_regs(e->env_ancillary_state.fpr,2);
+               push_two_fp_regs(e->env_ancillary_state.fpr,4);
+               push_two_fp_regs(e->env_ancillary_state.fpr,6);
+               push_two_fp_regs(e->env_ancillary_state.fpr,8);
+               push_two_fp_regs(e->env_ancillary_state.fpr,10);
+               push_two_fp_regs(e->env_ancillary_state.fpr,12);
+               push_two_fp_regs(e->env_ancillary_state.fpr,14);
+               push_two_fp_regs(e->env_ancillary_state.fpr,16);
+               push_two_fp_regs(e->env_ancillary_state.fpr,18);
+               push_two_fp_regs(e->env_ancillary_state.fpr,20);
+               push_two_fp_regs(e->env_ancillary_state.fpr,22);
+               push_two_fp_regs(e->env_ancillary_state.fpr,24);
+               push_two_fp_regs(e->env_ancillary_state.fpr,26);
+               push_two_fp_regs(e->env_ancillary_state.fpr,28);
+               push_two_fp_regs(e->env_ancillary_state.fpr,30);
+
+               write_psr(read_psr() & ~PSR_EF);
+       }
+}
+
+void
+(IN_HANDLER env_pop_ancillary_state)(env_t* e)
+{ 
+
+       #define pop_two_fp_regs(pdest,n) \
+           __asm__ __volatile__ ("ldd  [%0+4*" XSTR(n) "], %%f" XSTR(n) \
+                             : : "r"(pdest) : "memory");
+
+       if(e->env_tf.psr & PSR_EF)
+       {
+               write_psr(read_psr() | PSR_EF);
+
+               pop_two_fp_regs(e->env_ancillary_state.fpr,0);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,2);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,4);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,6);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,8);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,10);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,12);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,14);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,16);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,18);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,20);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,22);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,24);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,26);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,28);
+               pop_two_fp_regs(e->env_ancillary_state.fpr,30);
+
+               write_fsr(e->env_ancillary_state.fsr);
+
+               write_psr(read_psr() & ~PSR_EF);
+       }
+}
+
+void
+env_set_program_counter(env_t* e, uintptr_t pc)
+{
+       e->env_tf.pc = pc;
+       e->env_tf.npc = pc+4;
+}
+
+void
+env_init_trapframe(env_t* e)
+{
+       extern char trap_table;
+
+       e->env_tf.gpr[14] = USTACKTOP-64;
+       e->env_tf.psr = PSR_S; // but PS = 0
+       e->env_tf.wim = 0;
+       e->env_tf.tbr = (uint32_t)&trap_table;
+}
+
+// Flush all mapped pages in the user portion of the address space
+// TODO: only supports L3 user pages
+void
+env_user_mem_free(env_t* e)
+{
+       pte_t *l1pt = e->env_pgdir, *l2pt, *l3pt;
+       uint32_t l1x,l2x,l3x;
+       physaddr_t l2ptpa,l3ptpa,page_pa;
+       uint32_t l2_tables_per_page,l3_tables_per_page;
+
+       l2_tables_per_page = PGSIZE/(sizeof(pte_t)*NL2ENTRIES);
+       l3_tables_per_page = PGSIZE/(sizeof(pte_t)*NL3ENTRIES);
+
+       static_assert(L2X(UTOP) == 0 && L3X(UTOP) == 0);
+       for(l1x = 0; l1x < L1X(UTOP); l1x++)
+       {
+               if(!(l1pt[l1x] & PTE_PTD))
+                       continue;
+
+               l2ptpa = PTD_ADDR(l1pt[l1x]);
+               l2pt = (pte_t*COUNT(NL2ENTRIES)) KADDR(l2ptpa);
+
+               for(l2x = 0; l2x < NL2ENTRIES; l2x++)
+               {
+                       if(!(l2pt[l2x] & PTE_PTD))
+                               continue;
+
+                       l3ptpa = PTD_ADDR(l2pt[l2x]);
+                       l3pt = (pte_t*COUNT(NL3ENTRIES)) KADDR(l3ptpa);
+
+                       for(l3x = 0; l3x < NL3ENTRIES; l3x++)
+                       {
+                               if(l3pt[l3x] & PTE_PTE)
+                               {
+                                       page_pa = PTE_ADDR(l3pt[l3x]);
+                                       l3pt[l3x] = 0;
+                                       page_decref(pa2page(page_pa));
+                               }
+                       }
+
+                       l2pt[l2x] = 0;
+
+                       // free the L3 PT itself
+                       page_decref(pa2page(l2ptpa));
+               }
+
+               l1pt[l1x] = 0;
+
+               // free the L2 PT itself
+               page_decref(pa2page(l2ptpa));
+       }
+
+       tlbflush();
+}
diff --git a/arch/sparc/src/frontend.c b/arch/sparc/src/frontend.c
new file mode 100644 (file)
index 0000000..f0fe759
--- /dev/null
@@ -0,0 +1,43 @@
+#include <atomic.h>
+
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+volatile int magic_mem[16];
+
+int32_t frontend_syscall(int32_t syscall_num, uint32_t arg0, uint32_t arg1, uint32_t arg2)
+{
+       static spinlock_t lock = 0;
+       int32_t ret;
+
+       // only one frontend request at a time.
+       // interrupts could try to do frontend requests,
+       // which would deadlock, so disable them
+       spin_lock_irqsave(&lock);
+
+       // write syscall into magic memory
+       magic_mem[1] = 0;
+       magic_mem[2] = (uintptr_t)magic_mem;
+       magic_mem[3] = syscall_num;
+       magic_mem[4] = arg0;
+       magic_mem[5] = arg1;
+       magic_mem[6] = arg2;
+       magic_mem[0] = 0x80;
+
+       // wait for front-end response
+       while(magic_mem[1] == 0)
+               ;
+
+       magic_mem[0] = 0;
+
+       // wait for front-end ack
+       while(magic_mem[1] == 1)
+               ;
+
+       ret = magic_mem[7];
+
+       spin_unlock_irqsave(&lock);
+
+       return ret;
+}
diff --git a/arch/sparc/src/pmap.c b/arch/sparc/src/pmap.c
new file mode 100644 (file)
index 0000000..a5d5dbb
--- /dev/null
@@ -0,0 +1,289 @@
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+#include <arch/mmu.h>
+#include <ros/memlayout.h>
+#include <multiboot.h>
+#include <pmap.h>
+#include <string.h>
+
+pde_t* boot_pgdir;
+char* boot_freemem;
+page_t* pages;
+page_list_t page_free_list;
+
+void
+vm_init(void)
+{
+       // we already set up our page tables before jumping
+       // into the kernel, so there's not much going on here
+
+       extern pde_t l1_page_table[NL1ENTRIES];
+       boot_pgdir = l1_page_table;
+
+       size_t page_array_size = ROUNDUP(npage*sizeof(page_t),PGSIZE);
+       pages = (page_t*)boot_alloc(page_array_size,PGSIZE);
+       memset(pages,0,page_array_size);
+
+       size_t env_array_size = ROUNDUP(NENV*sizeof(env_t), PGSIZE);
+       envs = (env_t *)boot_alloc(env_array_size, PGSIZE);
+       memset(envs, 0, env_array_size);
+}
+
+void
+page_init(void)
+{
+       uintptr_t i;
+       physaddr_t physaddr_after_kernel = PADDR(ROUNDUP(boot_freemem,PGSIZE));
+
+       LIST_INIT(&page_free_list);
+
+       // mark [0, physaddr_after_kernel) as in-use
+       for(i = 0; i < PPN(physaddr_after_kernel); i++)
+               pages[i].pp_ref = 1;
+
+       // mark [physaddr_after_kernel, maxaddrpa) as free
+       for(i = PPN(physaddr_after_kernel); i < PPN(maxaddrpa); i++)
+       {
+               pages[i].pp_ref = 0;
+               LIST_INSERT_HEAD(&page_free_list,&pages[i],pp_link);
+       }
+
+       // mark [maxaddrpa, ...) as in-use (as they are invalid)
+       for(i = PPN(maxaddrpa); i < npage; i++)
+               pages[i].pp_ref = 1;
+}
+
+error_t
+pagetable_remove(pde_t* l1pt, void* va)
+{
+       panic("pagetable_remove doesn't work yet... -asw");
+       return 0;
+}
+
+pte_t*
+pgdir_walk(pde_t* l1pt, const void*SNT va, int create)
+{
+       pte_t *l1pte, *l2pt, *l2pte, *l3pt, *l3pte;
+       page_t* new_table;
+
+       l1pte = &l1pt[L1X(va)];
+       if(*l1pte & PTE_PTE)
+               return l1pte;
+       if(!(*l1pte & PTE_PTD))
+       {
+               int i, l1x_start, l2_tables_per_page;
+               physaddr_t pa;
+
+               if(!create)
+                       return NULL;
+
+               // create a new L2 PT.  we actually allocated way more
+               // space than needed, so also use it for the adjacent
+               // l2_tables_per_page-1 pages (if they're unmapped)
+
+               if(page_alloc(&new_table))
+                       return NULL;
+               new_table->pp_ref = 1;
+               memset(page2kva(new_table),0,PGSIZE);
+
+               l2_tables_per_page = PGSIZE/(sizeof(pte_t)*NL2ENTRIES);
+               l1x_start = L1X(va)/l2_tables_per_page*l2_tables_per_page;
+
+               for(i = 0; i < l2_tables_per_page; i++)
+               {
+                       if(l1pt[l1x_start+i] != 0)
+                               continue;
+
+                       new_table->pp_ref++;
+                       pa = page2pa(new_table) + i*sizeof(pte_t)*NL2ENTRIES;
+                       l1pt[l1x_start+i] = PTD(pa);
+               }
+
+               l1pte = &l1pt[L1X(va)];
+       }
+
+       l2pt = (pte_t*)KADDR(PTD_ADDR(*l1pte));
+       l2pte = &l2pt[L2X(va)];
+       if(*l2pte & PTE_PTE)
+               return l2pte;
+       if(!(*l2pte & PTE_PTD))
+       {
+               int i, l2x_start, l3_tables_per_page;
+               physaddr_t pa;
+
+               if(!create)
+                       return NULL;
+
+               if(page_alloc(&new_table))
+                       return NULL;
+               new_table->pp_ref = 1;
+               memset(page2kva(new_table),0,PGSIZE);
+
+               l3_tables_per_page = PGSIZE/(sizeof(pte_t)*NL3ENTRIES);
+               l2x_start = L2X(va)/l3_tables_per_page*l3_tables_per_page;
+
+               for(i = 0; i < l3_tables_per_page; i++)
+               {
+                       if(l2pt[l2x_start+i] != 0)
+                               continue;
+
+                       new_table->pp_ref++;
+                       pa = page2pa(new_table) + i*sizeof(pte_t)*NL3ENTRIES;
+                       l2pt[l2x_start+i] = PTD(pa);
+               }
+
+               l2pte = &l2pt[L2X(va)];
+       }
+
+       l3pt = (pte_t*)KADDR(PTD_ADDR(*l2pte));
+       l3pte = &l3pt[L3X(va)];
+       return l3pte;
+}
+void
+page_check(void)
+{
+/*
+       page_t *pp, *pp0, *pp1, *pp2;
+       page_list_t fl;
+       pte_t *ptep;
+
+       // should be able to allocate three pages
+       pp0 = pp1 = pp2 = 0;
+       assert(page_alloc(&pp0) == 0);
+       assert(page_alloc(&pp1) == 0);
+       assert(page_alloc(&pp2) == 0);
+
+       assert(pp0);
+       assert(pp1 && pp1 != pp0);
+       assert(pp2 && pp2 != pp1 && pp2 != pp0);
+
+       // temporarily steal the rest of the free pages
+       fl = page_free_list;
+       LIST_INIT(&page_free_list);
+
+       // should be no free memory
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // Fill pp1 with bogus data and check for invalid tlb entries
+       memset(page2kva(pp1), 0xFFFFFFFF, PGSIZE);
+
+       // there is no page allocated at address 0
+       assert(page_lookup(boot_pgdir, (void *) 0x0, &ptep) == NULL);
+
+       // there is no free memory, so we can't allocate a page table 
+       assert(page_insert(boot_pgdir, pp1, 0x0, 0) < 0);
+
+       // free pp0 and try again: pp0 should be used for page table
+       page_free(pp0);
+       assert(page_insert(boot_pgdir, pp1, 0x0, 0) == 0);
+       tlb_invalidate(boot_pgdir, 0x0);
+       // DEP Should have shot down invalid TLB entry - let's check
+       {
+         int *x = 0x0;
+         assert(*x == 0xFFFFFFFF);
+       }
+       assert(PTD_ADDR(boot_pgdir[0]) == page2pa(pp0));
+       assert(check_va2pa(boot_pgdir, 0x0) == page2pa(pp1));
+       assert(pp1->pp_ref == 1);
+       assert(pp0->pp_ref == 1);
+
+       // should be able to map pp2 at PGSIZE because pp0 is already allocated for page table
+       assert(page_insert(boot_pgdir, pp2, (void*) PGSIZE, 0) == 0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp2));
+       assert(pp2->pp_ref == 1);
+
+       // Make sure that pgdir_walk returns a pointer to the pte and
+       // not the table or some other garbage
+       {
+         pte_t *p = KADDR(PTD_ADDR(boot_pgdir[PDX(PGSIZE)]));
+         assert(pgdir_walk(boot_pgdir, (void *)PGSIZE, 0) == &p[PTX(PGSIZE)]);
+       }
+
+       // should be no free memory
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // should be able to map pp2 at PGSIZE because it's already there
+       assert(page_insert(boot_pgdir, pp2, (void*) PGSIZE, PTE_U) == 0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp2));
+       assert(pp2->pp_ref == 1);
+
+       // Make sure that we actually changed the permission on pp2 when we re-mapped it
+       {
+         pte_t *p = pgdir_walk(boot_pgdir, (void*)PGSIZE, 0);
+         assert(((*p) & PTE_U) == PTE_U);
+       }
+
+       // pp2 should NOT be on the free list
+       // could happen in ref counts are handled sloppily in page_insert
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // should not be able to map at PTSIZE because need free page for page table
+       assert(page_insert(boot_pgdir, pp0, (void*) PTSIZE, 0) < 0);
+
+       // insert pp1 at PGSIZE (replacing pp2)
+       assert(page_insert(boot_pgdir, pp1, (void*) PGSIZE, 0) == 0);
+
+       // should have pp1 at both 0 and PGSIZE, pp2 nowhere, ...
+       assert(check_va2pa(boot_pgdir, 0) == page2pa(pp1));
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp1));
+       // ... and ref counts should reflect this
+       assert(pp1->pp_ref == 2);
+       assert(pp2->pp_ref == 0);
+
+       // pp2 should be returned by page_alloc
+       assert(page_alloc(&pp) == 0 && pp == pp2);
+
+       // unmapping pp1 at 0 should keep pp1 at PGSIZE
+       page_remove(boot_pgdir, 0x0);
+       assert(check_va2pa(boot_pgdir, 0x0) == ~0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == page2pa(pp1));
+       assert(pp1->pp_ref == 1);
+       assert(pp2->pp_ref == 0);
+
+       // unmapping pp1 at PGSIZE should free it
+       page_remove(boot_pgdir, (void*) PGSIZE);
+       assert(check_va2pa(boot_pgdir, 0x0) == ~0);
+       assert(check_va2pa(boot_pgdir, PGSIZE) == ~0);
+       assert(pp1->pp_ref == 0);
+       assert(pp2->pp_ref == 0);
+
+       // so it should be returned by page_alloc
+       assert(page_alloc(&pp) == 0 && pp == pp1);
+
+       // should be no free memory
+       assert(page_alloc(&pp) == -ENOMEM);
+
+       // forcibly take pp0 back
+       assert(PTD_ADDR(boot_pgdir[0]) == page2pa(pp0));
+       boot_pgdir[0] = 0;
+       assert(pp0->pp_ref == 1);
+       pp0->pp_ref = 0;
+
+       // Catch invalid pointer addition in pgdir_walk - i.e. pgdir + PDX(va)
+       {
+         // Give back pp0 for a bit
+         page_free(pp0);
+
+         void * va = (void *)((PGSIZE * NPDENTRIES) + PGSIZE);
+         pte_t *p2 = pgdir_walk(boot_pgdir, va, 1);
+         pte_t *p = KADDR(PTD_ADDR(boot_pgdir[PDX(va)]));
+         assert(p2 == &p[PTX(va)]);
+
+         // Clean up again
+         boot_pgdir[PDX(va)] = 0;
+         pp0->pp_ref = 0;
+       }
+
+       // give free list back
+       page_free_list = fl;
+
+       // free the pages we took
+       page_free(pp0);
+       page_free(pp1);
+       page_free(pp2);
+
+       cprintf("page_check() succeeded!\n");
+*/
+}
diff --git a/arch/sparc/src/smp.c b/arch/sparc/src/smp.c
new file mode 100644 (file)
index 0000000..942bc40
--- /dev/null
@@ -0,0 +1,144 @@
+#include <smp.h>
+#include <arch/arch.h>
+#include <arch/smp.h>
+#include <stdio.h>
+#include <string.h>
+#include <ros/error.h>
+#include <assert.h>
+#include <atomic.h>
+
+#ifdef __DEPUTY__
+#pragma nodeputy
+#endif
+
+void
+smp_boot(void)
+{
+       extern int time_for_smp_init;
+       num_cpus = 1;
+       cprintf("Cores, report in!\n");
+       time_for_smp_init = 1;
+
+       while(*(volatile uint8_t*)&num_cpus < num_cores());
+
+       cprintf("All cores reporting!\n");
+}
+
+void
+smp_init(void)
+{
+       static spinlock_t report_in_lock = 0;
+
+       cprintf("Good morning, Vietnam! (core id = %d)\n",core_id());
+
+       spin_lock(&report_in_lock);
+       num_cpus++;
+       spin_unlock(&report_in_lock);
+
+       smp_idle();
+}
+
+handler_wrapper_t
+wrapper_pool[MAX_NUM_CPUS*8] = {{0},0};
+
+handler_wrapper_t*
+smp_make_wrapper()
+{
+       int i;
+       for(i = 0; i < sizeof(wrapper_pool)/sizeof(wrapper_pool[0]); i++)
+               if(spin_trylock(&wrapper_pool[i].lock) == 0)
+                       return &wrapper_pool[i];
+       return NULL;
+}
+
+void
+smp_call_wrapper(trapframe_t* tf, uint32_t src, isr_t handler,
+                 handler_wrapper_t* wrapper,void* data)
+{
+       if(wrapper)
+               wrapper->wait_list[core_id()] = 0;
+       handler(tf,data);
+}
+
+int smp_call_function_self(isr_t handler, void* data,
+                           handler_wrapper_t** wait_wrapper)
+{
+       return smp_call_function_single(core_id(),handler,data,wait_wrapper);
+}
+
+int smp_call_function_all(isr_t handler, void* data,
+                          handler_wrapper_t** wait_wrapper)
+{
+       int8_t state = 0;
+       int i;
+       handler_wrapper_t* wrapper = 0;
+       if(wait_wrapper)
+       {
+               wrapper = *wait_wrapper = smp_make_wrapper();
+               if(!wrapper)
+                       return -ENOMEM;
+
+               for(i = 0; i < num_cores(); i++)
+                       wrapper->wait_list[i] = 1;
+       }
+
+       enable_irqsave(&state);
+
+       // send to others
+       for(i = 0; i < num_cores(); i++)
+       {
+               if(i == core_id())
+                       continue;
+
+               while(send_active_message(i,(amr_t)smp_call_wrapper,
+                                         (uint32_t)handler,(uint32_t)wrapper,
+                                         (uint32_t)data) != 0);
+       }
+
+       // send to me
+       while(send_active_message(core_id(),(amr_t)smp_call_wrapper,
+                                 (uint32_t)handler,(uint32_t)wrapper,
+                                 (uint32_t)data) != 0);
+
+       cpu_relax(); // wait to get the interrupt
+
+       disable_irqsave(&state);
+
+       return 0;
+}
+
+int smp_call_function_single(uint8_t dest, isr_t handler, void* data,
+                             handler_wrapper_t** wait_wrapper)
+{
+       int8_t state = 0;
+       handler_wrapper_t* wrapper = 0;
+       if(wait_wrapper)
+       {
+               wrapper = *wait_wrapper = smp_make_wrapper();
+               if(!wrapper)
+                       return -ENOMEM;
+               wrapper->wait_list[dest] = 1;
+       }
+
+       enable_irqsave(&state);
+
+       while(send_active_message(dest,(amr_t)smp_call_wrapper,
+                                 (uint32_t)handler,(uint32_t)wrapper,
+                                 (uint32_t)data) != 0);
+
+       cpu_relax(); // wait to get the interrupt, if it's to this core
+
+       disable_irqsave(&state);
+
+       return 0;
+}
+
+int smp_call_wait(handler_wrapper_t* wrapper)
+{
+       int i;
+       for(i = 0; i < num_cores(); i++)
+               while(wrapper->wait_list[i]);
+
+       spin_unlock(&wrapper->lock);
+       return 0;
+}
diff --git a/arch/sparc/src/spillfill.S b/arch/sparc/src/spillfill.S
new file mode 100644 (file)
index 0000000..e705592
--- /dev/null
@@ -0,0 +1,175 @@
+#include <arch/mmu.h>
+#include <arch/sparc.h>
+#include <arch/trap.h>
+#include <ros/memlayout.h>
+
+# before spilling a window, we must be certain
+# that %sp is 8-byte aligned and the range [%sp,%sp+64)
+# is validly mapped in
+#define VALIDATE_STACK(reg1,reg2,misaligned,pagefault) \
+       mov     %psr,reg1               ;\
+       btst    7,%sp                   ;\
+       bne     misaligned              ;\
+        mov    reg1,%psr               ;\
+       andn    %sp,0xFFF,reg1          ;\
+       or      reg1,0x400,reg1         ;\
+       lda     [reg1] 3,reg2           ;\
+       add     %sp,56,reg1             ;\
+       andn    reg1,0xFFF,reg1         ;\
+       or      reg1,0x400,reg1         ;\
+       lda     [reg1] 3,reg1           ;\
+       and     reg2,reg1,reg2          ;\
+       mov     %psr,reg1               ;\
+       btst    PTE_PTE,reg2            ;\
+       be      pagefault               ;\
+        mov    reg1,%psr
+
+#define RETHROW_TRAP(func)             \
+       mov     %psr,%l7                ;\
+       and     %l7,PSR_CWP,%l4         ;\
+       set     NWINDOWS,%l3            ;\
+       ld      [%l3],%l3               ;\
+       dec     %l3                     ;\
+       cmp     %l3,%l4                 ;\
+       inc     %l4                     ;\
+       be,a    7f                      ;\
+        mov    0,%l4                   ;\
+7:     mov     1,%l3                   ;\
+       sll     %l3,%l4,%l4             ;\
+       mov     %g0,%wim                ;\
+       set     bootstacktop-64-SIZEOF_TRAPFRAME_T,%sp  ;\
+       mov     CORE_ID_REG,%l5         ;\
+       sll     %l5,KSTKSHIFT,%l5       ;\
+       sub     %sp,%l5,%sp             ;\
+       btst    PSR_PS,%l7              ;\
+       bne,a   8f                      ;\
+        sub    %fp,64+SIZEOF_TRAPFRAME_T,%sp ;\
+8:     mov     %l7,%psr                ;\
+       mov     %l1,%o1                 ;\
+       mov     %l2,%o2                 ;\
+    &nb