Fixed lots of bugs in the SPARC port wrt multithreading
authorAndrew Waterman <waterman@r53.millennium.berkeley.edu>
Mon, 26 Oct 2009 07:21:46 +0000 (00:21 -0700)
committerKevin Klues <klueska@cs.berkeley.edu>
Mon, 26 Oct 2009 07:32:50 +0000 (08:32 +0100)
Also added the Harts threading interface and added
a getvcoreid system call.  Added gettimeofday to SPARC.

Added an arch-specific subfolder to include/ros.  Moved
a few key things that must be exposed to userspace there,
e.g. PGSIZE and relax()

44 files changed:
kern/arch/i386/mmu.h
kern/arch/i386/ros/hart.h [new file with mode: 0644]
kern/arch/i386/ros/mmu.h [new file with mode: 0644]
kern/arch/i386/ros/syscall.h [new file with mode: 0644]
kern/arch/i386/trap.h
kern/arch/sparc/arch.h
kern/arch/sparc/atomic.h
kern/arch/sparc/boot.c
kern/arch/sparc/console.c
kern/arch/sparc/entry.S
kern/arch/sparc/frontend.c
kern/arch/sparc/frontend.h
kern/arch/sparc/mmu.h
kern/arch/sparc/process.c
kern/arch/sparc/ros/hart.h [new file with mode: 0644]
kern/arch/sparc/ros/mmu.h [new file with mode: 0644]
kern/arch/sparc/smp.c
kern/arch/sparc/sparc.h
kern/arch/sparc/trap.c
kern/arch/sparc/trap_entry.S
kern/arch/sparc/trap_table.S
kern/include/ros/arch [new symlink]
kern/include/ros/memlayout.h
kern/include/ros/procdata.h
kern/include/ros/syscall.h
kern/src/Makefrag
kern/src/env.c
kern/src/kfs.c
kern/src/manager.c
kern/src/syscall.c
user/apps/parlib/manycore_test.c [new file with mode: 0644]
user/parlib/inc/hart.h [new file with mode: 0644]
user/parlib/inc/newlib_backend.h
user/parlib/inc/parlib.h
user/parlib/src/Makefrag
user/parlib/src/channel.c
user/parlib/src/hart.c [new file with mode: 0644]
user/parlib/src/i386/entry.S
user/parlib/src/sparc/Makefrag
user/parlib/src/sparc/entry.S
user/parlib/src/sparc/newlib_backend.c
user/parlib/src/syscall.c
user/roslib/inc/lib.h
user/roslib/src/sparc/entry.S

index 9b3a67c..aca9c44 100644 (file)
@@ -5,6 +5,8 @@
 #include <ros/common.h>
 #endif
 
+#include <ros/arch/mmu.h>
+
 /*
  * This file contains definitions for the x86 memory management unit (MMU),
  * including paging- and segmentation-related data structures and constants,
 #define NPDENTRIES     1024            // page directory entries per page directory
 #define NPTENTRIES     1024            // page table entries per page table
 
-#define PGSIZE         4096            // bytes mapped by a page
-#define JPGSIZE                4096*1024       // bytes mapped by a jumbo page (4MB)
-#define PGSHIFT                12              // log2(PGSIZE)
-
-#define PTSIZE         (PGSIZE*NPTENTRIES) // bytes mapped by a page directory entry
-#define PTSHIFT                22              // log2(PTSIZE)
-
 #define PTXSHIFT       12              // offset of PTX in a linear address
 #define PDXSHIFT       22              // offset of PDX in a linear address
 
diff --git a/kern/arch/i386/ros/hart.h b/kern/arch/i386/ros/hart.h
new file mode 100644 (file)
index 0000000..5e6cf6e
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _ROS_ARCH_HART_H
+#define _ROS_ARCH_HART_H
+
+#include <parlib.h>
+
+// The actual hart_self() function is a global symbol that invokes this routine.
+static inline size_t
+__hart_self()
+{
+       // TODO: use some kind of thread-local storage to speed this up!
+       return (size_t)syscall(SYS_getvcoreid,0,0,0,0,0);
+}
+
+static inline void
+hart_relax()
+{
+       asm volatile ("pause" : : : "memory");
+}
+
+static inline size_t
+hart_swap(size_t* addr, size_t val)
+{
+       asm volatile ("xchg %0, (%2)" : "=r"(val) : "0"(val),"r"(addr) : "memory");
+       return val;
+}
+
+#endif
diff --git a/kern/arch/i386/ros/mmu.h b/kern/arch/i386/ros/mmu.h
new file mode 100644 (file)
index 0000000..f361a7c
--- /dev/null
@@ -0,0 +1,12 @@
+#ifndef _ROS_ARCH_MMU_H
+#define _ROS_ARCH_MMU_H
+
+#define PTSHIFT 22
+#define PTSIZE (1 << PTSHIFT)
+
+#define PGSHIFT 12
+#define PGSIZE (1 << PGSHIFT)
+
+#define JPGSIZE PTSIZE
+
+#endif
diff --git a/kern/arch/i386/ros/syscall.h b/kern/arch/i386/ros/syscall.h
new file mode 100644 (file)
index 0000000..5107059
--- /dev/null
@@ -0,0 +1,6 @@
+#ifndef _ROS_ARCH_SYSCALL_H
+#define _ROS_ARCH_SYSCALL_H
+
+#define T_SYSCALL      0x80
+
+#endif
index 049172f..6fb1e61 100644 (file)
 
 // These are arbitrarily chosen, but with care not to overlap
 // processor defined exceptions or interrupt vectors.
-#define T_SYSCALL   0x80                       // system call
+
+// T_SYSCALL is defined by the following include:
+#include <ros/arch/syscall.h>
+
 #define T_DEFAULT   0xdeadbeef         // catchall
 
 /* IPIs */
index b23bd0a..962b5e5 100644 (file)
@@ -35,6 +35,8 @@ void print_cpuinfo(void);
 void show_mapping(uintptr_t start, size_t size);
 void backtrace(void);
 
+extern uintptr_t mmu_context_tables[MAX_NUM_CPUS][NCONTEXTS+CONTEXT_TABLE_PAD];
+
 static __inline void
 breakpoint(void)
 {
@@ -58,7 +60,7 @@ tlbflush(void)
 static __inline uint64_t
 read_tsc(void)
 {
-       return read_perfctr(0);
+       return read_perfctr(0,0);
 }
 
 static __inline uint64_t 
@@ -156,16 +158,14 @@ reboot(void)
 static __inline void
 lcr3(uint32_t val)
 {
-       extern uintptr_t mmu_context_table[NCONTEXTS];
-       *mmu_context_table = val >> 4 | PTE_PTD;
+       mmu_context_tables[core_id()][0] = val >> 4 | PTE_PTD;
        tlbflush();
 }
 
 static __inline uint32_t
 rcr3(void)
 {
-       extern uintptr_t mmu_context_table[NCONTEXTS];
-       return (*mmu_context_table & ~0x3) << 4;
+       return (mmu_context_tables[core_id()][0] & ~0x3) << 4;
 }
 
 #endif /* !__ASSEMBLER__ */
index 90be16c..d0daba2 100644 (file)
@@ -21,6 +21,7 @@ static inline void atomic_set(atomic_t* number, int32_t val);
 static inline void atomic_add(atomic_t* number, int32_t inc);
 static inline void atomic_inc(atomic_t* number);
 static inline void atomic_dec(atomic_t* number);
+static inline uint32_t atomic_swap(uint32_t* addr, uint32_t val);
 static inline uint32_t spin_trylock(spinlock_t*SAFE lock);
 static inline void spin_lock(spinlock_t*SAFE lock);
 static inline void spin_unlock(spinlock_t*SAFE lock);
@@ -70,6 +71,12 @@ static inline void atomic_dec(atomic_t* number)
        atomic_add(number,-1);
 }
 
+static inline uint32_t atomic_swap(uint32_t* addr, uint32_t val)
+{
+       asm volatile ("swap [%2],%0" : "=r"(val) : "0"(val),"r"(addr) : "memory");
+       return val;
+}
+
 static inline uint32_t spin_trylock(spinlock_t*SAFE lock)
 {
        uint32_t reg;
index d5f4a1b..bc66a66 100644 (file)
@@ -13,8 +13,6 @@
 #pragma nodeputy
 #endif
 
-extern uintptr_t mmu_context_table[NCONTEXTS];
-
 void
 build_multiboot_info(multiboot_info_t* mbi)
 {
@@ -35,20 +33,12 @@ build_boot_pgdir(void)
        extern uintptr_t l1_page_table[NL1ENTRIES];
 
        // relocate symbols
-       uintptr_t* mmuctxtbl = (uintptr_t*)((uint8_t*)mmu_context_table-KERNBASE);
        uintptr_t* l1 = (uintptr_t*)((uint8_t*)l1_page_table-KERNBASE);
 
        uintptr_t kernsize = /* 4GB */ - KERNBASE;
 
-       // make all context table entries invalid
-       int i;
-       for(i = 0; i < NCONTEXTS; i++)
-               mmuctxtbl[i] = 0;
-
-       // except for the zeroth one, which points to our L1 PT
-       *mmuctxtbl = PTD((uintptr_t)l1);
-
        // make all L1 PTEs invalid by default
+       int i;
        for(i = 0; i < NL1ENTRIES; i++)
                l1[i] = 0;
 
@@ -67,19 +57,29 @@ build_boot_pgdir(void)
 void
 mmu_init(void)
 {
+       extern uintptr_t l1_page_table[NL1ENTRIES];
+       uintptr_t* l1 = (uintptr_t*)((uint8_t*)l1_page_table-KERNBASE);
+
        int zero = 0;
-       uintptr_t* mmuctxtbl = (uintptr_t*)((uint8_t*)mmu_context_table-KERNBASE);
+       uintptr_t* mmuctxtbl = (uintptr_t*)((uint8_t*)(mmu_context_tables[core_id()])-KERNBASE);
 
-       // set physical address of context table
-       store_alternate(0x100,4,(uintptr_t)mmuctxtbl>>4);
+       // make all context table entries invalid
+       int i;
+       for(i = 0; i < NCONTEXTS; i++)
+               mmuctxtbl[i] = 0;
+
+       // except for the zeroth one, which points to our L1 PT
+       *mmuctxtbl = PTD((uintptr_t)l1);
 
        // set current context (== 0)
        store_alternate(0x200,4,zero);
 
-       // turn on MMU
-       store_alternate(0x000,4,1);
+       // set physical address of context table
+       store_alternate(0x100,4,(uintptr_t)mmuctxtbl>>4);
 
+       // turn on MMU
        tlbflush();
+       store_alternate(0x000,4,1);
 }
 
 // delete temporary mappings used by the entry code
index 6c06532..e483a0b 100644 (file)
@@ -11,7 +11,9 @@ cons_init(void)
 void
 cputbuf(const char*COUNT(len) buf, int len)
 {
-       frontend_syscall(RAMP_SYSCALL_write,1,(uint32_t)PADDR(buf),len);
+       int i;
+       for(i = 0; i < len; i++)
+               cputchar(buf[i]);
 }
 
 // Low-level console I/O
@@ -35,13 +37,14 @@ cons_putc(int c)
 void
 cputchar(int c)
 {
-        cons_putc(c);
+       while(sys_nbputch(c));
 }
 
 int
 cons_getc()
 {
-       return frontend_syscall(RAMP_SYSCALL_getch,0,0,0);
+       int ret = sys_nbgetch();
+       return ret < 0 ? 0 : ret;
 }
 
 int
index 56f9eeb..3bed26e 100644 (file)
@@ -110,6 +110,9 @@ _start:
        mov     NUM_CORES_REG,%l1
        st      %l1,[%l0]
 
+       cmp     %l1,MAX_NUM_CPUS
+       tg      0x7f
+
        sub     %sp,64,%sp              ! 64 >= sizeof(multiboot_header_t)
        call    build_multiboot_info
         add    %sp,64,%o0
@@ -185,11 +188,10 @@ bootstacktop:
 ###################################################################
 # page tables
 ###################################################################
-       .align          64
-       .align          NCONTEXTS*4
-       .global         mmu_context_table
-mmu_context_table:
-       .skip           NCONTEXTS*4
+       .align          (NCONTEXTS+CONTEXT_TABLE_PAD)*4
+       .global         mmu_context_tables
+mmu_context_tables:
+       .skip           MAX_NUM_CPUS*(NCONTEXTS+CONTEXT_TABLE_PAD)*4
 
        .align          1024
        .global         l1_page_table
index 0c45043..e00834e 100644 (file)
@@ -10,7 +10,7 @@
 #include <pmap.h>
 #include <arch/frontend.h>
 
-volatile int magic_mem[8] __attribute__((aligned(32)));
+volatile int magic_mem[10];
 
 int32_t frontend_syscall_from_user(env_t* p, int32_t syscall_num, uint32_t arg0, uint32_t arg1, uint32_t arg2)
 {
@@ -25,7 +25,14 @@ int32_t frontend_syscall_from_user(env_t* p, int32_t syscall_num, uint32_t arg0,
                        if(memcpy_from_user(p,buf,(void*)arg1,arg2))
                                return -1;
                        arg1 = PADDR((uint32_t)buf);
+
+                       extern spinlock_t output_lock;
+                       spin_lock(&output_lock);
+
                        ret = frontend_syscall(syscall_num,arg0,arg1,arg2);
+
+                       spin_unlock(&output_lock);
+
                        break;
 
                case RAMP_SYSCALL_open:
@@ -51,6 +58,39 @@ int32_t frontend_syscall_from_user(env_t* p, int32_t syscall_num, uint32_t arg0,
                case RAMP_SYSCALL_getch:
                        return frontend_syscall(RAMP_SYSCALL_getch,0,0,0);
 
+               case RAMP_SYSCALL_gettimeofday:
+               {
+                       struct timeval
+                       {
+                               size_t tv_sec;
+                               size_t tv_usec;
+                       };
+
+                       static spinlock_t gettimeofday_lock = 0;
+                       static size_t t0 = 0;
+                       spin_lock(&gettimeofday_lock);
+
+                       if(!t0)
+                       {
+                               volatile struct timeval tp;
+                               ret = frontend_syscall(RAMP_SYSCALL_gettimeofday,(int)PADDR((uint32_t)&tp),0,0);
+                               if(ret == 0)
+                                       t0 = tp.tv_sec;
+                       }
+                       else ret = 0;
+
+                       spin_unlock(&gettimeofday_lock);
+
+                       if(ret == 0)
+                       {
+                               struct timeval tp;
+                               long long dt = read_tsc();
+                               tp.tv_sec = t0 + dt/system_timing.tsc_freq;
+                               tp.tv_usec = (dt % system_timing.tsc_freq)*1000000/system_timing.tsc_freq;
+
+                               ret = memcpy_to_user(p,(void*)arg0,&tp,sizeof(tp));
+                       }
+               }
                default:
                        ret = -1;
                        break;
@@ -87,3 +127,35 @@ int32_t frontend_syscall(int32_t syscall_num, uint32_t arg0, uint32_t arg1, uint
 
        return ret;
 }
+
+int32_t sys_nbputch(char ch)
+{
+       static spinlock_t putch_lock = 0;
+       spin_lock(&putch_lock);
+
+       int ret = -1;
+       if(magic_mem[8] == 0)
+       {
+               magic_mem[8] = (unsigned int)(unsigned char)ch;
+               ret = 0;
+       }
+
+       spin_unlock(&putch_lock);
+       return ret;
+}
+
+int32_t sys_nbgetch()
+{
+       static spinlock_t getch_lock = 0;
+       spin_lock(&getch_lock);
+
+       int result = -1;
+       if(magic_mem[9]) 
+       {
+               result = magic_mem[9];
+               magic_mem[9] = 0;
+       }
+
+       spin_unlock(&getch_lock);
+       return result;
+}
index 5b0e0fe..0db5f75 100644 (file)
@@ -9,6 +9,9 @@
 int32_t frontend_syscall_from_user(env_t* p, int32_t syscall_num, uint32_t arg0, uint32_t arg1, uint32_t arg2);
 int32_t frontend_syscall(int32_t syscall_num, uint32_t arg0, uint32_t arg1, uint32_t arg2);
 
+int32_t sys_nbgetch();
+int32_t sys_nbputch(char ch);
+
 #endif
 
 #define RAMP_SYSCALL_exit              1
index 3ba0c84..38b06fb 100644 (file)
@@ -2,11 +2,11 @@
 #define ROS_INC_MMU_H
 
 /*
- * This file contains definitions for the x86 memory management unit (MMU),
- * including paging- and segmentation-related data structures and constants,
- * the %cr0, %cr4, and %eflags registers, and traps.
+ * This file contains definitions for the SRMMU.
  */
 
+#include <ros/arch/mmu.h>
+
 /*
  *
  *     Part 1.  Paging data structures and constants.
@@ -52,6 +52,7 @@
 
 // Number of L1 page tables (contexts) the MMU can store at any time
 #define NCONTEXTS      8
+#define CONTEXT_TABLE_PAD 8 // we require NCONTEXTS+CONTEXT_TBALE_PAD % 16 == 0
 
 // Page directory and page table constants.
 #define NL3ENTRIES     64              // # entries in an L3 page table
 #define L1PGSIZE       (4096*64*64)    // bytes mapped by an L1 page
 #define L1PGSHIFT      (12+6+6)        // log2(L1PGSIZE)
 
-// The only page size we actually support for now is L3
-#define PGSIZE         L3PGSIZE
-#define        PGSHIFT         L3PGSHIFT
-
 // Page table/directory entry flags.
 #define PTE_PTD                0x001   // Entry is a Page Table Descriptor
 #define PTE_PTE                0x002   // Entry is a Page Table Entry
index 18fcc15..1ac589f 100644 (file)
@@ -33,7 +33,7 @@ proc_init_trapframe(trapframe_t *tf)
 
 void proc_set_tfcoreid(trapframe_t *tf, uint32_t id)
 {
-       tf->gpr[10] = id;
+       tf->gpr[6] = id;
 }
 
 /* For cases that we won't return from a syscall via the normal path, and need
diff --git a/kern/arch/sparc/ros/hart.h b/kern/arch/sparc/ros/hart.h
new file mode 100644 (file)
index 0000000..83ccd0d
--- /dev/null
@@ -0,0 +1,27 @@
+#ifndef _ROS_ARCH_HART_H
+#define _ROS_ARCH_HART_H
+
+#define HART_ALLOCATE_STACKS
+
+static inline size_t
+__hart_self()
+{
+       size_t id;
+       asm volatile ("mov %%g6,%0" : "=r"(id));
+       return id;
+}
+
+static inline void
+hart_relax()
+{
+       // TODO: relax
+}
+
+static inline size_t
+hart_swap(size_t* addr, size_t val)
+{
+       asm volatile ("swap [%2],%0" : "=r"(val) : "0"(val),"r"(addr) : "memory");
+       return val;
+}
+
+#endif
diff --git a/kern/arch/sparc/ros/mmu.h b/kern/arch/sparc/ros/mmu.h
new file mode 100644 (file)
index 0000000..26e5edf
--- /dev/null
@@ -0,0 +1,7 @@
+#ifndef _ROS_ARCH_MMU_H
+#define _ROS_ARCH_MMU_H
+
+#define PGSHIFT 12
+#define PGSIZE (1 << PGSHIFT)
+
+#endif
index 95c6183..dbae1d6 100644 (file)
@@ -94,13 +94,13 @@ int smp_call_function_all(isr_t handler, void* data,
                if(i == core_id())
                        continue;
 
-               while(send_active_message(i,(amr_t)smp_call_wrapper,
-                                         handler, wrapper, data) != 0);
+               send_active_msg_sync(i,(amr_t)smp_call_wrapper,
+                                         handler, wrapper, data);
        }
 
        // send to me
-       while(send_active_message(core_id(),(amr_t)smp_call_wrapper,
-                                 handler,wrapper,data) != 0);
+       send_active_msg_sync(core_id(),(amr_t)smp_call_wrapper,
+                                 handler,wrapper,data);
 
        cpu_relax(); // wait to get the interrupt
 
@@ -124,8 +124,8 @@ int smp_call_function_single(uint32_t dest, isr_t handler, void* data,
 
        enable_irqsave(&state);
 
-       while(send_active_message(dest,(amr_t)smp_call_wrapper,
-                                 handler,wrapper,data) != 0);
+       send_active_msg_sync(dest,(amr_t)smp_call_wrapper,
+                                 handler,wrapper,data);
 
        cpu_relax(); // wait to get the interrupt, if it's to this core
 
index a108e2c..fac8912 100644 (file)
@@ -30,7 +30,7 @@ static __inline uint32_t read_tbr(void) __attribute__((always_inline));
 static __inline uint32_t read_mmu_reg(uint32_t which) __attribute__((always_inline));
 static __inline uint32_t read_y(void) __attribute__((always_inline));
 static __inline uint32_t read_fsr(void) __attribute__((always_inline));
-static __inline uint64_t read_perfctr(uint32_t which) __attribute__((always_inline));
+static __inline uint64_t read_perfctr(uint32_t core, uint32_t which) __attribute__((always_inline));
 static __inline void write_psr(uint32_t val) __attribute__((always_inline));
 static __inline void write_wim(uint32_t val) __attribute__((always_inline));
 static __inline void write_tbr(uint32_t val) __attribute__((always_inline));
@@ -39,6 +39,7 @@ static __inline void write_y(uint32_t val) __attribute__((always_inline));
 static __inline void write_fsr(uint32_t val) __attribute__((always_inline));
 static __inline uint32_t memsize_mb(void) __attribute__((always_inline));
 static __inline uint32_t mmu_probe(uint32_t va) __attribute__((always_inline));
+static __inline uint32_t send_ipi(uint32_t dst) __attribute__((always_inline));
 
 void flush_windows();
 
@@ -92,10 +93,10 @@ read_fsr(void)
 }
 
 static __inline uint64_t
-read_perfctr(uint32_t which)
+read_perfctr(uint32_t cpu, uint32_t which)
 {
        uint32_t hi,lo;
-       intptr_t addr = which<<3;
+       intptr_t addr = cpu<<10 | which<<3;
        hi = load_alternate(addr,2);
        lo = load_alternate(addr+4,2);
        return (((uint64_t)hi) << 32) | lo;
@@ -159,6 +160,13 @@ mmu_probe(uint32_t va)
        return load_alternate((va & ~0xFFF) | 0x400, 3);
 }
 
+static __inline uint32_t
+send_ipi(uint32_t dst)
+{
+       store_alternate(2 << 16 | dst << 10, 2, 0);
+       return 0;
+}
+
 #endif /* !__ASSEMBLER__ */
 
 #endif /* !ROS_INC_X86_H */
index 88d0a43..10028af 100644 (file)
 #pragma nodeputy
 #endif
 
-void
-idt_init(void)
+spinlock_t active_message_buf_busy[MAX_NUM_CPUS] = {0};
+active_message_t active_message_buf[MAX_NUM_CPUS];
+
+uint32_t send_active_message(uint32_t dst, amr_t pc,
+                             TV(a0t) arg0, TV(a1t) arg1, TV(a2t) arg2)
 {
+       if(dst >= num_cpus || spin_trylock(&active_message_buf_busy[dst]))
+               return -1;
+
+       active_message_buf[dst].srcid = core_id();
+       active_message_buf[dst].pc = pc;
+       active_message_buf[dst].arg0 = arg0;
+       active_message_buf[dst].arg1 = arg1;
+       active_message_buf[dst].arg2 = arg2;
+
+       if(send_ipi(dst))
+       {
+               spin_unlock(&active_message_buf_busy[dst]);
+               return -1;
+       }
+
+       return 0;
 }
 
 void
-sysenter_init(void)
+idt_init(void)
 {
 }
 
 void
-trap_handled(void)
+sysenter_init(void)
 {
-       if(current)
-               proc_startcore(current,&current->env_tf);
-       else if(core_id() == 0)
-               manager();
-       else
-               smp_idle();
 }
 
 void
@@ -70,10 +83,10 @@ void
 
 #define TRAPNAME_MAX   32
 
-char*
+static char*
 get_trapname(uint8_t tt, char buf[TRAPNAME_MAX])
 {
-       const char* trapnames[] = {
+       static const char* trapnames[] = {
                [0x00] "reset",
                [0x01] "instruction access exception",
                [0x02] "illegal instruction",
@@ -109,27 +122,31 @@ get_trapname(uint8_t tt, char buf[TRAPNAME_MAX])
 }
 
 void
-trap(trapframe_t* state, active_message_t* msg,
-     void (*handler)(trapframe_t*,active_message_t*))
+trap(trapframe_t* state, void (*handler)(trapframe_t*))
 {
-       // TODO: this will change with multicore processes
-       if(current)
+       // TODO: must save other cores' trap frames
+       // if we want them to migrate, block, etc.
+       if(current && current->vcoremap[0] == core_id())
        {
                current->env_tf = *state;
-               handler(&current->env_tf,msg);
+               handler(&current->env_tf);
        }
        else
-               handler(state,msg);
+               handler(state);
 }
 
 void
-handle_active_message(trapframe_t* state, active_message_t* message)
+handle_ipi(trapframe_t* state)
 {
-       uint32_t src = message->srcid;
-       TV(a0t) a0 = message->arg0;
-       TV(a1t) a1 = message->arg1;
-       TV(a2t) a2 = message->arg2;
-       (message->pc)(state,src,a0,a1,a2);
+       active_message_t m;
+       m = active_message_buf[core_id()];
+       spin_unlock(&active_message_buf_busy[core_id()]);
+
+       uint32_t src = m.srcid;
+       TV(a0t) a0 = m.arg0;
+       TV(a1t) a1 = m.arg1;
+       TV(a2t) a2 = m.arg2;
+       (m.pc)(state,src,a0,a1,a2);
        env_pop_tf(state);
 }
 
@@ -140,12 +157,14 @@ unhandled_trap(trapframe_t* state)
        uint32_t trap_type = (state->tbr >> 4) & 0xFF;
        get_trapname(trap_type,buf);
 
-       print_trapframe(state);
-
        if(state->psr & PSR_PS)
+       {
+               print_trapframe(state);
                panic("Unhandled trap in kernel!\nTrap type: %s",buf);
+       }
        else
        {
+               print_trapframe(state);
                warn("Unhandled trap in user!\nTrap type: %s",buf);
                assert(current);
                proc_destroy(current);
@@ -153,7 +172,7 @@ unhandled_trap(trapframe_t* state)
        }
 }
 
-void
+static void
 stack_fucked(trapframe_t* state)
 {
        // see if the problem arose when flushing out
@@ -161,11 +180,13 @@ stack_fucked(trapframe_t* state)
        extern uint32_t tflush;
        if(state->pc == (uint32_t)&tflush)
        {
-               // if so, copy original trap state, except for trap type
-               uint32_t tbr = state->tbr;
-               *state = *(trapframe_t*)(state->gpr[14]+64);
-               state->tbr = tbr;
+               // the trap happened while flushing out windows.
+               // hope this happened in the user, or else we're hosed...
+               extern char bootstacktop;
+               state = (trapframe_t*)(&bootstacktop-SIZEOF_TRAPFRAME_T-core_id()*KSTKSIZE);
        }
+
+       warn("You just got stack fucked!");
        unhandled_trap(state);
 }
 
@@ -225,11 +246,14 @@ handle_syscall(trapframe_t* state)
        state->pc = state->npc;
        state->npc += 4;
 
-       env_push_ancillary_state(current);
+       // TODO: must save other cores' ancillary state
+       // if we want them to migrate, block, etc.
+       if(current->vcoremap[0] == core_id())
+               env_push_ancillary_state(current);
 
        state->gpr[8] = syscall(current,state,num,a1,a2,a3,a4,a5);
 
-       trap_handled();
+       proc_startcore(current,state);
 }
 
 void
@@ -266,7 +290,10 @@ handle_breakpoint(trapframe_t* state)
        state->pc = state->npc;
        state->npc += 4;
 
-       env_push_ancillary_state(current);
+       // TODO: must save other cores' ancillary state
+       // if we want them to migrate, block, etc.
+       if(current->vcoremap[0] == core_id())
+               env_push_ancillary_state(current);
 
        // run the monitor
        monitor(state);
index 80b55c4..5621506 100644 (file)
@@ -14,9 +14,10 @@ handle_trap:
        # At this point, %l1 = pc, %l2 = npc, and %l0/3/4/5
        # might contain an active message.  so we only get %l6/%l7
 
-        # calculate stack pointer (-64 is space for window spill)
-        # sp = bootstacktop - core_id*KSTKSIZE - 64 - sizeof(trapframe_t)
-        set     bootstacktop-64-SIZEOF_TRAPFRAME_T-SIZEOF_ACTIVE_MESSAGE_T,%l6
+        # calculate stack pointer (-64 is space for window spill).
+        # sp = bootstacktop - core_id*KSTKSIZE - 64 - sizeof(trapframe_t).
+       # should you change this, make sure to change stack_fucked()
+        set     bootstacktop-64-SIZEOF_TRAPFRAME_T,%l6
         mov     CORE_ID_REG,%l7
        sll     %l7,KSTKSHIFT,%l7
         sub     %l6,%l7,%l6
@@ -26,16 +27,11 @@ handle_trap:
        mov     %psr,%l7
        btst    PSR_PS,%l7
        bne,a   1f
-        sub    %fp,64+SIZEOF_TRAPFRAME_T+SIZEOF_ACTIVE_MESSAGE_T,%l6
+        sub    %fp,64+SIZEOF_TRAPFRAME_T,%l6
 
-1:     # back up what might be an active message
-       st      %l0,[%l6+64+SIZEOF_TRAPFRAME_T+0]
-       st      %l3,[%l6+64+SIZEOF_TRAPFRAME_T+4]
-       st      %l4,[%l6+64+SIZEOF_TRAPFRAME_T+8]
-       st      %l5,[%l6+64+SIZEOF_TRAPFRAME_T+12]
-       st      %g0,[%l6+64+SIZEOF_TRAPFRAME_T+16]
+       # here is where we might do something with an active message
 
-       mov     %l7,%l0
+1:     mov     %l7,%l0
        # At this point we may use %l3/4/5/7 as temporary regs
 
        # is CWP valid?
@@ -86,9 +82,8 @@ tflush:        save   %sp,0,%sp
 
        # call the handler and pass in the tf, message, and handler addr
        add     %sp,64,%o0
-       add     %sp,64+SIZEOF_TRAPFRAME_T,%o1
        call    trap
-        mov    %l5,%o2
+        mov    %l5,%o1
 
        # should never get here
        unimp
@@ -230,7 +225,7 @@ env_pop_tf:
        jmp     %l1
        rett    %l2
 
-       .global send_active_message
-send_active_message:
+       .global send_active_message_raw
+send_active_message_raw:
        retl
-        .word  0x81f00000      ! this is the opcode for SENDAM
+        .word  0x81f00000
index 76c5b33..770712e 100644 (file)
@@ -32,13 +32,13 @@ trap_table:
        UNHANDLED_TRAP                          ! 0x15
        UNHANDLED_TRAP                          ! 0x16
        UNHANDLED_TRAP                          ! 0x17
-       TRAP_TABLE_ENTRY(handle_active_message) ! 0x18
+       UNHANDLED_TRAP                          ! 0x18
        UNHANDLED_TRAP                          ! 0x19
        JMP(handle_timer_interrupt)             ! 0x1A
        UNHANDLED_TRAP                          ! 0x1B
        UNHANDLED_TRAP                          ! 0x1C
        UNHANDLED_TRAP                          ! 0x1D
-       UNHANDLED_TRAP                          ! 0x1E
+       TRAP_TABLE_ENTRY(handle_ipi)            ! 0x1E
        UNHANDLED_TRAP                          ! 0x1F
        UNHANDLED_TRAP                          ! 0x20
        UNHANDLED_TRAP                          ! 0x21
diff --git a/kern/include/ros/arch b/kern/include/ros/arch
new file mode 120000 (symlink)
index 0000000..e510ad0
--- /dev/null
@@ -0,0 +1 @@
+../arch/ros
\ No newline at end of file
index 90d84df..1f57c3d 100644 (file)
@@ -3,9 +3,10 @@
 
 #ifndef __ASSEMBLER__
 #include <ros/common.h>
-#include <arch/mmu.h>
 #endif /* not __ASSEMBLER__ */
 
+#include <ros/arch/mmu.h>
+
 /*
  * This file contains definitions for memory management in our OS,
  * which are relevant to both the kernel and user-mode software.
index ee7e22f..b48feb0 100644 (file)
@@ -11,6 +11,7 @@
 
 typedef struct procinfo {
        pid_t id;
+       size_t max_harts;
 } procinfo_t;
 #define PROCINFO_NUM_PAGES  ((sizeof(procinfo_t)-1)/PGSIZE + 1)        
 
index d20a195..3fffe91 100644 (file)
@@ -38,8 +38,9 @@
 #define SYS_run_binary                         21
 // forward a syscall to front-end machine
 #define SYS_frontend                           22
+#define SYS_getvcoreid                         23
 // Keep this in sync with the last syscall number
-#define NSYSCALLS                                      22
+#define NSYSCALLS                                      23
 // syscall number starts at 1 and goes up to NSYSCALLS, without holes.
 #define INVALID_SYSCALL(syscallno) ((syscallno) > NSYSCALLS)
 
index 5352eaf..5177c42 100644 (file)
@@ -60,6 +60,7 @@ KERN_APPFILES := \
                  $(USER_APPS_PARLIB_DIR)/channel_test_client \
                  $(USER_APPS_PARLIB_DIR)/channel_test_server \
                  $(USER_APPS_PARLIB_DIR)/hello \
+                 $(USER_APPS_PARLIB_DIR)/manycore_test \
                  $(USER_APPS_PARLIB_DIR)/matrix
 #                 $(USER_APPS_PARLIB_DIR)/open_read
 
index d319916..1da01c5 100644 (file)
@@ -233,6 +233,15 @@ env_setup_vm_error:
        return -ENOMEM;
 }
 
+static void
+proc_init_procinfo(struct proc* p)
+{
+       p->env_procinfo->id = (p->env_id & 0x3FF);
+
+       // TODO: maybe do something smarter here
+       p->env_procinfo->max_harts = num_cpus-1;
+}
+
 //
 // Allocates and initializes a new environment.
 // On success, the new environment is stored in *newenv_store.
@@ -275,7 +284,7 @@ env_alloc(env_t **newenv_store, envid_t parent_id)
        e->env_id = generation | (e - envs);
 
        // Set the basic status variables.
-    e->proc_lock = 0;
+       e->proc_lock = 0;
        e->env_parent_id = parent_id;
        proc_set_state(e, PROC_CREATED);
        e->env_runs = 0;
@@ -293,10 +302,7 @@ env_alloc(env_t **newenv_store, envid_t parent_id)
        memset(&e->env_tf, 0, sizeof(e->env_tf));
        proc_init_trapframe(&e->env_tf);
 
-       /*
-        * Initialize the contents of the e->env_procinfo structure
-        */
-       e->env_procinfo->id = (e->env_id & 0x3FF);
+       proc_init_procinfo(e);
 
        /*
         * Initialize the contents of the e->env_procdata structure
@@ -444,7 +450,7 @@ load_icode(env_t *SAFE e, uint8_t *COUNT(size) binary, size_t size)
                memcpy(&phdr, binary + elfhdr.e_phoff + i*sizeof(phdr), sizeof(phdr));
                if (phdr.p_type != ELF_PROG_LOAD)
                        continue;
-        // TODO: validate elf header fields!
+               // TODO: validate elf header fields!
                // seg alloc creates PTE_U|PTE_W pages.  if you ever want to change
                // this, there will be issues with overlapping sections
                _end = MAX(_end, (void*)(phdr.p_va + phdr.p_memsz));
index 8479545..4c7b30b 100644 (file)
@@ -40,6 +40,7 @@ DECL_PROG(parlib_channel_test_client);
 DECL_PROG(parlib_channel_test_server);
 DECL_PROG(parlib_hello);
 DECL_PROG(parlib_matrix);
+DECL_PROG(parlib_manycore_test);
 
 struct kfs_entry kfs[MAX_KFS_FILES] = {
        KFS_ENTRY(roslib_proctests)
@@ -54,6 +55,7 @@ struct kfs_entry kfs[MAX_KFS_FILES] = {
        KFS_ENTRY(parlib_channel_test_server)
        KFS_ENTRY(parlib_hello)
        KFS_ENTRY(parlib_matrix)
+       KFS_ENTRY(parlib_manycore_test)
 };
 
 ssize_t kfs_lookup_path(char* path)
index d35dbdf..c20ddbe 100644 (file)
  */
 void manager(void)
 {
+       #ifndef DEVELOPER_NAME
+               #define DEVELOPER_NAME brho
+       #endif
+
+       // LoL
+       #define PASTE(s1,s2) s1 ## s2
+       #define MANAGER_FUNC(dev) PASTE(manager_,dev)
+
+       void MANAGER_FUNC(DEVELOPER_NAME)(void);
+       MANAGER_FUNC(DEVELOPER_NAME)();
+}
+
+void manager_brho(void)
+{
        static uint8_t RACY progress = 0;
 
        struct proc *envs[256];
@@ -37,16 +51,6 @@ void manager(void)
        uint32_t corelist[MAX_NUM_CPUS];
        uint32_t num = 3;
 
-       // This is a bypass of the standard manager structure, for network use
-       // If enabled, this spawns parlib_matrix, and allows the execution
-       // of a remote binary to function correctly (schedule() call below)
-       if (progress++ == 0) {
-               envs[0] = kfs_proc_create(kfs_lookup_path("parlib_matrix"));
-               proc_set_state(envs[0], PROC_RUNNABLE_S);
-               proc_run(envs[0]);
-       }
-       schedule();
-
        switch (progress++) {
                case 0:
                        //p = kfs_proc_create(kfs_lookup_path("roslib_proctests"));
@@ -176,3 +180,56 @@ void manager(void)
        */
        return;
 }
+
+void manager_klueska()
+{
+       struct proc *envs[256];
+       static uint8_t progress = 0;
+
+       if (progress++ == 0) {
+               envs[0] = kfs_proc_create(kfs_lookup_path("parlib_matrix"));
+               proc_set_state(envs[0], PROC_RUNNABLE_S);
+               proc_run(envs[0]);
+       }
+}
+
+void manager_waterman()
+{
+       struct proc *envs[256];
+       static uint8_t progress = 0;
+
+       switch(progress++)
+       {
+               case 0:
+                       printk("got here\n");
+                       envs[0] = kfs_proc_create(kfs_lookup_path("parlib_draw_nanwan_standalone"));
+                       proc_set_state(envs[0], PROC_RUNNABLE_S);
+                       proc_run(envs[0]);
+                       schedule();
+                       break;
+
+               case 1:
+                       envs[1] = kfs_proc_create(kfs_lookup_path("parlib_manycore_test"));
+                       proc_set_state(envs[1], PROC_RUNNABLE_S);
+                       proc_run(envs[1]);
+                       schedule();
+                       break;
+
+               case 2:
+                       envs[2] = kfs_proc_create(kfs_lookup_path("parlib_draw_nanwan_standalone"));
+                       proc_set_state(envs[2], PROC_RUNNABLE_S);
+                       proc_run(envs[2]);
+                       schedule();
+                       break;
+
+               case 3:
+                       envs[3] = kfs_proc_create(kfs_lookup_path("parlib_draw_nanwan_standalone"));
+                       //envs[3] = kfs_proc_create(kfs_lookup_path("parlib_manycore_test"));
+                       proc_set_state(envs[3], PROC_RUNNABLE_S);
+                       proc_run(envs[3]);
+                       schedule();
+                       break;
+       }
+
+       panic("DON'T PANIC");
+}
index 840d8fa..4e43603 100644 (file)
@@ -335,6 +335,11 @@ static envid_t sys_getcpuid(void)
        return core_id();
 }
 
+static size_t sys_getvcoreid(env_t* e)
+{
+       return e->vcoremap[core_id()];
+}
+
 // TODO FIX Me!!!! for processes
 // Destroy a given environment (possibly the currently running environment).
 //
@@ -479,6 +484,8 @@ intreg_t syscall(struct proc *p, trapframe_t *tf, uintreg_t syscallno,
                        return sys_cgetc(p); // this will need to block
                case SYS_getcpuid:
                        return sys_getcpuid();
+               case SYS_getvcoreid:
+                       return sys_getvcoreid(p);
                case SYS_getpid:
                        return sys_getenvid(p);
                case SYS_proc_destroy:
diff --git a/user/apps/parlib/manycore_test.c b/user/apps/parlib/manycore_test.c
new file mode 100644 (file)
index 0000000..04de64c
--- /dev/null
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <assert.h>
+#include <hart.h>
+#include <parlib.h>
+
+hart_barrier_t b;
+
+void do_work_son(int vcoreid)
+{
+       int cpuid = sys_getcpuid();
+       int pid = sys_getpid();
+       printf("Hello! My Process ID: %d My VCoreID: %d My CPU: %d\n", pid, vcoreid, cpuid);
+       hart_barrier_wait(&b,vcoreid);
+}
+
+void hart_entry()
+{
+       assert(hart_self() > 0);
+       do_work_son(hart_self());
+}
+
+int main(int argc, char** argv)
+{
+       assert(hart_self() == 0);
+       hart_barrier_init(&b,hart_max_harts());
+       hart_request(hart_max_harts()-1);
+       do_work_son(0);
+       return 0;
+}
diff --git a/user/parlib/inc/hart.h b/user/parlib/inc/hart.h
new file mode 100644 (file)
index 0000000..a80c089
--- /dev/null
@@ -0,0 +1,65 @@
+#ifndef _HART_H
+#define _HART_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <ros/error.h>
+#include <ros/arch/hart.h>
+
+#define HART_LOG2_MAX_MAX_HARTS 6
+#define HART_MAX_MAX_HARTS (1 << HART_LOG2_MAX_MAX_HARTS)
+#define HART_CL_SIZE 128
+
+typedef struct hart_lock_qnode
+{
+       volatile struct hart_lock_qnode* volatile next;
+       volatile int locked;
+       char pad[HART_CL_SIZE-sizeof(void*)-sizeof(int)];
+} hart_lock_qnode_t;
+
+typedef struct
+{
+       hart_lock_qnode_t* lock;
+       char pad[HART_CL_SIZE-sizeof(hart_lock_qnode_t*)];
+       hart_lock_qnode_t qnode[HART_MAX_MAX_HARTS] __attribute__((aligned(8)));
+} hart_lock_t;
+
+#define HART_LOCK_INIT {0}
+
+typedef struct
+{
+       volatile int myflags[2][HART_LOG2_MAX_MAX_HARTS];
+       volatile int* partnerflags[2][HART_LOG2_MAX_MAX_HARTS];
+       int parity;
+       int sense;
+       char pad[HART_CL_SIZE];
+} hart_dissem_flags_t;
+
+typedef struct
+{
+       size_t nprocs;
+       hart_dissem_flags_t* allnodes;
+       size_t logp;
+} hart_barrier_t;
+
+extern void hart_entry();
+
+error_t hart_barrier_init(hart_barrier_t* b, size_t nprocs);
+void hart_barrier_wait(hart_barrier_t* b, size_t vcoreid);
+
+void hart_lock_init(hart_lock_t* lock);
+void hart_lock_unlock(hart_lock_t* lock);
+void hart_lock_lock(hart_lock_t* l);
+
+size_t hart_self();
+error_t hart_request(size_t k);
+void hart_yield();
+size_t hart_max_harts();
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index 269e5db..0669836 100644 (file)
@@ -11,6 +11,7 @@
 
 #define debug_in_out(...) //debug(__VA_ARGS__)  
 #define debug_write_check(fmt, ...)  //debug(fmt, __VA_ARGS__)
+uint32_t newcore(void);
 
 typedef uint32_t syscall_id_t;
 
index b3a5f11..050135d 100644 (file)
@@ -19,8 +19,8 @@ enum {
        PG_RDWR   = 6,
 };
 
-extern procinfo_t* procinfo;
-extern procdata_t* procdata;
+extern procinfo_t procinfo;
+extern procdata_t procdata;
 
 intreg_t syscall(uint16_t num, intreg_t a1,
                 intreg_t a2, intreg_t a3,
@@ -40,5 +40,7 @@ error_t     sys_proc_destroy(int pid);
 ssize_t     sys_shared_page_alloc(void *COUNT(PGSIZE) *addr, pid_t p2, 
                                   int p1_flags, int p2_flags);
 ssize_t     sys_shared_page_free(void *COUNT(PGSIZE) addr, pid_t p2);
+ssize_t     sys_resource_req(int type, size_t amount, uint32_t flags);
+
 
 #endif // !ROS_INC_PARLIB_H
index f5bdbca..1016696 100644 (file)
@@ -14,6 +14,7 @@ USER_PARLIB_SRC_SRCFILES := $(USER_PARLIB_SRC_DIR)/debug.c \
                             $(USER_PARLIB_SRC_DIR)/syscall.c \
                             $(USER_PARLIB_SRC_DIR)/parlibmain.c \
                             $(USER_PARLIB_SRC_DIR)/channel.c \
+                            $(USER_PARLIB_SRC_DIR)/hart.c \
                             $(USER_PARLIB_ARCH_SRCFILES)
 
 USER_PARLIB_SRC_OBJFILES := $(patsubst $(USER_PARLIB_SRC_DIR)/%.c, \
index ba576f2..b02ad1a 100644 (file)
@@ -10,7 +10,7 @@
 #include <string.h>
 #include <channel.h>
 #include <ros/syscall.h>
-#include <arch/arch.h>
+#include <hart.h>
 
 void simulate_rsp(channel_t* ch) {
        channel_t ch_server;
@@ -121,7 +121,7 @@ error_t channel_sendmsg(channel_t* ch, channel_msg_t* msg) {
        RING_PUSH_REQUESTS(&(ch->ring_side.front));
        
        while (!(RING_HAS_UNCONSUMED_RESPONSES(&(ch->ring_side.front))))
-               cpu_relax();
+               hart_relax();
        RING_GET_RESPONSE(&(ch->ring_side.front), ch->ring_side.front.rsp_cons++);
        
        return ESUCCESS;
diff --git a/user/parlib/src/hart.c b/user/parlib/src/hart.c
new file mode 100644 (file)
index 0000000..f288235
--- /dev/null
@@ -0,0 +1,196 @@
+#include <hart.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <parlib.h>
+
+// TODO: HART_ALLOCATE_STACKS should be disposed of by means of a better ABI.
+
+static size_t _hart_current_harts = 1;
+static hart_lock_t _hart_lock = HART_LOCK_INIT;
+
+static void hart_abort(const char* str)
+{
+       write(2,str,strlen(str));
+       abort();
+}
+
+#pragma weak hart_entry
+void hart_entry()
+{
+       hart_abort("You should write your own damn hart_entry()!\n");
+}
+
+static void _hart_init()
+{
+       static int initialized = 0;
+       if(initialized)
+               return;
+
+       initialized = 1;
+
+       #ifdef HART_ALLOCATE_STACKS
+       extern void** stack_ptr_array;
+       stack_ptr_array = (void**)calloc(hart_max_harts(),sizeof(void*));
+       if(stack_ptr_array == NULL)
+               hart_abort("Harts initialization ran out of memory!\n");
+       #endif
+}
+
+error_t hart_request(size_t k)
+{
+       size_t i,j;
+       const int user_stack_size = 1024*1024;
+
+       #ifdef HART_ALLOCATE_STACKS
+       extern void** stack_ptr_array;
+       #endif
+
+       _hart_init();
+
+       hart_lock_lock(&_hart_lock);
+
+       if(k < 0 || _hart_current_harts+k > hart_max_harts())
+               return -1;
+
+       #ifdef HART_ALLOCATE_STACKS
+       for(i = _hart_current_harts; i < _hart_current_harts+k; i++)
+       {
+               char* stack = (char*)malloc(user_stack_size);
+               if(stack == NULL)
+               {
+                       for(j = _hart_current_harts; j < i; j++)
+                       {
+                               free(stack_ptr_array[j]);
+                               stack_ptr_array[j] = 0;
+                       }
+                       hart_lock_unlock(&_hart_lock);
+                       return -ENOMEM;
+               }
+               stack_ptr_array[i] = stack + user_stack_size;
+       }
+       #endif
+
+       error_t ret;
+       if((ret = sys_resource_req(0,_hart_current_harts+k,0)) == 0)
+       {
+               _hart_current_harts += k;
+               hart_lock_unlock(&_hart_lock);
+               return 0;
+       }
+
+       #ifdef HART_ALLOCATE_STACKS
+       for(i = _hart_current_harts; i < _hart_current_harts+k; i++)
+       {
+               free(stack_ptr_array[i]);
+               stack_ptr_array[i] = 0;
+       }
+       #endif
+
+       hart_lock_unlock(&_hart_lock);
+       return ret;
+}
+
+void hart_yield()
+{
+       syscall(SYS_yield,0,0,0,0,0);
+}
+
+size_t hart_max_harts()
+{
+       return procinfo.max_harts < HART_MAX_MAX_HARTS ? procinfo.max_harts : HART_MAX_MAX_HARTS;
+}
+
+// MCS locks!!
+void hart_lock_init(hart_lock_t* lock)
+{
+       memset(lock,0,sizeof(hart_lock_t));
+}
+
+static inline hart_lock_qnode_t* hart_qnode_swap(hart_lock_qnode_t** addr, hart_lock_qnode_t* val)
+{
+       return (hart_lock_qnode_t*)hart_swap((size_t*)addr,(size_t)val);
+}
+
+void hart_lock_lock(hart_lock_t* lock)
+{
+       hart_lock_qnode_t* qnode = &lock->qnode[hart_self()];
+       qnode->next = 0;
+       hart_lock_qnode_t* predecessor = hart_qnode_swap(&lock->lock,qnode);
+       if(predecessor)
+       {
+               qnode->locked = 1;
+               predecessor->next = qnode;
+               while(qnode->locked);
+       }
+}
+
+void hart_lock_unlock(hart_lock_t* lock)
+{
+       hart_lock_qnode_t* qnode = &lock->qnode[hart_self()];
+       if(qnode->next == 0)
+       {
+               hart_lock_qnode_t* old_tail = hart_qnode_swap(&lock->lock,0);
+               if(old_tail == qnode)
+                       return;
+
+               hart_lock_qnode_t* usurper = hart_qnode_swap(&lock->lock,old_tail);
+               while(qnode->next == 0);
+               if(usurper)
+                       usurper->next = qnode->next;
+               else
+                       qnode->next->locked = 0;
+       }
+       else
+               qnode->next->locked = 0;
+}
+
+// MCS dissemination barrier!
+error_t hart_barrier_init(hart_barrier_t* b, size_t np)
+{
+       if(np > hart_max_harts())
+               return -1;
+       b->allnodes = (hart_dissem_flags_t*)calloc(np,sizeof(hart_dissem_flags_t));
+       b->nprocs = np;
+
+       b->logp = (np & (np-1)) != 0;
+       while(np >>= 1)
+               b->logp++;
+
+       size_t i,k;
+       for(i = 0; i < b->nprocs; i++)
+       {
+               b->allnodes[i].parity = 0;
+               b->allnodes[i].sense = 1;
+
+               for(k = 0; k < b->logp; k++)
+               {
+                       size_t j = (i+(1<<k)) % b->nprocs;
+                       b->allnodes[i].partnerflags[0][k] = &b->allnodes[j].myflags[0][k];
+                       b->allnodes[i].partnerflags[1][k] = &b->allnodes[j].myflags[1][k];
+               } 
+       }
+
+       return 0;
+}
+
+void hart_barrier_wait(hart_barrier_t* b, size_t pid)
+{
+       hart_dissem_flags_t* localflags = &b->allnodes[pid];
+       size_t i;
+       for(i = 0; i < b->logp; i++)
+       {
+               *localflags->partnerflags[localflags->parity][i] = localflags->sense;
+               while(localflags->myflags[localflags->parity][i] != localflags->sense);
+       }
+       if(localflags->parity)
+               localflags->sense = 1-localflags->sense;
+       localflags->parity = 1-localflags->parity;
+}
+
+size_t
+hart_self()
+{
+       // defined in ros/arch/hart.h
+       return __hart_self();
+}
index 4734b02..5edc303 100644 (file)
@@ -1,4 +1,3 @@
-#include <arch/mmu.h>
 #include <ros/memlayout.h>
 
 .data
        .set procinfo, UINFO
        .globl procdata
        .set procdata, UDATA
-       .globl pages
-       .set pages, UPAGES
-       .globl vpt
-       .set vpt, UVPT
-       .globl vpd
-       .set vpd, (UVPT+(UVPT>>12)*4)
+
+// TODO: We're not exposing these yet.  Think about how to do so judiciously.
+//     .globl vpt
+//     .set vpt, UVPT
+//     .globl vpd
+//     .set vpd, (UVPT+(UVPT>>12)*4)
 
 
 // Entrypoint - this is where the kernel (or our parent environment)
index 93f663a..cad10d6 100644 (file)
@@ -12,4 +12,3 @@ OBJDIRS += $(USER_PARLIB_ARCH_SRC_DIR)
 USER_PARLIB_ARCH_SRCFILES := $(USER_PARLIB_ARCH_SRC_DIR)/syscall.c \
                              $(USER_PARLIB_ARCH_SRC_DIR)/entry.S \
                              $(USER_PARLIB_ARCH_SRC_DIR)/newlib_backend.c
-
index 2a61b56..4eb7cc9 100644 (file)
        .globl vpd
        .set vpd, (UVPT+(UVPT>>12)*4)
 
+.align 4
+.globl stack_ptr_array
+stack_ptr_array:
+   .word 0
+
+
 // Entrypoint - this is where the kernel (or our parent environment)
 // starts us running when we are initially loaded into a new environment.
 .text
 .globl _start
 _start:
+       tst     %g6
+       bne     notcore0
+        nop
+
+       // Save top 256 bytes of stack for other threads to allocate their stacks
+       //sub   %sp,256,%sp
+
        // See if we were started with arguments on the stack
-       tst     %o0
-       bne     args_exist
+       tst     %o0
+       bne     args_exist
         nop
 
        // If not, push dummy argc/argv arguments.
@@ -38,12 +51,22 @@ args_exist:
        call    parlibmain
         nop
 
-1:     ba      1b
-        nop
+       unimp
 
+notcore0:
 
-.globl hart_self
-hart_self:
-       retl
-       mov     CORE_ID_REG,%o0
+       // Get a stack, set %sp
+
+       set     stack_ptr_array, %l0
+       ld      [%l0], %l0
+       sll     %g6, 2, %l1
+       ld      [%l0+%l1], %l1
+       sub     %l1, 64, %sp
+
+       call    hart_entry
+        nop
+
+       call    hart_yield
+        nop
 
+       unimp
index 2e931dd..0e884d0 100644 (file)
@@ -8,12 +8,22 @@
 #include <sys/times.h>
 #include <sys/time.h>
 #include <debug.h>
+#include <hart.h>
 
 char *__env[1] = { 0 };
 char **environ = __env;
 
 #define IS_CONSOLE(fd) ((uint32_t)(fd) < 3)
 
+/* Return the vcoreid, which is set in entry.S right before calling libmain.
+ * This should only be used in libmain() and main(), before any code that might
+ * use a register.  It just returns eax. */
+uint32_t newcore(void)
+{
+       return hart_self();
+}
+
+
 int
 getpid(void)
 {
@@ -126,7 +136,7 @@ times(struct tms* buf)
 int
 gettimeofday(struct timeval* tp, void* tzp)
 {
-       return -1;
+       return syscall(SYS_frontend,RAMP_SYSCALL_gettimeofday,(int)tp,(int)tzp,0,0);
 }
 
 /* sbrk()
index 2fbe086..2e12367 100644 (file)
@@ -80,3 +80,10 @@ ssize_t sys_eth_read(void* buf, size_t len)
                
        return syscall(SYS_eth_read, (intreg_t)buf, len, 0, 0, 0);
 }
+
+/* Request resources from the kernel.  Flags in ros/resource.h. */
+ssize_t sys_resource_req(int type, size_t amount, uint32_t flags)
+{
+        return syscall(SYS_resource_req, type, amount, flags, 0, 0);
+}
+
index 6bf33ca..a1c1a98 100644 (file)
@@ -24,6 +24,7 @@
 // libos.c or entry.S
 extern char *NTS binaryname;
 extern procinfo_t* procinfo;
+extern void** stack_ptr_array;
 extern procdata_t* procdata;
 extern syscall_front_ring_t syscallfrontring;
 extern sysevent_back_ring_t syseventbackring;
index 6ce88ec..3cf3571 100644 (file)
@@ -1,10 +1,11 @@
 #include <arch/mmu.h>
+#include <arch/arch.h>
 #include <ros/memlayout.h>
 
 .data
 
 
-// Define the global symbols 'procinfo', 'procdata', 'pages', 'vpt', and 'vpd'
+// Define the global symbols 'envs', 'pages', 'vpt', and 'vpd'
 // so that they can be used in C as if they were ordinary global arrays.
        .globl procinfo
        .set procinfo, UINFO
        .globl vpd
        .set vpd, (UVPT+(UVPT>>12)*4)
 
+.align 4
+.globl stack_ptr_array
+stack_ptr_array:
+   .word 0
 
-// Entrypoint - this is where the kernel (or our parent process)
-// starts us running when we are initially created and run
+
+// Entrypoint - this is where the kernel (or our parent environment)
+// starts us running when we are initially loaded into a new environment.
 .text
 .globl _start
 _start:
-       // See if we were started with arguments on the stack
-       tst     %o0
-       bne     args_exist
-        nop
+        tst     %g6
+        bne     notcore0
+         nop
+
+        // Save top 256 bytes of stack for other threads to allocate their stacks
+        //sub   %sp,256,%sp
+
+        // See if we were started with arguments on the stack
+        tst     %o0
+        bne     args_exist
+         nop
 
-       // If not, push dummy argc/argv arguments.
-       // This happens when we are loaded by the kernel,
-       // because the kernel does not know about passing arguments.
-       mov     0,%o0
-       mov     0,%o1
+        // If not, push dummy argc/argv arguments.
+        // This happens when we are loaded by the kernel,
+        // because the kernel does not know about passing arguments.
+        mov     0,%o0
+        mov     0,%o1
 
 args_exist:
        call    libmain
         nop
 
-1:     ba      1b
+       call    exit
         nop
 
+       unimp
+
+notcore0:
+
+        // Get a stack, set %sp
+
+        set stack_ptr_array, %l0
+        ld [%l0], %l0
+        sll %g6, 2, %l1
+        ld  [%l0+%l1], %l1
+        sub %l1, 64, %sp
+
+        call    main
+         nop
+
+       call    exit
+        nop
+
+       unimp
+
+
+.globl hart_self
+hart_self:
+       retl
+        mov    %g6,%o0