Xen Ring Buffers
authorBarret Rhoden <brho@cs.berkeley.edu>
Sat, 2 May 2009 03:48:14 +0000 (20:48 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Sat, 2 May 2009 03:48:14 +0000 (20:48 -0700)
Adds Xen's ring buffers and uses them in a rudimentary manner to do the
cprintf_async syscalls.  The lib/syscall currently prints the string
twice.  Done just for testing, will be gone next commit.

inc/atomic.h [new file with mode: 0644]
inc/lib.h
inc/ring_buffer.h [new file with mode: 0644]
inc/syscall.h
kern/atomic.h
kern/env.c
kern/init.c
kern/syscall.c
kern/syscall.h
lib/libmain.c
lib/syscall.c

diff --git a/inc/atomic.h b/inc/atomic.h
new file mode 100644 (file)
index 0000000..3a26328
--- /dev/null
@@ -0,0 +1,9 @@
+#ifndef ROS_INC_ATOMIC_H
+#define ROS_INC_ATOMIC_H
+
+// TODO - check these, wrt x86
+#define mb() {rmb(); wmb();}
+#define rmb() ({ asm volatile("lfence"); })
+#define wmb() 
+
+#endif /* !ROS_INC_ATOMIC_H */
index fb49328..a47c61d 100644 (file)
--- a/inc/lib.h
+++ b/inc/lib.h
@@ -26,6 +26,7 @@ extern volatile env_t *env;
 // seems like they need to be either arrays [] or functions () for it to work
 extern volatile uint8_t (COUNT(PGSIZE * UINFO_PAGES) procinfo)[];
 extern volatile uint8_t (COUNT(PGSIZE * UDATA_PAGES) procdata)[];
+extern syscall_front_ring_t sysfrontring;
 extern volatile page_t pages[];
 void   exit(void);
 
diff --git a/inc/ring_buffer.h b/inc/ring_buffer.h
new file mode 100644 (file)
index 0000000..619c308
--- /dev/null
@@ -0,0 +1,306 @@
+/******************************************************************************
+ * ring.h
+ * 
+ * Shared producer-consumer ring macros.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef ROS_INC_RING_BUFFER_H
+#define ROS_INC_RING_BUFFER_H
+
+#include <inc/atomic.h>
+
+#define xen_mb()  mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
+#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
+#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest 
+ * power of two (so we can mask with (size-1) to loop around).
+ * This tells us how many elements the ring _s can contain, given _sz space.
+ */
+#define __RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ * 
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ * 
+ *     mytag_sring_t      - The shared ring.
+ *     mytag_front_ring_t - The 'front' half of the ring.
+ *     mytag_back_ring_t  - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ *     mytag_front_ring_t front_ring;
+ *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ *     mytag_back_ring_t back_ring;
+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
+                                                                        \
+/* Shared ring entry */                                                 \
+union __name##_sring_entry {                                            \
+    __req_t req;                                                        \
+    __rsp_t rsp;                                                        \
+};                                                                      \
+                                                                        \
+/* Shared ring page */                                                  \
+struct __name##_sring {                                                 \
+    RING_IDX req_prod, req_event;                                       \
+    RING_IDX rsp_prod, rsp_event;                                       \
+    uint8_t  pad[48];                                                   \
+    union __name##_sring_entry ring[1]; /* variable-length */           \
+};                                                                      \
+                                                                        \
+/* "Front" end's private variables */                                   \
+struct __name##_front_ring {                                            \
+    RING_IDX req_prod_pvt;                                              \
+    RING_IDX rsp_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* "Back" end's private variables */                                    \
+struct __name##_back_ring {                                             \
+    RING_IDX rsp_prod_pvt;                                              \
+    RING_IDX req_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* Syntactic sugar */                                                   \
+typedef struct __name##_sring __name##_sring_t;                         \
+typedef struct __name##_front_ring __name##_front_ring_t;               \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ * 
+ * FRONT_RING_whatever works on the "front end" of a ring: here 
+ * requests are pushed on to the ring and responses taken off it.
+ * 
+ * BACK_RING_whatever works on the "back end" of a ring: here 
+ * requests are taken off the ring and responses put on.
+ * 
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. 
+ * This is OK in 1-for-1 request-response situations where the 
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do {                                       \
+    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
+    (_s)->req_event = (_s)->rsp_event = 1;                              \
+    (void)memset((_s)->pad, 0, sizeof((_s)->pad));                      \
+} while(0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do {                            \
+    (_r)->req_prod_pvt = 0;                                             \
+    (_r)->rsp_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do {                             \
+    (_r)->rsp_prod_pvt = 0;                                             \
+    (_r)->req_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+/* Initialize to existing shared indexes -- for recovery */
+#define FRONT_RING_ATTACH(_r, _s, __size) do {                          \
+    (_r)->sring = (_s);                                                 \
+    (_r)->req_prod_pvt = (_s)->req_prod;                                \
+    (_r)->rsp_cons = (_s)->rsp_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+} while (0)
+
+#define BACK_RING_ATTACH(_r, _s, __size) do {                           \
+    (_r)->sring = (_s);                                                 \
+    (_r)->rsp_prod_pvt = (_s)->rsp_prod;                                \
+    (_r)->req_cons = (_s)->req_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r)                                                   \
+    ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                          \
+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r)                                                   \
+    (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
+    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
+    unsigned int rsp = RING_SIZE(_r) -                                  \
+        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
+    req < rsp ? req : rsp;                                              \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \
+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx)                                      \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+#define RING_GET_RESPONSE(_r, _idx)                                     \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
+    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do {                                     \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do {                                    \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ * 
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ * 
+ * When enqueuing requests or responses:
+ * 
+ *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ *  is a boolean return value. True indicates that the receiver requires an
+ *  asynchronous notification.
+ * 
+ * After dequeuing requests or responses (before sleeping the connection):
+ * 
+ *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ *  The second argument is a boolean return value. True indicates that there
+ *  are pending messages on the ring (i.e., the connection should not be put
+ *  to sleep).
+ * 
+ *  These macros will set the req_event/rsp_event field to trigger a
+ *  notification on the very next message that is enqueued. If you want to
+ *  create batches of work (i.e., only receive a notification after several
+ *  messages have been enqueued) then you will need to create a customised
+ *  version of the FINAL_CHECK macro in your own code, which sets the event
+ *  field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
+    RING_IDX __old = (_r)->sring->req_prod;                             \
+    RING_IDX __new = (_r)->req_prod_pvt;                                \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = __new;                                      \
+    xen_mb(); /* back sees new requests /before/ we check req_event */  \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
+    RING_IDX __old = (_r)->sring->rsp_prod;                             \
+    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = __new;                                      \
+    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+} while (0)
+
+#endif /* ROS_INC_RING_BUFFER_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
index bdcee2b..5435a6a 100644 (file)
@@ -2,6 +2,7 @@
 #define ROS_INC_SYSCALL_H
 
 #include <inc/types.h>
+#include <inc/ring_buffer.h>
 
 /* system call numbers */
 enum
@@ -14,10 +15,18 @@ enum
 };
 
 #define NUM_SYS_ARGS 6
-typedef struct Syscall {
+typedef struct SyscallRequest {
        uint32_t num;
        uint32_t flags;
        uint32_t args[NUM_SYS_ARGS];
-} syscall_t;
+} syscall_req_t;
+
+typedef struct SyscallResponse {
+       uint32_t retval;
+} syscall_resp_t;
+
+
+// Generic Syscall Ring Buffer
+DEFINE_RING_TYPES(syscall, syscall_req_t, syscall_resp_t);
 
 #endif /* !ROS_INC_SYSCALL_H */
index 36d7ed6..3b93bf4 100644 (file)
@@ -1,9 +1,10 @@
-#ifndef ROS_INC_ATOMIC_H
-#define ROS_INC_ATOMIC_H
+#ifndef ROS_KERN_ATOMIC_H
+#define ROS_KERN_ATOMIC_H
 
 #include <inc/types.h>
 #include <inc/mmu.h>
 #include <inc/x86.h>
+#include <inc/atomic.h>
 
 /* //linux style atomic ops
 typedef struct {uint32_t real_num;} atomic_t;
@@ -133,4 +134,4 @@ static inline void atomic_andb(volatile uint8_t* number, uint8_t mask)
 {
        asm volatile("lock andb %1,%0" : "=m"(*number) : "r"(mask) : "cc");
 }
-#endif /* !ROS_INC_ATOMIC_H */
+#endif /* !ROS_KERN_ATOMIC_H */
index 1b02794..d1aecb6 100644 (file)
@@ -9,6 +9,7 @@
 #include <inc/string.h>
 #include <inc/assert.h>
 #include <inc/elf.h>
+#include <inc/syscall.h>
 
 #include <kern/env.h>
 #include <kern/pmap.h>
@@ -139,6 +140,9 @@ env_setup_vm(env_t *e)
        memset(e->env_procinfo, 0, PGSIZE);
        memset(e->env_procdata, 0, PGSIZE);
 
+       // Initialize the generic syscall ring buffer
+       SHARED_RING_INIT((syscall_sring_t*)e->env_procdata);
+
        // should be able to do this so long as boot_pgdir never has
        // anything put below UTOP
        memcpy(e->env_pgdir, boot_pgdir, PGSIZE);
index bf7243a..6887d7f 100644 (file)
@@ -105,13 +105,24 @@ void kernel_init(multiboot_info_t *mboot_info)
        // need to switch to the right context, so we can handle the user pointer
        // that points to a data payload of the syscall
        lcr3(envs[0].env_cr3);
-       syscall_async((syscall_t*)(envs[0].env_procdata));
-       syscall_async(((syscall_t*)(envs[0].env_procdata)) + 1);
+       syscall_back_ring_t sysbackring;
+       BACK_RING_INIT(&sysbackring, (syscall_sring_t*)envs[0].env_procdata, PGSIZE);
+
+       // not really enough, mostly just testing
+       while (!(RING_HAS_UNCONSUMED_REQUESTS(&sysbackring)))
+               cpu_relax();
+       syscall_async((syscall_req_t*)(RING_GET_REQUEST(&sysbackring, 0)));
+       printk("\n");
+       syscall_async((syscall_req_t*)(RING_GET_REQUEST(&sysbackring, 1)));
+       printk("\n");
+       syscall_async((syscall_req_t*)(RING_GET_REQUEST(&sysbackring, 2)));
+       printk("\n");
+       syscall_async((syscall_req_t*)(RING_GET_REQUEST(&sysbackring, 3)));
        printk("\n");
        /*
        printk("Attempting to run the syscall at the beginning of procdata for env 1:\n\n");
        lcr3(envs[1].env_cr3);
-       syscall_async((syscall_t*)(envs[1].env_procdata));
+       syscall_async((syscall_req_t*)(envs[1].env_procdata));
        printk("\n");
        */
        panic("Don't Panic");
index d741991..255bbff 100644 (file)
@@ -98,7 +98,7 @@ syscall(uint32_t syscallno, uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4,
        return 0xdeadbeef;
 }
 
-uint32_t syscall_async(syscall_t *call)
+uint32_t syscall_async(syscall_req_t *call)
 {
        return syscall(call->num, call->args[0], call->args[1],
                       call->args[2], call->args[3], call->args[4]);
index 6476720..8ace012 100644 (file)
@@ -8,6 +8,6 @@
 
 uint32_t (SYNCHRONOUS syscall)(uint32_t num, uint32_t a1, uint32_t a2,
                                uint32_t a3, uint32_t a4, uint32_t a5);
-uint32_t syscall_async(syscall_t *syscall);
+uint32_t syscall_async(syscall_req_t *syscall);
 
 #endif /* !ROS_KERN_SYSCALL_H */
index 1996c78..3f02cb4 100644 (file)
@@ -2,11 +2,13 @@
 // entry.S already took care of defining envs, pages, vpd, and vpt.
 
 #include <inc/lib.h>
+#include <inc/syscall.h>
 
 extern void umain(int argc, char **argv);
 
 volatile env_t *env;
 char *binaryname = "(PROGRAM NAME UNKNOWN)";
+syscall_front_ring_t sysfrontring;
 
 void
 libmain(int argc, char **argv)
@@ -16,6 +18,9 @@ libmain(int argc, char **argv)
        // procinfo.  When we figure out what we want there, change this.
        env = (env_t*)procinfo;
 
+       // Set up the front ring for the general syscall ring
+       FRONT_RING_INIT(&sysfrontring, (syscall_sring_t*)procdata, PGSIZE);     
+
        // save the name of the program so that panic() can use it
        if (argc > 0)
                binaryname = argv[0];
index 7b9e415..0c18445 100644 (file)
@@ -34,21 +34,23 @@ syscall(int num, uint32_t a1, uint32_t a2, uint32_t a3, uint32_t a4, uint32_t a5
        return ret;
 }
 
-static inline error_t async_syscall(syscall_t *syscall)
+static inline error_t async_syscall(syscall_req_t *syscall)
 {
-       // testing just two syscalls at a time, and just put it at the beginning of
-       // the shared data page.  This is EXTREMELY GHETTO....
-       if ( ((syscall_t*)procdata)->args[0] ) // something there, presumably the first syscall
-               memcpy(((void*)procdata) + sizeof(syscall_t), syscall, sizeof(syscall_t));
-       else // nothing there, this is the first one
-               memcpy(procdata, syscall, sizeof(syscall_t));
+       static uint8_t next = 0; // should make sure this never goes too high
+       syscall_req_t* req = RING_GET_REQUEST(&sysfrontring, next++);   
+       memcpy(req, syscall, sizeof(syscall_req_t));
+       syscall_req_t* req = RING_GET_REQUEST(&sysfrontring, next++);   
+       memcpy(req, syscall, sizeof(syscall_req_t));
+       // need to actually update our sysfrontring.req_prod_pvt
+       sysfrontring.req_prod_pvt++;
+       RING_PUSH_REQUESTS(&sysfrontring);
        return 0;
 }
 
 void sys_cputs_async(const char *s, size_t len)
 {
        // could just hardcode 4 0's, will eventually wrap this marshaller anyway
-       syscall_t syscall = {SYS_cputs, 0, {(uint32_t)s, len, [2 ... (NUM_SYS_ARGS-1)] 0} };
+       syscall_req_t syscall = {SYS_cputs, 0, {(uint32_t)s, len, [2 ... (NUM_SYS_ARGS-1)] 0} };
        async_syscall(&syscall);
 }