Adds a microbenchmark test program
authorBarret Rhoden <brho@cs.berkeley.edu>
Sat, 26 Jan 2013 01:08:36 +0000 (17:08 -0800)
committerBarret Rhoden <brho@cs.berkeley.edu>
Sat, 26 Jan 2013 01:08:36 +0000 (17:08 -0800)
It can run some basic functions and measure the time elapsed.  Check the
top of microb_test.c for more details.

tests/juan_test.c [deleted file]
tests/microb_test.c [new file with mode: 0644]

diff --git a/tests/juan_test.c b/tests/juan_test.c
deleted file mode 100644 (file)
index 0e38a35..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#include <stdio.h>
-#include <pthread.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-/* OS dependent #incs */
-#include <parlib.h>
-#include <vcore.h>
-#include <timing.h>
-
-static uint32_t __get_pcoreid(void)
-{
-       return __procinfo.vcoremap[vcore_id()].pcoreid;
-}
-
-static __attribute__ ((noinline)) int juan_work(void)
-{
-    const int MAX_ITER = 100000;
-    register int res = 0;
-    for (int i = 0; i < MAX_ITER; ++i) {
-        for (int j = 0; j < i; ++j) {
-            res += (i * 2 - 5 * j) / 3;
-        }
-    }
-       return res;
-}
-
-static void juan_test(void)
-{
-       unsigned long long usec_diff;
-       struct timeval start_tv = {0};
-       struct timeval end_tv = {0};
-       int res;
-
-       printf("We are %sin MCP mode, running on vcore %d, pcore %d\n",
-              (in_multi_mode() ? "" : "not "), vcore_id(),
-              __get_pcoreid());
-
-       if (gettimeofday(&start_tv, 0))
-               perror("Start time error...");
-
-       res = juan_work();
-
-       if (gettimeofday(&end_tv, 0))
-               perror("End time error...");
-
-       usec_diff = (end_tv.tv_sec - start_tv.tv_sec) * 1000000 +
-                   (end_tv.tv_usec - start_tv.tv_usec);
-
-    printf("Result: %d Usec diff: %llu\n", res, usec_diff);
-}
-
-void *juan_thread(void* arg)
-{      
-       juan_test();
-}
-
-int main(int argc, char** argv) 
-{
-       pthread_t child;
-       void *child_ret;
-       juan_test();
-       printf("Spawning thread, etc...\n");
-       pthread_create(&child, NULL, &juan_thread, NULL);
-       pthread_join(child, &child_ret);
-} 
diff --git a/tests/microb_test.c b/tests/microb_test.c
new file mode 100644 (file)
index 0000000..bc9a521
--- /dev/null
@@ -0,0 +1,150 @@
+/* Copyright (c) 2013 The Regents of the University of California
+ * Barret Rhoden <brho@cs.berkeley.edu>
+ * See LICENSE for details.
+ *
+ * Basic perf test for small functions.  Will run them in a loop and give you
+ * the average cost per iteration.  It'll run them both as an SCP and an MCP.
+ *
+ * To use this, define a function of the form:
+ *
+ *             void my_test(unsigned long nr_loops)
+ *
+ * Which does some computation you wish to measure inside a loop that run
+ * nr_loops times.  Then in microb_test(), add your function in a line such as:
+ *
+ *             test_time_ns(my_test, 100000);
+ *
+ * This macro will run your test and print the results.  Pick a loop amount that
+ * is reasonable for your operation.  You can also use test_time_us() for longer
+ * operations.
+ *
+ * Notes:
+ * - I went with this style so you could do some prep work before and after the
+ *   loop (instead of having a macro build the loop).  It's what I needed.
+ * - Be sure to double check the ASM inside the loop to make sure the compiler
+ *   isn't optimizing out your main work. 
+ * - Make sure your function isn't static.  If it is static (and even if it is
+ *   __attribute__((noinline))), if the function is called only once, the
+ *   compiler will compile it differently (specifically, it will hardcode the
+ *   number of loops into the function, instead of taking a parameter).
+ *   Suddenly, the existence of a second test of the same function could change
+ *   the performance of *both* test runs.  Incidentally, when this happened to
+ *   me, the tests were *better* when this optimization didn't happen.  The way
+ *   to avoid the optimization completely is to have extern functions, since the
+ *   compiler can't assume it is only called once.  Though technically they
+ *   still could do some optimizations, and the only really safe way is to put
+ *   the tests in another .c file. */
+
+#include <stdio.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+/* OS dependent #incs */
+#include <parlib.h>
+#include <vcore.h>
+#include <timing.h>
+
+static uint32_t __get_pcoreid(void)
+{
+       return __procinfo.vcoremap[vcore_id()].pcoreid;
+}
+
+/* Testing functions here */
+
+void set_tlsdesc_test(unsigned long nr_loops)
+{
+       extern void** vcore_thread_control_blocks;
+       uint32_t vcoreid = vcore_id();
+       void *mytls = get_tls_desc(vcoreid);
+       void *vctls = vcore_thread_control_blocks[vcoreid];
+       segdesc_t tmp = SEG(STA_W, (uint32_t)vctls, 0xffffffff, 3);
+       uint32_t gs = (vcoreid << 3) | 0x07;
+    for (int i = 0; i < nr_loops; i++) {
+               __procdata.ldt[vcoreid] = tmp;
+               cmb();
+               asm volatile("movl %0,%%gs" : : "r" (gs) : "memory");
+    }
+       set_tls_desc(mytls, vcoreid);
+}
+
+/* Internal test infrastructure */
+
+void loop_overhead(unsigned long nr_loops)
+{
+    for (int i = 0; i < nr_loops; i++) {
+               cmb();
+    }
+}
+
+/* Runs func(loops) and returns the usec elapsed */
+#define __test_time_us(func, loops)                                            \
+({                                                                             \
+       struct timeval start_tv = {0};                                             \
+       struct timeval end_tv = {0};                                               \
+       if (gettimeofday(&start_tv, 0))                                            \
+               perror("Start time error...");                                         \
+       (func)((loops));                                                           \
+       if (gettimeofday(&end_tv, 0))                                              \
+               perror("End time error...");                                           \
+       ((end_tv.tv_sec - start_tv.tv_sec) * 1000000 +                             \
+        (end_tv.tv_usec - start_tv.tv_usec));                                     \
+})
+
+/* Runs func(loops) and returns the nsec elapsed */
+#define __test_time_ns(func, loops)                                            \
+({                                                                             \
+       (__test_time_us((func), (loops)) * 1000);                                  \
+})
+
+/* Runs func(loops), subtracts the loop overhead, and prints the result */
+#define test_time_us(func, loops)                                              \
+({                                                                             \
+       unsigned long long usec_diff;                                              \
+       usec_diff = __test_time_us((func), (loops)) - nsec_per_loop * loops / 1000;\
+       printf("\"%s\" total: %lluus, per iteration: %lluus\n", #func, usec_diff,  \
+              usec_diff / (loops));                                               \
+})
+
+/* Runs func(loops), subtracts the loop overhead, and prints the result */
+#define test_time_ns(func, loops)                                              \
+({                                                                             \
+       unsigned long long nsec_diff;                                              \
+       nsec_diff = __test_time_ns((func), (loops)) - nsec_per_loop * (loops);     \
+       printf("\"%s\" total: %lluns, per iteration: %lluns\n", #func, nsec_diff,  \
+              nsec_diff / (loops));                                               \
+})
+
+static void microb_test(void)
+{
+       unsigned long long nsec_per_loop;
+       printf("We are %sin MCP mode, running on vcore %d, pcore %d\n",
+              (in_multi_mode() ? "" : "not "), vcore_id(),
+              __get_pcoreid());
+       /* We divide the overhead by loops, and later we multiply again, which drops
+        * off some accuracy at the expense of usability (can do different
+        * iterations for different tests without worrying about recomputing the
+        * loop overhead). */
+       nsec_per_loop = __test_time_ns(loop_overhead, 100000) / 100000;
+       printd("Loop overhead per loop: %lluns\n", nsec_per_loop);
+
+       /* Add your tests here.  Func name, number of loops */
+       test_time_ns(set_tlsdesc_test , 100000);
+}
+
+void *worker_thread(void* arg)
+{      
+       microb_test();
+       return 0;
+}
+
+int main(int argc, char** argv) 
+{
+       pthread_t child;
+       void *child_ret;
+       microb_test();
+       printf("Spawning worker thread, etc...\n");
+       pthread_create(&child, NULL, &worker_thread, NULL);
+       pthread_join(child, &child_ret);
+}