Build topology from cpuid in topology.c
authorKevin Klues <klueska@cs.berkeley.edu>
Thu, 3 Sep 2015 23:09:01 +0000 (16:09 -0700)
committerBarret Rhoden <brho@cs.berkeley.edu>
Tue, 15 Sep 2015 18:04:35 +0000 (14:04 -0400)
This is the first commit to introduce the notion of CPU toology in
Akaros. Previously we relied on a flat topology of cores, with no
concept of hierarchy between hyperthreaded cores, sockets, or numa
domains.  With this commit we begin using the x86 cpuid instruction to
build out structures that are aware of the actual topology underneath.
We also use the ACPI tables to detect our numa structure.

Nothing is actually hooked in yet. We just bring the files in.

[brho: minor touchups, foo(void)s and comments ]

kern/arch/riscv/topology.h [new file with mode: 0644]
kern/arch/x86/topology.c [new file with mode: 0644]
kern/arch/x86/topology.h [new file with mode: 0644]

diff --git a/kern/arch/riscv/topology.h b/kern/arch/riscv/topology.h
new file mode 100644 (file)
index 0000000..de3af09
--- /dev/null
@@ -0,0 +1,13 @@
+/* Copyright (c) 2015 The Regents of the University of California
+ * Kevin Klues <klueska@cs.berkeley.edu>
+ * See LICENSE for details. */
+
+#ifndef AKAROS_ARCH_TOPOLOGY_H
+#define AKAROS_ARCH_TOPOLOGY_H
+
+/* Do nothing for now until we have the ability to build up the topology on
+ * riscv. */
+static void topology_init(void) {}
+static void print_cpu_topology(void) {}
+
+#endif /* !AKAROS_ARCH_TOPOLOGY_H */
diff --git a/kern/arch/x86/topology.c b/kern/arch/x86/topology.c
new file mode 100644 (file)
index 0000000..ea8f19b
--- /dev/null
@@ -0,0 +1,339 @@
+/* Copyright (c) 2015 The Regents of the University of California
+ * Kevin Klues <klueska@cs.berkeley.edu>
+ * See LICENSE for details. */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <kmalloc.h>
+#include <string.h>
+#include <acpi.h>
+#include <arch/arch.h>
+#include <arch/apic.h>
+#include <arch/topology.h>
+
+struct topology_info cpu_topology_info;
+int *os_coreid_lookup;
+
+#define num_cpus            (cpu_topology_info.num_cpus)
+#define num_sockets         (cpu_topology_info.num_sockets)
+#define num_numa            (cpu_topology_info.num_numa)
+#define cores_per_numa      (cpu_topology_info.cores_per_numa)
+#define cores_per_socket    (cpu_topology_info.cores_per_socket)
+#define cores_per_cpu       (cpu_topology_info.cores_per_cpu)
+#define cpus_per_socket     (cpu_topology_info.cpus_per_socket)
+#define cpus_per_numa       (cpu_topology_info.cpus_per_numa)
+#define sockets_per_numa    (cpu_topology_info.sockets_per_numa)
+#define max_apic_id         (cpu_topology_info.max_apic_id)
+#define core_list           (cpu_topology_info.core_list)
+
+/* Adjust the ids from any given node type to start at 0 and increase from
+ * there. We use the id_offset in the core_list to index the proper field. */
+static void adjust_ids(int id_offset)
+{
+       int new_id = 0, old_id = -1;
+       for (int i = 0; i < num_cores; i++) {
+               for (int j = 0; j < num_cores; j++) {
+                       int *id_field = ((void*)&core_list[j] + id_offset);
+                       if (*id_field >= new_id) {
+                               if (old_id == -1)
+                                       old_id = *id_field;
+                               if (old_id == *id_field)
+                                       *id_field = new_id;
+                       }
+               }
+               old_id=-1;
+               new_id++;
+       }
+}
+
+/* Set the actual socket id from the raw socket id extracted from cpuid.  This
+ * algorithm is adapted from the algorithm given at
+ * http://wiki.osdev.org/Detecting_CPU_Topology_(80x86) */
+static void set_socket_ids(void)
+{
+       int socket_id, raw_socket_id;
+       for (int numa_id = 0; numa_id < num_numa; numa_id++) {
+               socket_id = 0;
+               for (int i = 0; i < num_cores; i++) {
+                       if (core_list[i].numa_id == numa_id) {
+                               if (core_list[i].socket_id == -1) {
+                                       core_list[i].socket_id = socket_id;
+                                       raw_socket_id = core_list[i].raw_socket_id;
+                                       for (int j = i; j < num_cores; j++) {
+                                               if (core_list[j].numa_id == numa_id) {
+                                                       if (core_list[j].raw_socket_id == raw_socket_id) {
+                                                               core_list[j].socket_id = socket_id;
+                                                       }
+                                               }
+                                       }
+                               }
+                               socket_id++;
+                       }
+               }
+       }
+}
+
+/* Loop through our Srat table to find a matching numa domain for the given
+ * apid_id. */
+static int find_numa_domain(int apic_id)
+{
+       struct Srat *temp = srat;
+       while (temp) {
+               if (temp->type == SRlapic) {
+                       if (temp->lapic.apic == apic_id)
+                               return temp->lapic.dom;
+               }
+               temp = temp->next;
+       }
+       return -1;
+}
+
+/* Figure out the maximum number of cores we actually have and set it in our
+ * cpu_topology_info struct. */
+static void set_num_cores(void)
+{
+       struct Apicst *temp = apics->st;
+       while (temp) {
+               if (temp->type == ASlapic)
+                       num_cores++;
+               temp = temp->next;
+       }
+}
+
+/* Figure out the maximum number of numa domains we actually have and set it in
+ * our cpu_topology_info struct. */
+static void set_num_numa(void)
+{
+       num_numa = -1;
+       struct Srat *temp = srat;
+       while (temp) {
+               if (temp->type == SRlapic)
+                       if (temp->lapic.dom > num_numa)
+                               num_numa++;
+               temp = temp->next;
+       }
+       num_numa++;
+}
+
+/* Figure out what the max apic_id we will ever have is and set it in our
+ * cpu_topology_info struct. */
+static void set_max_apic_id(void)
+{
+       struct Apicst *temp = apics->st;
+       while (temp) {
+               if (temp->type == ASlapic) {
+                       if (temp->lapic.id > max_apic_id)
+                               max_apic_id = temp->lapic.id;
+               }
+               temp = temp->next;
+       }
+}
+
+static void init_os_coreid_lookup(void)
+{
+       /* Allocate (max_apic_id+1) entries in our os_coreid_lookup table.
+        * There may be holes in this table because of the way apic_ids work, but
+        * a little wasted space is OK for a constant time lookup of apic_id ->
+        * logical core id (from the OS's perspective). Memset the array to -1 to
+        * to represent invalid entries (which it's very possible we might have if
+        * the apic_id space has holes in it).  */
+       os_coreid_lookup = kmalloc((max_apic_id + 1) * sizeof(int), 0);
+       memset(os_coreid_lookup, -1, (max_apic_id + 1) * sizeof(int));
+
+       /* Loop through and set all valid entries to 0 to start with (making them
+        * temporarily valid, but not yet set to the correct value). This step is
+        * necessary because there is no ordering to the linked list we are
+        * pulling these ids from. After this, loop back through and set the
+        * mapping appropriately. */
+       struct Apicst *temp = apics->st;
+       while (temp) {
+               if (temp->type == ASlapic)
+                       os_coreid_lookup[temp->lapic.id] = 0;
+               temp = temp->next;
+       }
+       int os_coreid = 0;
+       for (int i = 0; i <= max_apic_id; i++)
+               if (os_coreid_lookup[i] == 0)
+                       os_coreid_lookup[i] = os_coreid++;
+}
+
+static void init_core_list(uint32_t core_bits, uint32_t cpu_bits)
+{
+       /* Assuming num_cores and max_apic_id have been set, we can allocate our
+        * core_list to the proper size. Initialize all entries to 0s to being
+        * with. */
+       core_list = kzmalloc(num_cores * sizeof(struct core_info), 0);
+
+       /* Loop through all possible apic_ids and fill in the core_list array with
+        * *relative* topology info. We will change this relative info to absolute
+        * info in a future step. As part of this step, we update our
+        * os_coreid_lookup array to contain the proper value. */
+       int os_coreid = 0;
+       int max_cpus = (1 << cpu_bits);
+       int max_cores_per_cpu = (1 << core_bits);
+       int max_logical_cores = (1 << (core_bits + cpu_bits));
+       int raw_socket_id = 0, cpu_id = 0, core_id = 0;
+       for (int apic_id = 0; apic_id <= max_apic_id; apic_id++) {
+               if (os_coreid_lookup[apic_id] != -1) {
+                       raw_socket_id = apic_id & ~(max_logical_cores - 1);
+                       cpu_id = (apic_id >> core_bits) & (max_cpus - 1);
+                       core_id = apic_id & (max_cores_per_cpu - 1);
+
+                       core_list[os_coreid].numa_id = find_numa_domain(apic_id);
+                       core_list[os_coreid].raw_socket_id = raw_socket_id;
+                       core_list[os_coreid].socket_id = -1;
+                       core_list[os_coreid].cpu_id = cpu_id;
+                       core_list[os_coreid].core_id = core_id;
+                       core_list[os_coreid].apic_id = apic_id;
+                       os_coreid++;
+               }
+       }
+
+       /* In general, the various id's set in the previous step are all unique in
+        * terms of representing the topology (i.e. all cores under the same socket
+        * have the same socket_id set), but these id's are not necessarily
+        * contiguous, and are only relative to the level of the hierarchy they
+        * exist at (e.g.  cpu_id 4 may exist under *both* socket_id 0 and
+        * socket_id 1). In this step, we squash these id's down so they are
+        * contiguous. In a following step, we will make them all absolute instead
+        * of relative. */
+       adjust_ids(offsetof(struct core_info, numa_id));
+       adjust_ids(offsetof(struct core_info, raw_socket_id));
+       adjust_ids(offsetof(struct core_info, cpu_id));
+       adjust_ids(offsetof(struct core_info, core_id));
+
+       /* We haven't yet set the socket id of each core yet. So far, all we've
+        * extracted is a "raw" socket id from the top bits in our apic id, but we
+        * need to condense these down into something workable for a socket id, per
+        * numa domain. OSDev has an algorithm for doing so
+        * (http://wiki.osdev.org/Detecting_CPU_Topology_%2880x86%29). We adapt it
+        * for our setup. */
+       set_socket_ids();
+}
+
+static void init_core_list_flat(void)
+{
+       /* Assuming num_cores and max_apic_id have been set, we can allocate our
+        * core_list to the proper size. Initialize all entries to 0s to being
+        * with. */
+       core_list = kzmalloc(num_cores * sizeof(struct core_info), 0);
+
+       /* Loop through all possible apic_ids and fill in the core_list array with
+        * flat topology info. */
+       int os_coreid = 0;
+       for (int apic_id = 0; apic_id <= max_apic_id; apic_id++) {
+               if (os_coreid_lookup[apic_id] != -1) {
+                       core_list[os_coreid].numa_id = 0;
+                       core_list[os_coreid].raw_socket_id = 0;
+                       core_list[os_coreid].socket_id = 0;
+                       core_list[os_coreid].cpu_id = 0;
+                       core_list[os_coreid].core_id = os_coreid;
+                       core_list[os_coreid].apic_id = apic_id;
+                       os_coreid++;
+               }
+       }
+}
+
+static void set_remaining_topology_info(void)
+{
+       /* Assuming we have our core_list set up with relative topology info, loop
+        * through our core_list and calculate the other statistics that we hold
+        * in our cpu_topology_info struct. */
+       int last_numa = -1, last_socket = -1, last_cpu = -1, last_core = -1;
+       for (int i = 0; i < num_cores; i++) {
+               if (core_list[i].socket_id > last_socket) {
+                       last_socket = core_list[i].socket_id;
+                       sockets_per_numa++;
+               }
+               if (core_list[i].cpu_id > last_cpu) {
+                       last_cpu = core_list[i].cpu_id;
+                       cpus_per_socket++;
+               }
+               if (core_list[i].core_id > last_core) {
+                       last_core = core_list[i].core_id;
+                       cores_per_cpu++;
+               }
+       }
+       cores_per_socket = cpus_per_socket * cores_per_cpu;
+       cores_per_numa = sockets_per_numa * cores_per_socket;
+       cpus_per_numa = sockets_per_numa * cpus_per_socket;
+       num_sockets = sockets_per_numa * num_numa;
+       num_cpus = cpus_per_socket * num_sockets;
+}
+
+static void update_core_list_with_absolute_ids(void)
+{
+       /* Fix up our core_list to have absolute id's at every level. */
+       for (int i = 0; i < num_cores; i++) {
+               struct core_info *c = &core_list[i];
+               c->socket_id = num_sockets/num_numa * c->numa_id + c->socket_id;
+               c->cpu_id = num_cpus/num_sockets * c->socket_id + c->cpu_id;
+               c->core_id = num_cores/num_cpus * c->cpu_id + c->core_id;
+       }
+}
+
+static void build_topology(uint32_t core_bits, uint32_t cpu_bits)
+{
+       set_num_cores();
+       set_num_numa();
+       set_max_apic_id();
+       init_os_coreid_lookup();
+       init_core_list(core_bits, cpu_bits);
+       set_remaining_topology_info();
+       update_core_list_with_absolute_ids();
+}
+
+static void build_flat_topology(void)
+{
+       set_num_cores();
+       num_numa = 1;
+       set_max_apic_id();
+       init_os_coreid_lookup();
+       init_core_list_flat();
+       set_remaining_topology_info();
+}
+
+void topology_init(void)
+{
+       uint32_t eax, ebx, ecx, edx;
+       int smt_leaf, core_leaf;
+       uint32_t core_bits = 0, cpu_bits = 0;
+
+       eax = 0x0000000b;
+       ecx = 1;
+       cpuid(eax, ecx, &eax, &ebx, &ecx, &edx);
+       core_leaf = (ecx >> 8) & 0x00000002;
+       if (core_leaf == 2) {
+               cpu_bits = eax;
+               eax = 0x0000000b;
+               ecx = 0;
+               cpuid(eax, ecx, &eax, &ebx, &ecx, &edx);
+               smt_leaf = (ecx >> 8) & 0x00000001;
+               if (smt_leaf == 1) {
+                       core_bits = eax;
+                       cpu_bits = cpu_bits - core_bits;
+               }
+       }
+       if (cpu_bits)
+               build_topology(core_bits, cpu_bits);
+       else
+               build_flat_topology();
+}
+
+void print_cpu_topology()
+{
+       printk("num_numa: %d, num_sockets: %d, num_cpus: %d, num_cores: %d\n",
+              num_numa, num_sockets, num_cpus, num_cores);
+       for (int i = 0; i < num_cores; i++) {
+               printk("OScoreid: %3d, HWcoreid: %3d, RawSocketid: %3d, "
+                      "Numa Domain: %3d, Socket: %3d, Cpu: %3d, Core: %3d\n",
+                      i,
+                      core_list[i].apic_id,
+                      core_list[i].numa_id,
+                      core_list[i].raw_socket_id,
+                      core_list[i].socket_id,
+                      core_list[i].cpu_id,
+                      core_list[i].core_id);
+       }
+}
diff --git a/kern/arch/x86/topology.h b/kern/arch/x86/topology.h
new file mode 100644 (file)
index 0000000..7bb0e64
--- /dev/null
@@ -0,0 +1,43 @@
+/* Copyright (c) 2015 The Regents of the University of California
+ * Kevin Klues <klueska@cs.berkeley.edu>
+ * See LICENSE for details. */
+
+#ifndef AKAROS_ARCH_TOPOLOGY_H
+#define AKAROS_ARCH_TOPOLOGY_H
+
+#include <ros/common.h>
+#include <arch/apic.h>
+#include <arch/x86.h>
+
+struct core_info {
+       int numa_id;
+       int socket_id;
+       int cpu_id;
+       int core_id;
+       int raw_socket_id;
+       int apic_id;
+};
+
+struct topology_info {
+       int num_cores;
+       int num_cpus;
+       int num_sockets;
+       int num_numa;
+       int cores_per_cpu;
+       int cores_per_socket;
+       int cores_per_numa;
+       int cpus_per_socket;
+       int cpus_per_numa;
+       int sockets_per_numa;
+       int max_apic_id;
+       struct core_info *core_list;
+};
+
+extern struct topology_info cpu_topology_info;
+extern int *os_coreid_lookup;
+#define num_cores (cpu_topology_info.num_cores)
+
+void topology_init();
+void print_cpu_topology();
+
+#endif /* AKAROS_ARCH_TOPOLOGY_H */