akaros/kern/arch/x86/topology.c
<<
>>
Prefs
   1/* Copyright (c) 2015 The Regents of the University of California
   2 * Kevin Klues <klueska@cs.berkeley.edu>
   3 * See LICENSE for details. */
   4
   5#include <stdio.h>
   6#include <stdint.h>
   7#include <stddef.h>
   8#include <kmalloc.h>
   9#include <string.h>
  10#include <ns.h>
  11#include <acpi.h>
  12#include <arch/arch.h>
  13#include <arch/apic.h>
  14#include <arch/topology.h>
  15
  16struct topology_info cpu_topology_info;
  17int *os_coreid_lookup;
  18
  19#define num_cpus            (cpu_topology_info.num_cpus)
  20#define num_sockets         (cpu_topology_info.num_sockets)
  21#define num_numa            (cpu_topology_info.num_numa)
  22#define cores_per_numa      (cpu_topology_info.cores_per_numa)
  23#define cores_per_socket    (cpu_topology_info.cores_per_socket)
  24#define cores_per_cpu       (cpu_topology_info.cores_per_cpu)
  25#define cpus_per_socket     (cpu_topology_info.cpus_per_socket)
  26#define cpus_per_numa       (cpu_topology_info.cpus_per_numa)
  27#define sockets_per_numa    (cpu_topology_info.sockets_per_numa)
  28#define max_apic_id         (cpu_topology_info.max_apic_id)
  29#define core_list           (cpu_topology_info.core_list)
  30
  31/* Adjust the ids from any given node type to start at 0 and increase from
  32 * there. We use the id_offset in the core_list to index the proper field. */
  33static void adjust_ids(int id_offset)
  34{
  35        int new_id = 0, old_id = -1;
  36
  37        for (int i = 0; i < num_cores; i++) {
  38                for (int j = 0; j < num_cores; j++) {
  39                        int *id_field = ((void*)&core_list[j] + id_offset);
  40                        if (*id_field >= new_id) {
  41                                if (old_id == -1)
  42                                        old_id = *id_field;
  43                                if (old_id == *id_field)
  44                                        *id_field = new_id;
  45                        }
  46                }
  47                old_id=-1;
  48                new_id++;
  49        }
  50}
  51
  52/* Set the actual socket id from the raw socket id extracted from cpuid.  This
  53 * algorithm is adapted from the algorithm given at
  54 * http://wiki.osdev.org/Detecting_CPU_Topology_(80x86) */
  55static void set_socket_ids(void)
  56{
  57        int socket_id, raw_socket_id;
  58
  59        for (int numa_id = 0; numa_id < num_numa; numa_id++) {
  60                socket_id = 0;
  61                for (int i = 0; i < num_cores; i++) {
  62                        if (core_list[i].numa_id == numa_id) {
  63                                if (core_list[i].socket_id == -1) {
  64                                        core_list[i].socket_id = socket_id;
  65                                        raw_socket_id =
  66                                                core_list[i].raw_socket_id;
  67                                        for (int j = i; j < num_cores; j++) {
  68                                                if (core_list[j].numa_id ==
  69                                                    numa_id) {
  70                                                        if (core_list[j].raw_socket_id == raw_socket_id) {
  71                                                                core_list[j].socket_id = socket_id;
  72                                                        }
  73                                                }
  74                                        }
  75                                }
  76                                socket_id++;
  77                        }
  78                }
  79        }
  80}
  81
  82/* Loop through our Srat table to find a matching numa domain for the given
  83 * apid_id. */
  84static int find_numa_domain(int apic_id)
  85{
  86        if (srat == NULL)
  87                return -1;
  88
  89        for (int i = 0; i < srat->nchildren; i++) {
  90                struct Srat *temp = srat->children[i]->tbl;
  91
  92                if (temp != NULL && temp->type == SRlapic) {
  93                        if (temp->lapic.apic == apic_id)
  94                                return temp->lapic.dom;
  95                }
  96        }
  97        return -1;
  98}
  99
 100/* Figure out the maximum number of cores we actually have and set it in our
 101 * cpu_topology_info struct. */
 102static void set_num_cores(void)
 103{
 104        int old_num_cores = num_cores;
 105
 106        if (apics == NULL)
 107                return;
 108
 109        num_cores = 0;
 110        for (int i = 0; i < apics->nchildren; i++) {
 111                struct Apicst *temp = apics->children[i]->tbl;
 112
 113                if (temp != NULL && temp->type == ASlapic)
 114                        num_cores++;
 115        }
 116        if (num_cores < old_num_cores)
 117                warn("Topology found less cores than early MADT parsing!");
 118        /* Too many cores will be a problem for some data structures. */
 119        if (num_cores > old_num_cores)
 120                panic("Topology found more cores than early MADT parsing!");
 121}
 122
 123/* Determine if srat has a unique numa domain compared to to all of the srat
 124 * records in list_head that are of type SRlapic.
 125 *
 126 * Note that this only finds a unique NUMA domain when we're on the last core in
 127 * the list with that domain.  When we find that one, we'll need to scan the
 128 * O(n) other cores from the other domains that are ahead of us in the list.
 129 * It's a little inefficient, but OK for now. */
 130static bool is_unique_numa(struct Srat *srat, struct Atable **tail,
 131                           size_t begin, size_t end)
 132{
 133        for (int i = begin; i < end; i++) {
 134                struct Srat *st = tail[i]->tbl;
 135
 136                if (st && st->type == SRlapic)
 137                        if (srat->lapic.dom == st->lapic.dom)
 138                                return FALSE;
 139        }
 140        return TRUE;
 141}
 142
 143/* Figure out the maximum number of numa domains we actually have.
 144 * This code should always return >= 0 domains. */
 145static int get_num_numa(void)
 146{
 147        int numa = 0;
 148
 149        if (srat == NULL)
 150                return 0;
 151
 152        for (int i = 0; i < srat->nchildren; i++) {
 153                struct Srat *temp = srat->children[i]->tbl;
 154
 155                if (temp != NULL && temp->type == SRlapic)
 156                        if (is_unique_numa(temp, srat->children, i + 1,
 157                                           srat->nchildren))
 158                                numa++;
 159        }
 160
 161        return numa;
 162}
 163
 164/* Set num_numa in our topology struct */
 165static void set_num_numa(void)
 166{
 167        num_numa = get_num_numa();
 168}
 169
 170/* Figure out what the max apic_id we will ever have is and set it in our
 171 * cpu_topology_info struct. */
 172static void set_max_apic_id(void)
 173{
 174        for (int i = 0; i < apics->nchildren; i++) {
 175                struct Apicst *temp = apics->children[i]->tbl;
 176
 177                if (temp->type == ASlapic) {
 178                        if (temp->lapic.id > max_apic_id)
 179                                max_apic_id = temp->lapic.id;
 180                }
 181        }
 182}
 183
 184static void init_os_coreid_lookup(void)
 185{
 186        /* Allocate (max_apic_id+1) entries in our os_coreid_lookup table.
 187         * There may be holes in this table because of the way apic_ids work,
 188         * but a little wasted space is OK for a constant time lookup of apic_id
 189         * -> logical core id (from the OS's perspective). Memset the array to
 190         *  -1 to to represent invalid entries (which it's very possible we
 191         *  might have if the apic_id space has holes in it).  */
 192        os_coreid_lookup = kmalloc((max_apic_id + 1) * sizeof(int), 0);
 193        memset(os_coreid_lookup, -1, (max_apic_id + 1) * sizeof(int));
 194
 195        /* Loop through and set all valid entries to 0 to start with (making
 196         * them temporarily valid, but not yet set to the correct value). This
 197         * step is necessary because there is no ordering to the linked list we
 198         * are
 199         * pulling these ids from. After this, loop back through and set the
 200         * mapping appropriately. */
 201        for (int i = 0; i < apics->nchildren; i++) {
 202                struct Apicst *temp = apics->children[i]->tbl;
 203
 204                if (temp->type == ASlapic)
 205                        os_coreid_lookup[temp->lapic.id] = 0;
 206        }
 207        int os_coreid = 0;
 208
 209        for (int i = 0; i <= max_apic_id; i++)
 210                if (os_coreid_lookup[i] == 0)
 211                        os_coreid_lookup[i] = os_coreid++;
 212}
 213
 214static void init_core_list(uint32_t core_bits, uint32_t cpu_bits)
 215{
 216        /* Assuming num_cores and max_apic_id have been set, we can allocate our
 217         * core_list to the proper size. Initialize all entries to 0s to being
 218         * with. */
 219        core_list = kzmalloc(num_cores * sizeof(struct core_info), 0);
 220
 221        /* Loop through all possible apic_ids and fill in the core_list array
 222         * with *relative* topology info. We will change this relative info to
 223         * absolute info in a future step. As part of this step, we update our
 224         * os_coreid_lookup array to contain the proper value. */
 225        int os_coreid = 0;
 226        int max_cpus = (1 << cpu_bits);
 227        int max_cores_per_cpu = (1 << core_bits);
 228        int max_logical_cores = (1 << (core_bits + cpu_bits));
 229        int raw_socket_id = 0, cpu_id = 0, core_id = 0;
 230
 231        for (int apic_id = 0; apic_id <= max_apic_id; apic_id++) {
 232                if (os_coreid_lookup[apic_id] != -1) {
 233                        raw_socket_id = apic_id & ~(max_logical_cores - 1);
 234                        cpu_id = (apic_id >> core_bits) & (max_cpus - 1);
 235                        core_id = apic_id & (max_cores_per_cpu - 1);
 236
 237                        core_list[os_coreid].numa_id =
 238                                find_numa_domain(apic_id);
 239                        core_list[os_coreid].raw_socket_id = raw_socket_id;
 240                        core_list[os_coreid].socket_id = -1;
 241                        core_list[os_coreid].cpu_id = cpu_id;
 242                        core_list[os_coreid].core_id = core_id;
 243                        core_list[os_coreid].apic_id = apic_id;
 244                        os_coreid++;
 245                }
 246        }
 247
 248        /* In general, the various id's set in the previous step are all unique
 249         * in terms of representing the topology (i.e. all cores under the same
 250         * socket have the same socket_id set), but these id's are not
 251         * necessarily contiguous, and are only relative to the level of the
 252         * hierarchy they exist at (e.g.  cpu_id 4 may exist under *both*
 253         * socket_id 0 and socket_id 1). In this step, we squash these id's down
 254         * so they are contiguous. In a following step, we will make them all
 255         * absolute instead of relative. */
 256        adjust_ids(offsetof(struct core_info, numa_id));
 257        adjust_ids(offsetof(struct core_info, raw_socket_id));
 258        adjust_ids(offsetof(struct core_info, cpu_id));
 259        adjust_ids(offsetof(struct core_info, core_id));
 260
 261        /* We haven't yet set the socket id of each core yet. So far, all we've
 262         * extracted is a "raw" socket id from the top bits in our apic id, but
 263         * we need to condense these down into something workable for a socket
 264         * id, per numa domain. OSDev has an algorithm for doing so
 265         * (http://wiki.osdev.org/Detecting_CPU_Topology_%2880x86%29).
 266         * We adapt it for our setup. */
 267        set_socket_ids();
 268}
 269
 270static void init_core_list_flat(void)
 271{
 272        /* Assuming num_cores and max_apic_id have been set, we can allocate our
 273         * core_list to the proper size. Initialize all entries to 0s to being
 274         * with. */
 275        core_list = kzmalloc(num_cores * sizeof(struct core_info), 0);
 276
 277        /* Loop through all possible apic_ids and fill in the core_list array
 278         * with flat topology info. */
 279        int os_coreid = 0;
 280
 281        for (int apic_id = 0; apic_id <= max_apic_id; apic_id++) {
 282                if (os_coreid_lookup[apic_id] != -1) {
 283                        core_list[os_coreid].numa_id = 0;
 284                        core_list[os_coreid].raw_socket_id = 0;
 285                        core_list[os_coreid].socket_id = 0;
 286                        core_list[os_coreid].cpu_id = 0;
 287                        core_list[os_coreid].core_id = os_coreid;
 288                        core_list[os_coreid].apic_id = apic_id;
 289                        os_coreid++;
 290                }
 291        }
 292}
 293
 294static void set_remaining_topology_info(void)
 295{
 296        /* Assuming we have our core_list set up with relative topology info,
 297         * loop through our core_list and calculate the other statistics that we
 298         * hold in our cpu_topology_info struct. */
 299        int last_numa = -1, last_socket = -1, last_cpu = -1, last_core = -1;
 300        for (int i = 0; i < num_cores; i++) {
 301                if (core_list[i].socket_id > last_socket) {
 302                        last_socket = core_list[i].socket_id;
 303                        sockets_per_numa++;
 304                }
 305                if (core_list[i].cpu_id > last_cpu) {
 306                        last_cpu = core_list[i].cpu_id;
 307                        cpus_per_socket++;
 308                }
 309                if (core_list[i].core_id > last_core) {
 310                        last_core = core_list[i].core_id;
 311                        cores_per_cpu++;
 312                }
 313        }
 314        cores_per_socket = cpus_per_socket * cores_per_cpu;
 315        cores_per_numa = sockets_per_numa * cores_per_socket;
 316        cpus_per_numa = sockets_per_numa * cpus_per_socket;
 317        num_sockets = sockets_per_numa * num_numa;
 318        num_cpus = cpus_per_socket * num_sockets;
 319}
 320
 321static void update_core_list_with_absolute_ids(void)
 322{
 323        /* Fix up our core_list to have absolute id's at every level. */
 324        for (int i = 0; i < num_cores; i++) {
 325                struct core_info *c = &core_list[i];
 326                c->socket_id = num_sockets/num_numa * c->numa_id + c->socket_id;
 327                c->cpu_id = num_cpus/num_sockets * c->socket_id + c->cpu_id;
 328                c->core_id = num_cores/num_cpus * c->cpu_id + c->core_id;
 329        }
 330}
 331
 332static void build_topology(uint32_t core_bits, uint32_t cpu_bits)
 333{
 334        set_num_cores();
 335        set_num_numa();
 336        set_max_apic_id();
 337        init_os_coreid_lookup();
 338        init_core_list(core_bits, cpu_bits);
 339        set_remaining_topology_info();
 340        update_core_list_with_absolute_ids();
 341}
 342
 343static void build_flat_topology(void)
 344{
 345        set_num_cores();
 346        num_numa = 1;
 347        set_max_apic_id();
 348        init_os_coreid_lookup();
 349        init_core_list_flat();
 350        set_remaining_topology_info();
 351}
 352
 353void topology_init(void)
 354{
 355        uint32_t eax, ebx, ecx, edx;
 356        int smt_leaf, core_leaf;
 357        uint32_t core_bits = 0, cpu_bits = 0;
 358
 359        eax = 0x0000000b;
 360        ecx = 1;
 361        cpuid(eax, ecx, &eax, &ebx, &ecx, &edx);
 362        core_leaf = (ecx >> 8) & 0x00000002;
 363        if (core_leaf == 2) {
 364                cpu_bits = eax;
 365                eax = 0x0000000b;
 366                ecx = 0;
 367                cpuid(eax, ecx, &eax, &ebx, &ecx, &edx);
 368                smt_leaf = (ecx >> 8) & 0x00000001;
 369                if (smt_leaf == 1) {
 370                        core_bits = eax;
 371                        cpu_bits = cpu_bits - core_bits;
 372                }
 373        }
 374        /* BIOSes are not strictly required to put NUMA information
 375         * into the ACPI table. If there is no information the safest
 376         * thing to do is assume it's a non-NUMA system, i.e. flat. */
 377        if (cpu_bits && get_num_numa())
 378                build_topology(core_bits, cpu_bits);
 379        else
 380                build_flat_topology();
 381}
 382
 383void print_cpu_topology(void)
 384{
 385        printk("num_numa: %d, num_sockets: %d, num_cpus: %d, num_cores: %d\n",
 386               num_numa, num_sockets, num_cpus, num_cores);
 387        for (int i = 0; i < num_cores; i++) {
 388                printk("OScoreid: %3d, HWcoreid: %3d, RawSocketid: %3d, "
 389                       "Numa Domain: %3d, Socket: %3d, Cpu: %3d, Core: %3d\n",
 390                       i,
 391                       core_list[i].apic_id,
 392                       core_list[i].numa_id,
 393                       core_list[i].raw_socket_id,
 394                       core_list[i].socket_id,
 395                       core_list[i].cpu_id,
 396                       core_list[i].core_id);
 397        }
 398}
 399