akaros/kern/arch/x86/devarch.c
<<
>>
Prefs
   1/*
   2 * This file is part of the UCB release of Plan 9. It is subject to the license
   3 * terms in the LICENSE file found in the top-level directory of this
   4 * distribution and at http://akaros.cs.berkeley.edu/files/Plan9License. No
   5 * part of the UCB release of Plan 9, including this file, may be copied,
   6 * modified, propagated, or distributed except according to the terms contained
   7 * in the LICENSE file.
   8 */
   9
  10#include <ros/memops.h>
  11#include <kmalloc.h>
  12#include <kref.h>
  13#include <kthread.h>
  14#include <string.h>
  15#include <stdio.h>
  16#include <assert.h>
  17#include <err.h>
  18#include <pmap.h>
  19#include <umem.h>
  20#include <smp.h>
  21#include <net/ip.h>
  22#include <time.h>
  23#include <bitops.h>
  24#include <core_set.h>
  25#include <address_range.h>
  26#include <arch/ros/perfmon.h>
  27#include <arch/topology.h>
  28#include <arch/perfmon.h>
  29#include <arch/ros/msr-index.h>
  30#include <arch/msr.h>
  31#include <arch/devarch.h>
  32
  33#define REAL_MEM_SIZE (1024 * 1024)
  34
  35struct perf_context {
  36        struct perfmon_session *ps;
  37        qlock_t resp_lock;
  38        size_t resp_size;
  39        uint8_t *resp;
  40};
  41
  42struct io_map {
  43        struct io_map *next;
  44        int reserved;
  45        char tag[13];
  46        uint32_t start;
  47        uint32_t end;
  48};
  49
  50static struct {
  51        spinlock_t lock;
  52        struct io_map *map;
  53        struct io_map *free;
  54        struct io_map maps[32];         // some initial free maps
  55        qlock_t ql;                     // lock for reading map
  56} iomap;
  57
  58enum {
  59        Qdir = 0,
  60        Qioalloc = 1,
  61        Qiob,
  62        Qiow,
  63        Qiol,
  64        Qgdb,
  65        Qrealmem,
  66        Qmsr,
  67        Qperf,
  68        Qcstate,
  69        Qpstate,
  70
  71        Qmax,
  72};
  73
  74enum {
  75        Linelen = 31,
  76};
  77
  78struct dev archdevtab;
  79static struct dirtab archdir[Qmax] = {
  80        {".", {Qdir, 0, QTDIR}, 0, 0555},
  81        {"ioalloc", {Qioalloc, 0}, 0, 0444},
  82        {"iob", {Qiob, 0}, 0, 0666},
  83        {"iow", {Qiow, 0}, 0, 0666},
  84        {"iol", {Qiol, 0}, 0, 0666},
  85        {"gdb", {Qgdb, 0}, 0, 0660},
  86        {"realmem", {Qrealmem, 0}, 0, 0444},
  87        {"msr", {Qmsr, 0}, 0, 0666},
  88        {"perf", {Qperf, 0}, 0, 0666},
  89        {"c-state", {Qcstate, 0}, 0, 0666},
  90        {"p-state", {Qpstate, 0}, 0, 0666},
  91};
  92
  93/* White list entries must not overlap. */
  94#define MSR_MAX_VAR_COUNTERS 16
  95#define MSR_MAX_FIX_COUNTERS 4
  96
  97static struct address_range msr_rd_wlist[] = {
  98        ADDRESS_RANGE(0x00000000, 0xffffffff),
  99};
 100static struct address_range msr_wr_wlist[] = {
 101        ADDRESS_RANGE(MSR_IA32_PERFCTR0,
 102                      MSR_IA32_PERFCTR0 + MSR_MAX_VAR_COUNTERS - 1),
 103        ADDRESS_RANGE(MSR_ARCH_PERFMON_EVENTSEL0,
 104                      MSR_ARCH_PERFMON_EVENTSEL0 + MSR_MAX_VAR_COUNTERS - 1),
 105        ADDRESS_RANGE(MSR_IA32_PERF_CTL, MSR_IA32_PERF_CTL),
 106        ADDRESS_RANGE(MSR_CORE_PERF_FIXED_CTR0,
 107                      MSR_CORE_PERF_FIXED_CTR0 + MSR_MAX_FIX_COUNTERS - 1),
 108        ADDRESS_RANGE(MSR_CORE_PERF_FIXED_CTR_CTRL,
 109                      MSR_CORE_PERF_GLOBAL_OVF_CTRL),
 110        ADDRESS_RANGE(MSR_IA32_MPERF, MSR_IA32_APERF),
 111};
 112int gdbactive = 0;
 113
 114//
 115//  alloc some io port space and remember who it was
 116//  alloced to.  if port < 0, find a free region.
 117//
 118int ioalloc(int port, int size, int align, char *tag)
 119{
 120        struct io_map *map, **l;
 121        int i;
 122
 123        spin_lock(&(&iomap)->lock);
 124        if (port < 0) {
 125                // find a free port above 0x400 and below 0x1000
 126                port = 0x400;
 127                for (l = &iomap.map; *l; l = &(*l)->next) {
 128                        map = *l;
 129                        if (map->start < 0x400)
 130                                continue;
 131                        i = map->start - port;
 132                        if (i > size)
 133                                break;
 134                        if (align > 0)
 135                                port = ((port + align - 1) / align) * align;
 136                        else
 137                                port = map->end;
 138                }
 139                if (*l == NULL) {
 140                        spin_unlock(&(&iomap)->lock);
 141                        return -1;
 142                }
 143        } else {
 144                // Only 64KB I/O space on the x86.
 145                if ((port + size) > 0x10000) {
 146                        spin_unlock(&(&iomap)->lock);
 147                        return -1;
 148                }
 149                // see if the space clashes with previously allocated ports
 150                for (l = &iomap.map; *l; l = &(*l)->next) {
 151                        map = *l;
 152                        if (map->end <= port)
 153                                continue;
 154                        if (map->reserved && map->start == port &&
 155                            map->end == port + size) {
 156                                map->reserved = 0;
 157                                spin_unlock(&(&iomap)->lock);
 158                                return map->start;
 159                        }
 160                        if (map->start >= port + size)
 161                                break;
 162                        spin_unlock(&(&iomap)->lock);
 163                        return -1;
 164                }
 165        }
 166        map = iomap.free;
 167        if (map == NULL) {
 168                printd("ioalloc: out of maps");
 169                spin_unlock(&(&iomap)->lock);
 170                return port;
 171        }
 172        iomap.free = map->next;
 173        map->next = *l;
 174        map->start = port;
 175        map->end = port + size;
 176        strlcpy(map->tag, tag, sizeof(map->tag));
 177        *l = map;
 178
 179        archdir[0].qid.vers++;
 180
 181        spin_unlock(&(&iomap)->lock);
 182        return map->start;
 183}
 184
 185void iofree(int port)
 186{
 187        struct io_map *map, **l;
 188
 189        spin_lock(&(&iomap)->lock);
 190        for (l = &iomap.map; *l; l = &(*l)->next) {
 191                if ((*l)->start == port) {
 192                        map = *l;
 193                        *l = map->next;
 194                        map->next = iomap.free;
 195                        iomap.free = map;
 196                        break;
 197                }
 198                if ((*l)->start > port)
 199                        break;
 200        }
 201        archdir[0].qid.vers++;
 202        spin_unlock(&(&iomap)->lock);
 203}
 204
 205int iounused(int start, int end)
 206{
 207        struct io_map *map;
 208
 209        for (map = iomap.map; map; map = map->next) {
 210                if (((start >= map->start) && (start < map->end)) ||
 211                    ((start <= map->start) && (end > map->start)))
 212                        return 0;
 213        }
 214        return 1;
 215}
 216
 217void ioinit(void)
 218{
 219        int i;
 220        char *excluded = "";
 221
 222        panic("Akaros doesn't do IO port allocation yet.  Don't init.");
 223        for (i = 0; i < ARRAY_SIZE(iomap.maps) - 1; i++)
 224                iomap.maps[i].next = &iomap.maps[i + 1];
 225        iomap.maps[i].next = NULL;
 226        iomap.free = iomap.maps;
 227        char *s;
 228
 229        s = excluded;
 230        while (s && *s != '\0' && *s != '\n') {
 231                char *ends;
 232                int io_s, io_e;
 233
 234                io_s = (int)strtol(s, &ends, 0);
 235                if (ends == NULL || ends == s || *ends != '-') {
 236                        printd("ioinit: cannot parse option string\n");
 237                        break;
 238                }
 239                s = ++ends;
 240
 241                io_e = (int)strtol(s, &ends, 0);
 242                if (ends && *ends == ',')
 243                        *ends++ = '\0';
 244                s = ends;
 245
 246                ioalloc(io_s, io_e - io_s + 1, 0, "pre-allocated");
 247        }
 248}
 249
 250// Reserve a range to be ioalloced later.
 251// This is in particular useful for exchangable cards, such
 252// as pcmcia and cardbus cards.
 253int ioreserve(int unused_int, int size, int align, char *tag)
 254{
 255        struct io_map *map, **l;
 256        int i, port;
 257
 258        spin_lock(&(&iomap)->lock);
 259        // find a free port above 0x400 and below 0x1000
 260        port = 0x400;
 261        for (l = &iomap.map; *l; l = &(*l)->next) {
 262                map = *l;
 263                if (map->start < 0x400)
 264                        continue;
 265                i = map->start - port;
 266                if (i > size)
 267                        break;
 268                if (align > 0)
 269                        port = ((port + align - 1) / align) * align;
 270                else
 271                        port = map->end;
 272        }
 273        if (*l == NULL) {
 274                spin_unlock(&(&iomap)->lock);
 275                return -1;
 276        }
 277        map = iomap.free;
 278        if (map == NULL) {
 279                printd("ioalloc: out of maps");
 280                spin_unlock(&(&iomap)->lock);
 281                return port;
 282        }
 283        iomap.free = map->next;
 284        map->next = *l;
 285        map->start = port;
 286        map->end = port + size;
 287        map->reserved = 1;
 288        strlcpy(map->tag, tag, sizeof(map->tag));
 289        *l = map;
 290
 291        archdir[0].qid.vers++;
 292
 293        spin_unlock(&(&iomap)->lock);
 294        return map->start;
 295}
 296
 297static void checkport(int start, int end)
 298{
 299        /* standard vga regs are OK */
 300        if (start >= 0x2b0 && end <= 0x2df + 1)
 301                return;
 302        if (start >= 0x3c0 && end <= 0x3da + 1)
 303                return;
 304
 305        if (iounused(start, end))
 306                return;
 307        error(EPERM, ERROR_FIXME);
 308}
 309
 310static struct chan *archattach(char *spec)
 311{
 312        return devattach(archdevtab.name, spec);
 313}
 314
 315struct walkqid *archwalk(struct chan *c, struct chan *nc, char **name,
 316                                                 unsigned int nname)
 317{
 318        return devwalk(c, nc, name, nname, archdir, Qmax, devgen);
 319}
 320
 321static size_t archstat(struct chan *c, uint8_t *dp, size_t n)
 322{
 323        archdir[Qrealmem].length = REAL_MEM_SIZE;
 324
 325        return devstat(c, dp, n, archdir, Qmax, devgen);
 326}
 327
 328static struct perf_context *arch_create_perf_context(void)
 329{
 330        ERRSTACK(1);
 331        struct perf_context *pc = kzmalloc(sizeof(struct perf_context),
 332                                           MEM_WAIT);
 333
 334        if (waserror()) {
 335                kfree(pc);
 336                nexterror();
 337        }
 338        qlock_init(&pc->resp_lock);
 339        pc->ps = perfmon_create_session();
 340        poperror();
 341
 342        return pc;
 343}
 344
 345/* Called after the last reference (FD / chan) to pc is closed. */
 346static void arch_free_perf_context(struct perf_context *pc)
 347{
 348        perfmon_close_session(pc->ps);
 349        kfree(pc->resp);
 350        kfree(pc);
 351}
 352
 353static const uint8_t *arch_read_core_set(struct core_set *cset,
 354                                         const uint8_t *kptr,
 355                                         const uint8_t *ktop)
 356{
 357        int i, nb;
 358        uint32_t n;
 359
 360        error_check(EBADMSG, (kptr + sizeof(uint32_t)) <= ktop);
 361        kptr = get_le_u32(kptr, &n);
 362        error_check(EBADMSG, (kptr + n) <= ktop);
 363        core_set_init(cset);
 364        nb = MIN((int) n * 8, num_cores);
 365        for (i = 0; i < nb; i++) {
 366                if (test_bit(i, (const unsigned long *) kptr))
 367                        core_set_setcpu(cset, i);
 368        }
 369
 370        return kptr + n;
 371}
 372
 373static long arch_perf_write(struct perf_context *pc, const void *udata,
 374                            long usize)
 375{
 376        ERRSTACK(1);
 377        void *kdata;
 378        const uint8_t *kptr, *ktop;
 379
 380        kdata = user_memdup_errno(current, udata, usize);
 381        if (unlikely(!kdata))
 382                return -1;
 383        qlock(&pc->resp_lock);
 384        if (waserror()) {
 385                qunlock(&pc->resp_lock);
 386                kfree(kdata);
 387                nexterror();
 388        }
 389        /* Fresh command, reset the response buffer */
 390        kfree(pc->resp);
 391        pc->resp = NULL;
 392        pc->resp_size = 0;
 393
 394        kptr = kdata;
 395        ktop = kptr + usize;
 396        error_check(EBADMSG, (kptr + 1) <= ktop);
 397        switch (*kptr++) {
 398        case PERFMON_CMD_COUNTER_OPEN: {
 399                int ped;
 400                struct perfmon_event pev;
 401                struct core_set cset;
 402
 403                error_check(EBADMSG, (kptr + 3 * sizeof(uint64_t)) <= ktop);
 404                perfmon_init_event(&pev);
 405                kptr = get_le_u64(kptr, &pev.event);
 406                kptr = get_le_u64(kptr, &pev.flags);
 407                kptr = get_le_u64(kptr, &pev.trigger_count);
 408                kptr = get_le_u64(kptr, &pev.user_data);
 409                kptr = arch_read_core_set(&cset, kptr, ktop);
 410
 411                ped = perfmon_open_event(&cset, pc->ps, &pev);
 412
 413                pc->resp_size = sizeof(uint32_t);
 414                pc->resp = kmalloc(pc->resp_size, MEM_WAIT);
 415                put_le_u32(pc->resp, (uint32_t) ped);
 416                break;
 417        }
 418        case PERFMON_CMD_COUNTER_STATUS: {
 419                uint32_t ped;
 420                uint8_t *rptr;
 421                struct perfmon_status *pef;
 422
 423                error_check(EBADMSG, (kptr + sizeof(uint32_t)) <= ktop);
 424                kptr = get_le_u32(kptr, &ped);
 425
 426                pef = perfmon_get_event_status(pc->ps, (int) ped);
 427
 428                pc->resp_size = sizeof(uint32_t) + num_cores * sizeof(uint64_t);
 429                pc->resp = kmalloc(pc->resp_size, MEM_WAIT);
 430                rptr = put_le_u32(pc->resp, num_cores);
 431                for (int i = 0; i < num_cores; i++)
 432                        rptr = put_le_u64(rptr, pef->cores_values[i]);
 433
 434                perfmon_free_event_status(pef);
 435                break;
 436        }
 437        case PERFMON_CMD_COUNTER_CLOSE: {
 438                uint32_t ped;
 439
 440                error_check(EBADMSG, (kptr + sizeof(uint32_t)) <= ktop);
 441                kptr = get_le_u32(kptr, &ped);
 442
 443                perfmon_close_event(pc->ps, (int) ped);
 444                break;
 445        }
 446        case PERFMON_CMD_CPU_CAPS: {
 447                uint8_t *rptr;
 448                struct perfmon_cpu_caps pcc;
 449
 450                perfmon_get_cpu_caps(&pcc);
 451
 452                pc->resp_size = 6 * sizeof(uint32_t);
 453                pc->resp = kmalloc(pc->resp_size, MEM_WAIT);
 454
 455                rptr = put_le_u32(pc->resp, pcc.perfmon_version);
 456                rptr = put_le_u32(rptr, pcc.proc_arch_events);
 457                rptr = put_le_u32(rptr, pcc.bits_x_counter);
 458                rptr = put_le_u32(rptr, pcc.counters_x_proc);
 459                rptr = put_le_u32(rptr, pcc.bits_x_fix_counter);
 460                rptr = put_le_u32(rptr, pcc.fix_counters_x_proc);
 461                break;
 462        }
 463        default:
 464                error(EINVAL, "Invalid perfmon command: 0x%x", kptr[-1]);
 465        }
 466        poperror();
 467        qunlock(&pc->resp_lock);
 468        kfree(kdata);
 469
 470        return (long) (kptr - (const uint8_t *) kdata);
 471}
 472
 473static struct chan *archopen(struct chan *c, int omode)
 474{
 475        c = devopen(c, omode, archdir, Qmax, devgen);
 476        switch ((uint32_t) c->qid.path) {
 477        case Qperf:
 478                if (!perfmon_supported())
 479                        error(ENODEV, "perf is not supported");
 480                assert(!c->aux);
 481                c->aux = arch_create_perf_context();
 482                break;
 483        }
 484
 485        return c;
 486}
 487
 488static void archclose(struct chan *c)
 489{
 490        switch ((uint32_t) c->qid.path) {
 491        case Qperf:
 492                if (c->aux) {
 493                        arch_free_perf_context((struct perf_context *) c->aux);
 494                        c->aux = NULL;
 495                }
 496                break;
 497        }
 498}
 499
 500static size_t archread(struct chan *c, void *a, size_t n, off64_t offset)
 501{
 502        char *buf, *p;
 503        int err, port;
 504        uint64_t *values;
 505        uint16_t *sp;
 506        uint32_t *lp;
 507        struct io_map *map;
 508        struct core_set cset;
 509        struct msr_address msra;
 510        struct msr_value msrv;
 511
 512        switch ((uint32_t) c->qid.path) {
 513        case Qdir:
 514                return devdirread(c, a, n, archdir, Qmax, devgen);
 515        case Qgdb:
 516                p = gdbactive ? "1" : "0";
 517                return readstr(offset, a, n, p);
 518        case Qiob:
 519                port = offset;
 520                checkport(offset, offset + n);
 521                for (p = a; port < offset + n; port++)
 522                        *p++ = inb(port);
 523                return n;
 524        case Qiow:
 525                if (n & 1)
 526                        error(EINVAL, ERROR_FIXME);
 527                checkport(offset, offset + n);
 528                sp = a;
 529                for (port = offset; port < offset + n; port += 2)
 530                        *sp++ = inw(port);
 531                return n;
 532        case Qiol:
 533                if (n & 3)
 534                        error(EINVAL, ERROR_FIXME);
 535                checkport(offset, offset + n);
 536                lp = a;
 537                for (port = offset; port < offset + n; port += 4)
 538                        *lp++ = inl(port);
 539                return n;
 540        case Qioalloc:
 541                break;
 542        case Qrealmem:
 543                return readmem(offset, a, n, KADDR(0), REAL_MEM_SIZE);
 544        case Qmsr:
 545                if (!address_range_find(msr_rd_wlist, ARRAY_SIZE(msr_rd_wlist),
 546                                        (uintptr_t) offset))
 547                        error(EPERM, "MSR 0x%x not in read whitelist", offset);
 548                core_set_init(&cset);
 549                core_set_fill_available(&cset);
 550                msr_set_address(&msra, (uint32_t) offset);
 551                values = kzmalloc(num_cores * sizeof(uint64_t),
 552                                  MEM_WAIT);
 553                if (!values)
 554                        error(ENOMEM, ERROR_FIXME);
 555                msr_set_values(&msrv, values, num_cores);
 556
 557                err = msr_cores_read(&cset, &msra, &msrv);
 558
 559                if (likely(!err)) {
 560                        if (n >= num_cores * sizeof(uint64_t)) {
 561                                if (!memcpy_to_user_errno(current, a, values,
 562                                                          num_cores *
 563                                                          sizeof(uint64_t)))
 564                                        n = num_cores * sizeof(uint64_t);
 565                                else
 566                                        n = -1;
 567                        } else {
 568                                kfree(values);
 569                                error(ERANGE, "Not enough space for MSR read");
 570                        }
 571                } else {
 572                        switch (-err) {
 573                        case (EFAULT):
 574                                error(-err, "read_msr() faulted on MSR 0x%x",
 575                                      offset);
 576                        case (ERANGE):
 577                                error(-err, "Not enough space for MSR read");
 578                        };
 579                        error(-err, "MSR read failed");
 580                }
 581                kfree(values);
 582                return n;
 583        case Qperf: {
 584                struct perf_context *pc = (struct perf_context *) c->aux;
 585
 586                assert(pc);
 587                qlock(&pc->resp_lock);
 588                if (pc->resp && ((size_t) offset < pc->resp_size)) {
 589                        n = MIN(n, (long) pc->resp_size - (long) offset);
 590                        if (memcpy_to_user_errno(current, a, pc->resp + offset,
 591                                                 n))
 592                                n = -1;
 593                } else {
 594                        n = 0;
 595                }
 596                qunlock(&pc->resp_lock);
 597
 598                return n;
 599        case Qcstate:
 600                return readnum_hex(offset, a, n, get_cstate(), NUMSIZE32);
 601        case Qpstate:
 602                return readnum_hex(offset, a, n, get_pstate(), NUMSIZE32);
 603        }
 604        default:
 605                error(EINVAL, ERROR_FIXME);
 606        }
 607
 608        if ((buf = kzmalloc(n, 0)) == NULL)
 609                error(ENOMEM, ERROR_FIXME);
 610        p = buf;
 611        n = n / Linelen;
 612        offset = offset / Linelen;
 613
 614        switch ((uint32_t) c->qid.path) {
 615        case Qioalloc:
 616                spin_lock(&(&iomap)->lock);
 617                for (map = iomap.map; n > 0 && map != NULL; map = map->next) {
 618                        if (offset-- > 0)
 619                                continue;
 620                        snprintf(p, n * Linelen, "%#8p %#8p %-12.12s\n",
 621                                 map->start,
 622                                 map->end - 1, map->tag);
 623                        p += Linelen;
 624                        n--;
 625                }
 626                spin_unlock(&(&iomap)->lock);
 627                break;
 628        }
 629
 630        n = p - buf;
 631        memmove(a, buf, n);
 632        kfree(buf);
 633
 634        return n;
 635}
 636
 637static ssize_t cstate_write(void *ubuf, size_t len, off64_t off)
 638{
 639        set_cstate(strtoul_from_ubuf(ubuf, len, off));
 640        /* Poke the other cores so they use the new C-state. */
 641        send_broadcast_ipi(I_POKE_CORE);
 642        return len;
 643}
 644
 645static void __smp_set_pstate(void *arg)
 646{
 647        unsigned int val = (unsigned int)(unsigned long)arg;
 648
 649        set_pstate(val);
 650}
 651
 652static ssize_t pstate_write(void *ubuf, size_t len, off64_t off)
 653{
 654        struct core_set all_cores;
 655
 656        core_set_init(&all_cores);
 657        core_set_fill_available(&all_cores);
 658        smp_do_in_cores(&all_cores, __smp_set_pstate,
 659                        (void*)strtoul_from_ubuf(ubuf, len, off));
 660        return len;
 661}
 662
 663static size_t archwrite(struct chan *c, void *a, size_t n, off64_t offset)
 664{
 665        char *p;
 666        int port, err;
 667        uint64_t value;
 668        uint16_t *sp;
 669        uint32_t *lp;
 670        struct core_set cset;
 671        struct msr_address msra;
 672        struct msr_value msrv;
 673
 674        switch ((uint32_t) c->qid.path) {
 675        case Qgdb:
 676                p = a;
 677                if (n != 1)
 678                        error(EINVAL, "Gdb: Write one byte, '1' or '0'");
 679                if (*p == '1')
 680                        gdbactive = 1;
 681                else if (*p == '0')
 682                        gdbactive = 0;
 683                else
 684                        error(EINVAL, "Gdb: must be 1 or 0");
 685                return 1;
 686        case Qiob:
 687                p = a;
 688                checkport(offset, offset + n);
 689                for (port = offset; port < offset + n; port++)
 690                        outb(port, *p++);
 691                return n;
 692        case Qiow:
 693                if (n & 1)
 694                        error(EINVAL, ERROR_FIXME);
 695                checkport(offset, offset + n);
 696                sp = a;
 697                for (port = offset; port < offset + n; port += 2)
 698                        outw(port, *sp++);
 699                return n;
 700        case Qiol:
 701                if (n & 3)
 702                        error(EINVAL, ERROR_FIXME);
 703                checkport(offset, offset + n);
 704                lp = a;
 705                for (port = offset; port < offset + n; port += 4)
 706                        outl(port, *lp++);
 707                return n;
 708        case Qmsr:
 709                if (!address_range_find(msr_wr_wlist, ARRAY_SIZE(msr_wr_wlist),
 710                                        (uintptr_t) offset))
 711                        error(EPERM, "MSR 0x%x not in write whitelist", offset);
 712                if (n != sizeof(uint64_t))
 713                        error(EINVAL, "Tried to write more than a u64 (%p)", n);
 714                if (memcpy_from_user_errno(current, &value, a, sizeof(value)))
 715                        return -1;
 716
 717                core_set_init(&cset);
 718                core_set_fill_available(&cset);
 719                msr_set_address(&msra, (uint32_t) offset);
 720                msr_set_value(&msrv, value);
 721
 722                err = msr_cores_write(&cset, &msra, &msrv);
 723                if (unlikely(err)) {
 724                        switch (-err) {
 725                        case (EFAULT):
 726                                error(-err, "write_msr() faulted on MSR 0x%x",
 727                                      offset);
 728                        case (ERANGE):
 729                                error(-err, "Not enough space for MSR write");
 730                        };
 731                        error(-err, "MSR write failed");
 732                }
 733                return sizeof(uint64_t);
 734        case Qperf: {
 735                struct perf_context *pc = (struct perf_context *) c->aux;
 736
 737                assert(pc);
 738
 739                return arch_perf_write(pc, a, n);
 740        }
 741        case Qcstate:
 742                return cstate_write(a, n, 0);
 743        case Qpstate:
 744                return pstate_write(a, n, 0);
 745        default:
 746                error(EINVAL, ERROR_FIXME);
 747        }
 748        return 0;
 749}
 750
 751static void archinit(void)
 752{
 753        int ret;
 754
 755        ret = address_range_init(msr_rd_wlist, ARRAY_SIZE(msr_rd_wlist));
 756        assert(!ret);
 757        ret = address_range_init(msr_wr_wlist, ARRAY_SIZE(msr_wr_wlist));
 758        assert(!ret);
 759}
 760
 761struct dev archdevtab __devtab = {
 762        .name = "arch",
 763
 764        .reset = devreset,
 765        .init = archinit,
 766        .shutdown = devshutdown,
 767        .attach = archattach,
 768        .walk = archwalk,
 769        .stat = archstat,
 770        .open = archopen,
 771        .create = devcreate,
 772        .close = archclose,
 773        .read = archread,
 774        .bread = devbread,
 775        .write = archwrite,
 776        .bwrite = devbwrite,
 777        .remove = devremove,
 778        .wstat = devwstat,
 779};
 780
 781void archreset(void)
 782{
 783        int i;
 784
 785        /*
 786         * And sometimes there is no keyboard...
 787         *
 788         * The reset register (0xcf9) is usually in one of the bridge
 789         * chips. The actual location and sequence could be extracted from
 790         * ACPI but why bother, this is the end of the line anyway.
 791         print("Takes a licking and keeps on ticking...\n");
 792         */
 793        i = inb(0xcf9); /* ICHx reset control */
 794        i &= 0x06;
 795        outb(0xcf9, i | 0x02);  /* SYS_RST */
 796        udelay(1000);
 797        outb(0xcf9, i | 0x06);  /* RST_CPU transition */
 798
 799        udelay(100 * 1000);
 800
 801        /* some broken hardware -- as well as qemu -- might
 802         * never reboot anyway with cf9. This is a standard
 803         * keyboard reboot sequence known to work on really
 804         * broken stuff -- like qemu. If there is no
 805         * keyboard it will do no harm.
 806         */
 807        for (;;) {
 808                (void)inb(0x64);
 809                outb(0x64, 0xFE);
 810                udelay(100 * 1000);
 811        }
 812}
 813