akaros/user/perfmon/pfmlib_intel_x86.c
<<
>>
Prefs
   1/* pfmlib_intel_x86.c : common code for Intel X86 processors
   2 *
   3 * Copyright (c) 2009 Google, Inc
   4 * Contributed by Stephane Eranian <eranian@gmail.com>
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
  10 * of the Software, and to permit persons to whom the Software is furnished to do so,
  11 * subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in all
  14 * copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  17 * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  18 * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  19 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  20 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  21 * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  22 *
  23 * This file implements the common code for all Intel X86 processors.
  24 */
  25#include <sys/types.h>
  26#include <string.h>
  27#include <stdlib.h>
  28#include <stdio.h>
  29#include <stdarg.h>
  30
  31/* private headers */
  32#include "pfmlib_priv.h"
  33#include "pfmlib_intel_x86_priv.h"
  34
  35const pfmlib_attr_desc_t intel_x86_mods[]={
  36        PFM_ATTR_B("k", "monitor at priv level 0"),             /* monitor priv level 0 */
  37        PFM_ATTR_B("u", "monitor at priv level 1, 2, 3"),       /* monitor priv level 1, 2, 3 */
  38        PFM_ATTR_B("e", "edge level (may require counter-mask >= 1)"), /* edge */
  39        PFM_ATTR_B("i", "invert"),                              /* invert */
  40        PFM_ATTR_I("c", "counter-mask in range [0-255]"),       /* counter-mask */
  41        PFM_ATTR_B("t", "measure any thread"),                  /* monitor on both threads */
  42        PFM_ATTR_I("ldlat", "load latency threshold (cycles, [3-65535])"),      /* load latency threshold */
  43        PFM_ATTR_B("intx", "monitor only inside transactional memory region"),
  44        PFM_ATTR_B("intxcp", "do not count occurrences inside aborted transactional memory region"),
  45        PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */
  46};
  47
  48pfm_intel_x86_config_t pfm_intel_x86_cfg;
  49
  50/*
  51 * .byte 0x53 == push ebx. it's universal for 32 and 64 bit
  52 * .byte 0x5b == pop ebx.
  53 * Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
  54 * Using the opcode directly avoids this problem.
  55 */
  56static inline void
  57cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
  58{
  59  __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
  60       : "=a" (*a),
  61             "=S" (*b),
  62                 "=c" (*c),
  63                 "=d" (*d)
  64       : "a" (op));
  65}
  66
  67static void
  68pfm_intel_x86_display_reg(void *this, pfmlib_event_desc_t *e)
  69{
  70        const intel_x86_entry_t *pe = this_pe(this);
  71        pfm_intel_x86_reg_t reg;
  72        int i;
  73
  74        reg.val = e->codes[0];
  75
  76        /*
  77         * handle generic counters
  78         */
  79        __pfm_vbprintf("[0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d "
  80                       "en=%d int=%d inv=%d edge=%d cnt_mask=%d",
  81                        reg.val,
  82                        reg.sel_event_select,
  83                        reg.sel_unit_mask,
  84                        reg.sel_os,
  85                        reg.sel_usr,
  86                        reg.sel_en,
  87                        reg.sel_int,
  88                        reg.sel_inv,
  89                        reg.sel_edge,
  90                        reg.sel_cnt_mask);
  91
  92        if (pe[e->event].modmsk & _INTEL_X86_ATTR_T)
  93                __pfm_vbprintf(" any=%d", reg.sel_anythr);
  94
  95        __pfm_vbprintf("]", e->fstr);
  96
  97        for (i = 1 ; i < e->count; i++)
  98                __pfm_vbprintf(" [0x%"PRIx64"]", e->codes[i]);
  99
 100        __pfm_vbprintf(" %s\n", e->fstr);
 101
 102}
 103
 104/*
 105 * number of HW modifiers
 106 */
 107static int
 108intel_x86_num_mods(void *this, int idx)
 109{
 110        const intel_x86_entry_t *pe = this_pe(this);
 111        unsigned int mask;
 112
 113        mask = pe[idx].modmsk;
 114        return pfmlib_popcnt(mask);
 115}
 116
 117int
 118intel_x86_attr2mod(void *this, int pidx, int attr_idx)
 119{
 120        const intel_x86_entry_t *pe = this_pe(this);
 121        size_t x;
 122        int n, numasks;
 123
 124        numasks = intel_x86_num_umasks(this, pidx);
 125        n = attr_idx - numasks;
 126
 127        pfmlib_for_each_bit(x, pe[pidx].modmsk) {
 128                if (n == 0)
 129                        break;
 130                n--;
 131        }
 132        return x;
 133}
 134
 135/*
 136 * detect processor model using cpuid()
 137 * based on documentation
 138 * http://www.intel.com/Assets/PDF/appnote/241618.pdf
 139 */
 140int
 141pfm_intel_x86_detect(void)
 142{
 143        unsigned int a, b, c, d;
 144        char buffer[64];
 145
 146        if (pfm_intel_x86_cfg.family)
 147                return PFM_SUCCESS;
 148
 149        cpuid(0, &a, &b, &c, &d);
 150        strncpy(&buffer[0], (char *)(&b), 4);
 151        strncpy(&buffer[4], (char *)(&d), 4);
 152        strncpy(&buffer[8], (char *)(&c), 4);
 153        buffer[12] = '\0';
 154
 155        /* must be Intel */
 156        if (strcmp(buffer, "GenuineIntel"))
 157                return PFM_ERR_NOTSUPP;
 158
 159        cpuid(1, &a, &b, &c, &d);
 160
 161        pfm_intel_x86_cfg.family = (a >> 8) & 0xf;  // bits 11 - 8
 162        pfm_intel_x86_cfg.model  = (a >> 4) & 0xf;  // Bits  7 - 4
 163        pfm_intel_x86_cfg.stepping = a & 0xf;       // Bits 0 - 3
 164
 165        /* extended family */
 166        if (pfm_intel_x86_cfg.family == 0xf)
 167                pfm_intel_x86_cfg.family += (a >> 20) & 0xff;
 168
 169        /* extended model */
 170        if (pfm_intel_x86_cfg.family >= 0x6)
 171                pfm_intel_x86_cfg.model += ((a >> 16) & 0xf) << 4;
 172
 173        return PFM_SUCCESS;
 174}
 175
 176int pfm_intel_x86_model_detect(void *this)
 177{
 178        pfmlib_pmu_t *pmu = this;
 179        const int *p;
 180        int ret;
 181
 182        ret = pfm_intel_x86_detect();
 183        if (ret != PFM_SUCCESS)
 184                return ret;
 185
 186        if (pfm_intel_x86_cfg.family != pmu->cpu_family)
 187                return PFM_ERR_NOTSUPP;
 188
 189        for (p = pmu->cpu_models; *p; p++) {
 190                if (*p == pfm_intel_x86_cfg.model)
 191                        return PFM_SUCCESS;
 192        }
 193        return PFM_ERR_NOTSUPP;
 194}
 195
 196int
 197pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e,
 198                           unsigned int msk,
 199                           uint64_t *umask,
 200                           unsigned int max_grpid)
 201{
 202        const intel_x86_entry_t *pe = this_pe(this);
 203        const intel_x86_entry_t *ent;
 204        unsigned int i;
 205        int j, k, added, skip;
 206        int idx;
 207
 208        k = e->nattrs;
 209        ent = pe+e->event;
 210
 211        for(i=0; msk; msk >>=1, i++) {
 212
 213                if (!(msk & 0x1))
 214                        continue;
 215
 216                added = skip = 0;
 217                /*
 218                 * must scan list of possible attributes
 219                 * (not all possible attributes)
 220                 */
 221                for (j = 0; j < e->npattrs; j++) {
 222                        if (e->pattrs[j].ctrl != PFM_ATTR_CTRL_PMU)
 223                                continue;
 224
 225                        if (e->pattrs[j].type != PFM_ATTR_UMASK)
 226                                continue;
 227
 228                        idx = e->pattrs[j].idx;
 229
 230                        if (ent->umasks[idx].grpid != i)
 231                                continue;
 232
 233                        if (max_grpid != INTEL_X86_MAX_GRPID && i > max_grpid) {
 234                                skip = 1;
 235                                continue;
 236                        }
 237
 238                        if (intel_x86_uflag(this, e->event, idx, INTEL_X86_GRP_DFL_NONE)) {
 239                                skip = 1;
 240                                continue;
 241                        }
 242
 243                        /* umask is default for group */
 244                        if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) {
 245                                DPRINT("added default %s for group %d j=%d idx=%d ucode=0x%"PRIx64"\n",
 246                                        ent->umasks[idx].uname,
 247                                        i,      
 248                                        j,
 249                                        idx,
 250                                        ent->umasks[idx].ucode);
 251                                /*
 252                                 * default could be an alias, but
 253                                 * ucode must reflect actual code
 254                                 */
 255                                *umask |= ent->umasks[idx].ucode >> 8;
 256
 257                                e->attrs[k].id = j; /* pattrs index */
 258                                e->attrs[k].ival = 0;
 259                                k++;
 260
 261                                added++;
 262                                if (intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL))
 263                                        goto done;
 264
 265                                if (intel_x86_uflag(this, e->event, idx, INTEL_X86_EXCL_GRP_GT)) {
 266                                        if (max_grpid != INTEL_X86_MAX_GRPID) {
 267                                                DPRINT("two max_grpid, old=%d new=%d\n", max_grpid, ent->umasks[idx].grpid);
 268                                                return PFM_ERR_UMASK;
 269                                        }
 270                                        max_grpid = ent->umasks[idx].grpid;
 271                                }
 272                        }
 273                }
 274                if (!added && !skip) {
 275                        DPRINT("no default found for event %s unit mask group %d (max_grpid=%d)\n", ent->name, i, max_grpid);
 276                        return PFM_ERR_UMASK;
 277                }
 278        }
 279        DPRINT("max_grpid=%d nattrs=%d k=%d umask=0x%"PRIx64"\n", max_grpid, e->nattrs, k, *umask);
 280done:
 281        e->nattrs = k;
 282        return PFM_SUCCESS;
 283}
 284
 285static int
 286intel_x86_check_pebs(void *this, pfmlib_event_desc_t *e)
 287{
 288        const intel_x86_entry_t *pe = this_pe(this);
 289        pfm_event_attr_info_t *a;
 290        int numasks = 0, pebs = 0;
 291        int i;
 292
 293#if 1
 294        if (1) // !intel_x86_requesting_pebs(e))
 295                return PFM_SUCCESS;
 296#endif
 297
 298        /*
 299         * if event has no umask and is PEBS, then we are okay
 300         */
 301        if (!pe[e->event].numasks
 302            && intel_x86_eflag(this, e->event, INTEL_X86_PEBS))
 303                return PFM_SUCCESS;
 304
 305        /*
 306         * if the event sets PEBS, then it measn at least one umask
 307         * supports PEBS, so we need to check
 308         */
 309        for (i = 0; i < e->nattrs; i++) {
 310                a = attr(e, i);
 311
 312                if (a->ctrl != PFM_ATTR_CTRL_PMU)
 313                        continue;
 314
 315                if (a->type == PFM_ATTR_UMASK) {
 316                        /* count number of umasks */
 317                        numasks++;
 318                        /* and those that support PEBS */
 319                        if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_PEBS))
 320                                pebs++;
 321                }
 322        }
 323        /*
 324         * pass if user requested only PEBS  umasks
 325         */
 326        return pebs != numasks ? PFM_ERR_FEATCOMB : PFM_SUCCESS;
 327}
 328
 329static int
 330intel_x86_check_max_grpid(void *this, pfmlib_event_desc_t *e, int max_grpid)
 331{
 332        const intel_x86_entry_t *pe;
 333        pfm_event_attr_info_t *a;
 334        int i, grpid;
 335
 336        DPRINT("check: max_grpid=%d\n", max_grpid);
 337        pe = this_pe(this);
 338
 339        for (i = 0; i < e->nattrs; i++) {
 340                a = attr(e, i);
 341
 342                if (a->ctrl != PFM_ATTR_CTRL_PMU)
 343                        continue;
 344
 345                if (a->type == PFM_ATTR_UMASK) {
 346                        grpid = pe[e->event].umasks[a->idx].grpid;
 347                        if (grpid > max_grpid)
 348                                return PFM_ERR_FEATCOMB;
 349                }
 350        }
 351        return PFM_SUCCESS;
 352}
 353
 354static int
 355pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e)
 356
 357{
 358        pfmlib_pmu_t *pmu = this;
 359        pfm_event_attr_info_t *a;
 360        const intel_x86_entry_t *pe;
 361        pfm_intel_x86_reg_t reg;
 362        unsigned int grpmsk, ugrpmsk = 0;
 363        uint64_t umask1, umask2, ucode, last_ucode = ~0ULL;
 364        unsigned int modhw = 0;
 365        unsigned int plmmsk = 0;
 366        int umodmsk = 0, modmsk_r = 0;
 367        int k, ret, id;
 368        unsigned int max_grpid = INTEL_X86_MAX_GRPID;
 369        unsigned int last_grpid =  INTEL_X86_MAX_GRPID;
 370        unsigned int grpid;
 371        int ldlat = 0, ldlat_um = 0;
 372        int grpcounts[INTEL_X86_NUM_GRP];
 373        int ncombo[INTEL_X86_NUM_GRP];
 374
 375        memset(grpcounts, 0, sizeof(grpcounts));
 376        memset(ncombo, 0, sizeof(ncombo));
 377
 378        pe     = this_pe(this);
 379
 380        e->fstr[0] = '\0';
 381
 382        /*
 383         * preset certain fields from event code
 384         * including modifiers
 385         */
 386        reg.val = pe[e->event].code;
 387
 388        grpmsk = (1 << pe[e->event].ngrp)-1;
 389
 390        /* take into account hardcoded umask */
 391        umask1 = (reg.val >> 8) & 0xff;
 392        umask2 = 0;
 393
 394        modmsk_r = pe[e->event].modmsk_req;
 395
 396        for (k = 0; k < e->nattrs; k++) {
 397                a = attr(e, k);
 398
 399                if (a->ctrl != PFM_ATTR_CTRL_PMU)
 400                        continue;
 401
 402                if (a->type == PFM_ATTR_UMASK) {
 403                        grpid = pe[e->event].umasks[a->idx].grpid;
 404
 405                        /*
 406                         * certain event groups are meant to be
 407                         * exclusive, i.e., only unit masks of one group
 408                         * can be used
 409                         */
 410                        if (last_grpid != INTEL_X86_MAX_GRPID && grpid != last_grpid
 411                            && intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) {
 412                                DPRINT("exclusive unit mask group error\n");
 413                                return PFM_ERR_FEATCOMB;
 414                        }
 415                        /*
 416                         * selecting certain umasks in a group may exclude any umasks
 417                         * from any groups with a higher index
 418                         *
 419                         * enforcement requires looking at the grpid of all the umasks
 420                         */
 421                        if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_EXCL_GRP_GT))
 422                                max_grpid = grpid;
 423
 424                        /*
 425                         * upper layer has removed duplicates
 426                         * so if we come here more than once, it is for two
 427                         * disinct umasks
 428                         *
 429                         * NCOMBO=no combination of unit masks within the same
 430                         * umask group
 431                         */
 432                        ++grpcounts[grpid];
 433
 434                        /* mark that we have a umask with NCOMBO in this group */
 435                        if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_NCOMBO))
 436                                ncombo[grpid] = 1;
 437
 438                        if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_LDLAT))
 439                                ldlat_um = 1;
 440                        /*
 441                         * if more than one umask in this group but one is marked
 442                         * with ncombo, then fail. It is okay to combine umask within
 443                         * a group as long as none is tagged with NCOMBO
 444                         */
 445                        if (grpcounts[grpid] > 1 && ncombo[grpid])  {
 446                                DPRINT("umask %s does not support unit mask combination within group %d\n", pe[e->event].umasks[a->idx].uname, grpid);
 447                                return PFM_ERR_FEATCOMB;
 448                        }
 449
 450                        last_grpid = grpid;
 451                        ucode     = pe[e->event].umasks[a->idx].ucode;
 452                        modhw    |= pe[e->event].umasks[a->idx].modhw;
 453                        umask2   |= ucode >> 8;
 454                        ugrpmsk  |= 1 << pe[e->event].umasks[a->idx].grpid;
 455
 456                        modmsk_r |= pe[e->event].umasks[a->idx].umodmsk_req;
 457
 458                        if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_CODE_OVERRIDE)) {
 459                                if (last_ucode != ~0ULL && (ucode & 0xff) != last_ucode) {
 460                                        DPRINT("cannot override event with two different codes for %s\n", pe[e->event].name);
 461                                        return PFM_ERR_FEATCOMB;
 462                                }
 463                                last_ucode = ucode & 0xff;
 464                                reg.sel_event_select = last_ucode;
 465                        }
 466                } else if (a->type == PFM_ATTR_RAW_UMASK) {
 467
 468                        /* there can only be one RAW_UMASK per event */
 469
 470                        /* sanity check */
 471                        if (a->idx & ~0xff) {
 472                                DPRINT("raw umask is 8-bit wide\n");
 473                                return PFM_ERR_ATTR;
 474                        }
 475                        /* override umask */
 476                        umask2 = a->idx & 0xff;
 477                        ugrpmsk = grpmsk;
 478                } else {
 479                        uint64_t ival = e->attrs[k].ival;
 480                        switch(a->idx) {
 481                                case INTEL_X86_ATTR_I: /* invert */
 482                                        if (modhw & _INTEL_X86_ATTR_I)
 483                                                return PFM_ERR_ATTR_SET;
 484                                        reg.sel_inv = !!ival;
 485                                        umodmsk |= _INTEL_X86_ATTR_I;
 486                                        break;
 487                                case INTEL_X86_ATTR_E: /* edge */
 488                                        if (modhw & _INTEL_X86_ATTR_E)
 489                                                return PFM_ERR_ATTR_SET;
 490                                        reg.sel_edge = !!ival;
 491                                        umodmsk |= _INTEL_X86_ATTR_E;
 492                                        break;
 493                                case INTEL_X86_ATTR_C: /* counter-mask */
 494                                        if (modhw & _INTEL_X86_ATTR_C)
 495                                                return PFM_ERR_ATTR_SET;
 496                                        if (ival > 255)
 497                                                return PFM_ERR_ATTR_VAL;
 498                                        reg.sel_cnt_mask = ival;
 499                                        umodmsk |= _INTEL_X86_ATTR_C;
 500                                        break;
 501                                case INTEL_X86_ATTR_U: /* USR */
 502                                        if (modhw & _INTEL_X86_ATTR_U)
 503                                                return PFM_ERR_ATTR_SET;
 504                                        reg.sel_usr = !!ival;
 505                                        plmmsk |= _INTEL_X86_ATTR_U;
 506                                        umodmsk |= _INTEL_X86_ATTR_U;
 507                                        break;
 508                                case INTEL_X86_ATTR_K: /* OS */
 509                                        if (modhw & _INTEL_X86_ATTR_K)
 510                                                return PFM_ERR_ATTR_SET;
 511                                        reg.sel_os = !!ival;
 512                                        plmmsk |= _INTEL_X86_ATTR_K;
 513                                        umodmsk |= _INTEL_X86_ATTR_K;
 514                                        break;
 515                                case INTEL_X86_ATTR_T: /* anythread (v3 and above) */
 516                                        if (modhw & _INTEL_X86_ATTR_T)
 517                                                return PFM_ERR_ATTR_SET;
 518                                        reg.sel_anythr = !!ival;
 519                                        umodmsk |= _INTEL_X86_ATTR_T;
 520                                        break;
 521                                case INTEL_X86_ATTR_LDLAT: /* load latency */
 522                                        if (ival < 3 || ival > 65535)
 523                                                return PFM_ERR_ATTR_VAL;
 524                                        ldlat = ival;
 525                                        break;
 526                                case INTEL_X86_ATTR_INTX: /* in_tx */
 527                                        if (modhw & _INTEL_X86_ATTR_INTX)
 528                                                return PFM_ERR_ATTR_SET;
 529                                        reg.sel_intx = !!ival;
 530                                        umodmsk |= _INTEL_X86_ATTR_INTX;
 531                                        break;
 532                                case INTEL_X86_ATTR_INTXCP: /* in_tx_cp */
 533                                        if (modhw & _INTEL_X86_ATTR_INTXCP)
 534                                                return PFM_ERR_ATTR_SET;
 535                                        reg.sel_intxcp = !!ival;
 536                                        umodmsk |= _INTEL_X86_ATTR_INTXCP;
 537                                        break;
 538                        }
 539                }
 540        }
 541
 542        /*
 543         * handle case where no priv level mask was passed.
 544         * then we use the dfl_plm
 545         */
 546        if (!(plmmsk & (_INTEL_X86_ATTR_K|_INTEL_X86_ATTR_U))) {
 547                if ((e->dfl_plm & PFM_PLM0) && (pmu->supported_plm & PFM_PLM0))
 548                        reg.sel_os = 1;
 549                if ((e->dfl_plm & PFM_PLM3) && (pmu->supported_plm & PFM_PLM3))
 550                        reg.sel_usr = 1;
 551        }
 552        /*
 553         * check that there is at least of unit mask in each unit
 554         * mask group
 555         */
 556        if ((ugrpmsk != grpmsk && !intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) || ugrpmsk == 0) {
 557                ugrpmsk ^= grpmsk;
 558                ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask2, max_grpid);
 559                if (ret != PFM_SUCCESS)
 560                        return ret;
 561        }
 562
 563        ret = intel_x86_check_pebs(this, e);
 564        if (ret != PFM_SUCCESS)
 565                return ret;
 566
 567        /*
 568         * check no umask violates the max_grpid constraint
 569         */
 570        if (max_grpid != INTEL_X86_MAX_GRPID) {
 571                ret = intel_x86_check_max_grpid(this, e, max_grpid);
 572                if (ret != PFM_SUCCESS) {
 573                        DPRINT("event %s: umask from grp > %d\n", pe[e->event].name, max_grpid);
 574                        return ret;
 575                }
 576        }
 577
 578        if (modmsk_r && (umodmsk ^ modmsk_r)) {
 579                DPRINT("required modifiers missing: 0x%x\n", modmsk_r);
 580                return PFM_ERR_ATTR;
 581        }
 582        /*
 583         * reorder all the attributes such that the fstr appears always
 584         * the same regardless of how the attributes were submitted.
 585         */
 586        evt_strcat(e->fstr, "%s", pe[e->event].name);
 587        pfmlib_sort_attr(e);
 588        for(k=0; k < e->nattrs; k++) {
 589                a = attr(e, k);
 590                if (a->ctrl != PFM_ATTR_CTRL_PMU)
 591                        continue;
 592                if (a->type == PFM_ATTR_UMASK)
 593                        evt_strcat(e->fstr, ":%s", pe[e->event].umasks[a->idx].uname);
 594                else if (a->type == PFM_ATTR_RAW_UMASK)
 595                        evt_strcat(e->fstr, ":0x%x", a->idx);
 596        }
 597
 598        if (intel_x86_eflag(this, e->event, INTEL_X86_NHM_OFFCORE)) {
 599                e->codes[1] = umask2;
 600                e->count = 2;
 601                umask2 = 0;
 602        } else {
 603                e->count = 1;
 604        }
 605
 606        if (ldlat && !ldlat_um) {
 607                DPRINT("passed ldlat= but not using ldlat umask\n");
 608                return PFM_ERR_ATTR;
 609        }
 610
 611        /*
 612         * force a default ldlat (will not appear in display_reg)
 613         */
 614        if (ldlat_um && !ldlat) {
 615                DPRINT("missing ldlat= for umask, forcing to default %d cycles\n", INTEL_X86_LDLAT_DEFAULT);
 616                ldlat = INTEL_X86_LDLAT_DEFAULT;
 617        }
 618
 619        if (ldlat && ldlat_um) {
 620                e->codes[1] = ldlat;
 621                e->count = 2;
 622        }
 623
 624        /* take into account hardcoded modifiers, so use or on reg.val */
 625        reg.val     |= (umask1 | umask2)  << 8;
 626
 627        reg.sel_en   = 1; /* force enable bit to 1 */
 628        reg.sel_int  = 1; /* force APIC int to 1 */
 629
 630        e->codes[0] = reg.val;
 631
 632DPRINT("sel_edge=%d cnt=%d\n", reg.sel_edge, reg.sel_cnt_mask);
 633        /*
 634         * on recent processors (except Atom), edge requires cmask >=1
 635         */
 636        if ((pmu->flags & INTEL_X86_PMU_FL_ECMASK)
 637            && reg.sel_edge && !reg.sel_cnt_mask) {
 638                DPRINT("edge requires cmask >= 1\n");
 639                return PFM_ERR_ATTR;
 640        }
 641
 642        /*
 643         * decode ALL modifiers
 644         */
 645        for (k = 0; k < e->npattrs; k++) {
 646                if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU)
 647                        continue;
 648
 649                if (e->pattrs[k].type == PFM_ATTR_UMASK)
 650                        continue;
 651
 652                id = e->pattrs[k].idx;
 653                switch(id) {
 654                case INTEL_X86_ATTR_U:
 655                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_usr);
 656                        break;
 657                case INTEL_X86_ATTR_K:
 658                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_os);
 659                        break;
 660                case INTEL_X86_ATTR_E:
 661                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_edge);
 662                        break;
 663                case INTEL_X86_ATTR_I:
 664                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_inv);
 665                        break;
 666                case INTEL_X86_ATTR_C:
 667                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_cnt_mask);
 668                        break;
 669                case INTEL_X86_ATTR_T:
 670                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_anythr);
 671                        break;
 672                case INTEL_X86_ATTR_LDLAT:
 673                        evt_strcat(e->fstr, ":%s=%d", intel_x86_mods[id].name, ldlat);
 674                        break;
 675                case INTEL_X86_ATTR_INTX:
 676                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intx);
 677                        break;
 678                case INTEL_X86_ATTR_INTXCP:
 679                        evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intxcp);
 680                        break;
 681                }
 682        }
 683        return PFM_SUCCESS;
 684}
 685
 686int
 687pfm_intel_x86_get_encoding(void *this, pfmlib_event_desc_t *e)
 688{
 689        int ret;
 690
 691        ret = pfm_intel_x86_encode_gen(this, e);
 692        if (ret != PFM_SUCCESS)
 693                return ret;
 694
 695        pfm_intel_x86_display_reg(this, e);
 696
 697        return PFM_SUCCESS;
 698}
 699
 700int
 701pfm_intel_x86_get_event_first(void *this)
 702{
 703        pfmlib_pmu_t *p = this;
 704
 705        return p->pme_count ? 0 : -1;
 706}
 707
 708int
 709pfm_intel_x86_get_event_next(void *this, int idx)
 710{
 711        pfmlib_pmu_t *p = this;
 712
 713        if (idx >= (p->pme_count-1))
 714                return -1;
 715
 716        return idx+1;
 717}
 718
 719int
 720pfm_intel_x86_event_is_valid(void *this, int pidx)
 721{
 722        pfmlib_pmu_t *p = this;
 723        return pidx >= 0 && pidx < p->pme_count;
 724}
 725
 726int
 727pfm_intel_x86_validate_table(void *this, FILE *fp)
 728{
 729        pfmlib_pmu_t *pmu = this;
 730        const intel_x86_entry_t *pe = this_pe(this);
 731        int ndfl[INTEL_X86_NUM_GRP];
 732        int i, j, error = 0;
 733        unsigned int u, v;
 734        int npebs;
 735
 736        if (!pmu->atdesc) {
 737                fprintf(fp, "pmu: %s missing attr_desc\n", pmu->name);
 738                error++;
 739        }
 740
 741        if (!pmu->supported_plm && pmu->type == PFM_PMU_TYPE_CORE) {
 742                fprintf(fp, "pmu: %s supported_plm not set\n", pmu->name);
 743                error++;
 744        }
 745
 746        for(i=0; i < pmu->pme_count; i++) {
 747
 748                if (!pe[i].name) {
 749                        fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i,
 750                        i > 1 ? pe[i-1].name : "??");
 751                        error++;
 752                }
 753
 754                if (!pe[i].desc) {
 755                        fprintf(fp, "pmu: %s event%d: %s :: no description\n", pmu->name, i, pe[i].name);
 756                        error++;
 757                }
 758
 759                if (!pe[i].cntmsk) {
 760                        fprintf(fp, "pmu: %s event%d: %s :: cntmsk=0\n", pmu->name, i, pe[i].name);
 761                        error++;
 762                }
 763
 764                if (pe[i].numasks && pe[i].ngrp == 0) {
 765                        fprintf(fp, "pmu: %s event%d: %s :: ngrp cannot be zero\n", pmu->name, i, pe[i].name);
 766                        error++;
 767                }
 768
 769                if (pe[i].numasks && pe[i].umasks == NULL) {
 770                        fprintf(fp, "pmu: %s event%d: %s :: numasks but no umasks\n", pmu->name, i, pe[i].name);
 771                        error++;
 772                }
 773
 774                if (pe[i].numasks == 0 && pe[i].umasks) {
 775                        fprintf(fp, "pmu: %s event%d: %s :: numasks=0 but umasks defined\n", pmu->name, i, pe[i].name);
 776                        error++;
 777                }
 778
 779                if (pe[i].numasks == 0 && pe[i].ngrp) {
 780                        fprintf(fp, "pmu: %s event%d: %s :: ngrp must be zero\n", pmu->name, i, pe[i].name);
 781                        error++;
 782                }
 783
 784                if (pe[i].ngrp >= INTEL_X86_NUM_GRP) {
 785                        fprintf(fp, "pmu: %s event%d: %s :: ngrp too big (max=%d)\n", pmu->name, i, pe[i].name, INTEL_X86_NUM_GRP);
 786                        error++;
 787                }
 788
 789                for (j=i+1; j < (int)pmu->pme_count; j++) {
 790                        if (pe[i].code == pe[j].code && !(pe[j].equiv || pe[i].equiv) && pe[j].cntmsk == pe[i].cntmsk) {
 791                                fprintf(fp, "pmu: %s events %s and %s have the same code 0x%x\n", pmu->name, pe[i].name, pe[j].name, pe[i].code);
 792                                error++;
 793                                }
 794                        }
 795
 796                for(j=0; j < INTEL_X86_NUM_GRP; j++)
 797                        ndfl[j] = 0;
 798
 799                for(j=0, npebs = 0; j < (int)pe[i].numasks; j++) {
 800
 801                        if (!pe[i].umasks[j].uname) {
 802                                fprintf(fp, "pmu: %s event%d: %s umask%d :: no name\n", pmu->name, i, pe[i].name, j);
 803                                error++;
 804                        }
 805                        if (pe[i].umasks[j].modhw && (pe[i].umasks[j].modhw | pe[i].modmsk) != pe[i].modmsk) {
 806                                fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: modhw not subset of modmsk\n", pmu->name, i, pe[i].name, j, pe[i].umasks[j].uname);
 807                                error++;
 808                        }
 809
 810                        if (!pe[i].umasks[j].udesc) {
 811                                fprintf(fp, "pmu: %s event%d: umask%d: %s :: no description\n", pmu->name, i, j, pe[i].umasks[j].uname);
 812                                error++;
 813                        }
 814
 815                        if (pe[i].ngrp && pe[i].umasks[j].grpid >= pe[i].ngrp) {
 816                                fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: invalid grpid %d (must be < %d)\n", pmu->name, i, pe[i].name, j, pe[i].umasks[j].uname, pe[i].umasks[j].grpid, pe[i].ngrp);
 817                                error++;
 818                        }
 819                        if (pe[i].umasks[j].uflags & INTEL_X86_DFL)
 820                                ndfl[pe[i].umasks[j].grpid]++;
 821
 822                        if (pe[i].umasks[j].uflags & INTEL_X86_PEBS)
 823                                npebs++;
 824                }
 825
 826                if (npebs && !intel_x86_eflag(this, i, INTEL_X86_PEBS)) {
 827                        fprintf(fp, "pmu: %s event%d: %s, pebs umasks but event pebs flag not set\n", pmu->name, i, pe[i].name);
 828                        error++;
 829                }
 830
 831                if (intel_x86_eflag(this, i, INTEL_X86_PEBS) && pe[i].numasks && npebs == 0) {
 832                        fprintf(fp, "pmu: %s event%d: %s, pebs event flag but not umask has pebs flag\n", pmu->name, i, pe[i].name);
 833                        error++;
 834                }
 835
 836                /* if only one umask, then ought to be default */
 837                if (pe[i].numasks == 1 && !(pe[i].umasks[0].uflags & INTEL_X86_DFL)) {
 838                        fprintf(fp, "pmu: %s event%d: %s, only one umask but no default\n", pmu->name, i, pe[i].name);
 839                        error++;
 840                }
 841
 842                if (pe[i].numasks) {
 843                        unsigned int *dfl_model = malloc(sizeof(*dfl_model) * pe[i].numasks);
 844                        if (!dfl_model)
 845                                goto skip_dfl;
 846                        for(u=0; u < pe[i].ngrp; u++) {
 847                                int l = 0, m;
 848                                for (v = 0; v < pe[i].numasks; v++) {
 849                                        if (pe[i].umasks[v].grpid != u)
 850                                                continue;
 851                                        if (pe[i].umasks[v].uflags & INTEL_X86_DFL) {
 852                                                for (m = 0; m < l; m++) {
 853                                                        if (dfl_model[m] == pe[i].umasks[v].umodel || dfl_model[m] == 0) {
 854                                                                fprintf(fp, "pmu: %s event%d: %s grpid %d has 2 default umasks\n", pmu->name, i, pe[i].name, u);
 855                                                                error++;
 856                                                        }
 857                                                }
 858                                                if (m == l)
 859                                                        dfl_model[l++] = pe[i].umasks[v].umodel;
 860                                        }
 861                                }
 862                        }
 863                        free(dfl_model);
 864                }
 865skip_dfl:
 866
 867                if (pe[i].flags & INTEL_X86_NCOMBO) {
 868                        fprintf(fp, "pmu: %s event%d: %s :: NCOMBO is unit mask only flag\n", pmu->name, i, pe[i].name);
 869                        error++;
 870                }
 871
 872                for(u=0; u < pe[i].numasks; u++) {
 873
 874                        if (pe[i].umasks[u].uequiv)
 875                                continue;
 876
 877                        if (pe[i].umasks[u].uflags & INTEL_X86_NCOMBO)
 878                                continue;
 879
 880                        for(v=j+1; v < pe[i].numasks; v++) {
 881                                if (pe[i].umasks[v].uequiv)
 882                                        continue;
 883                                if (pe[i].umasks[v].uflags & INTEL_X86_NCOMBO)
 884                                        continue;
 885                                if (pe[i].umasks[v].grpid != pe[i].umasks[u].grpid)
 886                                        continue;
 887                                if ((pe[i].umasks[u].ucode & pe[i].umasks[v].ucode) && pe[i].umasks[u].umodel == pe[i].umasks[v].umodel) {
 888                                        fprintf(fp, "pmu: %s event%d: %s :: umask %s and %s have overlapping code bits\n", pmu->name, i, pe[i].name, pe[i].umasks[u].uname, pe[i].umasks[v].uname);
 889                                        error++;
 890                                }
 891                        }
 892                }
 893        }
 894        return error ? PFM_ERR_INVAL : PFM_SUCCESS;
 895}
 896
 897int
 898pfm_intel_x86_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info)
 899{
 900        const intel_x86_entry_t *pe = this_pe(this);
 901        const pfmlib_attr_desc_t *atdesc = this_atdesc(this);
 902        int numasks, idx;
 903
 904        numasks = intel_x86_num_umasks(this, pidx);
 905        if (attr_idx < numasks) {
 906                idx = intel_x86_attr2umask(this, pidx, attr_idx);
 907                info->name = pe[pidx].umasks[idx].uname;
 908                info->desc = pe[pidx].umasks[idx].udesc;
 909                info->equiv= pe[pidx].umasks[idx].uequiv;
 910
 911                info->code = pe[pidx].umasks[idx].ucode;
 912                if (!intel_x86_uflag(this, pidx, idx, INTEL_X86_CODE_OVERRIDE))
 913                        info->code >>= 8;
 914
 915                info->type = PFM_ATTR_UMASK;
 916                info->is_dfl = intel_x86_uflag(this, pidx, idx, INTEL_X86_DFL);
 917                info->is_precise = intel_x86_uflag(this, pidx, idx, INTEL_X86_PEBS);
 918        } else {
 919                idx = intel_x86_attr2mod(this, pidx, attr_idx);
 920                info->name = atdesc[idx].name;
 921                info->desc = atdesc[idx].desc;
 922                info->type = atdesc[idx].type;
 923                info->equiv= NULL;
 924                info->code = idx;
 925                info->is_dfl = 0;
 926                info->is_precise = 0;
 927        }
 928
 929        info->ctrl = PFM_ATTR_CTRL_PMU;
 930        info->idx = idx; /* namespace specific index */
 931        info->dfl_val64 = 0;
 932
 933        return PFM_SUCCESS;
 934}
 935
 936int
 937pfm_intel_x86_get_event_info(void *this, int idx, pfm_event_info_t *info)
 938{
 939        const intel_x86_entry_t *pe = this_pe(this);
 940        pfmlib_pmu_t *pmu = this;
 941
 942        info->name  = pe[idx].name;
 943        info->desc  = pe[idx].desc;
 944        info->code  = pe[idx].code;
 945        info->equiv = pe[idx].equiv;
 946        info->idx   = idx; /* private index */
 947        info->pmu   = pmu->pmu;
 948        /*
 949         * no    umask: event supports PEBS
 950         * with umasks: at least one umask supports PEBS
 951         */
 952        info->is_precise = intel_x86_eflag(this, idx, INTEL_X86_PEBS);
 953
 954        info->nattrs  = intel_x86_num_umasks(this, idx);
 955        info->nattrs += intel_x86_num_mods(this, idx);
 956
 957        return PFM_SUCCESS;
 958}
 959
 960int
 961pfm_intel_x86_valid_pebs(pfmlib_event_desc_t *e)
 962{
 963        pfm_event_attr_info_t *a;
 964        int i, npebs = 0, numasks = 0;
 965
 966        /* first check at the event level */
 967        if (intel_x86_eflag(e->pmu, e->event, INTEL_X86_PEBS))
 968                return PFM_SUCCESS;
 969
 970        /*
 971         * next check the umasks
 972         *
 973         * we do not assume we are calling after
 974         * pfm_intel_x86_ge_event_encoding(), therefore
 975         * we check the unit masks again.
 976         * They must all be PEBS-capable.
 977         */
 978        for(i=0; i < e->nattrs; i++) {
 979
 980                a = attr(e, i);
 981
 982                if (a->ctrl != PFM_ATTR_CTRL_PMU || a->type != PFM_ATTR_UMASK)
 983                        continue;
 984
 985                numasks++;
 986                if (intel_x86_uflag(e->pmu, e->event, a->idx, INTEL_X86_PEBS))
 987                        npebs++;
 988        }
 989        return npebs == numasks ? PFM_SUCCESS : PFM_ERR_FEATCOMB;
 990}
 991
 992unsigned int
 993pfm_intel_x86_get_event_nattrs(void *this, int pidx)
 994{
 995        unsigned int nattrs;
 996        nattrs  = intel_x86_num_umasks(this, pidx);
 997        nattrs += intel_x86_num_mods(this, pidx);
 998        return nattrs;
 999}
1000
1001int
1002pfm_intel_x86_can_auto_encode(void *this, int pidx, int uidx)
1003{
1004        int numasks;
1005
1006        if (intel_x86_eflag(this, pidx, INTEL_X86_NO_AUTOENCODE))
1007                return 0;
1008
1009        numasks = intel_x86_num_umasks(this, pidx);
1010        if (uidx >= numasks)
1011                return 0;
1012
1013        return !intel_x86_uflag(this, pidx, uidx, INTEL_X86_NO_AUTOENCODE);
1014}
1015