parlib: Expand our printf hacks
[akaros.git] / user / perfmon / pfmlib_intel_x86.c
1 /* pfmlib_intel_x86.c : common code for Intel X86 processors
2  *
3  * Copyright (c) 2009 Google, Inc
4  * Contributed by Stephane Eranian <eranian@gmail.com>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
10  * of the Software, and to permit persons to whom the Software is furnished to do so,
11  * subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
17  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18  * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
20  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
21  * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  *
23  * This file implements the common code for all Intel X86 processors.
24  */
25 #include <sys/types.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <stdio.h>
29 #include <stdarg.h>
30
31 /* private headers */
32 #include "pfmlib_priv.h"
33 #include "pfmlib_intel_x86_priv.h"
34
35 const pfmlib_attr_desc_t intel_x86_mods[]={
36         PFM_ATTR_B("k", "monitor at priv level 0"),             /* monitor priv level 0 */
37         PFM_ATTR_B("u", "monitor at priv level 1, 2, 3"),       /* monitor priv level 1, 2, 3 */
38         PFM_ATTR_B("e", "edge level (may require counter-mask >= 1)"), /* edge */
39         PFM_ATTR_B("i", "invert"),                              /* invert */
40         PFM_ATTR_I("c", "counter-mask in range [0-255]"),       /* counter-mask */
41         PFM_ATTR_B("t", "measure any thread"),                  /* monitor on both threads */
42         PFM_ATTR_I("ldlat", "load latency threshold (cycles, [3-65535])"),      /* load latency threshold */
43         PFM_ATTR_B("intx", "monitor only inside transactional memory region"),
44         PFM_ATTR_B("intxcp", "do not count occurrences inside aborted transactional memory region"),
45         PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */
46 };
47
48 pfm_intel_x86_config_t pfm_intel_x86_cfg;
49
50 /*
51  * .byte 0x53 == push ebx. it's universal for 32 and 64 bit
52  * .byte 0x5b == pop ebx.
53  * Some gcc's (4.1.2 on Core2) object to pairing push/pop and ebx in 64 bit mode.
54  * Using the opcode directly avoids this problem.
55  */
56 static inline void
57 cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, unsigned int *d)
58 {
59   __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\tmovl %%ebx, %%esi\n\t.byte 0x5b"
60        : "=a" (*a),
61              "=S" (*b),
62                  "=c" (*c),
63                  "=d" (*d)
64        : "a" (op));
65 }
66
67 static void
68 pfm_intel_x86_display_reg(void *this, pfmlib_event_desc_t *e)
69 {
70         const intel_x86_entry_t *pe = this_pe(this);
71         pfm_intel_x86_reg_t reg;
72         int i;
73
74         reg.val = e->codes[0];
75
76         /*
77          * handle generic counters
78          */
79         __pfm_vbprintf("[0x%"PRIx64" event_sel=0x%x umask=0x%x os=%d usr=%d "
80                        "en=%d int=%d inv=%d edge=%d cnt_mask=%d",
81                         reg.val,
82                         reg.sel_event_select,
83                         reg.sel_unit_mask,
84                         reg.sel_os,
85                         reg.sel_usr,
86                         reg.sel_en,
87                         reg.sel_int,
88                         reg.sel_inv,
89                         reg.sel_edge,
90                         reg.sel_cnt_mask);
91
92         if (pe[e->event].modmsk & _INTEL_X86_ATTR_T)
93                 __pfm_vbprintf(" any=%d", reg.sel_anythr);
94
95         __pfm_vbprintf("]", e->fstr);
96
97         for (i = 1 ; i < e->count; i++)
98                 __pfm_vbprintf(" [0x%"PRIx64"]", e->codes[i]);
99
100         __pfm_vbprintf(" %s\n", e->fstr);
101
102 }
103
104 /*
105  * number of HW modifiers
106  */
107 static int
108 intel_x86_num_mods(void *this, int idx)
109 {
110         const intel_x86_entry_t *pe = this_pe(this);
111         unsigned int mask;
112
113         mask = pe[idx].modmsk;
114         return pfmlib_popcnt(mask);
115 }
116
117 int
118 intel_x86_attr2mod(void *this, int pidx, int attr_idx)
119 {
120         const intel_x86_entry_t *pe = this_pe(this);
121         size_t x;
122         int n, numasks;
123
124         numasks = intel_x86_num_umasks(this, pidx);
125         n = attr_idx - numasks;
126
127         pfmlib_for_each_bit(x, pe[pidx].modmsk) {
128                 if (n == 0)
129                         break;
130                 n--;
131         }
132         return x;
133 }
134
135 /*
136  * detect processor model using cpuid()
137  * based on documentation
138  * http://www.intel.com/Assets/PDF/appnote/241618.pdf
139  */
140 int
141 pfm_intel_x86_detect(void)
142 {
143         unsigned int a, b, c, d;
144         char buffer[64];
145
146         if (pfm_intel_x86_cfg.family)
147                 return PFM_SUCCESS;
148
149         cpuid(0, &a, &b, &c, &d);
150         strncpy(&buffer[0], (char *)(&b), 4);
151         strncpy(&buffer[4], (char *)(&d), 4);
152         strncpy(&buffer[8], (char *)(&c), 4);
153         buffer[12] = '\0';
154
155         /* must be Intel */
156         if (strcmp(buffer, "GenuineIntel"))
157                 return PFM_ERR_NOTSUPP;
158
159         cpuid(1, &a, &b, &c, &d);
160
161         pfm_intel_x86_cfg.family = (a >> 8) & 0xf;  // bits 11 - 8
162         pfm_intel_x86_cfg.model  = (a >> 4) & 0xf;  // Bits  7 - 4
163         pfm_intel_x86_cfg.stepping = a & 0xf;       // Bits 0 - 3
164
165         /* extended family */
166         if (pfm_intel_x86_cfg.family == 0xf)
167                 pfm_intel_x86_cfg.family += (a >> 20) & 0xff;
168
169         /* extended model */
170         if (pfm_intel_x86_cfg.family >= 0x6)
171                 pfm_intel_x86_cfg.model += ((a >> 16) & 0xf) << 4;
172
173         return PFM_SUCCESS;
174 }
175
176 int pfm_intel_x86_model_detect(void *this)
177 {
178         pfmlib_pmu_t *pmu = this;
179         const int *p;
180         int ret;
181
182         ret = pfm_intel_x86_detect();
183         if (ret != PFM_SUCCESS)
184                 return ret;
185
186         if (pfm_intel_x86_cfg.family != pmu->cpu_family)
187                 return PFM_ERR_NOTSUPP;
188
189         for (p = pmu->cpu_models; *p; p++) {
190                 if (*p == pfm_intel_x86_cfg.model)
191                         return PFM_SUCCESS;
192         }
193         return PFM_ERR_NOTSUPP;
194 }
195
196 int
197 pfm_intel_x86_add_defaults(void *this, pfmlib_event_desc_t *e,
198                            unsigned int msk,
199                            uint64_t *umask,
200                            unsigned int max_grpid)
201 {
202         const intel_x86_entry_t *pe = this_pe(this);
203         const intel_x86_entry_t *ent;
204         unsigned int i;
205         int j, k, added, skip;
206         int idx;
207
208         k = e->nattrs;
209         ent = pe+e->event;
210
211         for(i=0; msk; msk >>=1, i++) {
212
213                 if (!(msk & 0x1))
214                         continue;
215
216                 added = skip = 0;
217                 /*
218                  * must scan list of possible attributes
219                  * (not all possible attributes)
220                  */
221                 for (j = 0; j < e->npattrs; j++) {
222                         if (e->pattrs[j].ctrl != PFM_ATTR_CTRL_PMU)
223                                 continue;
224
225                         if (e->pattrs[j].type != PFM_ATTR_UMASK)
226                                 continue;
227
228                         idx = e->pattrs[j].idx;
229
230                         if (ent->umasks[idx].grpid != i)
231                                 continue;
232
233                         if (max_grpid != INTEL_X86_MAX_GRPID && i > max_grpid) {
234                                 skip = 1;
235                                 continue;
236                         }
237
238                         if (intel_x86_uflag(this, e->event, idx, INTEL_X86_GRP_DFL_NONE)) {
239                                 skip = 1;
240                                 continue;
241                         }
242
243                         /* umask is default for group */
244                         if (intel_x86_uflag(this, e->event, idx, INTEL_X86_DFL)) {
245                                 DPRINT("added default %s for group %d j=%d idx=%d ucode=0x%"PRIx64"\n",
246                                         ent->umasks[idx].uname,
247                                         i,      
248                                         j,
249                                         idx,
250                                         ent->umasks[idx].ucode);
251                                 /*
252                                  * default could be an alias, but
253                                  * ucode must reflect actual code
254                                  */
255                                 *umask |= ent->umasks[idx].ucode >> 8;
256
257                                 e->attrs[k].id = j; /* pattrs index */
258                                 e->attrs[k].ival = 0;
259                                 k++;
260
261                                 added++;
262                                 if (intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL))
263                                         goto done;
264
265                                 if (intel_x86_uflag(this, e->event, idx, INTEL_X86_EXCL_GRP_GT)) {
266                                         if (max_grpid != INTEL_X86_MAX_GRPID) {
267                                                 DPRINT("two max_grpid, old=%d new=%d\n", max_grpid, ent->umasks[idx].grpid);
268                                                 return PFM_ERR_UMASK;
269                                         }
270                                         max_grpid = ent->umasks[idx].grpid;
271                                 }
272                         }
273                 }
274                 if (!added && !skip) {
275                         DPRINT("no default found for event %s unit mask group %d (max_grpid=%d)\n", ent->name, i, max_grpid);
276                         return PFM_ERR_UMASK;
277                 }
278         }
279         DPRINT("max_grpid=%d nattrs=%d k=%d umask=0x%"PRIx64"\n", max_grpid, e->nattrs, k, *umask);
280 done:
281         e->nattrs = k;
282         return PFM_SUCCESS;
283 }
284
285 static int
286 intel_x86_check_pebs(void *this, pfmlib_event_desc_t *e)
287 {
288         const intel_x86_entry_t *pe = this_pe(this);
289         pfm_event_attr_info_t *a;
290         int numasks = 0, pebs = 0;
291         int i;
292
293 #if 1
294         if (1) // !intel_x86_requesting_pebs(e))
295                 return PFM_SUCCESS;
296 #endif
297
298         /*
299          * if event has no umask and is PEBS, then we are okay
300          */
301         if (!pe[e->event].numasks
302             && intel_x86_eflag(this, e->event, INTEL_X86_PEBS))
303                 return PFM_SUCCESS;
304
305         /*
306          * if the event sets PEBS, then it measn at least one umask
307          * supports PEBS, so we need to check
308          */
309         for (i = 0; i < e->nattrs; i++) {
310                 a = attr(e, i);
311
312                 if (a->ctrl != PFM_ATTR_CTRL_PMU)
313                         continue;
314
315                 if (a->type == PFM_ATTR_UMASK) {
316                         /* count number of umasks */
317                         numasks++;
318                         /* and those that support PEBS */
319                         if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_PEBS))
320                                 pebs++;
321                 }
322         }
323         /*
324          * pass if user requested only PEBS  umasks
325          */
326         return pebs != numasks ? PFM_ERR_FEATCOMB : PFM_SUCCESS;
327 }
328
329 static int
330 intel_x86_check_max_grpid(void *this, pfmlib_event_desc_t *e, int max_grpid)
331 {
332         const intel_x86_entry_t *pe;
333         pfm_event_attr_info_t *a;
334         int i, grpid;
335
336         DPRINT("check: max_grpid=%d\n", max_grpid);
337         pe = this_pe(this);
338
339         for (i = 0; i < e->nattrs; i++) {
340                 a = attr(e, i);
341
342                 if (a->ctrl != PFM_ATTR_CTRL_PMU)
343                         continue;
344
345                 if (a->type == PFM_ATTR_UMASK) {
346                         grpid = pe[e->event].umasks[a->idx].grpid;
347                         if (grpid > max_grpid)
348                                 return PFM_ERR_FEATCOMB;
349                 }
350         }
351         return PFM_SUCCESS;
352 }
353
354 static int
355 pfm_intel_x86_encode_gen(void *this, pfmlib_event_desc_t *e)
356
357 {
358         pfmlib_pmu_t *pmu = this;
359         pfm_event_attr_info_t *a;
360         const intel_x86_entry_t *pe;
361         pfm_intel_x86_reg_t reg;
362         unsigned int grpmsk, ugrpmsk = 0;
363         uint64_t umask1, umask2, ucode, last_ucode = ~0ULL;
364         unsigned int modhw = 0;
365         unsigned int plmmsk = 0;
366         int umodmsk = 0, modmsk_r = 0;
367         int k, ret, id;
368         unsigned int max_grpid = INTEL_X86_MAX_GRPID;
369         unsigned int last_grpid =  INTEL_X86_MAX_GRPID;
370         unsigned int grpid;
371         int ldlat = 0, ldlat_um = 0;
372         int grpcounts[INTEL_X86_NUM_GRP];
373         int ncombo[INTEL_X86_NUM_GRP];
374
375         memset(grpcounts, 0, sizeof(grpcounts));
376         memset(ncombo, 0, sizeof(ncombo));
377
378         pe     = this_pe(this);
379
380         e->fstr[0] = '\0';
381
382         /*
383          * preset certain fields from event code
384          * including modifiers
385          */
386         reg.val = pe[e->event].code;
387
388         grpmsk = (1 << pe[e->event].ngrp)-1;
389
390         /* take into account hardcoded umask */
391         umask1 = (reg.val >> 8) & 0xff;
392         umask2 = 0;
393
394         modmsk_r = pe[e->event].modmsk_req;
395
396         for (k = 0; k < e->nattrs; k++) {
397                 a = attr(e, k);
398
399                 if (a->ctrl != PFM_ATTR_CTRL_PMU)
400                         continue;
401
402                 if (a->type == PFM_ATTR_UMASK) {
403                         grpid = pe[e->event].umasks[a->idx].grpid;
404
405                         /*
406                          * certain event groups are meant to be
407                          * exclusive, i.e., only unit masks of one group
408                          * can be used
409                          */
410                         if (last_grpid != INTEL_X86_MAX_GRPID && grpid != last_grpid
411                             && intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) {
412                                 DPRINT("exclusive unit mask group error\n");
413                                 return PFM_ERR_FEATCOMB;
414                         }
415                         /*
416                          * selecting certain umasks in a group may exclude any umasks
417                          * from any groups with a higher index
418                          *
419                          * enforcement requires looking at the grpid of all the umasks
420                          */
421                         if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_EXCL_GRP_GT))
422                                 max_grpid = grpid;
423
424                         /*
425                          * upper layer has removed duplicates
426                          * so if we come here more than once, it is for two
427                          * disinct umasks
428                          *
429                          * NCOMBO=no combination of unit masks within the same
430                          * umask group
431                          */
432                         ++grpcounts[grpid];
433
434                         /* mark that we have a umask with NCOMBO in this group */
435                         if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_NCOMBO))
436                                 ncombo[grpid] = 1;
437
438                         if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_LDLAT))
439                                 ldlat_um = 1;
440                         /*
441                          * if more than one umask in this group but one is marked
442                          * with ncombo, then fail. It is okay to combine umask within
443                          * a group as long as none is tagged with NCOMBO
444                          */
445                         if (grpcounts[grpid] > 1 && ncombo[grpid])  {
446                                 DPRINT("umask %s does not support unit mask combination within group %d\n", pe[e->event].umasks[a->idx].uname, grpid);
447                                 return PFM_ERR_FEATCOMB;
448                         }
449
450                         last_grpid = grpid;
451                         ucode     = pe[e->event].umasks[a->idx].ucode;
452                         modhw    |= pe[e->event].umasks[a->idx].modhw;
453                         umask2   |= ucode >> 8;
454                         ugrpmsk  |= 1 << pe[e->event].umasks[a->idx].grpid;
455
456                         modmsk_r |= pe[e->event].umasks[a->idx].umodmsk_req;
457
458                         if (intel_x86_uflag(this, e->event, a->idx, INTEL_X86_CODE_OVERRIDE)) {
459                                 if (last_ucode != ~0ULL && (ucode & 0xff) != last_ucode) {
460                                         DPRINT("cannot override event with two different codes for %s\n", pe[e->event].name);
461                                         return PFM_ERR_FEATCOMB;
462                                 }
463                                 last_ucode = ucode & 0xff;
464                                 reg.sel_event_select = last_ucode;
465                         }
466                 } else if (a->type == PFM_ATTR_RAW_UMASK) {
467
468                         /* there can only be one RAW_UMASK per event */
469
470                         /* sanity check */
471                         if (a->idx & ~0xff) {
472                                 DPRINT("raw umask is 8-bit wide\n");
473                                 return PFM_ERR_ATTR;
474                         }
475                         /* override umask */
476                         umask2 = a->idx & 0xff;
477                         ugrpmsk = grpmsk;
478                 } else {
479                         uint64_t ival = e->attrs[k].ival;
480                         switch(a->idx) {
481                                 case INTEL_X86_ATTR_I: /* invert */
482                                         if (modhw & _INTEL_X86_ATTR_I)
483                                                 return PFM_ERR_ATTR_SET;
484                                         reg.sel_inv = !!ival;
485                                         umodmsk |= _INTEL_X86_ATTR_I;
486                                         break;
487                                 case INTEL_X86_ATTR_E: /* edge */
488                                         if (modhw & _INTEL_X86_ATTR_E)
489                                                 return PFM_ERR_ATTR_SET;
490                                         reg.sel_edge = !!ival;
491                                         umodmsk |= _INTEL_X86_ATTR_E;
492                                         break;
493                                 case INTEL_X86_ATTR_C: /* counter-mask */
494                                         if (modhw & _INTEL_X86_ATTR_C)
495                                                 return PFM_ERR_ATTR_SET;
496                                         if (ival > 255)
497                                                 return PFM_ERR_ATTR_VAL;
498                                         reg.sel_cnt_mask = ival;
499                                         umodmsk |= _INTEL_X86_ATTR_C;
500                                         break;
501                                 case INTEL_X86_ATTR_U: /* USR */
502                                         if (modhw & _INTEL_X86_ATTR_U)
503                                                 return PFM_ERR_ATTR_SET;
504                                         reg.sel_usr = !!ival;
505                                         plmmsk |= _INTEL_X86_ATTR_U;
506                                         umodmsk |= _INTEL_X86_ATTR_U;
507                                         break;
508                                 case INTEL_X86_ATTR_K: /* OS */
509                                         if (modhw & _INTEL_X86_ATTR_K)
510                                                 return PFM_ERR_ATTR_SET;
511                                         reg.sel_os = !!ival;
512                                         plmmsk |= _INTEL_X86_ATTR_K;
513                                         umodmsk |= _INTEL_X86_ATTR_K;
514                                         break;
515                                 case INTEL_X86_ATTR_T: /* anythread (v3 and above) */
516                                         if (modhw & _INTEL_X86_ATTR_T)
517                                                 return PFM_ERR_ATTR_SET;
518                                         reg.sel_anythr = !!ival;
519                                         umodmsk |= _INTEL_X86_ATTR_T;
520                                         break;
521                                 case INTEL_X86_ATTR_LDLAT: /* load latency */
522                                         if (ival < 3 || ival > 65535)
523                                                 return PFM_ERR_ATTR_VAL;
524                                         ldlat = ival;
525                                         break;
526                                 case INTEL_X86_ATTR_INTX: /* in_tx */
527                                         if (modhw & _INTEL_X86_ATTR_INTX)
528                                                 return PFM_ERR_ATTR_SET;
529                                         reg.sel_intx = !!ival;
530                                         umodmsk |= _INTEL_X86_ATTR_INTX;
531                                         break;
532                                 case INTEL_X86_ATTR_INTXCP: /* in_tx_cp */
533                                         if (modhw & _INTEL_X86_ATTR_INTXCP)
534                                                 return PFM_ERR_ATTR_SET;
535                                         reg.sel_intxcp = !!ival;
536                                         umodmsk |= _INTEL_X86_ATTR_INTXCP;
537                                         break;
538                         }
539                 }
540         }
541
542         /*
543          * handle case where no priv level mask was passed.
544          * then we use the dfl_plm
545          */
546         if (!(plmmsk & (_INTEL_X86_ATTR_K|_INTEL_X86_ATTR_U))) {
547                 if ((e->dfl_plm & PFM_PLM0) && (pmu->supported_plm & PFM_PLM0))
548                         reg.sel_os = 1;
549                 if ((e->dfl_plm & PFM_PLM3) && (pmu->supported_plm & PFM_PLM3))
550                         reg.sel_usr = 1;
551         }
552         /*
553          * check that there is at least of unit mask in each unit
554          * mask group
555          */
556         if ((ugrpmsk != grpmsk && !intel_x86_eflag(this, e->event, INTEL_X86_GRP_EXCL)) || ugrpmsk == 0) {
557                 ugrpmsk ^= grpmsk;
558                 ret = pfm_intel_x86_add_defaults(this, e, ugrpmsk, &umask2, max_grpid);
559                 if (ret != PFM_SUCCESS)
560                         return ret;
561         }
562
563         ret = intel_x86_check_pebs(this, e);
564         if (ret != PFM_SUCCESS)
565                 return ret;
566
567         /*
568          * check no umask violates the max_grpid constraint
569          */
570         if (max_grpid != INTEL_X86_MAX_GRPID) {
571                 ret = intel_x86_check_max_grpid(this, e, max_grpid);
572                 if (ret != PFM_SUCCESS) {
573                         DPRINT("event %s: umask from grp > %d\n", pe[e->event].name, max_grpid);
574                         return ret;
575                 }
576         }
577
578         if (modmsk_r && (umodmsk ^ modmsk_r)) {
579                 DPRINT("required modifiers missing: 0x%x\n", modmsk_r);
580                 return PFM_ERR_ATTR;
581         }
582         /*
583          * reorder all the attributes such that the fstr appears always
584          * the same regardless of how the attributes were submitted.
585          */
586         evt_strcat(e->fstr, "%s", pe[e->event].name);
587         pfmlib_sort_attr(e);
588         for(k=0; k < e->nattrs; k++) {
589                 a = attr(e, k);
590                 if (a->ctrl != PFM_ATTR_CTRL_PMU)
591                         continue;
592                 if (a->type == PFM_ATTR_UMASK)
593                         evt_strcat(e->fstr, ":%s", pe[e->event].umasks[a->idx].uname);
594                 else if (a->type == PFM_ATTR_RAW_UMASK)
595                         evt_strcat(e->fstr, ":0x%x", a->idx);
596         }
597
598         if (intel_x86_eflag(this, e->event, INTEL_X86_NHM_OFFCORE)) {
599                 e->codes[1] = umask2;
600                 e->count = 2;
601                 umask2 = 0;
602         } else {
603                 e->count = 1;
604         }
605
606         if (ldlat && !ldlat_um) {
607                 DPRINT("passed ldlat= but not using ldlat umask\n");
608                 return PFM_ERR_ATTR;
609         }
610
611         /*
612          * force a default ldlat (will not appear in display_reg)
613          */
614         if (ldlat_um && !ldlat) {
615                 DPRINT("missing ldlat= for umask, forcing to default %d cycles\n", INTEL_X86_LDLAT_DEFAULT);
616                 ldlat = INTEL_X86_LDLAT_DEFAULT;
617         }
618
619         if (ldlat && ldlat_um) {
620                 e->codes[1] = ldlat;
621                 e->count = 2;
622         }
623
624         /* take into account hardcoded modifiers, so use or on reg.val */
625         reg.val     |= (umask1 | umask2)  << 8;
626
627         reg.sel_en   = 1; /* force enable bit to 1 */
628         reg.sel_int  = 1; /* force APIC int to 1 */
629
630         e->codes[0] = reg.val;
631
632 DPRINT("sel_edge=%d cnt=%d\n", reg.sel_edge, reg.sel_cnt_mask);
633         /*
634          * on recent processors (except Atom), edge requires cmask >=1
635          */
636         if ((pmu->flags & INTEL_X86_PMU_FL_ECMASK)
637             && reg.sel_edge && !reg.sel_cnt_mask) {
638                 DPRINT("edge requires cmask >= 1\n");
639                 return PFM_ERR_ATTR;
640         }
641
642         /*
643          * decode ALL modifiers
644          */
645         for (k = 0; k < e->npattrs; k++) {
646                 if (e->pattrs[k].ctrl != PFM_ATTR_CTRL_PMU)
647                         continue;
648
649                 if (e->pattrs[k].type == PFM_ATTR_UMASK)
650                         continue;
651
652                 id = e->pattrs[k].idx;
653                 switch(id) {
654                 case INTEL_X86_ATTR_U:
655                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_usr);
656                         break;
657                 case INTEL_X86_ATTR_K:
658                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_os);
659                         break;
660                 case INTEL_X86_ATTR_E:
661                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_edge);
662                         break;
663                 case INTEL_X86_ATTR_I:
664                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_inv);
665                         break;
666                 case INTEL_X86_ATTR_C:
667                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_cnt_mask);
668                         break;
669                 case INTEL_X86_ATTR_T:
670                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_anythr);
671                         break;
672                 case INTEL_X86_ATTR_LDLAT:
673                         evt_strcat(e->fstr, ":%s=%d", intel_x86_mods[id].name, ldlat);
674                         break;
675                 case INTEL_X86_ATTR_INTX:
676                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intx);
677                         break;
678                 case INTEL_X86_ATTR_INTXCP:
679                         evt_strcat(e->fstr, ":%s=%lu", intel_x86_mods[id].name, reg.sel_intxcp);
680                         break;
681                 }
682         }
683         return PFM_SUCCESS;
684 }
685
686 int
687 pfm_intel_x86_get_encoding(void *this, pfmlib_event_desc_t *e)
688 {
689         int ret;
690
691         ret = pfm_intel_x86_encode_gen(this, e);
692         if (ret != PFM_SUCCESS)
693                 return ret;
694
695         pfm_intel_x86_display_reg(this, e);
696
697         return PFM_SUCCESS;
698 }
699
700 int
701 pfm_intel_x86_get_event_first(void *this)
702 {
703         pfmlib_pmu_t *p = this;
704
705         return p->pme_count ? 0 : -1;
706 }
707
708 int
709 pfm_intel_x86_get_event_next(void *this, int idx)
710 {
711         pfmlib_pmu_t *p = this;
712
713         if (idx >= (p->pme_count-1))
714                 return -1;
715
716         return idx+1;
717 }
718
719 int
720 pfm_intel_x86_event_is_valid(void *this, int pidx)
721 {
722         pfmlib_pmu_t *p = this;
723         return pidx >= 0 && pidx < p->pme_count;
724 }
725
726 int
727 pfm_intel_x86_validate_table(void *this, FILE *fp)
728 {
729         pfmlib_pmu_t *pmu = this;
730         const intel_x86_entry_t *pe = this_pe(this);
731         int ndfl[INTEL_X86_NUM_GRP];
732         int i, j, error = 0;
733         unsigned int u, v;
734         int npebs;
735
736         if (!pmu->atdesc) {
737                 fprintf(fp, "pmu: %s missing attr_desc\n", pmu->name);
738                 error++;
739         }
740
741         if (!pmu->supported_plm && pmu->type == PFM_PMU_TYPE_CORE) {
742                 fprintf(fp, "pmu: %s supported_plm not set\n", pmu->name);
743                 error++;
744         }
745
746         for(i=0; i < pmu->pme_count; i++) {
747
748                 if (!pe[i].name) {
749                         fprintf(fp, "pmu: %s event%d: :: no name (prev event was %s)\n", pmu->name, i,
750                         i > 1 ? pe[i-1].name : "??");
751                         error++;
752                 }
753
754                 if (!pe[i].desc) {
755                         fprintf(fp, "pmu: %s event%d: %s :: no description\n", pmu->name, i, pe[i].name);
756                         error++;
757                 }
758
759                 if (!pe[i].cntmsk) {
760                         fprintf(fp, "pmu: %s event%d: %s :: cntmsk=0\n", pmu->name, i, pe[i].name);
761                         error++;
762                 }
763
764                 if (pe[i].numasks && pe[i].ngrp == 0) {
765                         fprintf(fp, "pmu: %s event%d: %s :: ngrp cannot be zero\n", pmu->name, i, pe[i].name);
766                         error++;
767                 }
768
769                 if (pe[i].numasks && pe[i].umasks == NULL) {
770                         fprintf(fp, "pmu: %s event%d: %s :: numasks but no umasks\n", pmu->name, i, pe[i].name);
771                         error++;
772                 }
773
774                 if (pe[i].numasks == 0 && pe[i].umasks) {
775                         fprintf(fp, "pmu: %s event%d: %s :: numasks=0 but umasks defined\n", pmu->name, i, pe[i].name);
776                         error++;
777                 }
778
779                 if (pe[i].numasks == 0 && pe[i].ngrp) {
780                         fprintf(fp, "pmu: %s event%d: %s :: ngrp must be zero\n", pmu->name, i, pe[i].name);
781                         error++;
782                 }
783
784                 if (pe[i].ngrp >= INTEL_X86_NUM_GRP) {
785                         fprintf(fp, "pmu: %s event%d: %s :: ngrp too big (max=%d)\n", pmu->name, i, pe[i].name, INTEL_X86_NUM_GRP);
786                         error++;
787                 }
788
789                 for (j=i+1; j < (int)pmu->pme_count; j++) {
790                         if (pe[i].code == pe[j].code && !(pe[j].equiv || pe[i].equiv) && pe[j].cntmsk == pe[i].cntmsk) {
791                                 fprintf(fp, "pmu: %s events %s and %s have the same code 0x%x\n", pmu->name, pe[i].name, pe[j].name, pe[i].code);
792                                 error++;
793                                 }
794                         }
795
796                 for(j=0; j < INTEL_X86_NUM_GRP; j++)
797                         ndfl[j] = 0;
798
799                 for(j=0, npebs = 0; j < (int)pe[i].numasks; j++) {
800
801                         if (!pe[i].umasks[j].uname) {
802                                 fprintf(fp, "pmu: %s event%d: %s umask%d :: no name\n", pmu->name, i, pe[i].name, j);
803                                 error++;
804                         }
805                         if (pe[i].umasks[j].modhw && (pe[i].umasks[j].modhw | pe[i].modmsk) != pe[i].modmsk) {
806                                 fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: modhw not subset of modmsk\n", pmu->name, i, pe[i].name, j, pe[i].umasks[j].uname);
807                                 error++;
808                         }
809
810                         if (!pe[i].umasks[j].udesc) {
811                                 fprintf(fp, "pmu: %s event%d: umask%d: %s :: no description\n", pmu->name, i, j, pe[i].umasks[j].uname);
812                                 error++;
813                         }
814
815                         if (pe[i].ngrp && pe[i].umasks[j].grpid >= pe[i].ngrp) {
816                                 fprintf(fp, "pmu: %s event%d: %s umask%d: %s :: invalid grpid %d (must be < %d)\n", pmu->name, i, pe[i].name, j, pe[i].umasks[j].uname, pe[i].umasks[j].grpid, pe[i].ngrp);
817                                 error++;
818                         }
819                         if (pe[i].umasks[j].uflags & INTEL_X86_DFL)
820                                 ndfl[pe[i].umasks[j].grpid]++;
821
822                         if (pe[i].umasks[j].uflags & INTEL_X86_PEBS)
823                                 npebs++;
824                 }
825
826                 if (npebs && !intel_x86_eflag(this, i, INTEL_X86_PEBS)) {
827                         fprintf(fp, "pmu: %s event%d: %s, pebs umasks but event pebs flag not set\n", pmu->name, i, pe[i].name);
828                         error++;
829                 }
830
831                 if (intel_x86_eflag(this, i, INTEL_X86_PEBS) && pe[i].numasks && npebs == 0) {
832                         fprintf(fp, "pmu: %s event%d: %s, pebs event flag but not umask has pebs flag\n", pmu->name, i, pe[i].name);
833                         error++;
834                 }
835
836                 /* if only one umask, then ought to be default */
837                 if (pe[i].numasks == 1 && !(pe[i].umasks[0].uflags & INTEL_X86_DFL)) {
838                         fprintf(fp, "pmu: %s event%d: %s, only one umask but no default\n", pmu->name, i, pe[i].name);
839                         error++;
840                 }
841
842                 if (pe[i].numasks) {
843                         unsigned int *dfl_model = malloc(sizeof(*dfl_model) * pe[i].numasks);
844                         if (!dfl_model)
845                                 goto skip_dfl;
846                         for(u=0; u < pe[i].ngrp; u++) {
847                                 int l = 0, m;
848                                 for (v = 0; v < pe[i].numasks; v++) {
849                                         if (pe[i].umasks[v].grpid != u)
850                                                 continue;
851                                         if (pe[i].umasks[v].uflags & INTEL_X86_DFL) {
852                                                 for (m = 0; m < l; m++) {
853                                                         if (dfl_model[m] == pe[i].umasks[v].umodel || dfl_model[m] == 0) {
854                                                                 fprintf(fp, "pmu: %s event%d: %s grpid %d has 2 default umasks\n", pmu->name, i, pe[i].name, u);
855                                                                 error++;
856                                                         }
857                                                 }
858                                                 if (m == l)
859                                                         dfl_model[l++] = pe[i].umasks[v].umodel;
860                                         }
861                                 }
862                         }
863                         free(dfl_model);
864                 }
865 skip_dfl:
866
867                 if (pe[i].flags & INTEL_X86_NCOMBO) {
868                         fprintf(fp, "pmu: %s event%d: %s :: NCOMBO is unit mask only flag\n", pmu->name, i, pe[i].name);
869                         error++;
870                 }
871
872                 for(u=0; u < pe[i].numasks; u++) {
873
874                         if (pe[i].umasks[u].uequiv)
875                                 continue;
876
877                         if (pe[i].umasks[u].uflags & INTEL_X86_NCOMBO)
878                                 continue;
879
880                         for(v=j+1; v < pe[i].numasks; v++) {
881                                 if (pe[i].umasks[v].uequiv)
882                                         continue;
883                                 if (pe[i].umasks[v].uflags & INTEL_X86_NCOMBO)
884                                         continue;
885                                 if (pe[i].umasks[v].grpid != pe[i].umasks[u].grpid)
886                                         continue;
887                                 if ((pe[i].umasks[u].ucode & pe[i].umasks[v].ucode) && pe[i].umasks[u].umodel == pe[i].umasks[v].umodel) {
888                                         fprintf(fp, "pmu: %s event%d: %s :: umask %s and %s have overlapping code bits\n", pmu->name, i, pe[i].name, pe[i].umasks[u].uname, pe[i].umasks[v].uname);
889                                         error++;
890                                 }
891                         }
892                 }
893         }
894         return error ? PFM_ERR_INVAL : PFM_SUCCESS;
895 }
896
897 int
898 pfm_intel_x86_get_event_attr_info(void *this, int pidx, int attr_idx, pfm_event_attr_info_t *info)
899 {
900         const intel_x86_entry_t *pe = this_pe(this);
901         const pfmlib_attr_desc_t *atdesc = this_atdesc(this);
902         int numasks, idx;
903
904         numasks = intel_x86_num_umasks(this, pidx);
905         if (attr_idx < numasks) {
906                 idx = intel_x86_attr2umask(this, pidx, attr_idx);
907                 info->name = pe[pidx].umasks[idx].uname;
908                 info->desc = pe[pidx].umasks[idx].udesc;
909                 info->equiv= pe[pidx].umasks[idx].uequiv;
910
911                 info->code = pe[pidx].umasks[idx].ucode;
912                 if (!intel_x86_uflag(this, pidx, idx, INTEL_X86_CODE_OVERRIDE))
913                         info->code >>= 8;
914
915                 info->type = PFM_ATTR_UMASK;
916                 info->is_dfl = intel_x86_uflag(this, pidx, idx, INTEL_X86_DFL);
917                 info->is_precise = intel_x86_uflag(this, pidx, idx, INTEL_X86_PEBS);
918         } else {
919                 idx = intel_x86_attr2mod(this, pidx, attr_idx);
920                 info->name = atdesc[idx].name;
921                 info->desc = atdesc[idx].desc;
922                 info->type = atdesc[idx].type;
923                 info->equiv= NULL;
924                 info->code = idx;
925                 info->is_dfl = 0;
926                 info->is_precise = 0;
927         }
928
929         info->ctrl = PFM_ATTR_CTRL_PMU;
930         info->idx = idx; /* namespace specific index */
931         info->dfl_val64 = 0;
932
933         return PFM_SUCCESS;
934 }
935
936 int
937 pfm_intel_x86_get_event_info(void *this, int idx, pfm_event_info_t *info)
938 {
939         const intel_x86_entry_t *pe = this_pe(this);
940         pfmlib_pmu_t *pmu = this;
941
942         info->name  = pe[idx].name;
943         info->desc  = pe[idx].desc;
944         info->code  = pe[idx].code;
945         info->equiv = pe[idx].equiv;
946         info->idx   = idx; /* private index */
947         info->pmu   = pmu->pmu;
948         /*
949          * no    umask: event supports PEBS
950          * with umasks: at least one umask supports PEBS
951          */
952         info->is_precise = intel_x86_eflag(this, idx, INTEL_X86_PEBS);
953
954         info->nattrs  = intel_x86_num_umasks(this, idx);
955         info->nattrs += intel_x86_num_mods(this, idx);
956
957         return PFM_SUCCESS;
958 }
959
960 int
961 pfm_intel_x86_valid_pebs(pfmlib_event_desc_t *e)
962 {
963         pfm_event_attr_info_t *a;
964         int i, npebs = 0, numasks = 0;
965
966         /* first check at the event level */
967         if (intel_x86_eflag(e->pmu, e->event, INTEL_X86_PEBS))
968                 return PFM_SUCCESS;
969
970         /*
971          * next check the umasks
972          *
973          * we do not assume we are calling after
974          * pfm_intel_x86_ge_event_encoding(), therefore
975          * we check the unit masks again.
976          * They must all be PEBS-capable.
977          */
978         for(i=0; i < e->nattrs; i++) {
979
980                 a = attr(e, i);
981
982                 if (a->ctrl != PFM_ATTR_CTRL_PMU || a->type != PFM_ATTR_UMASK)
983                         continue;
984
985                 numasks++;
986                 if (intel_x86_uflag(e->pmu, e->event, a->idx, INTEL_X86_PEBS))
987                         npebs++;
988         }
989         return npebs == numasks ? PFM_SUCCESS : PFM_ERR_FEATCOMB;
990 }
991
992 unsigned int
993 pfm_intel_x86_get_event_nattrs(void *this, int pidx)
994 {
995         unsigned int nattrs;
996         nattrs  = intel_x86_num_umasks(this, pidx);
997         nattrs += intel_x86_num_mods(this, pidx);
998         return nattrs;
999 }
1000
1001 int
1002 pfm_intel_x86_can_auto_encode(void *this, int pidx, int uidx)
1003 {
1004         int numasks;
1005
1006         if (intel_x86_eflag(this, pidx, INTEL_X86_NO_AUTOENCODE))
1007                 return 0;
1008
1009         numasks = intel_x86_num_umasks(this, pidx);
1010         if (uidx >= numasks)
1011                 return 0;
1012
1013         return !intel_x86_uflag(this, pidx, uidx, INTEL_X86_NO_AUTOENCODE);
1014 }