1 /*-
2 * Copyright (c) 2014, 2015 Netflix Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/errno.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <getopt.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
40
41 static int max_pmc_counters = 1;
42 static int run_all = 0;
43
44 #define MAX_COUNTER_SLOTS 1024
45 #define MAX_NLEN 64
46 #define MAX_CPU 64
47 static int verbose = 0;
48
49 extern char **environ;
50 extern struct expression *master_exp;
51 struct expression *master_exp=NULL;
52
53 #define PMC_INITIAL_ALLOC 512
54 extern char **valid_pmcs;
55 char **valid_pmcs = NULL;
56 extern int valid_pmc_cnt;
57 int valid_pmc_cnt=0;
58 extern int pmc_allocated_cnt;
59 int pmc_allocated_cnt=0;
60
61 /*
62 * The following two varients on popen and pclose with
63 * the cavet that they get you the PID so that you
64 * can supply it to pclose so it can send a SIGTERM
65 * to the process.
66 */
67 static FILE *
my_popen(const char * command,const char * dir,pid_t * p_pid)68 my_popen(const char *command, const char *dir, pid_t *p_pid)
69 {
70 FILE *io_out, *io_in;
71 int pdesin[2], pdesout[2];
72 char *argv[4];
73 pid_t pid;
74 char cmd[4];
75 char cmd2[1024];
76 char arg1[4];
77
78 if ((strcmp(dir, "r") != 0) &&
79 (strcmp(dir, "w") != 0)) {
80 errno = EINVAL;
81 return(NULL);
82 }
83 if (pipe(pdesin) < 0)
84 return (NULL);
85
86 if (pipe(pdesout) < 0) {
87 (void)close(pdesin[0]);
88 (void)close(pdesin[1]);
89 return (NULL);
90 }
91 strcpy(cmd, "sh");
92 strcpy(arg1, "-c");
93 strcpy(cmd2, command);
94 argv[0] = cmd;
95 argv[1] = arg1;
96 argv[2] = cmd2;
97 argv[3] = NULL;
98
99 switch (pid = fork()) {
100 case -1: /* Error. */
101 (void)close(pdesin[0]);
102 (void)close(pdesin[1]);
103 (void)close(pdesout[0]);
104 (void)close(pdesout[1]);
105 return (NULL);
106 /* NOTREACHED */
107 case 0: /* Child. */
108 /* Close out un-used sides */
109 (void)close(pdesin[1]);
110 (void)close(pdesout[0]);
111 /* Now prepare the stdin of the process */
112 close(0);
113 (void)dup(pdesin[0]);
114 (void)close(pdesin[0]);
115 /* Now prepare the stdout of the process */
116 close(1);
117 (void)dup(pdesout[1]);
118 /* And lets do stderr just in case */
119 close(2);
120 (void)dup(pdesout[1]);
121 (void)close(pdesout[1]);
122 /* Now run it */
123 execve("/bin/sh", argv, environ);
124 exit(127);
125 /* NOTREACHED */
126 }
127 /* Parent; assume fdopen can't fail. */
128 /* Store the pid */
129 *p_pid = pid;
130 if (strcmp(dir, "r") != 0) {
131 io_out = fdopen(pdesin[1], "w");
132 (void)close(pdesin[0]);
133 (void)close(pdesout[0]);
134 (void)close(pdesout[1]);
135 return(io_out);
136 } else {
137 /* Prepare the input stream */
138 io_in = fdopen(pdesout[0], "r");
139 (void)close(pdesout[1]);
140 (void)close(pdesin[0]);
141 (void)close(pdesin[1]);
142 return (io_in);
143 }
144 }
145
146 /*
147 * pclose --
148 * Pclose returns -1 if stream is not associated with a `popened' command,
149 * if already `pclosed', or waitpid returns an error.
150 */
151 static void
my_pclose(FILE * io,pid_t the_pid)152 my_pclose(FILE *io, pid_t the_pid)
153 {
154 int pstat;
155 pid_t pid;
156
157 /*
158 * Find the appropriate file pointer and remove it from the list.
159 */
160 (void)fclose(io);
161 /* Die if you are not dead! */
162 kill(the_pid, SIGTERM);
163 do {
164 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
165 } while (pid == -1 && errno == EINTR);
166 }
167
168 struct counters {
169 struct counters *next_cpu;
170 char counter_name[MAX_NLEN]; /* Name of counter */
171 int cpu; /* CPU we are on */
172 int pos; /* Index we are filling to. */
173 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
174 uint64_t sum; /* Summary of entries */
175 };
176
177 extern struct counters *glob_cpu[MAX_CPU];
178 struct counters *glob_cpu[MAX_CPU];
179
180 extern struct counters *cnts;
181 struct counters *cnts=NULL;
182
183 extern int ncnts;
184 int ncnts=0;
185
186 extern int (*expression)(struct counters *, int);
187 int (*expression)(struct counters *, int);
188
189 static const char *threshold=NULL;
190 static const char *command;
191
192 struct cpu_entry {
193 const char *name;
194 const char *thresh;
195 const char *command;
196 int (*func)(struct counters *, int);
197 int counters_required;
198 };
199
200 struct cpu_type {
201 char cputype[32];
202 int number;
203 struct cpu_entry *ents;
204 void (*explain)(const char *name);
205 };
206 extern struct cpu_type the_cpu;
207 struct cpu_type the_cpu;
208
209 static void
explain_name_sb(const char * name)210 explain_name_sb(const char *name)
211 {
212 const char *mythresh;
213 if (strcmp(name, "allocstall1") == 0) {
214 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
215 mythresh = "thresh > .05";
216 } else if (strcmp(name, "allocstall2") == 0) {
217 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
218 mythresh = "thresh > .05";
219 } else if (strcmp(name, "br_miss") == 0) {
220 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
221 mythresh = "thresh >= .2";
222 } else if (strcmp(name, "splitload") == 0) {
223 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
224 mythresh = "thresh >= .1";
225 } else if (strcmp(name, "splitstore") == 0) {
226 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
227 mythresh = "thresh >= .01";
228 } else if (strcmp(name, "contested") == 0) {
229 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
230 mythresh = "thresh >= .05";
231 } else if (strcmp(name, "blockstorefwd") == 0) {
232 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
233 mythresh = "thresh >= .05";
234 } else if (strcmp(name, "cache2") == 0) {
235 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
236 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
237 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
238 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
239 mythresh = "thresh >= .2";
240 } else if (strcmp(name, "cache1") == 0) {
241 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
242 mythresh = "thresh >= .2";
243 } else if (strcmp(name, "dtlbmissload") == 0) {
244 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
245 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
246 mythresh = "thresh >= .1";
247 } else if (strcmp(name, "frontendstall") == 0) {
248 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
249 mythresh = "thresh >= .15";
250 } else if (strcmp(name, "clears") == 0) {
251 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
252 printf(" MACHINE_CLEARS.SMC + \n");
253 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
254 mythresh = "thresh >= .02";
255 } else if (strcmp(name, "microassist") == 0) {
256 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
257 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
258 mythresh = "thresh >= .05";
259 } else if (strcmp(name, "aliasing_4k") == 0) {
260 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
261 mythresh = "thresh >= .1";
262 } else if (strcmp(name, "fpassist") == 0) {
263 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
264 mythresh = "look for a excessive value";
265 } else if (strcmp(name, "otherassistavx") == 0) {
266 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 mythresh = "look for a excessive value";
268 } else if (strcmp(name, "otherassistsse") == 0) {
269 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
270 mythresh = "look for a excessive value";
271 } else if (strcmp(name, "eff1") == 0) {
272 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
273 mythresh = "thresh < .9";
274 } else if (strcmp(name, "eff2") == 0) {
275 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
276 mythresh = "thresh > 1.0";
277 } else if (strcmp(name, "dtlbmissstore") == 0) {
278 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
279 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
280 mythresh = "thresh >= .05";
281 } else {
282 printf("Unknown name:%s\n", name);
283 mythresh = "unknown entry";
284 }
285 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
286 }
287
288 static void
explain_name_ib(const char * name)289 explain_name_ib(const char *name)
290 {
291 const char *mythresh;
292 if (strcmp(name, "br_miss") == 0) {
293 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
294 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
295 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
296 mythresh = "thresh >= .2";
297 } else if (strcmp(name, "eff1") == 0) {
298 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
299 mythresh = "thresh < .9";
300 } else if (strcmp(name, "eff2") == 0) {
301 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
302 mythresh = "thresh > 1.0";
303 } else if (strcmp(name, "cache1") == 0) {
304 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
305 mythresh = "thresh >= .2";
306 } else if (strcmp(name, "cache2") == 0) {
307 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
308 mythresh = "thresh >= .2";
309 } else if (strcmp(name, "itlbmiss") == 0) {
310 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
311 mythresh = "thresh > .05";
312 } else if (strcmp(name, "icachemiss") == 0) {
313 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
314 mythresh = "thresh > .05";
315 } else if (strcmp(name, "lcpstall") == 0) {
316 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
317 mythresh = "thresh > .05";
318 } else if (strcmp(name, "datashare") == 0) {
319 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
320 mythresh = "thresh > .05";
321 } else if (strcmp(name, "blockstorefwd") == 0) {
322 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
323 mythresh = "thresh >= .05";
324 } else if (strcmp(name, "splitload") == 0) {
325 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
326 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
327 mythresh = "thresh >= .1";
328 } else if (strcmp(name, "splitstore") == 0) {
329 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
330 mythresh = "thresh >= .01";
331 } else if (strcmp(name, "aliasing_4k") == 0) {
332 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
333 mythresh = "thresh >= .1";
334 } else if (strcmp(name, "dtlbmissload") == 0) {
335 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
336 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
337 mythresh = "thresh >= .1";
338 } else if (strcmp(name, "dtlbmissstore") == 0) {
339 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
340 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
341 mythresh = "thresh >= .05";
342 } else if (strcmp(name, "contested") == 0) {
343 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
344 mythresh = "thresh >= .05";
345 } else if (strcmp(name, "clears") == 0) {
346 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
347 printf(" MACHINE_CLEARS.SMC + \n");
348 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
349 mythresh = "thresh >= .02";
350 } else if (strcmp(name, "microassist") == 0) {
351 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
352 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
353 mythresh = "thresh >= .05";
354 } else if (strcmp(name, "fpassist") == 0) {
355 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
356 mythresh = "look for a excessive value";
357 } else if (strcmp(name, "otherassistavx") == 0) {
358 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 mythresh = "look for a excessive value";
360 } else if (strcmp(name, "otherassistsse") == 0) {
361 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
362 mythresh = "look for a excessive value";
363 } else {
364 printf("Unknown name:%s\n", name);
365 mythresh = "unknown entry";
366 }
367 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
368 }
369
370
371 static void
explain_name_has(const char * name)372 explain_name_has(const char *name)
373 {
374 const char *mythresh;
375 if (strcmp(name, "eff1") == 0) {
376 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
377 mythresh = "thresh < .75";
378 } else if (strcmp(name, "eff2") == 0) {
379 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
380 mythresh = "thresh > 1.0";
381 } else if (strcmp(name, "itlbmiss") == 0) {
382 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
383 mythresh = "thresh > .05";
384 } else if (strcmp(name, "icachemiss") == 0) {
385 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
386 mythresh = "thresh > .05";
387 } else if (strcmp(name, "lcpstall") == 0) {
388 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
389 mythresh = "thresh > .05";
390 } else if (strcmp(name, "cache1") == 0) {
391 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
392 mythresh = "thresh >= .2";
393 } else if (strcmp(name, "cache2") == 0) {
394 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
395 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
396 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
397 printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
398 mythresh = "thresh >= .2";
399 } else if (strcmp(name, "contested") == 0) {
400 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
401 mythresh = "thresh >= .05";
402 } else if (strcmp(name, "datashare") == 0) {
403 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
404 mythresh = "thresh > .05";
405 } else if (strcmp(name, "blockstorefwd") == 0) {
406 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
407 mythresh = "thresh >= .05";
408 } else if (strcmp(name, "splitload") == 0) {
409 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
410 mythresh = "thresh >= .1";
411 } else if (strcmp(name, "splitstore") == 0) {
412 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
413 mythresh = "thresh >= .01";
414 } else if (strcmp(name, "aliasing_4k") == 0) {
415 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
416 mythresh = "thresh >= .1";
417 } else if (strcmp(name, "dtlbmissload") == 0) {
418 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
419 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
420 mythresh = "thresh >= .1";
421 } else if (strcmp(name, "br_miss") == 0) {
422 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
423 mythresh = "thresh >= .2";
424 } else if (strcmp(name, "clears") == 0) {
425 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
426 printf(" MACHINE_CLEARS.SMC + \n");
427 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
428 mythresh = "thresh >= .02";
429 } else if (strcmp(name, "microassist") == 0) {
430 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
431 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
432 mythresh = "thresh >= .05";
433 } else if (strcmp(name, "fpassist") == 0) {
434 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
435 mythresh = "look for a excessive value";
436 } else if (strcmp(name, "otherassistavx") == 0) {
437 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 mythresh = "look for a excessive value";
439 } else if (strcmp(name, "otherassistsse") == 0) {
440 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
441 mythresh = "look for a excessive value";
442 } else {
443 printf("Unknown name:%s\n", name);
444 mythresh = "unknown entry";
445 }
446 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
447 }
448
449
450
451 static struct counters *
find_counter(struct counters * base,const char * name)452 find_counter(struct counters *base, const char *name)
453 {
454 struct counters *at;
455 int len;
456
457 at = base;
458 len = strlen(name);
459 while(at) {
460 if (strncmp(at->counter_name, name, len) == 0) {
461 return(at);
462 }
463 at = at->next_cpu;
464 }
465 printf("Can't find counter %s\n", name);
466 printf("We have:\n");
467 at = base;
468 while(at) {
469 printf("- %s\n", at->counter_name);
470 at = at->next_cpu;
471 }
472 exit(-1);
473 }
474
475 static int
allocstall1(struct counters * cpu,int pos)476 allocstall1(struct counters *cpu, int pos)
477 {
478 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
479 int ret;
480 struct counters *partial;
481 struct counters *unhalt;
482 double un, par, res;
483 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
484 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
485 if (pos != -1) {
486 par = partial->vals[pos] * 1.0;
487 un = unhalt->vals[pos] * 1.0;
488 } else {
489 par = partial->sum * 1.0;
490 un = unhalt->sum * 1.0;
491 }
492 res = par/un;
493 ret = printf("%1.3f", res);
494 return(ret);
495 }
496
497 static int
allocstall2(struct counters * cpu,int pos)498 allocstall2(struct counters *cpu, int pos)
499 {
500 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
501 int ret;
502 struct counters *partial;
503 struct counters *unhalt;
504 double un, par, res;
505 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
506 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
507 if (pos != -1) {
508 par = partial->vals[pos] * 1.0;
509 un = unhalt->vals[pos] * 1.0;
510 } else {
511 par = partial->sum * 1.0;
512 un = unhalt->sum * 1.0;
513 }
514 res = par/un;
515 ret = printf("%1.3f", res);
516 return(ret);
517 }
518
519 static int
br_mispredict(struct counters * cpu,int pos)520 br_mispredict(struct counters *cpu, int pos)
521 {
522 struct counters *brctr;
523 struct counters *unhalt;
524 int ret;
525 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
526 double br, un, con, res;
527 con = 20.0;
528
529 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
530 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
531 if (pos != -1) {
532 br = brctr->vals[pos] * 1.0;
533 un = unhalt->vals[pos] * 1.0;
534 } else {
535 br = brctr->sum * 1.0;
536 un = unhalt->sum * 1.0;
537 }
538 res = (con * br)/un;
539 ret = printf("%1.3f", res);
540 return(ret);
541 }
542
543 static int
br_mispredictib(struct counters * cpu,int pos)544 br_mispredictib(struct counters *cpu, int pos)
545 {
546 struct counters *brctr;
547 struct counters *unhalt;
548 struct counters *clear, *clear2, *clear3;
549 struct counters *uops;
550 struct counters *recv;
551 struct counters *iss;
552 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
553 int ret;
554 /*
555 * (BR_MISP_RETIRED.ALL_BRANCHES /
556 * (BR_MISP_RETIRED.ALL_BRANCHES +
557 * MACHINE_CLEAR.COUNT) *
558 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
559 *
560 */
561 double br, cl, cl2, cl3, uo, re, un, con, res, is;
562 con = 4.0;
563
564 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
565 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
566 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
567 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
568 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
569 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
570 iss = find_counter(cpu, "UOPS_ISSUED.ANY");
571 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
572 if (pos != -1) {
573 br = brctr->vals[pos] * 1.0;
574 cl = clear->vals[pos] * 1.0;
575 cl2 = clear2->vals[pos] * 1.0;
576 cl3 = clear3->vals[pos] * 1.0;
577 uo = uops->vals[pos] * 1.0;
578 re = recv->vals[pos] * 1.0;
579 is = iss->vals[pos] * 1.0;
580 un = unhalt->vals[pos] * 1.0;
581 } else {
582 br = brctr->sum * 1.0;
583 cl = clear->sum * 1.0;
584 cl2 = clear2->sum * 1.0;
585 cl3 = clear3->sum * 1.0;
586 uo = uops->sum * 1.0;
587 re = recv->sum * 1.0;
588 is = iss->sum * 1.0;
589 un = unhalt->sum * 1.0;
590 }
591 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
592 ret = printf("%1.3f", res);
593 return(ret);
594 }
595
596
597 static int
br_mispredict_broad(struct counters * cpu,int pos)598 br_mispredict_broad(struct counters *cpu, int pos)
599 {
600 struct counters *brctr;
601 struct counters *unhalt;
602 struct counters *clear;
603 struct counters *uops;
604 struct counters *uops_ret;
605 struct counters *recv;
606 int ret;
607 double br, cl, uo, uo_r, re, con, un, res;
608
609 con = 4.0;
610
611 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
612 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
613 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
614 uops = find_counter(cpu, "UOPS_ISSUED.ANY");
615 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
616 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
617
618 if (pos != -1) {
619 un = unhalt->vals[pos] * 1.0;
620 br = brctr->vals[pos] * 1.0;
621 cl = clear->vals[pos] * 1.0;
622 uo = uops->vals[pos] * 1.0;
623 uo_r = uops_ret->vals[pos] * 1.0;
624 re = recv->vals[pos] * 1.0;
625 } else {
626 un = unhalt->sum * 1.0;
627 br = brctr->sum * 1.0;
628 cl = clear->sum * 1.0;
629 uo = uops->sum * 1.0;
630 uo_r = uops_ret->sum * 1.0;
631 re = recv->sum * 1.0;
632 }
633 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
634 ret = printf("%1.3f", res);
635 return(ret);
636 }
637
638 static int
splitloadib(struct counters * cpu,int pos)639 splitloadib(struct counters *cpu, int pos)
640 {
641 int ret;
642 struct counters *mem;
643 struct counters *l1d, *ldblock;
644 struct counters *unhalt;
645 double un, memd, res, l1, ldb;
646 /*
647 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
648 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
649 */
650
651 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
652 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
653 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
654 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
655 if (pos != -1) {
656 memd = mem->vals[pos] * 1.0;
657 l1 = l1d->vals[pos] * 1.0;
658 ldb = ldblock->vals[pos] * 1.0;
659 un = unhalt->vals[pos] * 1.0;
660 } else {
661 memd = mem->sum * 1.0;
662 l1 = l1d->sum * 1.0;
663 ldb = ldblock->sum * 1.0;
664 un = unhalt->sum * 1.0;
665 }
666 res = ((l1 / memd) * ldb)/un;
667 ret = printf("%1.3f", res);
668 return(ret);
669 }
670
671
672 static int
splitload(struct counters * cpu,int pos)673 splitload(struct counters *cpu, int pos)
674 {
675 int ret;
676 struct counters *mem;
677 struct counters *unhalt;
678 double con, un, memd, res;
679 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
680
681 con = 5.0;
682 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
684 if (pos != -1) {
685 memd = mem->vals[pos] * 1.0;
686 un = unhalt->vals[pos] * 1.0;
687 } else {
688 memd = mem->sum * 1.0;
689 un = unhalt->sum * 1.0;
690 }
691 res = (memd * con)/un;
692 ret = printf("%1.3f", res);
693 return(ret);
694 }
695
696
697 static int
splitload_sb(struct counters * cpu,int pos)698 splitload_sb(struct counters *cpu, int pos)
699 {
700 int ret;
701 struct counters *mem;
702 struct counters *unhalt;
703 double con, un, memd, res;
704 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
705
706 con = 5.0;
707 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
708 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
709 if (pos != -1) {
710 memd = mem->vals[pos] * 1.0;
711 un = unhalt->vals[pos] * 1.0;
712 } else {
713 memd = mem->sum * 1.0;
714 un = unhalt->sum * 1.0;
715 }
716 res = (memd * con)/un;
717 ret = printf("%1.3f", res);
718 return(ret);
719 }
720
721
722 static int
splitstore_sb(struct counters * cpu,int pos)723 splitstore_sb(struct counters *cpu, int pos)
724 {
725 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
726 int ret;
727 struct counters *mem_split;
728 struct counters *mem_stores;
729 double memsplit, memstore, res;
730 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
731 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
732 if (pos != -1) {
733 memsplit = mem_split->vals[pos] * 1.0;
734 memstore = mem_stores->vals[pos] * 1.0;
735 } else {
736 memsplit = mem_split->sum * 1.0;
737 memstore = mem_stores->sum * 1.0;
738 }
739 res = memsplit/memstore;
740 ret = printf("%1.3f", res);
741 return(ret);
742 }
743
744
745
746 static int
splitstore(struct counters * cpu,int pos)747 splitstore(struct counters *cpu, int pos)
748 {
749 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
750 int ret;
751 struct counters *mem_split;
752 struct counters *mem_stores;
753 double memsplit, memstore, res;
754 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
755 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
756 if (pos != -1) {
757 memsplit = mem_split->vals[pos] * 1.0;
758 memstore = mem_stores->vals[pos] * 1.0;
759 } else {
760 memsplit = mem_split->sum * 1.0;
761 memstore = mem_stores->sum * 1.0;
762 }
763 res = memsplit/memstore;
764 ret = printf("%1.3f", res);
765 return(ret);
766 }
767
768
769 static int
contested(struct counters * cpu,int pos)770 contested(struct counters *cpu, int pos)
771 {
772 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
773 int ret;
774 struct counters *mem;
775 struct counters *unhalt;
776 double con, un, memd, res;
777
778 con = 60.0;
779 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
780 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
781 if (pos != -1) {
782 memd = mem->vals[pos] * 1.0;
783 un = unhalt->vals[pos] * 1.0;
784 } else {
785 memd = mem->sum * 1.0;
786 un = unhalt->sum * 1.0;
787 }
788 res = (memd * con)/un;
789 ret = printf("%1.3f", res);
790 return(ret);
791 }
792
793 static int
contested_has(struct counters * cpu,int pos)794 contested_has(struct counters *cpu, int pos)
795 {
796 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
797 int ret;
798 struct counters *mem;
799 struct counters *unhalt;
800 double con, un, memd, res;
801
802 con = 84.0;
803 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
804 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
805 if (pos != -1) {
806 memd = mem->vals[pos] * 1.0;
807 un = unhalt->vals[pos] * 1.0;
808 } else {
809 memd = mem->sum * 1.0;
810 un = unhalt->sum * 1.0;
811 }
812 res = (memd * con)/un;
813 ret = printf("%1.3f", res);
814 return(ret);
815 }
816
817 static int
contestedbroad(struct counters * cpu,int pos)818 contestedbroad(struct counters *cpu, int pos)
819 {
820 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
821 int ret;
822 struct counters *mem;
823 struct counters *mem2;
824 struct counters *unhalt;
825 double con, un, memd, memtoo, res;
826
827 con = 84.0;
828 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
829 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
830 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
831
832 if (pos != -1) {
833 memd = mem->vals[pos] * 1.0;
834 memtoo = mem2->vals[pos] * 1.0;
835 un = unhalt->vals[pos] * 1.0;
836 } else {
837 memd = mem->sum * 1.0;
838 memtoo = mem2->sum * 1.0;
839 un = unhalt->sum * 1.0;
840 }
841 res = ((memd * con) + memtoo)/un;
842 ret = printf("%1.3f", res);
843 return(ret);
844 }
845
846
847 static int
blockstoreforward(struct counters * cpu,int pos)848 blockstoreforward(struct counters *cpu, int pos)
849 {
850 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
851 int ret;
852 struct counters *ldb;
853 struct counters *unhalt;
854 double con, un, ld, res;
855
856 con = 13.0;
857 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
858 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
859 if (pos != -1) {
860 ld = ldb->vals[pos] * 1.0;
861 un = unhalt->vals[pos] * 1.0;
862 } else {
863 ld = ldb->sum * 1.0;
864 un = unhalt->sum * 1.0;
865 }
866 res = (ld * con)/un;
867 ret = printf("%1.3f", res);
868 return(ret);
869 }
870
871 static int
cache2(struct counters * cpu,int pos)872 cache2(struct counters *cpu, int pos)
873 {
874 /* ** Suspect ***
875 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
876 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
877 */
878 int ret;
879 struct counters *mem1, *mem2, *mem3;
880 struct counters *unhalt;
881 double con1, con2, con3, un, me_1, me_2, me_3, res;
882
883 con1 = 26.0;
884 con2 = 43.0;
885 con3 = 60.0;
886 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
887 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
888 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
889 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
890 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
891 if (pos != -1) {
892 me_1 = mem1->vals[pos] * 1.0;
893 me_2 = mem2->vals[pos] * 1.0;
894 me_3 = mem3->vals[pos] * 1.0;
895 un = unhalt->vals[pos] * 1.0;
896 } else {
897 me_1 = mem1->sum * 1.0;
898 me_2 = mem2->sum * 1.0;
899 me_3 = mem3->sum * 1.0;
900 un = unhalt->sum * 1.0;
901 }
902 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
903 ret = printf("%1.3f", res);
904 return(ret);
905 }
906
907 static int
datasharing(struct counters * cpu,int pos)908 datasharing(struct counters *cpu, int pos)
909 {
910 /*
911 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
912 */
913 int ret;
914 struct counters *mem;
915 struct counters *unhalt;
916 double con, res, me, un;
917
918 con = 43.0;
919 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
920 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
921 if (pos != -1) {
922 me = mem->vals[pos] * 1.0;
923 un = unhalt->vals[pos] * 1.0;
924 } else {
925 me = mem->sum * 1.0;
926 un = unhalt->sum * 1.0;
927 }
928 res = (me * con)/un;
929 ret = printf("%1.3f", res);
930 return(ret);
931
932 }
933
934
935 static int
datasharing_has(struct counters * cpu,int pos)936 datasharing_has(struct counters *cpu, int pos)
937 {
938 /*
939 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
940 */
941 int ret;
942 struct counters *mem;
943 struct counters *unhalt;
944 double con, res, me, un;
945
946 con = 72.0;
947 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
948 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
949 if (pos != -1) {
950 me = mem->vals[pos] * 1.0;
951 un = unhalt->vals[pos] * 1.0;
952 } else {
953 me = mem->sum * 1.0;
954 un = unhalt->sum * 1.0;
955 }
956 res = (me * con)/un;
957 ret = printf("%1.3f", res);
958 return(ret);
959
960 }
961
962
963 static int
cache2ib(struct counters * cpu,int pos)964 cache2ib(struct counters *cpu, int pos)
965 {
966 /*
967 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
968 */
969 int ret;
970 struct counters *mem;
971 struct counters *unhalt;
972 double con, un, me, res;
973
974 con = 29.0;
975 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
976 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
977 if (pos != -1) {
978 me = mem->vals[pos] * 1.0;
979 un = unhalt->vals[pos] * 1.0;
980 } else {
981 me = mem->sum * 1.0;
982 un = unhalt->sum * 1.0;
983 }
984 res = (con * me)/un;
985 ret = printf("%1.3f", res);
986 return(ret);
987 }
988
989 static int
cache2has(struct counters * cpu,int pos)990 cache2has(struct counters *cpu, int pos)
991 {
992 /*
993 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
994 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
995 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
996 * / CPU_CLK_UNHALTED.THREAD_P
997 */
998 int ret;
999 struct counters *mem1, *mem2, *mem3;
1000 struct counters *unhalt;
1001 double con1, con2, con3, un, me1, me2, me3, res;
1002
1003 con1 = 36.0;
1004 con2 = 72.0;
1005 con3 = 84.0;
1006 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1007 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1008 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1009 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1010 if (pos != -1) {
1011 me1 = mem1->vals[pos] * 1.0;
1012 me2 = mem2->vals[pos] * 1.0;
1013 me3 = mem3->vals[pos] * 1.0;
1014 un = unhalt->vals[pos] * 1.0;
1015 } else {
1016 me1 = mem1->sum * 1.0;
1017 me2 = mem2->sum * 1.0;
1018 me3 = mem3->sum * 1.0;
1019 un = unhalt->sum * 1.0;
1020 }
1021 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1022 ret = printf("%1.3f", res);
1023 return(ret);
1024 }
1025
1026
1027 static int
cache2broad(struct counters * cpu,int pos)1028 cache2broad(struct counters *cpu, int pos)
1029 {
1030 /*
1031 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1032 */
1033 int ret;
1034 struct counters *mem;
1035 struct counters *unhalt;
1036 double con, un, me, res;
1037
1038 con = 36.0;
1039 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1041 if (pos != -1) {
1042 me = mem->vals[pos] * 1.0;
1043 un = unhalt->vals[pos] * 1.0;
1044 } else {
1045 me = mem->sum * 1.0;
1046 un = unhalt->sum * 1.0;
1047 }
1048 res = (con * me)/un;
1049 ret = printf("%1.3f", res);
1050 return(ret);
1051 }
1052
1053
1054 static int
cache1(struct counters * cpu,int pos)1055 cache1(struct counters *cpu, int pos)
1056 {
1057 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1058 int ret;
1059 struct counters *mem;
1060 struct counters *unhalt;
1061 double con, un, me, res;
1062
1063 con = 180.0;
1064 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1065 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1066 if (pos != -1) {
1067 me = mem->vals[pos] * 1.0;
1068 un = unhalt->vals[pos] * 1.0;
1069 } else {
1070 me = mem->sum * 1.0;
1071 un = unhalt->sum * 1.0;
1072 }
1073 res = (me * con)/un;
1074 ret = printf("%1.3f", res);
1075 return(ret);
1076 }
1077
1078 static int
cache1ib(struct counters * cpu,int pos)1079 cache1ib(struct counters *cpu, int pos)
1080 {
1081 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1082 int ret;
1083 struct counters *mem;
1084 struct counters *unhalt;
1085 double con, un, me, res;
1086
1087 con = 180.0;
1088 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1089 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1090 if (pos != -1) {
1091 me = mem->vals[pos] * 1.0;
1092 un = unhalt->vals[pos] * 1.0;
1093 } else {
1094 me = mem->sum * 1.0;
1095 un = unhalt->sum * 1.0;
1096 }
1097 res = (me * con)/un;
1098 ret = printf("%1.3f", res);
1099 return(ret);
1100 }
1101
1102
1103 static int
cache1broad(struct counters * cpu,int pos)1104 cache1broad(struct counters *cpu, int pos)
1105 {
1106 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1107 int ret;
1108 struct counters *mem;
1109 struct counters *unhalt;
1110 double con, un, me, res;
1111
1112 con = 180.0;
1113 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1114 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1115 if (pos != -1) {
1116 me = mem->vals[pos] * 1.0;
1117 un = unhalt->vals[pos] * 1.0;
1118 } else {
1119 me = mem->sum * 1.0;
1120 un = unhalt->sum * 1.0;
1121 }
1122 res = (me * con)/un;
1123 ret = printf("%1.3f", res);
1124 return(ret);
1125 }
1126
1127
1128 static int
dtlb_missload(struct counters * cpu,int pos)1129 dtlb_missload(struct counters *cpu, int pos)
1130 {
1131 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1132 int ret;
1133 struct counters *dtlb_m, *dtlb_d;
1134 struct counters *unhalt;
1135 double con, un, d1, d2, res;
1136
1137 con = 7.0;
1138 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1139 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1140 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1141 if (pos != -1) {
1142 d1 = dtlb_m->vals[pos] * 1.0;
1143 d2 = dtlb_d->vals[pos] * 1.0;
1144 un = unhalt->vals[pos] * 1.0;
1145 } else {
1146 d1 = dtlb_m->sum * 1.0;
1147 d2 = dtlb_d->sum * 1.0;
1148 un = unhalt->sum * 1.0;
1149 }
1150 res = ((d1 * con) + d2)/un;
1151 ret = printf("%1.3f", res);
1152 return(ret);
1153 }
1154
1155 static int
dtlb_missstore(struct counters * cpu,int pos)1156 dtlb_missstore(struct counters *cpu, int pos)
1157 {
1158 /*
1159 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1160 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1161 */
1162 int ret;
1163 struct counters *dtsb_m, *dtsb_d;
1164 struct counters *unhalt;
1165 double con, un, d1, d2, res;
1166
1167 con = 7.0;
1168 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1169 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1170 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1171 if (pos != -1) {
1172 d1 = dtsb_m->vals[pos] * 1.0;
1173 d2 = dtsb_d->vals[pos] * 1.0;
1174 un = unhalt->vals[pos] * 1.0;
1175 } else {
1176 d1 = dtsb_m->sum * 1.0;
1177 d2 = dtsb_d->sum * 1.0;
1178 un = unhalt->sum * 1.0;
1179 }
1180 res = ((d1 * con) + d2)/un;
1181 ret = printf("%1.3f", res);
1182 return(ret);
1183 }
1184
1185 static int
itlb_miss(struct counters * cpu,int pos)1186 itlb_miss(struct counters *cpu, int pos)
1187 {
1188 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
1189 int ret;
1190 struct counters *itlb;
1191 struct counters *unhalt;
1192 double un, d1, res;
1193
1194 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1195 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1196 if (pos != -1) {
1197 d1 = itlb->vals[pos] * 1.0;
1198 un = unhalt->vals[pos] * 1.0;
1199 } else {
1200 d1 = itlb->sum * 1.0;
1201 un = unhalt->sum * 1.0;
1202 }
1203 res = d1/un;
1204 ret = printf("%1.3f", res);
1205 return(ret);
1206 }
1207
1208
1209 static int
itlb_miss_broad(struct counters * cpu,int pos)1210 itlb_miss_broad(struct counters *cpu, int pos)
1211 {
1212 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */
1213 int ret;
1214 struct counters *itlb;
1215 struct counters *unhalt;
1216 struct counters *four_k;
1217 double un, d1, res, k;
1218
1219 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1220 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1221 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1222 if (pos != -1) {
1223 d1 = itlb->vals[pos] * 1.0;
1224 un = unhalt->vals[pos] * 1.0;
1225 k = four_k->vals[pos] * 1.0;
1226 } else {
1227 d1 = itlb->sum * 1.0;
1228 un = unhalt->sum * 1.0;
1229 k = four_k->sum * 1.0;
1230 }
1231 res = (7.0 * k + d1)/un;
1232 ret = printf("%1.3f", res);
1233 return(ret);
1234 }
1235
1236
1237 static int
icache_miss(struct counters * cpu,int pos)1238 icache_miss(struct counters *cpu, int pos)
1239 {
1240 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1241
1242 int ret;
1243 struct counters *itlb, *icache;
1244 struct counters *unhalt;
1245 double un, d1, ic, res;
1246
1247 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1249 icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1250 if (pos != -1) {
1251 d1 = itlb->vals[pos] * 1.0;
1252 ic = icache->vals[pos] * 1.0;
1253 un = unhalt->vals[pos] * 1.0;
1254 } else {
1255 d1 = itlb->sum * 1.0;
1256 ic = icache->sum * 1.0;
1257 un = unhalt->sum * 1.0;
1258 }
1259 res = (ic-d1)/un;
1260 ret = printf("%1.3f", res);
1261 return(ret);
1262
1263 }
1264
1265 static int
icache_miss_has(struct counters * cpu,int pos)1266 icache_miss_has(struct counters *cpu, int pos)
1267 {
1268 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1269
1270 int ret;
1271 struct counters *icache;
1272 struct counters *unhalt;
1273 double un, con, ic, res;
1274
1275 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1276 icache = find_counter(cpu, "ICACHE.MISSES");
1277 con = 36.0;
1278 if (pos != -1) {
1279 ic = icache->vals[pos] * 1.0;
1280 un = unhalt->vals[pos] * 1.0;
1281 } else {
1282 ic = icache->sum * 1.0;
1283 un = unhalt->sum * 1.0;
1284 }
1285 res = (con * ic)/un;
1286 ret = printf("%1.3f", res);
1287 return(ret);
1288
1289 }
1290
1291 static int
lcp_stall(struct counters * cpu,int pos)1292 lcp_stall(struct counters *cpu, int pos)
1293 {
1294 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1295 int ret;
1296 struct counters *ild;
1297 struct counters *unhalt;
1298 double un, d1, res;
1299
1300 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1301 ild = find_counter(cpu, "ILD_STALL.LCP");
1302 if (pos != -1) {
1303 d1 = ild->vals[pos] * 1.0;
1304 un = unhalt->vals[pos] * 1.0;
1305 } else {
1306 d1 = ild->sum * 1.0;
1307 un = unhalt->sum * 1.0;
1308 }
1309 res = d1/un;
1310 ret = printf("%1.3f", res);
1311 return(ret);
1312
1313 }
1314
1315
1316 static int
frontendstall(struct counters * cpu,int pos)1317 frontendstall(struct counters *cpu, int pos)
1318 {
1319 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1320 int ret;
1321 struct counters *idq;
1322 struct counters *unhalt;
1323 double con, un, id, res;
1324
1325 con = 4.0;
1326 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1327 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1328 if (pos != -1) {
1329 id = idq->vals[pos] * 1.0;
1330 un = unhalt->vals[pos] * 1.0;
1331 } else {
1332 id = idq->sum * 1.0;
1333 un = unhalt->sum * 1.0;
1334 }
1335 res = id/(un * con);
1336 ret = printf("%1.3f", res);
1337 return(ret);
1338 }
1339
1340 static int
clears(struct counters * cpu,int pos)1341 clears(struct counters *cpu, int pos)
1342 {
1343 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1344 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
1345
1346 int ret;
1347 struct counters *clr1, *clr2, *clr3;
1348 struct counters *unhalt;
1349 double con, un, cl1, cl2, cl3, res;
1350
1351 con = 100.0;
1352 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1353 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1354 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1355 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1356
1357 if (pos != -1) {
1358 cl1 = clr1->vals[pos] * 1.0;
1359 cl2 = clr2->vals[pos] * 1.0;
1360 cl3 = clr3->vals[pos] * 1.0;
1361 un = unhalt->vals[pos] * 1.0;
1362 } else {
1363 cl1 = clr1->sum * 1.0;
1364 cl2 = clr2->sum * 1.0;
1365 cl3 = clr3->sum * 1.0;
1366 un = unhalt->sum * 1.0;
1367 }
1368 res = ((cl1 + cl2 + cl3) * con)/un;
1369 ret = printf("%1.3f", res);
1370 return(ret);
1371 }
1372
1373
1374
1375 static int
clears_broad(struct counters * cpu,int pos)1376 clears_broad(struct counters *cpu, int pos)
1377 {
1378 int ret;
1379 struct counters *clr1, *clr2, *clr3, *cyc;
1380 struct counters *unhalt;
1381 double con, un, cl1, cl2, cl3, cy, res;
1382
1383 con = 100.0;
1384 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1385 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1386 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1387 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1388 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1389 if (pos != -1) {
1390 cl1 = clr1->vals[pos] * 1.0;
1391 cl2 = clr2->vals[pos] * 1.0;
1392 cl3 = clr3->vals[pos] * 1.0;
1393 cy = cyc->vals[pos] * 1.0;
1394 un = unhalt->vals[pos] * 1.0;
1395 } else {
1396 cl1 = clr1->sum * 1.0;
1397 cl2 = clr2->sum * 1.0;
1398 cl3 = clr3->sum * 1.0;
1399 cy = cyc->sum * 1.0;
1400 un = unhalt->sum * 1.0;
1401 }
1402 /* Formula not listed but extrapulated to add the cy ?? */
1403 res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1404 ret = printf("%1.3f", res);
1405 return(ret);
1406 }
1407
1408
1409
1410
1411
1412 static int
microassist(struct counters * cpu,int pos)1413 microassist(struct counters *cpu, int pos)
1414 {
1415 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1416 int ret;
1417 struct counters *idq;
1418 struct counters *unhalt;
1419 double un, id, res, con;
1420
1421 con = 4.0;
1422 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1423 idq = find_counter(cpu, "IDQ.MS_UOPS");
1424 if (pos != -1) {
1425 id = idq->vals[pos] * 1.0;
1426 un = unhalt->vals[pos] * 1.0;
1427 } else {
1428 id = idq->sum * 1.0;
1429 un = unhalt->sum * 1.0;
1430 }
1431 res = id/(un * con);
1432 ret = printf("%1.3f", res);
1433 return(ret);
1434 }
1435
1436
1437 static int
microassist_broad(struct counters * cpu,int pos)1438 microassist_broad(struct counters *cpu, int pos)
1439 {
1440 int ret;
1441 struct counters *idq;
1442 struct counters *unhalt;
1443 struct counters *uopiss;
1444 struct counters *uopret;
1445 double un, id, res, con, uoi, uor;
1446
1447 con = 4.0;
1448 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1449 idq = find_counter(cpu, "IDQ.MS_UOPS");
1450 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1451 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1452 if (pos != -1) {
1453 id = idq->vals[pos] * 1.0;
1454 un = unhalt->vals[pos] * 1.0;
1455 uoi = uopiss->vals[pos] * 1.0;
1456 uor = uopret->vals[pos] * 1.0;
1457 } else {
1458 id = idq->sum * 1.0;
1459 un = unhalt->sum * 1.0;
1460 uoi = uopiss->sum * 1.0;
1461 uor = uopret->sum * 1.0;
1462 }
1463 res = (uor/uoi) * (id/(un * con));
1464 ret = printf("%1.3f", res);
1465 return(ret);
1466 }
1467
1468
1469 static int
aliasing(struct counters * cpu,int pos)1470 aliasing(struct counters *cpu, int pos)
1471 {
1472 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1473 int ret;
1474 struct counters *ld;
1475 struct counters *unhalt;
1476 double un, lds, con, res;
1477
1478 con = 5.0;
1479 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1480 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1481 if (pos != -1) {
1482 lds = ld->vals[pos] * 1.0;
1483 un = unhalt->vals[pos] * 1.0;
1484 } else {
1485 lds = ld->sum * 1.0;
1486 un = unhalt->sum * 1.0;
1487 }
1488 res = (lds * con)/un;
1489 ret = printf("%1.3f", res);
1490 return(ret);
1491 }
1492
1493 static int
aliasing_broad(struct counters * cpu,int pos)1494 aliasing_broad(struct counters *cpu, int pos)
1495 {
1496 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1497 int ret;
1498 struct counters *ld;
1499 struct counters *unhalt;
1500 double un, lds, con, res;
1501
1502 con = 7.0;
1503 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1504 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1505 if (pos != -1) {
1506 lds = ld->vals[pos] * 1.0;
1507 un = unhalt->vals[pos] * 1.0;
1508 } else {
1509 lds = ld->sum * 1.0;
1510 un = unhalt->sum * 1.0;
1511 }
1512 res = (lds * con)/un;
1513 ret = printf("%1.3f", res);
1514 return(ret);
1515 }
1516
1517
1518 static int
fpassists(struct counters * cpu,int pos)1519 fpassists(struct counters *cpu, int pos)
1520 {
1521 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1522 int ret;
1523 struct counters *fp;
1524 struct counters *inst;
1525 double un, fpd, res;
1526
1527 inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1528 fp = find_counter(cpu, "FP_ASSIST.ANY");
1529 if (pos != -1) {
1530 fpd = fp->vals[pos] * 1.0;
1531 un = inst->vals[pos] * 1.0;
1532 } else {
1533 fpd = fp->sum * 1.0;
1534 un = inst->sum * 1.0;
1535 }
1536 res = fpd/un;
1537 ret = printf("%1.3f", res);
1538 return(ret);
1539 }
1540
1541 static int
otherassistavx(struct counters * cpu,int pos)1542 otherassistavx(struct counters *cpu, int pos)
1543 {
1544 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1545 int ret;
1546 struct counters *oth;
1547 struct counters *unhalt;
1548 double un, ot, con, res;
1549
1550 con = 75.0;
1551 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1552 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1553 if (pos != -1) {
1554 ot = oth->vals[pos] * 1.0;
1555 un = unhalt->vals[pos] * 1.0;
1556 } else {
1557 ot = oth->sum * 1.0;
1558 un = unhalt->sum * 1.0;
1559 }
1560 res = (ot * con)/un;
1561 ret = printf("%1.3f", res);
1562 return(ret);
1563 }
1564
1565 static int
otherassistsse(struct counters * cpu,int pos)1566 otherassistsse(struct counters *cpu, int pos)
1567 {
1568
1569 int ret;
1570 struct counters *oth;
1571 struct counters *unhalt;
1572 double un, ot, con, res;
1573
1574 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1575 con = 75.0;
1576 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1577 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1578 if (pos != -1) {
1579 ot = oth->vals[pos] * 1.0;
1580 un = unhalt->vals[pos] * 1.0;
1581 } else {
1582 ot = oth->sum * 1.0;
1583 un = unhalt->sum * 1.0;
1584 }
1585 res = (ot * con)/un;
1586 ret = printf("%1.3f", res);
1587 return(ret);
1588 }
1589
1590 static int
efficiency1(struct counters * cpu,int pos)1591 efficiency1(struct counters *cpu, int pos)
1592 {
1593
1594 int ret;
1595 struct counters *uops;
1596 struct counters *unhalt;
1597 double un, ot, con, res;
1598
1599 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1600 con = 4.0;
1601 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1602 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1603 if (pos != -1) {
1604 ot = uops->vals[pos] * 1.0;
1605 un = unhalt->vals[pos] * 1.0;
1606 } else {
1607 ot = uops->sum * 1.0;
1608 un = unhalt->sum * 1.0;
1609 }
1610 res = ot/(con * un);
1611 ret = printf("%1.3f", res);
1612 return(ret);
1613 }
1614
1615 static int
efficiency2(struct counters * cpu,int pos)1616 efficiency2(struct counters *cpu, int pos)
1617 {
1618
1619 int ret;
1620 struct counters *uops;
1621 struct counters *unhalt;
1622 double un, ot, res;
1623
1624 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1625 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1626 uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1627 if (pos != -1) {
1628 ot = uops->vals[pos] * 1.0;
1629 un = unhalt->vals[pos] * 1.0;
1630 } else {
1631 ot = uops->sum * 1.0;
1632 un = unhalt->sum * 1.0;
1633 }
1634 res = un/ot;
1635 ret = printf("%1.3f", res);
1636 return(ret);
1637 }
1638
1639 #define SANDY_BRIDGE_COUNT 20
1640 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1641 /*01*/ { "allocstall1", "thresh > .05",
1642 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1643 allocstall1, 2 },
1644 /* -- not defined for SB right (partial-rat_stalls) 02*/
1645 { "allocstall2", "thresh > .05",
1646 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1647 allocstall2, 2 },
1648 /*03*/ { "br_miss", "thresh >= .2",
1649 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1650 br_mispredict, 2 },
1651 /*04*/ { "splitload", "thresh >= .1",
1652 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1653 splitload_sb, 2 },
1654 /* 05*/ { "splitstore", "thresh >= .01",
1655 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1656 splitstore_sb, 2 },
1657 /*06*/ { "contested", "thresh >= .05",
1658 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659 contested, 2 },
1660 /*07*/ { "blockstorefwd", "thresh >= .05",
1661 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662 blockstoreforward, 2 },
1663 /*08*/ { "cache2", "thresh >= .2",
1664 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665 cache2, 4 },
1666 /*09*/ { "cache1", "thresh >= .2",
1667 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668 cache1, 2 },
1669 /*10*/ { "dtlbmissload", "thresh >= .1",
1670 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671 dtlb_missload, 3 },
1672 /*11*/ { "dtlbmissstore", "thresh >= .05",
1673 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674 dtlb_missstore, 3 },
1675 /*12*/ { "frontendstall", "thresh >= .15",
1676 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677 frontendstall, 2 },
1678 /*13*/ { "clears", "thresh >= .02",
1679 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680 clears, 4 },
1681 /*14*/ { "microassist", "thresh >= .05",
1682 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683 microassist, 2 },
1684 /*15*/ { "aliasing_4k", "thresh >= .1",
1685 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1686 aliasing, 2 },
1687 /*16*/ { "fpassist", "look for a excessive value",
1688 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1689 fpassists, 2 },
1690 /*17*/ { "otherassistavx", "look for a excessive value",
1691 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692 otherassistavx, 2},
1693 /*18*/ { "otherassistsse", "look for a excessive value",
1694 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695 otherassistsse, 2 },
1696 /*19*/ { "eff1", "thresh < .9",
1697 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698 efficiency1, 2 },
1699 /*20*/ { "eff2", "thresh > 1.0",
1700 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1701 efficiency2, 2 },
1702 };
1703
1704
1705 #define IVY_BRIDGE_COUNT 21
1706 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1707 /*1*/ { "eff1", "thresh < .75",
1708 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709 efficiency1, 2 },
1710 /*2*/ { "eff2", "thresh > 1.0",
1711 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712 efficiency2, 2 },
1713 /*3*/ { "itlbmiss", "thresh > .05",
1714 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715 itlb_miss, 2 },
1716 /*4*/ { "icachemiss", "thresh > .05",
1717 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718 icache_miss, 3 },
1719 /*5*/ { "lcpstall", "thresh > .05",
1720 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721 lcp_stall, 2 },
1722 /*6*/ { "cache1", "thresh >= .2",
1723 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724 cache1ib, 2 },
1725 /*7*/ { "cache2", "thresh >= .2",
1726 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727 cache2ib, 2 },
1728 /*8*/ { "contested", "thresh >= .05",
1729 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730 contested, 2 },
1731 /*9*/ { "datashare", "thresh >= .05",
1732 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733 datasharing, 2 },
1734 /*10*/ { "blockstorefwd", "thresh >= .05",
1735 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1736 blockstoreforward, 2 },
1737 /*11*/ { "splitload", "thresh >= .1",
1738 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1739 splitloadib, 4 },
1740 /*12*/ { "splitstore", "thresh >= .01",
1741 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1742 splitstore, 2 },
1743 /*13*/ { "aliasing_4k", "thresh >= .1",
1744 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745 aliasing, 2 },
1746 /*14*/ { "dtlbmissload", "thresh >= .1",
1747 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748 dtlb_missload , 3},
1749 /*15*/ { "dtlbmissstore", "thresh >= .05",
1750 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1751 dtlb_missstore, 3 },
1752 /*16*/ { "br_miss", "thresh >= .2",
1753 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1754 br_mispredictib, 8 },
1755 /*17*/ { "clears", "thresh >= .02",
1756 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757 clears, 4 },
1758 /*18*/ { "microassist", "thresh >= .05",
1759 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1760 microassist, 2 },
1761 /*19*/ { "fpassist", "look for a excessive value",
1762 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1763 fpassists, 2 },
1764 /*20*/ { "otherassistavx", "look for a excessive value",
1765 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766 otherassistavx , 2},
1767 /*21*/ { "otherassistsse", "look for a excessive value",
1768 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1769 otherassistsse, 2 },
1770 };
1771
1772 #define HASWELL_COUNT 20
1773 static struct cpu_entry haswell[HASWELL_COUNT] = {
1774 /*1*/ { "eff1", "thresh < .75",
1775 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776 efficiency1, 2 },
1777 /*2*/ { "eff2", "thresh > 1.0",
1778 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779 efficiency2, 2 },
1780 /*3*/ { "itlbmiss", "thresh > .05",
1781 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782 itlb_miss, 2 },
1783 /*4*/ { "icachemiss", "thresh > .05",
1784 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785 icache_miss_has, 2 },
1786 /*5*/ { "lcpstall", "thresh > .05",
1787 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788 lcp_stall, 2 },
1789 /*6*/ { "cache1", "thresh >= .2",
1790 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791 cache1ib, 2 },
1792 /*7*/ { "cache2", "thresh >= .2",
1793 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794 cache2has, 4 },
1795 /*8*/ { "contested", "thresh >= .05",
1796 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797 contested_has, 2 },
1798 /*9*/ { "datashare", "thresh >= .05",
1799 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800 datasharing_has, 2 },
1801 /*10*/ { "blockstorefwd", "thresh >= .05",
1802 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1803 blockstoreforward, 2 },
1804 /*11*/ { "splitload", "thresh >= .1",
1805 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1806 splitload , 2},
1807 /*12*/ { "splitstore", "thresh >= .01",
1808 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1809 splitstore, 2 },
1810 /*13*/ { "aliasing_4k", "thresh >= .1",
1811 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812 aliasing, 2 },
1813 /*14*/ { "dtlbmissload", "thresh >= .1",
1814 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1815 dtlb_missload, 3 },
1816 /*15*/ { "br_miss", "thresh >= .2",
1817 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1818 br_mispredict, 2 },
1819 /*16*/ { "clears", "thresh >= .02",
1820 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821 clears, 4 },
1822 /*17*/ { "microassist", "thresh >= .05",
1823 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1824 microassist, 2 },
1825 /*18*/ { "fpassist", "look for a excessive value",
1826 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1827 fpassists, 2 },
1828 /*19*/ { "otherassistavx", "look for a excessive value",
1829 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830 otherassistavx, 2 },
1831 /*20*/ { "otherassistsse", "look for a excessive value",
1832 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1833 otherassistsse, 2 },
1834 };
1835
1836
1837 static void
explain_name_broad(const char * name)1838 explain_name_broad(const char *name)
1839 {
1840 const char *mythresh;
1841 if (strcmp(name, "eff1") == 0) {
1842 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1843 mythresh = "thresh < .75";
1844 } else if (strcmp(name, "eff2") == 0) {
1845 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1846 mythresh = "thresh > 1.0";
1847 } else if (strcmp(name, "itlbmiss") == 0) {
1848 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1849 mythresh = "thresh > .05";
1850 } else if (strcmp(name, "icachemiss") == 0) {
1851 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1852 mythresh = "thresh > .05";
1853 } else if (strcmp(name, "lcpstall") == 0) {
1854 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1855 mythresh = "thresh > .05";
1856 } else if (strcmp(name, "cache1") == 0) {
1857 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1858 mythresh = "thresh >= .1";
1859 } else if (strcmp(name, "cache2") == 0) {
1860 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1861 mythresh = "thresh >= .2";
1862 } else if (strcmp(name, "contested") == 0) {
1863 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1864 mythresh = "thresh >= .05";
1865 } else if (strcmp(name, "datashare") == 0) {
1866 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1867 mythresh = "thresh > .05";
1868 } else if (strcmp(name, "blockstorefwd") == 0) {
1869 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1870 mythresh = "thresh >= .05";
1871 } else if (strcmp(name, "aliasing_4k") == 0) {
1872 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1873 mythresh = "thresh >= .1";
1874 } else if (strcmp(name, "dtlbmissload") == 0) {
1875 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1876 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
1877 mythresh = "thresh >= .1";
1878
1879 } else if (strcmp(name, "br_miss") == 0) {
1880 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1881 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1882 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1883 mythresh = "thresh >= .2";
1884 } else if (strcmp(name, "clears") == 0) {
1885 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1886 printf(" MACHINE_CLEARS.SMC + \n");
1887 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1888 mythresh = "thresh >= .02";
1889 } else if (strcmp(name, "fpassist") == 0) {
1890 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1891 mythresh = "look for a excessive value";
1892 } else if (strcmp(name, "otherassistavx") == 0) {
1893 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1894 mythresh = "look for a excessive value";
1895 } else if (strcmp(name, "microassist") == 0) {
1896 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1897 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1898 mythresh = "thresh >= .05";
1899 } else {
1900 printf("Unknown name:%s\n", name);
1901 mythresh = "unknown entry";
1902 }
1903 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1904 }
1905
1906
1907 #define BROADWELL_COUNT 17
1908 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1909 /*1*/ { "eff1", "thresh < .75",
1910 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911 efficiency1, 2 },
1912 /*2*/ { "eff2", "thresh > 1.0",
1913 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1914 efficiency2, 2 },
1915 /*3*/ { "itlbmiss", "thresh > .05",
1916 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1917 itlb_miss_broad, 3 },
1918 /*4*/ { "icachemiss", "thresh > .05",
1919 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920 icache_miss_has, 2 },
1921 /*5*/ { "lcpstall", "thresh > .05",
1922 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923 lcp_stall, 2 },
1924 /*6*/ { "cache1", "thresh >= .1",
1925 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926 cache1broad, 2 },
1927 /*7*/ { "cache2", "thresh >= .2",
1928 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1929 cache2broad, 2 },
1930 /*8*/ { "contested", "thresh >= .05",
1931 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1932 contestedbroad, 2 },
1933 /*9*/ { "datashare", "thresh >= .05",
1934 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935 datasharing_has, 2 },
1936 /*10*/ { "blockstorefwd", "thresh >= .05",
1937 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938 blockstoreforward, 2 },
1939 /*11*/ { "aliasing_4k", "thresh >= .1",
1940 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941 aliasing_broad, 2 },
1942 /*12*/ { "dtlbmissload", "thresh >= .1",
1943 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1944 dtlb_missload, 3 },
1945 /*13*/ { "br_miss", "thresh >= .2",
1946 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1947 br_mispredict_broad, 7 },
1948 /*14*/ { "clears", "thresh >= .02",
1949 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1950 clears_broad, 5 },
1951 /*15*/ { "fpassist", "look for a excessive value",
1952 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1953 fpassists, 2 },
1954 /*16*/ { "otherassistavx", "look for a excessive value",
1955 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1956 otherassistavx, 2 },
1957 /*17*/ { "microassist", "thresh >= .2",
1958 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
1959 microassist_broad, 4 },
1960 };
1961
1962
1963 static void
set_sandybridge(void)1964 set_sandybridge(void)
1965 {
1966 strcpy(the_cpu.cputype, "SandyBridge PMC");
1967 the_cpu.number = SANDY_BRIDGE_COUNT;
1968 the_cpu.ents = sandy_bridge;
1969 the_cpu.explain = explain_name_sb;
1970 }
1971
1972 static void
set_ivybridge(void)1973 set_ivybridge(void)
1974 {
1975 strcpy(the_cpu.cputype, "IvyBridge PMC");
1976 the_cpu.number = IVY_BRIDGE_COUNT;
1977 the_cpu.ents = ivy_bridge;
1978 the_cpu.explain = explain_name_ib;
1979 }
1980
1981
1982 static void
set_haswell(void)1983 set_haswell(void)
1984 {
1985 strcpy(the_cpu.cputype, "HASWELL PMC");
1986 the_cpu.number = HASWELL_COUNT;
1987 the_cpu.ents = haswell;
1988 the_cpu.explain = explain_name_has;
1989 }
1990
1991
1992 static void
set_broadwell(void)1993 set_broadwell(void)
1994 {
1995 strcpy(the_cpu.cputype, "HASWELL PMC");
1996 the_cpu.number = BROADWELL_COUNT;
1997 the_cpu.ents = broadwell;
1998 the_cpu.explain = explain_name_broad;
1999 }
2000
2001
2002 static int
set_expression(const char * name)2003 set_expression(const char *name)
2004 {
2005 int found = 0, i;
2006 for(i=0 ; i< the_cpu.number; i++) {
2007 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2008 found = 1;
2009 expression = the_cpu.ents[i].func;
2010 command = the_cpu.ents[i].command;
2011 threshold = the_cpu.ents[i].thresh;
2012 if (the_cpu.ents[i].counters_required > max_pmc_counters) {
2013 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2014 the_cpu.ents[i].name,
2015 the_cpu.ents[i].counters_required, max_pmc_counters);
2016 printf("Sorry this test can not be run\n");
2017 if (run_all == 0) {
2018 exit(-1);
2019 } else {
2020 return(-1);
2021 }
2022 }
2023 break;
2024 }
2025 }
2026 if (!found) {
2027 printf("For CPU type %s we have no expression:%s\n",
2028 the_cpu.cputype, name);
2029 exit(-1);
2030 }
2031 return(0);
2032 }
2033
2034
2035
2036
2037
2038 static int
validate_expression(char * name)2039 validate_expression(char *name)
2040 {
2041 int i, found;
2042
2043 found = 0;
2044 for(i=0 ; i< the_cpu.number; i++) {
2045 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2046 found = 1;
2047 break;
2048 }
2049 }
2050 if (!found) {
2051 return(-1);
2052 }
2053 return (0);
2054 }
2055
2056 static void
do_expression(struct counters * cpu,int pos)2057 do_expression(struct counters *cpu, int pos)
2058 {
2059 if (expression == NULL)
2060 return;
2061 (*expression)(cpu, pos);
2062 }
2063
2064 static void
process_header(int idx,char * p)2065 process_header(int idx, char *p)
2066 {
2067 struct counters *up;
2068 int i, len, nlen;
2069 /*
2070 * Given header element idx, at p in
2071 * form 's/NN/nameof'
2072 * process the entry to pull out the name and
2073 * the CPU number.
2074 */
2075 if (strncmp(p, "s/", 2)) {
2076 printf("Check -- invalid header no s/ in %s\n",
2077 p);
2078 return;
2079 }
2080 up = &cnts[idx];
2081 up->cpu = strtol(&p[2], NULL, 10);
2082 len = strlen(p);
2083 for (i=2; i<len; i++) {
2084 if (p[i] == '/') {
2085 nlen = strlen(&p[(i+1)]);
2086 if (nlen < (MAX_NLEN-1)) {
2087 strcpy(up->counter_name, &p[(i+1)]);
2088 } else {
2089 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2090 }
2091 }
2092 }
2093 }
2094
2095 static void
build_counters_from_header(FILE * io)2096 build_counters_from_header(FILE *io)
2097 {
2098 char buffer[8192], *p;
2099 int i, len, cnt;
2100 size_t mlen;
2101
2102 /* We have a new start, lets
2103 * setup our headers and cpus.
2104 */
2105 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2106 printf("First line can't be read from file err:%d\n", errno);
2107 return;
2108 }
2109 /*
2110 * Ok output is an array of counters. Once
2111 * we start to read the values in we must
2112 * put them in there slot to match there CPU and
2113 * counter being updated. We create a mass array
2114 * of the counters, filling in the CPU and
2115 * counter name.
2116 */
2117 /* How many do we get? */
2118 len = strlen(buffer);
2119 for (i=0, cnt=0; i<len; i++) {
2120 if (strncmp(&buffer[i], "s/", 2) == 0) {
2121 cnt++;
2122 for(;i<len;i++) {
2123 if (buffer[i] == ' ')
2124 break;
2125 }
2126 }
2127 }
2128 mlen = sizeof(struct counters) * cnt;
2129 cnts = malloc(mlen);
2130 ncnts = cnt;
2131 if (cnts == NULL) {
2132 printf("No memory err:%d\n", errno);
2133 return;
2134 }
2135 memset(cnts, 0, mlen);
2136 for (i=0, cnt=0; i<len; i++) {
2137 if (strncmp(&buffer[i], "s/", 2) == 0) {
2138 p = &buffer[i];
2139 for(;i<len;i++) {
2140 if (buffer[i] == ' ') {
2141 buffer[i] = 0;
2142 break;
2143 }
2144 }
2145 process_header(cnt, p);
2146 cnt++;
2147 }
2148 }
2149 if (verbose)
2150 printf("We have %d entries\n", cnt);
2151 }
2152 extern int max_to_collect;
2153 int max_to_collect = MAX_COUNTER_SLOTS;
2154
2155 static int
read_a_line(FILE * io)2156 read_a_line(FILE *io)
2157 {
2158 char buffer[8192], *p, *stop;
2159 int pos, i;
2160
2161 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2162 return(0);
2163 }
2164 p = buffer;
2165 for (i=0; i<ncnts; i++) {
2166 pos = cnts[i].pos;
2167 cnts[i].vals[pos] = strtol(p, &stop, 0);
2168 cnts[i].pos++;
2169 cnts[i].sum += cnts[i].vals[pos];
2170 p = stop;
2171 }
2172 return (1);
2173 }
2174
2175 extern int cpu_count_out;
2176 int cpu_count_out=0;
2177
2178 static void
print_header(void)2179 print_header(void)
2180 {
2181 int i, cnt, printed_cnt;
2182
2183 printf("*********************************\n");
2184 for(i=0, cnt=0; i<MAX_CPU; i++) {
2185 if (glob_cpu[i]) {
2186 cnt++;
2187 }
2188 }
2189 cpu_count_out = cnt;
2190 for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2191 if (glob_cpu[i]) {
2192 printf("CPU%d", i);
2193 printed_cnt++;
2194 }
2195 if (printed_cnt == cnt) {
2196 printf("\n");
2197 break;
2198 } else {
2199 printf("\t");
2200 }
2201 }
2202 }
2203
2204 static void
lace_cpus_together(void)2205 lace_cpus_together(void)
2206 {
2207 int i, j, lace_cpu;
2208 struct counters *cpat, *at;
2209
2210 for(i=0; i<ncnts; i++) {
2211 cpat = &cnts[i];
2212 if (cpat->next_cpu) {
2213 /* Already laced in */
2214 continue;
2215 }
2216 lace_cpu = cpat->cpu;
2217 if (lace_cpu >= MAX_CPU) {
2218 printf("CPU %d to big\n", lace_cpu);
2219 continue;
2220 }
2221 if (glob_cpu[lace_cpu] == NULL) {
2222 glob_cpu[lace_cpu] = cpat;
2223 } else {
2224 /* Already processed this cpu */
2225 continue;
2226 }
2227 /* Ok look forward for cpu->cpu and link in */
2228 for(j=(i+1); j<ncnts; j++) {
2229 at = &cnts[j];
2230 if (at->next_cpu) {
2231 continue;
2232 }
2233 if (at->cpu == lace_cpu) {
2234 /* Found one */
2235 cpat->next_cpu = at;
2236 cpat = at;
2237 }
2238 }
2239 }
2240 }
2241
2242
2243 static void
process_file(char * filename)2244 process_file(char *filename)
2245 {
2246 FILE *io;
2247 int i;
2248 int line_at, not_done;
2249 pid_t pid_of_command=0;
2250
2251 if (filename == NULL) {
2252 io = my_popen(command, "r", &pid_of_command);
2253 } else {
2254 io = fopen(filename, "r");
2255 if (io == NULL) {
2256 printf("Can't process file %s err:%d\n",
2257 filename, errno);
2258 return;
2259 }
2260 }
2261 build_counters_from_header(io);
2262 if (cnts == NULL) {
2263 /* Nothing we can do */
2264 printf("Nothing to do -- no counters built\n");
2265 if (io) {
2266 fclose(io);
2267 }
2268 return;
2269 }
2270 lace_cpus_together();
2271 print_header();
2272 if (verbose) {
2273 for (i=0; i<ncnts; i++) {
2274 printf("Counter:%s cpu:%d index:%d\n",
2275 cnts[i].counter_name,
2276 cnts[i].cpu, i);
2277 }
2278 }
2279 line_at = 0;
2280 not_done = 1;
2281 while(not_done) {
2282 if (read_a_line(io)) {
2283 line_at++;
2284 } else {
2285 break;
2286 }
2287 if (line_at >= max_to_collect) {
2288 not_done = 0;
2289 }
2290 if (filename == NULL) {
2291 int cnt;
2292 /* For the ones we dynamically open we print now */
2293 for(i=0, cnt=0; i<MAX_CPU; i++) {
2294 do_expression(glob_cpu[i], (line_at-1));
2295 cnt++;
2296 if (cnt == cpu_count_out) {
2297 printf("\n");
2298 break;
2299 } else {
2300 printf("\t");
2301 }
2302 }
2303 }
2304 }
2305 if (filename) {
2306 fclose(io);
2307 } else {
2308 my_pclose(io, pid_of_command);
2309 }
2310 }
2311 #if defined(__amd64__)
2312 #define cpuid(in,a,b,c,d)\
2313 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2314
2315 static __inline void
do_cpuid(u_int ax,u_int cx,u_int * p)2316 do_cpuid(u_int ax, u_int cx, u_int *p)
2317 {
2318 __asm __volatile("cpuid"
2319 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2320 : "0" (ax), "c" (cx) );
2321 }
2322
2323 #else
2324 #define cpuid(in, a, b, c, d)
2325 static __inline void
do_cpuid(u_int ax,u_int cx,u_int * p)2326 do_cpuid(u_int ax, u_int cx, u_int *p)
2327 {
2328 }
2329
2330 #endif
2331
2332 static void
get_cpuid_set(void)2333 get_cpuid_set(void)
2334 {
2335 unsigned long eax, ebx, ecx, edx;
2336 int model;
2337 pid_t pid_of_command=0;
2338 size_t sz, len;
2339 FILE *io;
2340 char linebuf[1024], *str;
2341 u_int reg[4];
2342
2343 eax = ebx = ecx = edx = 0;
2344
2345 cpuid(0, eax, ebx, ecx, edx);
2346 if (ebx == 0x68747541) {
2347 printf("AMD processors are not supported by this program\n");
2348 printf("Sorry\n");
2349 exit(0);
2350 } else if (ebx == 0x6972794) {
2351 printf("Cyrix processors are not supported by this program\n");
2352 printf("Sorry\n");
2353 exit(0);
2354 } else if (ebx == 0x756e6547) {
2355 printf("Genuine Intel\n");
2356 } else {
2357 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2358 exit(0);
2359 }
2360 cpuid(1, eax, ebx, ecx, edx);
2361 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2362 printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2363 switch (eax & 0xF00) {
2364 case 0x500: /* Pentium family processors */
2365 printf("Intel Pentium P5\n");
2366 goto not_supported;
2367 break;
2368 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
2369 switch (model) {
2370 case 0x1:
2371 printf("Intel Pentium P6\n");
2372 goto not_supported;
2373 break;
2374 case 0x3:
2375 case 0x5:
2376 printf("Intel PII\n");
2377 goto not_supported;
2378 break;
2379 case 0x6: case 0x16:
2380 printf("Intel CL\n");
2381 goto not_supported;
2382 break;
2383 case 0x7: case 0x8: case 0xA: case 0xB:
2384 printf("Intel PIII\n");
2385 goto not_supported;
2386 break;
2387 case 0x9: case 0xD:
2388 printf("Intel PM\n");
2389 goto not_supported;
2390 break;
2391 case 0xE:
2392 printf("Intel CORE\n");
2393 goto not_supported;
2394 break;
2395 case 0xF:
2396 printf("Intel CORE2\n");
2397 goto not_supported;
2398 break;
2399 case 0x17:
2400 printf("Intel CORE2EXTREME\n");
2401 goto not_supported;
2402 break;
2403 case 0x1C: /* Per Intel document 320047-002. */
2404 printf("Intel ATOM\n");
2405 goto not_supported;
2406 break;
2407 case 0x1A:
2408 case 0x1E: /*
2409 * Per Intel document 253669-032 9/2009,
2410 * pages A-2 and A-57
2411 */
2412 case 0x1F: /*
2413 * Per Intel document 253669-032 9/2009,
2414 * pages A-2 and A-57
2415 */
2416 printf("Intel COREI7\n");
2417 goto not_supported;
2418 break;
2419 case 0x2E:
2420 printf("Intel NEHALEM\n");
2421 goto not_supported;
2422 break;
2423 case 0x25: /* Per Intel document 253669-033US 12/2009. */
2424 case 0x2C: /* Per Intel document 253669-033US 12/2009. */
2425 printf("Intel WESTMERE\n");
2426 goto not_supported;
2427 break;
2428 case 0x2F: /* Westmere-EX, seen in wild */
2429 printf("Intel WESTMERE\n");
2430 goto not_supported;
2431 break;
2432 case 0x2A: /* Per Intel document 253669-039US 05/2011. */
2433 printf("Intel SANDYBRIDGE\n");
2434 set_sandybridge();
2435 break;
2436 case 0x2D: /* Per Intel document 253669-044US 08/2012. */
2437 printf("Intel SANDYBRIDGE_XEON\n");
2438 set_sandybridge();
2439 break;
2440 case 0x3A: /* Per Intel document 253669-043US 05/2012. */
2441 printf("Intel IVYBRIDGE\n");
2442 set_ivybridge();
2443 break;
2444 case 0x3E: /* Per Intel document 325462-045US 01/2013. */
2445 printf("Intel IVYBRIDGE_XEON\n");
2446 set_ivybridge();
2447 break;
2448 case 0x3F: /* Per Intel document 325462-045US 09/2014. */
2449 printf("Intel HASWELL (Xeon)\n");
2450 set_haswell();
2451 break;
2452 case 0x3C: /* Per Intel document 325462-045US 01/2013. */
2453 case 0x45:
2454 case 0x46:
2455 printf("Intel HASWELL\n");
2456 set_haswell();
2457 break;
2458
2459 case 0x4e:
2460 case 0x5e:
2461 printf("Intel SKY-LAKE\n");
2462 goto not_supported;
2463 break;
2464 case 0x3D:
2465 case 0x47:
2466 printf("Intel BROADWELL\n");
2467 set_broadwell();
2468 break;
2469 case 0x4f:
2470 case 0x56:
2471 printf("Intel BROADWEL (Xeon)\n");
2472 set_broadwell();
2473 break;
2474
2475 case 0x4D:
2476 /* Per Intel document 330061-001 01/2014. */
2477 printf("Intel ATOM_SILVERMONT\n");
2478 goto not_supported;
2479 break;
2480 default:
2481 printf("Intel model 0x%x is not known -- sorry\n",
2482 model);
2483 goto not_supported;
2484 break;
2485 }
2486 break;
2487 case 0xF00: /* P4 */
2488 printf("Intel unknown model %d\n", model);
2489 goto not_supported;
2490 break;
2491 }
2492 do_cpuid(0xa, 0, reg);
2493 max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2494 printf("We have %d PMC counters to work with\n", max_pmc_counters);
2495 /* Ok lets load the list of all known PMC's */
2496 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2497 if (valid_pmcs == NULL) {
2498 /* Likely */
2499 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2500 sz = sizeof(char *) * pmc_allocated_cnt;
2501 valid_pmcs = malloc(sz);
2502 if (valid_pmcs == NULL) {
2503 printf("No memory allocation fails at startup?\n");
2504 exit(-1);
2505 }
2506 memset(valid_pmcs, 0, sz);
2507 }
2508
2509 while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2510 if (linebuf[0] != '\t') {
2511 /* sometimes headers ;-) */
2512 continue;
2513 }
2514 len = strlen(linebuf);
2515 if (linebuf[(len-1)] == '\n') {
2516 /* Likely */
2517 linebuf[(len-1)] = 0;
2518 }
2519 str = &linebuf[1];
2520 len = strlen(str) + 1;
2521 valid_pmcs[valid_pmc_cnt] = malloc(len);
2522 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2523 printf("No memory2 allocation fails at startup?\n");
2524 exit(-1);
2525 }
2526 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2527 strcpy(valid_pmcs[valid_pmc_cnt], str);
2528 valid_pmc_cnt++;
2529 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2530 /* Got to expand -- unlikely */
2531 char **more;
2532
2533 sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2534 more = malloc(sz);
2535 if (more == NULL) {
2536 printf("No memory3 allocation fails at startup?\n");
2537 exit(-1);
2538 }
2539 memset(more, 0, sz);
2540 memcpy(more, valid_pmcs, sz);
2541 pmc_allocated_cnt *= 2;
2542 free(valid_pmcs);
2543 valid_pmcs = more;
2544 }
2545 }
2546 my_pclose(io, pid_of_command);
2547 return;
2548 not_supported:
2549 printf("Not supported\n");
2550 exit(-1);
2551 }
2552
2553 static void
explain_all(void)2554 explain_all(void)
2555 {
2556 int i;
2557 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2558 printf("-------------------------------------------------------------\n");
2559 for(i=0; i<the_cpu.number; i++){
2560 printf("For -e %s ", the_cpu.ents[i].name);
2561 (*the_cpu.explain)(the_cpu.ents[i].name);
2562 printf("----------------------------\n");
2563 }
2564 }
2565
2566 static void
test_for_a_pmc(const char * pmc,int out_so_far)2567 test_for_a_pmc(const char *pmc, int out_so_far)
2568 {
2569 FILE *io;
2570 pid_t pid_of_command=0;
2571 char my_command[1024];
2572 char line[1024];
2573 char resp[1024];
2574 int len, llen, i;
2575
2576 if (out_so_far < 50) {
2577 len = 50 - out_so_far;
2578 for(i=0; i<len; i++) {
2579 printf(" ");
2580 }
2581 }
2582 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2583 io = my_popen(my_command, "r", &pid_of_command);
2584 if (io == NULL) {
2585 printf("Failed -- popen fails\n");
2586 return;
2587 }
2588 /* Setup what we expect */
2589 len = sprintf(resp, "%s", pmc);
2590 if (fgets(line, sizeof(line), io) == NULL) {
2591 printf("Failed -- no output from pmstat\n");
2592 goto out;
2593 }
2594 llen = strlen(line);
2595 if (line[(llen-1)] == '\n') {
2596 line[(llen-1)] = 0;
2597 llen--;
2598 }
2599 for(i=2; i<(llen-len); i++) {
2600 if (strncmp(&line[i], "ERROR", 5) == 0) {
2601 printf("Failed %s\n", line);
2602 goto out;
2603 } else if (strncmp(&line[i], resp, len) == 0) {
2604 int j, k;
2605
2606 if (fgets(line, sizeof(line), io) == NULL) {
2607 printf("Failed -- no second output from pmstat\n");
2608 goto out;
2609 }
2610 len = strlen(line);
2611 for (j=0; j<len; j++) {
2612 if (line[j] == ' ') {
2613 j++;
2614 } else {
2615 break;
2616 }
2617 }
2618 printf("Pass");
2619 len = strlen(&line[j]);
2620 if (len < 20) {
2621 for(k=0; k<(20-len); k++) {
2622 printf(" ");
2623 }
2624 }
2625 if (len) {
2626 printf("%s", &line[j]);
2627 } else {
2628 printf("\n");
2629 }
2630 goto out;
2631 }
2632 }
2633 printf("Failed -- '%s' not '%s'\n", line, resp);
2634 out:
2635 my_pclose(io, pid_of_command);
2636
2637 }
2638
2639 static int
add_it_to(char ** vars,int cur_cnt,char * name)2640 add_it_to(char **vars, int cur_cnt, char *name)
2641 {
2642 int i;
2643 size_t len;
2644 for(i=0; i<cur_cnt; i++) {
2645 if (strcmp(vars[i], name) == 0) {
2646 /* Already have */
2647 return(0);
2648 }
2649 }
2650 if (vars[cur_cnt] != NULL) {
2651 printf("Cur_cnt:%d filled with %s??\n",
2652 cur_cnt, vars[cur_cnt]);
2653 exit(-1);
2654 }
2655 /* Ok its new */
2656 len = strlen(name) + 1;
2657 vars[cur_cnt] = malloc(len);
2658 if (vars[cur_cnt] == NULL) {
2659 printf("No memory %s\n", __FUNCTION__);
2660 exit(-1);
2661 }
2662 memset(vars[cur_cnt], 0, len);
2663 strcpy(vars[cur_cnt], name);
2664 return(1);
2665 }
2666
2667 static char *
build_command_for_exp(struct expression * exp)2668 build_command_for_exp(struct expression *exp)
2669 {
2670 /*
2671 * Build the pmcstat command to handle
2672 * the passed in expression.
2673 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2674 * where NNN and QQQ represent the PMC's in the expression
2675 * uniquely..
2676 */
2677 char forming[1024];
2678 int cnt_pmc, alloced_pmcs, i;
2679 struct expression *at;
2680 char **vars, *cmd;
2681 size_t mal;
2682
2683 alloced_pmcs = cnt_pmc = 0;
2684 /* first how many do we have */
2685 at = exp;
2686 while (at) {
2687 if (at->type == TYPE_VALUE_PMC) {
2688 cnt_pmc++;
2689 }
2690 at = at->next;
2691 }
2692 if (cnt_pmc == 0) {
2693 printf("No PMC's in your expression -- nothing to do!!\n");
2694 exit(0);
2695 }
2696 mal = cnt_pmc * sizeof(char *);
2697 vars = malloc(mal);
2698 if (vars == NULL) {
2699 printf("No memory\n");
2700 exit(-1);
2701 }
2702 memset(vars, 0, mal);
2703 at = exp;
2704 while (at) {
2705 if (at->type == TYPE_VALUE_PMC) {
2706 if(add_it_to(vars, alloced_pmcs, at->name)) {
2707 alloced_pmcs++;
2708 }
2709 }
2710 at = at->next;
2711 }
2712 /* Now we have a unique list in vars so create our command */
2713 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
2714 for(i=0; i<alloced_pmcs; i++) {
2715 mal += strlen(vars[i]) + 4; /* var + " -s " */
2716 }
2717 cmd = malloc((mal+2));
2718 if (cmd == NULL) {
2719 printf("%s out of mem\n", __FUNCTION__);
2720 exit(-1);
2721 }
2722 memset(cmd, 0, (mal+2));
2723 strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2724 at = exp;
2725 for(i=0; i<alloced_pmcs; i++) {
2726 sprintf(forming, " -s %s", vars[i]);
2727 strcat(cmd, forming);
2728 free(vars[i]);
2729 vars[i] = NULL;
2730 }
2731 free(vars);
2732 return(cmd);
2733 }
2734
2735 static int
user_expr(struct counters * cpu,int pos)2736 user_expr(struct counters *cpu, int pos)
2737 {
2738 int ret;
2739 double res;
2740 struct counters *var;
2741 struct expression *at;
2742
2743 at = master_exp;
2744 while (at) {
2745 if (at->type == TYPE_VALUE_PMC) {
2746 var = find_counter(cpu, at->name);
2747 if (var == NULL) {
2748 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2749 exit(-1);
2750 }
2751 if (pos != -1) {
2752 at->value = var->vals[pos] * 1.0;
2753 } else {
2754 at->value = var->sum * 1.0;
2755 }
2756 }
2757 at = at->next;
2758 }
2759 res = run_expr(master_exp, 1, NULL);
2760 ret = printf("%1.3f", res);
2761 return(ret);
2762 }
2763
2764
2765 static void
set_manual_exp(struct expression * exp)2766 set_manual_exp(struct expression *exp)
2767 {
2768 expression = user_expr;
2769 command = build_command_for_exp(exp);
2770 threshold = "User defined threshold";
2771 }
2772
2773 static void
run_tests(void)2774 run_tests(void)
2775 {
2776 int i, lenout;
2777 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2778 printf("------------------------------------------------------------------------\n");
2779 for(i=0; i<valid_pmc_cnt; i++) {
2780 lenout = printf("%s", valid_pmcs[i]);
2781 fflush(stdout);
2782 test_for_a_pmc(valid_pmcs[i], lenout);
2783 }
2784 }
2785 static void
list_all(void)2786 list_all(void)
2787 {
2788 int i, cnt, j;
2789 printf("PMC Abbreviation\n");
2790 printf("--------------------------------------------------------------\n");
2791 for(i=0; i<valid_pmc_cnt; i++) {
2792 cnt = printf("%s", valid_pmcs[i]);
2793 for(j=cnt; j<52; j++) {
2794 printf(" ");
2795 }
2796 printf("%%%d\n", i);
2797 }
2798 }
2799
2800
2801 int
main(int argc,char ** argv)2802 main(int argc, char **argv)
2803 {
2804 int i, j, cnt;
2805 char *filename=NULL;
2806 const char *name=NULL;
2807 int help_only = 0;
2808 int test_mode = 0;
2809 int test_at = 0;
2810
2811 get_cpuid_set();
2812 memset(glob_cpu, 0, sizeof(glob_cpu));
2813 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2814 switch (i) {
2815 case 'A':
2816 run_all = 1;
2817 break;
2818 case 'L':
2819 list_all();
2820 return(0);
2821 case 'H':
2822 printf("**********************************\n");
2823 explain_all();
2824 printf("**********************************\n");
2825 return(0);
2826 break;
2827 case 'T':
2828 test_mode = 1;
2829 break;
2830 case 'E':
2831 master_exp = parse_expression(optarg);
2832 if (master_exp) {
2833 set_manual_exp(master_exp);
2834 }
2835 break;
2836 case 'e':
2837 if (validate_expression(optarg)) {
2838 printf("Unknown expression %s\n", optarg);
2839 return(0);
2840 }
2841 name = optarg;
2842 set_expression(optarg);
2843 break;
2844 case 'm':
2845 max_to_collect = strtol(optarg, NULL, 0);
2846 if (max_to_collect > MAX_COUNTER_SLOTS) {
2847 /* You can't collect more than max in array */
2848 max_to_collect = MAX_COUNTER_SLOTS;
2849 }
2850 break;
2851 case 'v':
2852 verbose++;
2853 break;
2854 case 'h':
2855 help_only = 1;
2856 break;
2857 case 'i':
2858 filename = optarg;
2859 break;
2860 case '?':
2861 default:
2862 use:
2863 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2864 argv[0]);
2865 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2866 printf("-v -- verbose dump debug type things -- you don't want this\n");
2867 printf("-m N -- maximum to collect is N measurments\n");
2868 printf("-e expr-name -- Do expression expr-name\n");
2869 printf("-E 'your expression' -- Do your expression\n");
2870 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2871 printf("-H -- Don't run anything, just explain all canned expressions\n");
2872 printf("-T -- Test all PMC's defined by this processor\n");
2873 printf("-A -- Run all canned tests\n");
2874 return(0);
2875 break;
2876 };
2877 }
2878 if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2879 (test_mode == 0) && (master_exp == NULL)) {
2880 printf("Without setting an expression we cannot dynamically gather information\n");
2881 printf("you must supply a filename (and you probably want verbosity)\n");
2882 goto use;
2883 }
2884 if (run_all && max_to_collect > 10) {
2885 max_to_collect = 3;
2886 }
2887 if (test_mode) {
2888 run_tests();
2889 return(0);
2890 }
2891 printf("*********************************\n");
2892 if ((master_exp == NULL) && name) {
2893 (*the_cpu.explain)(name);
2894 } else if (master_exp) {
2895 printf("Examine your expression ");
2896 print_exp(master_exp);
2897 printf("User defined threshold\n");
2898 }
2899 if (help_only) {
2900 return(0);
2901 }
2902 if (run_all) {
2903 more:
2904 name = the_cpu.ents[test_at].name;
2905 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2906 test_at++;
2907 if (set_expression(name) == -1) {
2908 if (test_at >= the_cpu.number) {
2909 goto done;
2910 } else
2911 goto more;
2912 }
2913
2914 }
2915 process_file(filename);
2916 if (verbose >= 2) {
2917 for (i=0; i<ncnts; i++) {
2918 printf("Counter:%s cpu:%d index:%d\n",
2919 cnts[i].counter_name,
2920 cnts[i].cpu, i);
2921 for(j=0; j<cnts[i].pos; j++) {
2922 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2923 }
2924 printf(" sum - %ld\n", (long int)cnts[i].sum);
2925 }
2926 }
2927 if (expression == NULL) {
2928 return(0);
2929 }
2930 if (max_to_collect > 1) {
2931 for(i=0, cnt=0; i<MAX_CPU; i++) {
2932 if (glob_cpu[i]) {
2933 do_expression(glob_cpu[i], -1);
2934 cnt++;
2935 if (cnt == cpu_count_out) {
2936 printf("\n");
2937 break;
2938 } else {
2939 printf("\t");
2940 }
2941 }
2942 }
2943 }
2944 if (run_all && (test_at < the_cpu.number)) {
2945 memset(glob_cpu, 0, sizeof(glob_cpu));
2946 ncnts = 0;
2947 printf("*********************************\n");
2948 goto more;
2949 } else if (run_all) {
2950 done:
2951 printf("*********************************\n");
2952 }
2953 return(0);
2954 }
2955