pmu.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. #include "x86/msr.h"
  2. #include "x86/processor.h"
  3. #include "x86/apic-defs.h"
  4. #include "x86/apic.h"
  5. #include "x86/desc.h"
  6. #include "x86/isr.h"
  7. #include "x86/vm.h"
  8. #include "libcflat.h"
  9. #include <stdint.h>
  10. #define FIXED_CNT_INDEX 32
  11. #define PC_VECTOR 32
  12. #define EVNSEL_EVENT_SHIFT 0
  13. #define EVNTSEL_UMASK_SHIFT 8
  14. #define EVNTSEL_USR_SHIFT 16
  15. #define EVNTSEL_OS_SHIFT 17
  16. #define EVNTSEL_EDGE_SHIFT 18
  17. #define EVNTSEL_PC_SHIFT 19
  18. #define EVNTSEL_INT_SHIFT 20
  19. #define EVNTSEL_EN_SHIF 22
  20. #define EVNTSEL_INV_SHIF 23
  21. #define EVNTSEL_CMASK_SHIFT 24
  22. #define EVNTSEL_EN (1 << EVNTSEL_EN_SHIF)
  23. #define EVNTSEL_USR (1 << EVNTSEL_USR_SHIFT)
  24. #define EVNTSEL_OS (1 << EVNTSEL_OS_SHIFT)
  25. #define EVNTSEL_PC (1 << EVNTSEL_PC_SHIFT)
  26. #define EVNTSEL_INT (1 << EVNTSEL_INT_SHIFT)
  27. #define EVNTSEL_INV (1 << EVNTSEL_INV_SHIF)
  28. #define N 1000000
  29. typedef struct {
  30. uint32_t ctr;
  31. uint32_t config;
  32. uint64_t count;
  33. int idx;
  34. } pmu_counter_t;
  35. union cpuid10_eax {
  36. struct {
  37. unsigned int version_id:8;
  38. unsigned int num_counters:8;
  39. unsigned int bit_width:8;
  40. unsigned int mask_length:8;
  41. } split;
  42. unsigned int full;
  43. } eax;
  44. union cpuid10_ebx {
  45. struct {
  46. unsigned int no_unhalted_core_cycles:1;
  47. unsigned int no_instructions_retired:1;
  48. unsigned int no_unhalted_reference_cycles:1;
  49. unsigned int no_llc_reference:1;
  50. unsigned int no_llc_misses:1;
  51. unsigned int no_branch_instruction_retired:1;
  52. unsigned int no_branch_misses_retired:1;
  53. } split;
  54. unsigned int full;
  55. } ebx;
  56. union cpuid10_edx {
  57. struct {
  58. unsigned int num_counters_fixed:5;
  59. unsigned int bit_width_fixed:8;
  60. unsigned int reserved:19;
  61. } split;
  62. unsigned int full;
  63. } edx;
  64. struct pmu_event {
  65. const char *name;
  66. uint32_t unit_sel;
  67. int min;
  68. int max;
  69. } gp_events[] = {
  70. {"core cycles", 0x003c, 1*N, 50*N},
  71. {"instructions", 0x00c0, 10*N, 10.2*N},
  72. {"ref cycles", 0x013c, 0.1*N, 30*N},
  73. {"llc refference", 0x4f2e, 1, 2*N},
  74. {"llc misses", 0x412e, 1, 1*N},
  75. {"branches", 0x00c4, 1*N, 1.1*N},
  76. {"branch misses", 0x00c5, 0, 0.1*N},
  77. }, fixed_events[] = {
  78. {"fixed 1", MSR_CORE_PERF_FIXED_CTR0, 10*N, 10.2*N},
  79. {"fixed 2", MSR_CORE_PERF_FIXED_CTR0 + 1, 1*N, 30*N},
  80. {"fixed 3", MSR_CORE_PERF_FIXED_CTR0 + 2, 0.1*N, 30*N}
  81. };
  82. static int num_counters;
  83. char *buf;
  84. static inline void loop()
  85. {
  86. unsigned long tmp, tmp2, tmp3;
  87. asm volatile("1: mov (%1), %2; add $64, %1; nop; nop; nop; nop; nop; nop; nop; loop 1b"
  88. : "=c"(tmp), "=r"(tmp2), "=r"(tmp3): "0"(N), "1"(buf));
  89. }
  90. volatile uint64_t irq_received;
  91. static void cnt_overflow(isr_regs_t *regs)
  92. {
  93. irq_received++;
  94. apic_write(APIC_EOI, 0);
  95. }
  96. static bool check_irq(void)
  97. {
  98. int i;
  99. irq_received = 0;
  100. irq_enable();
  101. for (i = 0; i < 100000 && !irq_received; i++)
  102. asm volatile("pause");
  103. irq_disable();
  104. return irq_received;
  105. }
  106. static bool is_gp(pmu_counter_t *evt)
  107. {
  108. return evt->ctr < MSR_CORE_PERF_FIXED_CTR0;
  109. }
  110. static int event_to_global_idx(pmu_counter_t *cnt)
  111. {
  112. return cnt->ctr - (is_gp(cnt) ? MSR_IA32_PERFCTR0 :
  113. (MSR_CORE_PERF_FIXED_CTR0 - FIXED_CNT_INDEX));
  114. }
  115. static struct pmu_event* get_counter_event(pmu_counter_t *cnt)
  116. {
  117. if (is_gp(cnt)) {
  118. int i;
  119. for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
  120. if (gp_events[i].unit_sel == (cnt->config & 0xffff))
  121. return &gp_events[i];
  122. } else
  123. return &fixed_events[cnt->ctr - MSR_CORE_PERF_FIXED_CTR0];
  124. return (void*)0;
  125. }
  126. static void global_enable(pmu_counter_t *cnt)
  127. {
  128. cnt->idx = event_to_global_idx(cnt);
  129. wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) |
  130. (1ull << cnt->idx));
  131. }
  132. static void global_disable(pmu_counter_t *cnt)
  133. {
  134. wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_CTRL) &
  135. ~(1ull << cnt->idx));
  136. }
  137. static void start_event(pmu_counter_t *evt)
  138. {
  139. wrmsr(evt->ctr, evt->count);
  140. if (is_gp(evt))
  141. wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
  142. evt->config | EVNTSEL_EN);
  143. else {
  144. uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
  145. int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
  146. uint32_t usrospmi = 0;
  147. if (evt->config & EVNTSEL_OS)
  148. usrospmi |= (1 << 0);
  149. if (evt->config & EVNTSEL_USR)
  150. usrospmi |= (1 << 1);
  151. if (evt->config & EVNTSEL_INT)
  152. usrospmi |= (1 << 3); // PMI on overflow
  153. ctrl = (ctrl & ~(0xf << shift)) | (usrospmi << shift);
  154. wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl);
  155. }
  156. global_enable(evt);
  157. }
  158. static void stop_event(pmu_counter_t *evt)
  159. {
  160. global_disable(evt);
  161. if (is_gp(evt))
  162. wrmsr(MSR_P6_EVNTSEL0 + event_to_global_idx(evt),
  163. evt->config & ~EVNTSEL_EN);
  164. else {
  165. uint32_t ctrl = rdmsr(MSR_CORE_PERF_FIXED_CTR_CTRL);
  166. int shift = (evt->ctr - MSR_CORE_PERF_FIXED_CTR0) * 4;
  167. wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, ctrl & ~(0xf << shift));
  168. }
  169. evt->count = rdmsr(evt->ctr);
  170. }
  171. static void measure(pmu_counter_t *evt, int count)
  172. {
  173. int i;
  174. for (i = 0; i < count; i++)
  175. start_event(&evt[i]);
  176. loop();
  177. for (i = 0; i < count; i++)
  178. stop_event(&evt[i]);
  179. }
  180. static bool verify_event(uint64_t count, struct pmu_event *e)
  181. {
  182. // printf("%lld >= %lld <= %lld\n", e->min, count, e->max);
  183. return count >= e->min && count <= e->max;
  184. }
  185. static bool verify_counter(pmu_counter_t *cnt)
  186. {
  187. return verify_event(cnt->count, get_counter_event(cnt));
  188. }
  189. static void check_gp_counter(struct pmu_event *evt)
  190. {
  191. pmu_counter_t cnt = {
  192. .ctr = MSR_IA32_PERFCTR0,
  193. .config = EVNTSEL_OS | EVNTSEL_USR | evt->unit_sel,
  194. };
  195. int i;
  196. for (i = 0; i < num_counters; i++, cnt.ctr++) {
  197. cnt.count = 0;
  198. measure(&cnt, 1);
  199. report("%s-%d", verify_event(cnt.count, evt), evt->name, i);
  200. }
  201. }
  202. static void check_gp_counters(void)
  203. {
  204. int i;
  205. for (i = 0; i < sizeof(gp_events)/sizeof(gp_events[0]); i++)
  206. if (!(ebx.full & (1 << i)))
  207. check_gp_counter(&gp_events[i]);
  208. else
  209. printf("GP event '%s' is disabled\n",
  210. gp_events[i].name);
  211. }
  212. static void check_fixed_counters(void)
  213. {
  214. pmu_counter_t cnt = {
  215. .config = EVNTSEL_OS | EVNTSEL_USR,
  216. };
  217. int i;
  218. for (i = 0; i < edx.split.num_counters_fixed; i++) {
  219. cnt.count = 0;
  220. cnt.ctr = fixed_events[i].unit_sel;
  221. measure(&cnt, 1);
  222. report("fixed-%d", verify_event(cnt.count, &fixed_events[i]), i);
  223. }
  224. }
  225. static void check_counters_many(void)
  226. {
  227. pmu_counter_t cnt[10];
  228. int i, n;
  229. for (i = 0, n = 0; n < num_counters; i++) {
  230. if (ebx.full & (1 << i))
  231. continue;
  232. cnt[n].count = 0;
  233. cnt[n].ctr = MSR_IA32_PERFCTR0 + n;
  234. cnt[n].config = EVNTSEL_OS | EVNTSEL_USR | gp_events[i].unit_sel;
  235. n++;
  236. }
  237. for (i = 0; i < edx.split.num_counters_fixed; i++) {
  238. cnt[n].count = 0;
  239. cnt[n].ctr = fixed_events[i].unit_sel;
  240. cnt[n].config = EVNTSEL_OS | EVNTSEL_USR;
  241. n++;
  242. }
  243. measure(cnt, n);
  244. for (i = 0; i < n; i++)
  245. if (!verify_counter(&cnt[i]))
  246. break;
  247. report("all counters", i == n);
  248. }
  249. static void check_counter_overflow(void)
  250. {
  251. uint64_t count;
  252. int i;
  253. pmu_counter_t cnt = {
  254. .ctr = MSR_IA32_PERFCTR0,
  255. .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
  256. .count = 0,
  257. };
  258. measure(&cnt, 1);
  259. count = cnt.count;
  260. /* clear status before test */
  261. wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, rdmsr(MSR_CORE_PERF_GLOBAL_STATUS));
  262. report_prefix_push("overflow");
  263. for (i = 0; i < num_counters + 1; i++, cnt.ctr++) {
  264. uint64_t status;
  265. int idx;
  266. if (i == num_counters)
  267. cnt.ctr = fixed_events[0].unit_sel;
  268. if (i % 2)
  269. cnt.config |= EVNTSEL_INT;
  270. else
  271. cnt.config &= ~EVNTSEL_INT;
  272. idx = event_to_global_idx(&cnt);
  273. cnt.count = 1 - count;
  274. measure(&cnt, 1);
  275. report("cntr-%d", cnt.count == 1, i);
  276. status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
  277. report("status-%d", status & (1ull << idx), i);
  278. wrmsr(MSR_CORE_PERF_GLOBAL_OVF_CTRL, status);
  279. status = rdmsr(MSR_CORE_PERF_GLOBAL_STATUS);
  280. report("status clear-%d", !(status & (1ull << idx)), i);
  281. report("irq-%d", check_irq() == (i % 2), i);
  282. }
  283. report_prefix_pop();
  284. }
  285. static void check_gp_counter_cmask(void)
  286. {
  287. pmu_counter_t cnt = {
  288. .ctr = MSR_IA32_PERFCTR0,
  289. .config = EVNTSEL_OS | EVNTSEL_USR | gp_events[1].unit_sel /* instructions */,
  290. .count = 0,
  291. };
  292. cnt.config |= (0x2 << EVNTSEL_CMASK_SHIFT);
  293. measure(&cnt, 1);
  294. report("cmask", cnt.count < gp_events[1].min);
  295. }
  296. static void check_rdpmc(void)
  297. {
  298. uint64_t val = 0x1f3456789ull;
  299. int i;
  300. report_prefix_push("rdpmc");
  301. for (i = 0; i < num_counters; i++) {
  302. uint64_t x = (val & 0xffffffff) |
  303. ((1ull << (eax.split.bit_width - 32)) - 1) << 32;
  304. wrmsr(MSR_IA32_PERFCTR0 + i, val);
  305. report("cntr-%d", rdpmc(i) == x, i);
  306. report("fast-%d", rdpmc(i | (1<<31)) == (u32)val, i);
  307. }
  308. for (i = 0; i < edx.split.num_counters_fixed; i++) {
  309. uint64_t x = (val & 0xffffffff) |
  310. ((1ull << (edx.split.bit_width_fixed - 32)) - 1) << 32;
  311. wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, val);
  312. report("fixed cntr-%d", rdpmc(i | (1 << 30)) == x, i);
  313. report("fixed fast-%d", rdpmc(i | (3<<30)) == (u32)val, i);
  314. }
  315. report_prefix_pop();
  316. }
  317. int main(int ac, char **av)
  318. {
  319. struct cpuid id = cpuid(10);
  320. setup_vm();
  321. setup_idt();
  322. handle_irq(PC_VECTOR, cnt_overflow);
  323. buf = vmalloc(N*64);
  324. eax.full = id.a;
  325. ebx.full = id.b;
  326. edx.full = id.d;
  327. if (!eax.split.version_id) {
  328. printf("No pmu is detected!\n");
  329. return report_summary();
  330. }
  331. printf("PMU version: %d\n", eax.split.version_id);
  332. printf("GP counters: %d\n", eax.split.num_counters);
  333. printf("GP counter width: %d\n", eax.split.bit_width);
  334. printf("Mask length: %d\n", eax.split.mask_length);
  335. printf("Fixed counters: %d\n", edx.split.num_counters_fixed);
  336. printf("Fixed counter width: %d\n", edx.split.bit_width_fixed);
  337. num_counters = eax.split.num_counters;
  338. if (num_counters > ARRAY_SIZE(gp_events))
  339. num_counters = ARRAY_SIZE(gp_events);
  340. apic_write(APIC_LVTPC, PC_VECTOR);
  341. check_gp_counters();
  342. check_fixed_counters();
  343. check_rdpmc();
  344. check_counters_many();
  345. check_counter_overflow();
  346. check_gp_counter_cmask();
  347. return report_summary();
  348. }