Browse Source

Get accurate clock freq for KVM, dump cpu ids, cpu feature flags (#905)

* partial implementation of cpuid feature dump

Adds the following files to devarch (under /dev/)
- cpuidraw - raw dump of the cpuid records
- cpuidflags - dump of short names of current cpu features

- doesn't show amd-specific flags
- only dumps for one cpu

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* hz wip

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* Read accurate cpu clock freq when running in kvm, also tidy cpu flags

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* Remove cpu_flags.h

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* tidy

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* Fix for qemu clock freq in go9pcpu

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* Remove old debug code

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* tidy cpuid parsing

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* small print fix

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>

* fix build error

Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>
Graham MacDonald 1 month ago
parent
commit
e083596652
4 changed files with 341 additions and 21 deletions
  1. 61 9
      sys/src/9/amd64/archamd64.c
  2. 271 10
      sys/src/9/amd64/devarch.c
  3. 4 1
      util/GO9PCPU
  4. 5 1
      util/GO9PTERM

+ 61 - 9
sys/src/9/amd64/archamd64.c

@@ -16,6 +16,11 @@
 #undef DBG
 #define DBG iprint
 
+typedef enum CpuHypervisor {
+	CpuHypervisorUnknown = 0,
+	CpuHypervisorKvm,
+} CpuHypervisor;
+
 static int
 cpuidinit(void)
 {
@@ -85,12 +90,34 @@ cpuidname(uint32_t *info0)
 		return vendorid;
 	}
 	return vendorid;
+}
 
+CpuHypervisor
+cpuhypervisor()
+{
+	uint32_t info[4];
+	if (cpuid(0x40000000, 0, info)) {
+		char *hypname = (char*)&info[1];
+		if (!memcmp("KVMKVMKVM\0\0\0", hypname, 12)) {
+			return CpuHypervisorKvm;
+		}
+	}
+	return CpuHypervisorUnknown;
+}
 
+static int64_t
+cpuidhz_hypervisor()
+{
+	uint32_t info[4];
+	if (cpuid(0x40000010, 0, info)) {
+		return info[0] * 1000;
+	}
+	print("cpuidhz_hypervisor: couldn't read TSC freq for hypervisor\n");
+	return 0;
 }
 
 static int64_t
-cpuidhz(uint32_t *info0, uint32_t *info1)
+cpuidhz(uint32_t *info0, uint32_t *info1, CpuHypervisor hypervisor)
 {
 	int f, r;
 	int64_t hz;
@@ -103,8 +130,27 @@ cpuidhz(uint32_t *info0, uint32_t *info1)
 	DBG("vendorid: %s\n", vendorid);
 	DBG("CPUID Signature: %d\n", info1[0]);
 
+	uint8_t family_ext = (info1[0] & 0xff00000) >> 20;
+	uint8_t model_ext = (info1[0] & 0xf0000) >> 16;
+	uint8_t proctype = (info1[0] & 0x3000) >> 12;
+	uint8_t family = (info1[0] & 0xf00) >> 8;
+	uint8_t model = (info1[0] & 0xf0) >> 4;
+	uint8_t stepping = (info1[0] & 0xf);
+	print("CPUID family %x model %x proctype %x stepping %x model_ext %x family_ext %x hypervisor: %d\n",
+		family, model, proctype, stepping, model_ext, family_ext, hypervisor);
+
+	if (hypervisor != CpuHypervisorUnknown) {
+		hz = cpuidhz_hypervisor();
+		if (hz > 0) {
+			return hz;
+		}
+	}
+
 	if(strcmp("GenuineIntel", vendorid) == 0) {
-		switch(info1[0] & 0x0fff3ff0){
+		uint32_t cpusig = info1[0] & 0x0fff3ff0;
+		print("CPU Signature: %x\n", cpusig);
+
+		switch (cpusig) {
 		default:
 			return 0;
 		case 0x00000f30:		/* Xeon (MP), Pentium [4D] */
@@ -164,16 +210,14 @@ cpuidhz(uint32_t *info0, uint32_t *info1)
 		case 0x000806e0:		/* i7,5,3 85xx */
 		case 0x000906e0:		/* i7,5,3 77xx 8xxx */
 			/*
-			 * Get the FSB frequemcy.
+			 * Get the FSB frequency.
 			 * If processor has Enhanced Intel Speedstep Technology
 			 * then non-integer bus frequency ratios are possible.
 			 */
-			//print("CPUID EIST: %d\n", (info1[2] & 0x00000080));
-			if(info1[2] & 0x00000080){
+			if (info1[2] & 0x00000080) {
 				msr = rdmsr(0x198);
 				r = (msr>>40) & 0x1f;
-			}
-			else{
+			} else {
 				msr = 0;
 				r = rdmsr(0x2a) & 0x1f;
 			}
@@ -181,6 +225,7 @@ cpuidhz(uint32_t *info0, uint32_t *info1)
 //iprint("rdmsr Intel: %d\n", rdmsr(0x2a));
 //iprint("Intel msr.lo %d\n", r);
 //iprint("Intel msr.hi %d\n", f);
+
 			switch(f){
 			default:
 				return 0;
@@ -224,7 +269,10 @@ cpuidhz(uint32_t *info0, uint32_t *info1)
 		DBG("cpuidhz: 0x2a: %#llx hz %lld\n", rdmsr(0x2a), hz);
 	}
 	else if(strcmp("AuthenticAMD",vendorid) == 0){
-		switch(info1[0] & 0x0fff0ff0){
+		uint32_t cpusig = info1[0] & 0x0fff0ff0;
+		print("CPU Signature: %x\n", cpusig);
+
+		switch (cpusig) {
 		default:
 			return 0;
 		case 0x00050ff0:		/* K8 Athlon Venice 64 / Qemu64 */
@@ -304,7 +352,11 @@ archhz(void)
 		return 0;
 	}
 
-	hz = cpuidhz(info0, info1);
+	// If we're in a hypervisor, we should try to get the TSC from that
+	// otherwise checking the MSRs below may not be accurate.
+	CpuHypervisor hypervisor = cpuhypervisor();
+
+	hz = cpuidhz(info0, info1, hypervisor);
 	if(hz > 0 || machp()->machno != 0)
 		return hz;
 

+ 271 - 10
sys/src/9/amd64/devarch.c

@@ -16,6 +16,177 @@
 
 #include "ureg.h"
 
+typedef struct Cpuflag {
+	const char	*name;		/* short name (linux-like)*/
+	uint32_t	eax;		/* input eax */
+	uint8_t		infoidx;	/* index of info result */
+	uint8_t		bitidx;		/* feature bit in info result */
+} Cpuflag;
+
+// Below infoidxs equate to: 0=eax 1=ebx 2=ecx 3=edx 
+Cpuflag cpuflags[] = {
+	/* name				eax 		info 	bit */
+	{ "fpu",			0x00000001,	3,	0, },
+	{ "vme",			0x00000001,	3,	1, },
+	{ "de",				0x00000001,	3,	2, },
+	{ "pse",			0x00000001,	3,	3, },
+	{ "tsc",			0x00000001,	3,	4, },
+	{ "msr",			0x00000001,	3,	5, },
+	{ "pae",			0x00000001,	3,	6, },
+	{ "mce",			0x00000001,	3,	7, },
+	{ "cx8",			0x00000001,	3,	8, },
+	{ "apic",			0x00000001,	3,	9, },
+	{ "sep",			0x00000001,	3,	11, },
+	{ "mtrr",			0x00000001,	3,	12, },
+	{ "pge",			0x00000001,	3,	13, },
+	{ "mca",			0x00000001,	3,	14, },
+	{ "cmov",			0x00000001,	3,	15, },
+	{ "pat",			0x00000001,	3,	16, },
+	{ "pse36",			0x00000001,	3,	17, },
+	{ "pn",				0x00000001,	3,	18, },
+	{ "clflush",			0x00000001,	3,	19, },
+	{ "dts",			0x00000001,	3,	21, },
+	{ "acpi",			0x00000001,	3,	22, },
+	{ "mmx",			0x00000001,	3,	23, },
+	{ "fxsr",			0x00000001,	3,	24, },
+	{ "sse",			0x00000001,	3,	25, },
+	{ "sse2",			0x00000001,	3,	26, },
+	{ "ss",				0x00000001,	3,	27, },
+	{ "ht",				0x00000001,	3,	28, },
+	{ "tm",				0x00000001,	3,	29, },
+	{ "ia64",			0x00000001,	3,	30, },
+	{ "pbe",			0x00000001,	3,	31, },
+	{ "pni",			0x00000001,	2,	0, },
+	{ "pclmulqdq",			0x00000001,	2,	1, },
+	{ "dtes64",			0x00000001,	2,	2, },
+	{ "monitor",			0x00000001,	2,	3, },
+	{ "ds_cpl",			0x00000001,	2,	4, },
+	{ "vmx",			0x00000001,	2,	5, },
+	{ "smx",			0x00000001,	2,	6, },
+	{ "est",			0x00000001,	2,	7, },
+	{ "tm2",			0x00000001,	2,	8, },
+	{ "ssse3",			0x00000001,	2,	9, },
+	{ "cid",			0x00000001,	2,	10, },
+	{ "sdbg",			0x00000001,	2,	11, },
+	{ "fma",			0x00000001,	2,	12, },
+	{ "cx16",			0x00000001,	2,	13, },
+	{ "xtpr",			0x00000001,	2,	14, },
+	{ "pdcm",			0x00000001,	2,	15, },
+	{ "pcid",			0x00000001,	2,	17, },
+	{ "dca",			0x00000001,	2,	18, },
+	{ "sse4_1",			0x00000001,	2,	19, },
+	{ "sse4_2",			0x00000001,	2,	20, },
+	{ "x2apic",			0x00000001,	2,	21, },
+	{ "movbe",			0x00000001,	2,	22, },
+	{ "popcnt",			0x00000001,	2,	23, },
+	{ "tsc_deadline_timer",		0x00000001,	2,	24, },
+	{ "aes",			0x00000001,	2,	25, },
+	{ "xsave",			0x00000001,	2,	26, },
+	{ "osxsave",			0x00000001,	2,	27, },
+	{ "avx",			0x00000001,	2,	28, },
+	{ "f16c",			0x00000001,	2,	29, },
+	{ "rdrand",			0x00000001,	2,	30, },
+	{ "hypervisor",			0x00000001,	2,	31, },
+	{ "lahf_lm",			0x80000001,	2,	0, },
+	{ "cmp_legacy",			0x80000001,	2,	1, },
+	{ "svm",			0x80000001,	2,	2, },
+	{ "extapic",			0x80000001,	2,	3, },
+	{ "cr8_legacy",			0x80000001,	2,	4, },
+	{ "abm",			0x80000001,	2,	5, },
+	{ "sse4a",			0x80000001,	2,	6, },
+	{ "misalignsse",		0x80000001,	2,	7, },
+	{ "3dnowprefetch",		0x80000001,	2,	8, },
+	{ "osvw",			0x80000001,	2,	9, },
+	{ "ibs",			0x80000001,	2,	10, },
+	{ "xop",			0x80000001,	2,	11, },
+	{ "skinit",			0x80000001,	2,	12, },
+	{ "wdt",			0x80000001,	2,	13, },
+	{ "lwp",			0x80000001,	2,	15, },
+	{ "fma4",			0x80000001,	2,	16, },
+	{ "tce",			0x80000001,	2,	17, },
+	{ "nodeid_msr",			0x80000001,	2,	19, },
+	{ "tbm",			0x80000001,	2,	21, },
+	{ "topoext",			0x80000001,	2,	22, },
+	{ "perfctr_core",		0x80000001,	2,	23, },
+	{ "perfctr_nb",			0x80000001,	2,	24, },
+	{ "bpext",			0x80000001,	2,	26, },
+	{ "ptsc",			0x80000001,	2,	27, },
+	{ "perfctr_llc",		0x80000001,	2,	28, },
+	{ "mwaitx",			0x80000001,	2,	29, },
+	{ "fsgsbase",			0x00000007,	1,	0, },
+	{ "tsc_adjust",			0x00000007,	1,	1, },
+	{ "bmi1",			0x00000007,	1,	3, },
+	{ "hle",			0x00000007,	1,	4, },
+	{ "avx2",			0x00000007,	1,	5, },
+	{ "smep",			0x00000007,	1,	7, },
+	{ "bmi2",			0x00000007,	1,	8, },
+	{ "erms",			0x00000007,	1,	9, },
+	{ "invpcid",			0x00000007,	1,	10, },
+	{ "rtm",			0x00000007,	1,	11, },
+	{ "cqm",			0x00000007,	1,	12, },
+	{ "mpx",			0x00000007,	1,	14, },
+	{ "rdt_a",			0x00000007,	1,	15, },
+	{ "avx512f",			0x00000007,	1,	16, },
+	{ "avx512dq",			0x00000007,	1,	17, },
+	{ "rdseed",			0x00000007,	1,	18, },
+	{ "adx",			0x00000007,	1,	19, },
+	{ "smap",			0x00000007,	1,	20, },
+	{ "avx512ifma",			0x00000007,	1,	21, },
+	{ "clflushopt",			0x00000007,	1,	23, },
+	{ "clwb",			0x00000007,	1,	24, },
+	{ "intel_pt",			0x00000007,	1,	25, },
+	{ "avx512pf",			0x00000007,	1,	26, },
+	{ "avx512er",			0x00000007,	1,	27, },
+	{ "avx512cd",			0x00000007,	1,	28, },
+	{ "sha_ni",			0x00000007,	1,	29, },
+	{ "avx512bw",			0x00000007,	1,	30, },
+	{ "avx512vl",			0x00000007,	1,	31, },
+	{ "xsaveopt",			0x0000000d,	0,	0, },
+	{ "xsavec",			0x0000000d,	0,	1, },
+	{ "xgetbv1",			0x0000000d,	0,	2, },
+	{ "xsaves",			0x0000000d,	0,	3, },
+	{ "cqm_llc",			0x0000000f,	3,	1, },
+	{ "cqm_occup_llc",		0x0000000f,	3,	0, },
+	{ "cqm_mbm_total",		0x0000000f,	3,	1, },
+	{ "cqm_mbm_local",		0x0000000f,	3,	2, },
+	{ "dtherm",			0x00000006,	1,	0, },
+	{ "ida",			0x00000006,	1,	1, },
+	{ "arat",			0x00000006,	1,	2, },
+	{ "pln",			0x00000006,	1,	4, },
+	{ "pts",			0x00000006,	1,	6, },
+	{ "hwp",			0x00000006,	1,	7, },
+	{ "hwp_notify",			0x00000006,	1,	8, },
+	{ "hwp_act_window",		0x00000006,	1,	9, },
+	{ "hwp_epp",			0x00000006,	1,	10, },
+	{ "hwp_pkg_req",		0x00000006,	1,	11, },
+	{ "avx512vbmi",			0x00000007,	2,	1, },
+	{ "umip",			0x00000007,	2,	2, },
+	{ "pku",			0x00000007,	2,	3, },
+	{ "ospke",			0x00000007,	2,	4, },
+	{ "avx512_vbmi2",		0x00000007,	2,	6, },
+	{ "gfni",			0x00000007,	2,	8, },
+	{ "vaes",			0x00000007,	2,	9, },
+	{ "vpclmulqdq",			0x00000007,	2,	10, },
+	{ "avx512_vnni",		0x00000007,	2,	11, },
+	{ "avx512_bitalg",		0x00000007,	2,	12, },
+	{ "tme",			0x00000007,	2,	13, },
+	{ "avx512_vpopcntdq",		0x00000007,	2,	14, },
+	{ "la57",			0x00000007,	2,	16, },
+	{ "rdpid",			0x00000007,	2,	22, },
+	{ "cldemote",			0x00000007,	2,	25, },
+	{ "movdiri",			0x00000007,	2,	27, },
+	{ "movdir64b",			0x00000007,	2,	28, },
+	{ "avx512_4vnniw",		0x00000007,	3,	2, },
+	{ "avx512_4fmaps",		0x00000007,	3,	3, },
+	{ "tsx_force_abort",		0x00000007,	3,	13, },
+	{ "pconfig",			0x00000007,	3,	18, },
+	{ "spec_ctrl",			0x00000007,	3,	26, },
+	{ "intel_stibp",		0x00000007,	3,	27, },
+	{ "flush_l1d",			0x00000007,	3,	28, },
+	{ "arch_capabilities",		0x00000007,	3,	29, },
+	{ "spec_ctrl_ssbd",		0x00000007,	3,	31, },
+};
+
 typedef struct IOMap IOMap;
 struct IOMap
 {
@@ -529,8 +700,8 @@ cputyperead(Chan* c, void *a, int32_t n, int64_t off)
 	char buf[512], *s, *e;
 	char *vendorid;
 	uint32_t info0[4];
-	int i, k;
 
+	s = buf;
 	e = buf+sizeof buf;
 
 	if(!cpuidinfo(0, 0, info0)) {
@@ -539,17 +710,105 @@ cputyperead(Chan* c, void *a, int32_t n, int64_t off)
 	} else
 		vendorid = cpuidname(info0);
 
-	s = seprint(buf, e, "%s CPU @ %uMHz\ncpu cores: %d\n", vendorid, machp()->cpumhz, sys->nmach);
+	s = seprint(s, e, "%s CPU @ %uMHz\ncpu cores: %d\n", vendorid, machp()->cpumhz, sys->nmach);
+
+	return readstr(off, a, n, buf);
+}
+
+static void
+get_cpuid_limits(int *num_basic, int *num_hypervisor, int *num_extended)
+{
+	uint32_t info[4];
+
+	*num_basic = 0;
+	*num_hypervisor = 0;
+	*num_extended = 0;
 
-	if(DBGFLG) {
-		k = machp()->CPU.ncpuinfoe - machp()->CPU.ncpuinfos;
-		if(k > 4)
-			k = 4;
-		for(i = 0; i < k; i++)
-			s = seprint(s, e, "%#8.8x %#8.8x %#8.8x %#8.8x\n",
-				machp()->CPU.cpuinfo[i][0], machp()->CPU.cpuinfo[i][1],
-				machp()->CPU.cpuinfo[i][2], machp()->CPU.cpuinfo[i][3]);
+	if (cpuid(0x00000000, 0, info)) {
+		*num_basic = info[0] + 1;
 	}
+	if (cpuid(0x40000000, 0, info)) {
+		*num_hypervisor = info[0] - 0x40000000 + 1;
+	}
+	if (cpuid(0x80000000, 0, info)) {
+		*num_extended = info[0] - 0x80000000 + 1;
+	}
+}
+
+// Given an index into the valid cpuids, and the number of each range of values,
+// return the appropriate EAX value.
+static int32_t
+itoeax(int i, uint32_t num_basic, uint32_t num_hyp, uint32_t num_ext) {
+	uint32_t first_hyp = num_basic;
+	uint32_t first_ext = num_basic + num_hyp;
+	if (i >= first_ext) {
+		return 0x80000000 + i - first_ext;
+	} else if (i >= first_hyp) {
+		return 0x40000000 + i - first_hyp;
+	} else {
+		return i;
+	}
+}
+
+// Output hex values of all valid cpuid values
+static int32_t
+cpuidrawread(Chan* c, void *a, int32_t n, int64_t off)
+{
+	char buf[4096];
+	char *s = buf;
+	char *e = buf+sizeof buf;
+	uint32_t info[4];
+
+	int num_basic = 0, num_hyp = 0, num_ext = 0;
+	get_cpuid_limits(&num_basic, &num_hyp, &num_ext);
+
+	for (int i = 0; i < num_basic + num_hyp + num_ext; i++) {
+		uint32_t eax = itoeax(i, num_basic, num_hyp, num_ext);
+		if (!cpuid(eax, 0, info)) {
+			continue;
+		}
+		s = seprint(s, e, "%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x\n",
+			eax, 0, info[0], info[1], info[2], info[3]);
+	}
+
+	return readstr(off, a, n, buf);
+}
+
+// Output cpu flag shortnames from cpuid values
+static int32_t
+cpuidflagsread(Chan* c, void *a, int32_t n, int64_t off)
+{
+	char buf[4096];
+	char *s = buf;
+	char *e = buf+sizeof buf;
+	uint32_t info[4];
+
+	int num_basic = 0, num_hyp = 0, num_ext = 0;
+	get_cpuid_limits(&num_basic, &num_hyp, &num_ext);
+
+	int num_flags = nelem(cpuflags);
+
+	for (int i = 0; i < num_basic + num_hyp + num_ext; i++) {
+		uint32_t eax = itoeax(i, num_basic, num_hyp, num_ext);
+		if (!cpuid(eax, 0, info)) {
+			continue;
+		}
+
+		// Extract any flag names if this particular eax contains flags
+		for (int fi = 0; fi < num_flags; fi++) {
+			Cpuflag *flag = &cpuflags[fi];
+			if (flag->eax != eax) {
+				continue;
+			}
+
+			if (info[flag->infoidx] & (1 << flag->bitidx)) {
+				s = seprint(s, e, "%s ", flag->name);
+			}
+		}
+	}
+
+	s = seprint(s, e, "\n");
+
 	return readstr(off, a, n, buf);
 }
 
@@ -558,6 +817,8 @@ archinit(void)
 {
 	addarchfile("cputype", 0444, cputyperead, nil);
 	addarchfile("mtags", 0444, mtagsread, nil);
+	addarchfile("cpuidraw", 0444, cpuidrawread, nil);
+	addarchfile("cpuidflags", 0444, cpuidflagsread, nil);
 }
 
 void

+ 4 - 1
util/GO9PCPU

@@ -16,8 +16,11 @@ if [ "$(uname)" = "Linux" ] && [ -e /dev/kvm ]; then
 	fi
 fi
 
+# vmware-cpuid-freq=on,+invtsc exposes the 0x40000000 hypervisor cpuid values to
+# the guest, which we can use to identify the TSC frequency
+
 read -r cmd <<EOF
-$kvmdo qemu-system-x86_64 -s -cpu Opteron_G1 -smp 4 -m 2048 $kvmflag \
+$kvmdo qemu-system-x86_64 -s -cpu max,vmware-cpuid-freq=on,+invtsc -smp 4 -m 2048 $kvmflag \
 -usb \
 -serial stdio \
 --machine $machineflag \

+ 5 - 1
util/GO9PTERM

@@ -23,8 +23,12 @@ fi
 # Provided is an example lsusb(8) output:
 #       Bus 002 Device 004: ID 056a:00e6 Wacom Co., Ltd TPCE6
 # Where 'X'== 2 and 'Y'== 4
+
+# vmware-cpuid-freq=on,+invtsc exposes the 0x40000000 hypervisor cpuid values to
+# the guest, which we can use to identify the TSC frequency
+
 read -r cmd <<EOF
-$kvmdo qemu-system-x86_64 -s -cpu Opteron_G1 -smp 4 -m 2048 $kvmflag \
+$kvmdo qemu-system-x86_64 -s -cpu max,vmware-cpuid-freq=on,+invtsc -smp 4 -m 2048 $kvmflag \
 -usb \
 -serial stdio \
 --machine $machineflag \