access.c 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991
  1. #include "libcflat.h"
  2. #include "desc.h"
  3. #include "processor.h"
  4. #include "asm/page.h"
  5. #define smp_id() 0
  6. #define true 1
  7. #define false 0
  8. static _Bool verbose = false;
  9. typedef unsigned long pt_element_t;
  10. static int cpuid_7_ebx;
  11. static int cpuid_7_ecx;
  12. static int invalid_mask;
  13. #define PT_BASE_ADDR_MASK ((pt_element_t)((((pt_element_t)1 << 40) - 1) & PAGE_MASK))
  14. #define PT_PSE_BASE_ADDR_MASK (PT_BASE_ADDR_MASK & ~(1ull << 21))
  15. #define CR0_WP_MASK (1UL << 16)
  16. #define CR4_SMEP_MASK (1UL << 20)
  17. #define PFERR_PRESENT_MASK (1U << 0)
  18. #define PFERR_WRITE_MASK (1U << 1)
  19. #define PFERR_USER_MASK (1U << 2)
  20. #define PFERR_RESERVED_MASK (1U << 3)
  21. #define PFERR_FETCH_MASK (1U << 4)
  22. #define PFERR_PK_MASK (1U << 5)
  23. #define MSR_EFER 0xc0000080
  24. #define EFER_NX_MASK (1ull << 11)
  25. #define PT_INDEX(address, level) \
  26. ((address) >> (12 + ((level)-1) * 9)) & 511
  27. /*
  28. * page table access check tests
  29. */
  30. enum {
  31. AC_PTE_PRESENT_BIT,
  32. AC_PTE_WRITABLE_BIT,
  33. AC_PTE_USER_BIT,
  34. AC_PTE_ACCESSED_BIT,
  35. AC_PTE_DIRTY_BIT,
  36. AC_PTE_NX_BIT,
  37. AC_PTE_BIT51_BIT,
  38. AC_PDE_PRESENT_BIT,
  39. AC_PDE_WRITABLE_BIT,
  40. AC_PDE_USER_BIT,
  41. AC_PDE_ACCESSED_BIT,
  42. AC_PDE_DIRTY_BIT,
  43. AC_PDE_PSE_BIT,
  44. AC_PDE_NX_BIT,
  45. AC_PDE_BIT51_BIT,
  46. AC_PDE_BIT13_BIT,
  47. AC_PKU_AD_BIT,
  48. AC_PKU_WD_BIT,
  49. AC_PKU_PKEY_BIT,
  50. AC_ACCESS_USER_BIT,
  51. AC_ACCESS_WRITE_BIT,
  52. AC_ACCESS_FETCH_BIT,
  53. AC_ACCESS_TWICE_BIT,
  54. AC_CPU_EFER_NX_BIT,
  55. AC_CPU_CR0_WP_BIT,
  56. AC_CPU_CR4_SMEP_BIT,
  57. AC_CPU_CR4_PKE_BIT,
  58. NR_AC_FLAGS
  59. };
  60. #define AC_PTE_PRESENT_MASK (1 << AC_PTE_PRESENT_BIT)
  61. #define AC_PTE_WRITABLE_MASK (1 << AC_PTE_WRITABLE_BIT)
  62. #define AC_PTE_USER_MASK (1 << AC_PTE_USER_BIT)
  63. #define AC_PTE_ACCESSED_MASK (1 << AC_PTE_ACCESSED_BIT)
  64. #define AC_PTE_DIRTY_MASK (1 << AC_PTE_DIRTY_BIT)
  65. #define AC_PTE_NX_MASK (1 << AC_PTE_NX_BIT)
  66. #define AC_PTE_BIT51_MASK (1 << AC_PTE_BIT51_BIT)
  67. #define AC_PDE_PRESENT_MASK (1 << AC_PDE_PRESENT_BIT)
  68. #define AC_PDE_WRITABLE_MASK (1 << AC_PDE_WRITABLE_BIT)
  69. #define AC_PDE_USER_MASK (1 << AC_PDE_USER_BIT)
  70. #define AC_PDE_ACCESSED_MASK (1 << AC_PDE_ACCESSED_BIT)
  71. #define AC_PDE_DIRTY_MASK (1 << AC_PDE_DIRTY_BIT)
  72. #define AC_PDE_PSE_MASK (1 << AC_PDE_PSE_BIT)
  73. #define AC_PDE_NX_MASK (1 << AC_PDE_NX_BIT)
  74. #define AC_PDE_BIT51_MASK (1 << AC_PDE_BIT51_BIT)
  75. #define AC_PDE_BIT13_MASK (1 << AC_PDE_BIT13_BIT)
  76. #define AC_PKU_AD_MASK (1 << AC_PKU_AD_BIT)
  77. #define AC_PKU_WD_MASK (1 << AC_PKU_WD_BIT)
  78. #define AC_PKU_PKEY_MASK (1 << AC_PKU_PKEY_BIT)
  79. #define AC_ACCESS_USER_MASK (1 << AC_ACCESS_USER_BIT)
  80. #define AC_ACCESS_WRITE_MASK (1 << AC_ACCESS_WRITE_BIT)
  81. #define AC_ACCESS_FETCH_MASK (1 << AC_ACCESS_FETCH_BIT)
  82. #define AC_ACCESS_TWICE_MASK (1 << AC_ACCESS_TWICE_BIT)
  83. #define AC_CPU_EFER_NX_MASK (1 << AC_CPU_EFER_NX_BIT)
  84. #define AC_CPU_CR0_WP_MASK (1 << AC_CPU_CR0_WP_BIT)
  85. #define AC_CPU_CR4_SMEP_MASK (1 << AC_CPU_CR4_SMEP_BIT)
  86. #define AC_CPU_CR4_PKE_MASK (1 << AC_CPU_CR4_PKE_BIT)
  87. const char *ac_names[] = {
  88. [AC_PTE_PRESENT_BIT] = "pte.p",
  89. [AC_PTE_ACCESSED_BIT] = "pte.a",
  90. [AC_PTE_WRITABLE_BIT] = "pte.rw",
  91. [AC_PTE_USER_BIT] = "pte.user",
  92. [AC_PTE_DIRTY_BIT] = "pte.d",
  93. [AC_PTE_NX_BIT] = "pte.nx",
  94. [AC_PTE_BIT51_BIT] = "pte.51",
  95. [AC_PDE_PRESENT_BIT] = "pde.p",
  96. [AC_PDE_ACCESSED_BIT] = "pde.a",
  97. [AC_PDE_WRITABLE_BIT] = "pde.rw",
  98. [AC_PDE_USER_BIT] = "pde.user",
  99. [AC_PDE_DIRTY_BIT] = "pde.d",
  100. [AC_PDE_PSE_BIT] = "pde.pse",
  101. [AC_PDE_NX_BIT] = "pde.nx",
  102. [AC_PDE_BIT51_BIT] = "pde.51",
  103. [AC_PDE_BIT13_BIT] = "pde.13",
  104. [AC_PKU_AD_BIT] = "pkru.ad",
  105. [AC_PKU_WD_BIT] = "pkru.wd",
  106. [AC_PKU_PKEY_BIT] = "pkey=1",
  107. [AC_ACCESS_WRITE_BIT] = "write",
  108. [AC_ACCESS_USER_BIT] = "user",
  109. [AC_ACCESS_FETCH_BIT] = "fetch",
  110. [AC_ACCESS_TWICE_BIT] = "twice",
  111. [AC_CPU_EFER_NX_BIT] = "efer.nx",
  112. [AC_CPU_CR0_WP_BIT] = "cr0.wp",
  113. [AC_CPU_CR4_SMEP_BIT] = "cr4.smep",
  114. [AC_CPU_CR4_PKE_BIT] = "cr4.pke",
  115. };
  116. static inline void *va(pt_element_t phys)
  117. {
  118. return (void *)phys;
  119. }
  120. typedef struct {
  121. pt_element_t pt_pool;
  122. unsigned pt_pool_size;
  123. unsigned pt_pool_current;
  124. } ac_pool_t;
  125. typedef struct {
  126. unsigned flags;
  127. void *virt;
  128. pt_element_t phys;
  129. pt_element_t *ptep;
  130. pt_element_t expected_pte;
  131. pt_element_t *pdep;
  132. pt_element_t expected_pde;
  133. pt_element_t ignore_pde;
  134. int expected_fault;
  135. unsigned expected_error;
  136. } ac_test_t;
  137. typedef struct {
  138. unsigned short limit;
  139. unsigned long linear_addr;
  140. } __attribute__((packed)) descriptor_table_t;
  141. static void ac_test_show(ac_test_t *at);
  142. int write_cr4_checking(unsigned long val)
  143. {
  144. asm volatile(ASM_TRY("1f")
  145. "mov %0,%%cr4\n\t"
  146. "1:": : "r" (val));
  147. return exception_vector();
  148. }
  149. void set_cr0_wp(int wp)
  150. {
  151. unsigned long cr0 = read_cr0();
  152. unsigned long old_cr0 = cr0;
  153. cr0 &= ~CR0_WP_MASK;
  154. if (wp)
  155. cr0 |= CR0_WP_MASK;
  156. if (old_cr0 != cr0)
  157. write_cr0(cr0);
  158. }
  159. void set_cr4_smep(int smep)
  160. {
  161. unsigned long cr4 = read_cr4();
  162. unsigned long old_cr4 = cr4;
  163. extern u64 ptl2[];
  164. cr4 &= ~CR4_SMEP_MASK;
  165. if (smep)
  166. cr4 |= CR4_SMEP_MASK;
  167. if (old_cr4 == cr4)
  168. return;
  169. if (smep)
  170. ptl2[2] &= ~PT_USER_MASK;
  171. write_cr4(cr4);
  172. if (!smep)
  173. ptl2[2] |= PT_USER_MASK;
  174. }
  175. void set_cr4_pke(int pke)
  176. {
  177. unsigned long cr4 = read_cr4();
  178. unsigned long old_cr4 = cr4;
  179. cr4 &= ~X86_CR4_PKE;
  180. if (pke)
  181. cr4 |= X86_CR4_PKE;
  182. if (old_cr4 == cr4)
  183. return;
  184. /* Check that protection keys do not affect accesses when CR4.PKE=0. */
  185. if ((read_cr4() & X86_CR4_PKE) && !pke) {
  186. write_pkru(0xfffffffc);
  187. }
  188. write_cr4(cr4);
  189. }
  190. void set_efer_nx(int nx)
  191. {
  192. unsigned long long efer = rdmsr(MSR_EFER);
  193. unsigned long long old_efer = efer;
  194. efer &= ~EFER_NX_MASK;
  195. if (nx)
  196. efer |= EFER_NX_MASK;
  197. if (old_efer != efer)
  198. wrmsr(MSR_EFER, efer);
  199. }
  200. static void ac_env_int(ac_pool_t *pool)
  201. {
  202. extern char page_fault, kernel_entry;
  203. set_idt_entry(14, &page_fault, 0);
  204. set_idt_entry(0x20, &kernel_entry, 3);
  205. pool->pt_pool = 33 * 1024 * 1024;
  206. pool->pt_pool_size = 120 * 1024 * 1024 - pool->pt_pool;
  207. pool->pt_pool_current = 0;
  208. }
  209. void ac_test_init(ac_test_t *at, void *virt)
  210. {
  211. wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NX_MASK);
  212. set_cr0_wp(1);
  213. at->flags = 0;
  214. at->virt = virt;
  215. at->phys = 32 * 1024 * 1024;
  216. }
  217. int ac_test_bump_one(ac_test_t *at)
  218. {
  219. at->flags = ((at->flags | invalid_mask) + 1) & ~invalid_mask;
  220. return at->flags < (1 << NR_AC_FLAGS);
  221. }
  222. #define F(x) ((flags & x##_MASK) != 0)
  223. _Bool ac_test_legal(ac_test_t *at)
  224. {
  225. int flags = at->flags;
  226. if (F(AC_ACCESS_FETCH) && F(AC_ACCESS_WRITE))
  227. return false;
  228. /*
  229. * Since we convert current page to kernel page when cr4.smep=1,
  230. * we can't switch to user mode.
  231. */
  232. if (F(AC_ACCESS_USER) && F(AC_CPU_CR4_SMEP))
  233. return false;
  234. /*
  235. * Only test protection key faults if CR4.PKE=1.
  236. */
  237. if (!F(AC_CPU_CR4_PKE) &&
  238. (F(AC_PKU_AD) || F(AC_PKU_WD))) {
  239. return false;
  240. }
  241. /*
  242. * pde.bit13 checks handling of reserved bits in largepage PDEs. It is
  243. * meaningless if there is a PTE.
  244. */
  245. if (!F(AC_PDE_PSE) && F(AC_PDE_BIT13))
  246. return false;
  247. return true;
  248. }
  249. int ac_test_bump(ac_test_t *at)
  250. {
  251. int ret;
  252. ret = ac_test_bump_one(at);
  253. while (ret && !ac_test_legal(at))
  254. ret = ac_test_bump_one(at);
  255. return ret;
  256. }
  257. pt_element_t ac_test_alloc_pt(ac_pool_t *pool)
  258. {
  259. pt_element_t ret = pool->pt_pool + pool->pt_pool_current;
  260. pool->pt_pool_current += PAGE_SIZE;
  261. return ret;
  262. }
  263. _Bool ac_test_enough_room(ac_pool_t *pool)
  264. {
  265. return pool->pt_pool_current + 4 * PAGE_SIZE <= pool->pt_pool_size;
  266. }
  267. void ac_test_reset_pt_pool(ac_pool_t *pool)
  268. {
  269. pool->pt_pool_current = 0;
  270. }
  271. pt_element_t ac_test_permissions(ac_test_t *at, unsigned flags, bool writable,
  272. bool user, bool executable)
  273. {
  274. bool kwritable = !F(AC_CPU_CR0_WP) && !F(AC_ACCESS_USER);
  275. pt_element_t expected = 0;
  276. if (F(AC_ACCESS_USER) && !user)
  277. at->expected_fault = 1;
  278. if (F(AC_ACCESS_WRITE) && !writable && !kwritable)
  279. at->expected_fault = 1;
  280. if (F(AC_ACCESS_FETCH) && !executable)
  281. at->expected_fault = 1;
  282. if (F(AC_ACCESS_FETCH) && user && F(AC_CPU_CR4_SMEP))
  283. at->expected_fault = 1;
  284. if (user && !F(AC_ACCESS_FETCH) && F(AC_PKU_PKEY) && F(AC_CPU_CR4_PKE)) {
  285. if (F(AC_PKU_AD)) {
  286. at->expected_fault = 1;
  287. at->expected_error |= PFERR_PK_MASK;
  288. } else if (F(AC_ACCESS_WRITE) && F(AC_PKU_WD) && !kwritable) {
  289. at->expected_fault = 1;
  290. at->expected_error |= PFERR_PK_MASK;
  291. }
  292. }
  293. if (!at->expected_fault) {
  294. expected |= PT_ACCESSED_MASK;
  295. if (F(AC_ACCESS_WRITE))
  296. expected |= PT_DIRTY_MASK;
  297. }
  298. return expected;
  299. }
  300. void ac_emulate_access(ac_test_t *at, unsigned flags)
  301. {
  302. bool pde_valid, pte_valid;
  303. bool user, writable, executable;
  304. if (F(AC_ACCESS_USER))
  305. at->expected_error |= PFERR_USER_MASK;
  306. if (F(AC_ACCESS_WRITE))
  307. at->expected_error |= PFERR_WRITE_MASK;
  308. if (F(AC_ACCESS_FETCH))
  309. at->expected_error |= PFERR_FETCH_MASK;
  310. if (!F(AC_PDE_ACCESSED))
  311. at->ignore_pde = PT_ACCESSED_MASK;
  312. pde_valid = F(AC_PDE_PRESENT)
  313. && !F(AC_PDE_BIT51) && !F(AC_PDE_BIT13)
  314. && !(F(AC_PDE_NX) && !F(AC_CPU_EFER_NX));
  315. if (!pde_valid) {
  316. at->expected_fault = 1;
  317. if (F(AC_PDE_PRESENT)) {
  318. at->expected_error |= PFERR_RESERVED_MASK;
  319. } else {
  320. at->expected_error &= ~PFERR_PRESENT_MASK;
  321. }
  322. goto fault;
  323. }
  324. writable = F(AC_PDE_WRITABLE);
  325. user = F(AC_PDE_USER);
  326. executable = !F(AC_PDE_NX);
  327. if (F(AC_PDE_PSE)) {
  328. at->expected_pde |= ac_test_permissions(at, flags, writable, user,
  329. executable);
  330. goto no_pte;
  331. }
  332. at->expected_pde |= PT_ACCESSED_MASK;
  333. pte_valid = F(AC_PTE_PRESENT)
  334. && !F(AC_PTE_BIT51)
  335. && !(F(AC_PTE_NX) && !F(AC_CPU_EFER_NX));
  336. if (!pte_valid) {
  337. at->expected_fault = 1;
  338. if (F(AC_PTE_PRESENT)) {
  339. at->expected_error |= PFERR_RESERVED_MASK;
  340. } else {
  341. at->expected_error &= ~PFERR_PRESENT_MASK;
  342. }
  343. goto fault;
  344. }
  345. writable &= F(AC_PTE_WRITABLE);
  346. user &= F(AC_PTE_USER);
  347. executable &= !F(AC_PTE_NX);
  348. at->expected_pte |= ac_test_permissions(at, flags, writable, user,
  349. executable);
  350. no_pte:
  351. fault:
  352. if (!at->expected_fault)
  353. at->ignore_pde = 0;
  354. if (!F(AC_CPU_EFER_NX) && !F(AC_CPU_CR4_SMEP))
  355. at->expected_error &= ~PFERR_FETCH_MASK;
  356. }
  357. void ac_set_expected_status(ac_test_t *at)
  358. {
  359. invlpg(at->virt);
  360. if (at->ptep)
  361. at->expected_pte = *at->ptep;
  362. at->expected_pde = *at->pdep;
  363. at->ignore_pde = 0;
  364. at->expected_fault = 0;
  365. at->expected_error = PFERR_PRESENT_MASK;
  366. if (at->flags & AC_ACCESS_TWICE_MASK) {
  367. ac_emulate_access(at, at->flags & ~AC_ACCESS_WRITE_MASK
  368. & ~AC_ACCESS_FETCH_MASK & ~AC_ACCESS_USER_MASK);
  369. at->expected_fault = 0;
  370. at->expected_error = PFERR_PRESENT_MASK;
  371. at->ignore_pde = 0;
  372. }
  373. ac_emulate_access(at, at->flags);
  374. }
  375. void __ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool, u64 pd_page,
  376. u64 pt_page)
  377. {
  378. unsigned long root = read_cr3();
  379. int flags = at->flags;
  380. if (!ac_test_enough_room(pool))
  381. ac_test_reset_pt_pool(pool);
  382. at->ptep = 0;
  383. for (int i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
  384. pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
  385. unsigned index = PT_INDEX((unsigned long)at->virt, i);
  386. pt_element_t pte = 0;
  387. switch (i) {
  388. case 4:
  389. case 3:
  390. pte = pd_page ? pd_page : ac_test_alloc_pt(pool);
  391. pte |= PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
  392. break;
  393. case 2:
  394. if (!F(AC_PDE_PSE)) {
  395. pte = pt_page ? pt_page : ac_test_alloc_pt(pool);
  396. /* The protection key is ignored on non-leaf entries. */
  397. if (F(AC_PKU_PKEY))
  398. pte |= 2ull << 59;
  399. } else {
  400. pte = at->phys & PT_PSE_BASE_ADDR_MASK;
  401. pte |= PT_PAGE_SIZE_MASK;
  402. if (F(AC_PKU_PKEY))
  403. pte |= 1ull << 59;
  404. }
  405. if (F(AC_PDE_PRESENT))
  406. pte |= PT_PRESENT_MASK;
  407. if (F(AC_PDE_WRITABLE))
  408. pte |= PT_WRITABLE_MASK;
  409. if (F(AC_PDE_USER))
  410. pte |= PT_USER_MASK;
  411. if (F(AC_PDE_ACCESSED))
  412. pte |= PT_ACCESSED_MASK;
  413. if (F(AC_PDE_DIRTY))
  414. pte |= PT_DIRTY_MASK;
  415. if (F(AC_PDE_NX))
  416. pte |= PT64_NX_MASK;
  417. if (F(AC_PDE_BIT51))
  418. pte |= 1ull << 51;
  419. if (F(AC_PDE_BIT13))
  420. pte |= 1ull << 13;
  421. at->pdep = &vroot[index];
  422. break;
  423. case 1:
  424. pte = at->phys & PT_BASE_ADDR_MASK;
  425. if (F(AC_PKU_PKEY))
  426. pte |= 1ull << 59;
  427. if (F(AC_PTE_PRESENT))
  428. pte |= PT_PRESENT_MASK;
  429. if (F(AC_PTE_WRITABLE))
  430. pte |= PT_WRITABLE_MASK;
  431. if (F(AC_PTE_USER))
  432. pte |= PT_USER_MASK;
  433. if (F(AC_PTE_ACCESSED))
  434. pte |= PT_ACCESSED_MASK;
  435. if (F(AC_PTE_DIRTY))
  436. pte |= PT_DIRTY_MASK;
  437. if (F(AC_PTE_NX))
  438. pte |= PT64_NX_MASK;
  439. if (F(AC_PTE_BIT51))
  440. pte |= 1ull << 51;
  441. at->ptep = &vroot[index];
  442. break;
  443. }
  444. vroot[index] = pte;
  445. root = vroot[index];
  446. }
  447. ac_set_expected_status(at);
  448. }
  449. static void ac_test_setup_pte(ac_test_t *at, ac_pool_t *pool)
  450. {
  451. __ac_setup_specific_pages(at, pool, 0, 0);
  452. }
  453. static void ac_setup_specific_pages(ac_test_t *at, ac_pool_t *pool,
  454. u64 pd_page, u64 pt_page)
  455. {
  456. return __ac_setup_specific_pages(at, pool, pd_page, pt_page);
  457. }
  458. static void dump_mapping(ac_test_t *at)
  459. {
  460. unsigned long root = read_cr3();
  461. int flags = at->flags;
  462. int i;
  463. printf("Dump mapping: address: %p\n", at->virt);
  464. for (i = 4; i >= 1 && (i >= 2 || !F(AC_PDE_PSE)); --i) {
  465. pt_element_t *vroot = va(root & PT_BASE_ADDR_MASK);
  466. unsigned index = PT_INDEX((unsigned long)at->virt, i);
  467. pt_element_t pte = vroot[index];
  468. printf("------L%d: %lx\n", i, pte);
  469. root = vroot[index];
  470. }
  471. }
  472. static void ac_test_check(ac_test_t *at, _Bool *success_ret, _Bool cond,
  473. const char *fmt, ...)
  474. {
  475. va_list ap;
  476. char buf[500];
  477. if (!*success_ret) {
  478. return;
  479. }
  480. if (!cond) {
  481. return;
  482. }
  483. *success_ret = false;
  484. if (!verbose) {
  485. puts("\n");
  486. ac_test_show(at);
  487. }
  488. va_start(ap, fmt);
  489. vsnprintf(buf, sizeof(buf), fmt, ap);
  490. va_end(ap);
  491. printf("FAIL: %s\n", buf);
  492. dump_mapping(at);
  493. }
  494. static int pt_match(pt_element_t pte1, pt_element_t pte2, pt_element_t ignore)
  495. {
  496. pte1 &= ~ignore;
  497. pte2 &= ~ignore;
  498. return pte1 == pte2;
  499. }
  500. int ac_test_do_access(ac_test_t *at)
  501. {
  502. static unsigned unique = 42;
  503. int fault = 0;
  504. unsigned e;
  505. static unsigned char user_stack[4096];
  506. unsigned long rsp;
  507. _Bool success = true;
  508. int flags = at->flags;
  509. ++unique;
  510. if (!(unique & 65535)) {
  511. puts(".");
  512. }
  513. *((unsigned char *)at->phys) = 0xc3; /* ret */
  514. unsigned r = unique;
  515. set_cr0_wp(F(AC_CPU_CR0_WP));
  516. set_efer_nx(F(AC_CPU_EFER_NX));
  517. set_cr4_pke(F(AC_CPU_CR4_PKE));
  518. if (F(AC_CPU_CR4_PKE)) {
  519. /* WD2=AD2=1, WD1=F(AC_PKU_WD), AD1=F(AC_PKU_AD) */
  520. write_pkru(0x30 | (F(AC_PKU_WD) ? 8 : 0) |
  521. (F(AC_PKU_AD) ? 4 : 0));
  522. }
  523. set_cr4_smep(F(AC_CPU_CR4_SMEP));
  524. if (F(AC_ACCESS_TWICE)) {
  525. asm volatile (
  526. "mov $fixed2, %%rsi \n\t"
  527. "mov (%[addr]), %[reg] \n\t"
  528. "fixed2:"
  529. : [reg]"=r"(r), [fault]"=a"(fault), "=b"(e)
  530. : [addr]"r"(at->virt)
  531. : "rsi"
  532. );
  533. fault = 0;
  534. }
  535. asm volatile ("mov $fixed1, %%rsi \n\t"
  536. "mov %%rsp, %%rdx \n\t"
  537. "cmp $0, %[user] \n\t"
  538. "jz do_access \n\t"
  539. "push %%rax; mov %[user_ds], %%ax; mov %%ax, %%ds; pop %%rax \n\t"
  540. "pushq %[user_ds] \n\t"
  541. "pushq %[user_stack_top] \n\t"
  542. "pushfq \n\t"
  543. "pushq %[user_cs] \n\t"
  544. "pushq $do_access \n\t"
  545. "iretq \n"
  546. "do_access: \n\t"
  547. "cmp $0, %[fetch] \n\t"
  548. "jnz 2f \n\t"
  549. "cmp $0, %[write] \n\t"
  550. "jnz 1f \n\t"
  551. "mov (%[addr]), %[reg] \n\t"
  552. "jmp done \n\t"
  553. "1: mov %[reg], (%[addr]) \n\t"
  554. "jmp done \n\t"
  555. "2: call *%[addr] \n\t"
  556. "done: \n"
  557. "fixed1: \n"
  558. "int %[kernel_entry_vector] \n\t"
  559. "back_to_kernel:"
  560. : [reg]"+r"(r), "+a"(fault), "=b"(e), "=&d"(rsp)
  561. : [addr]"r"(at->virt),
  562. [write]"r"(F(AC_ACCESS_WRITE)),
  563. [user]"r"(F(AC_ACCESS_USER)),
  564. [fetch]"r"(F(AC_ACCESS_FETCH)),
  565. [user_ds]"i"(USER_DS),
  566. [user_cs]"i"(USER_CS),
  567. [user_stack_top]"r"(user_stack + sizeof user_stack),
  568. [kernel_entry_vector]"i"(0x20)
  569. : "rsi");
  570. asm volatile (".section .text.pf \n\t"
  571. "page_fault: \n\t"
  572. "pop %rbx \n\t"
  573. "mov %rsi, (%rsp) \n\t"
  574. "movl $1, %eax \n\t"
  575. "iretq \n\t"
  576. ".section .text");
  577. asm volatile (".section .text.entry \n\t"
  578. "kernel_entry: \n\t"
  579. "mov %rdx, %rsp \n\t"
  580. "jmp back_to_kernel \n\t"
  581. ".section .text");
  582. ac_test_check(at, &success, fault && !at->expected_fault,
  583. "unexpected fault");
  584. ac_test_check(at, &success, !fault && at->expected_fault,
  585. "unexpected access");
  586. ac_test_check(at, &success, fault && e != at->expected_error,
  587. "error code %x expected %x", e, at->expected_error);
  588. ac_test_check(at, &success, at->ptep && *at->ptep != at->expected_pte,
  589. "pte %x expected %x", *at->ptep, at->expected_pte);
  590. ac_test_check(at, &success,
  591. !pt_match(*at->pdep, at->expected_pde, at->ignore_pde),
  592. "pde %x expected %x", *at->pdep, at->expected_pde);
  593. if (success && verbose) {
  594. if (at->expected_fault) {
  595. printf("PASS (%x)\n", at->expected_error);
  596. } else {
  597. printf("PASS\n");
  598. }
  599. }
  600. return success;
  601. }
  602. static void ac_test_show(ac_test_t *at)
  603. {
  604. char line[5000];
  605. *line = 0;
  606. strcat(line, "test");
  607. for (int i = 0; i < NR_AC_FLAGS; ++i)
  608. if (at->flags & (1 << i)) {
  609. strcat(line, " ");
  610. strcat(line, ac_names[i]);
  611. }
  612. strcat(line, ": ");
  613. printf("%s", line);
  614. }
  615. /*
  616. * This test case is used to triger the bug which is fixed by
  617. * commit e09e90a5 in the kvm tree
  618. */
  619. static int corrupt_hugepage_triger(ac_pool_t *pool)
  620. {
  621. ac_test_t at1, at2;
  622. ac_test_init(&at1, (void *)(0x123400000000));
  623. ac_test_init(&at2, (void *)(0x666600000000));
  624. at2.flags = AC_CPU_CR0_WP_MASK | AC_PDE_PSE_MASK | AC_PDE_PRESENT_MASK;
  625. ac_test_setup_pte(&at2, pool);
  626. if (!ac_test_do_access(&at2))
  627. goto err;
  628. at1.flags = at2.flags | AC_PDE_WRITABLE_MASK;
  629. ac_test_setup_pte(&at1, pool);
  630. if (!ac_test_do_access(&at1))
  631. goto err;
  632. at1.flags |= AC_ACCESS_WRITE_MASK;
  633. ac_set_expected_status(&at1);
  634. if (!ac_test_do_access(&at1))
  635. goto err;
  636. at2.flags |= AC_ACCESS_WRITE_MASK;
  637. ac_set_expected_status(&at2);
  638. if (!ac_test_do_access(&at2))
  639. goto err;
  640. return 1;
  641. err:
  642. printf("corrupt_hugepage_triger test fail\n");
  643. return 0;
  644. }
  645. /*
  646. * This test case is used to triger the bug which is fixed by
  647. * commit 3ddf6c06e13e in the kvm tree
  648. */
  649. static int check_pfec_on_prefetch_pte(ac_pool_t *pool)
  650. {
  651. ac_test_t at1, at2;
  652. ac_test_init(&at1, (void *)(0x123406001000));
  653. ac_test_init(&at2, (void *)(0x123406003000));
  654. at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK;
  655. ac_setup_specific_pages(&at1, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
  656. at2.flags = at1.flags | AC_PTE_NX_MASK;
  657. ac_setup_specific_pages(&at2, pool, 30 * 1024 * 1024, 30 * 1024 * 1024);
  658. if (!ac_test_do_access(&at1)) {
  659. printf("%s: prepare fail\n", __FUNCTION__);
  660. goto err;
  661. }
  662. if (!ac_test_do_access(&at2)) {
  663. printf("%s: check PFEC on prefetch pte path fail\n",
  664. __FUNCTION__);
  665. goto err;
  666. }
  667. return 1;
  668. err:
  669. return 0;
  670. }
  671. /*
  672. * If the write-fault access is from supervisor and CR0.WP is not set on the
  673. * vcpu, kvm will fix it by adjusting pte access - it sets the W bit on pte
  674. * and clears U bit. This is the chance that kvm can change pte access from
  675. * readonly to writable.
  676. *
  677. * Unfortunately, the pte access is the access of 'direct' shadow page table,
  678. * means direct sp.role.access = pte_access, then we will create a writable
  679. * spte entry on the readonly shadow page table. It will cause Dirty bit is
  680. * not tracked when two guest ptes point to the same large page. Note, it
  681. * does not have other impact except Dirty bit since cr0.wp is encoded into
  682. * sp.role.
  683. *
  684. * Note: to trigger this bug, hugepage should be disabled on host.
  685. */
  686. static int check_large_pte_dirty_for_nowp(ac_pool_t *pool)
  687. {
  688. ac_test_t at1, at2;
  689. ac_test_init(&at1, (void *)(0x123403000000));
  690. ac_test_init(&at2, (void *)(0x666606000000));
  691. at2.flags = AC_PDE_PRESENT_MASK | AC_PDE_PSE_MASK;
  692. ac_test_setup_pte(&at2, pool);
  693. if (!ac_test_do_access(&at2)) {
  694. printf("%s: read on the first mapping fail.\n", __FUNCTION__);
  695. goto err;
  696. }
  697. at1.flags = at2.flags | AC_ACCESS_WRITE_MASK;
  698. ac_test_setup_pte(&at1, pool);
  699. if (!ac_test_do_access(&at1)) {
  700. printf("%s: write on the second mapping fail.\n", __FUNCTION__);
  701. goto err;
  702. }
  703. at2.flags |= AC_ACCESS_WRITE_MASK;
  704. ac_set_expected_status(&at2);
  705. if (!ac_test_do_access(&at2)) {
  706. printf("%s: write on the first mapping fail.\n", __FUNCTION__);
  707. goto err;
  708. }
  709. return 1;
  710. err:
  711. return 0;
  712. }
  713. static int check_smep_andnot_wp(ac_pool_t *pool)
  714. {
  715. ac_test_t at1;
  716. int err_prepare_andnot_wp, err_smep_andnot_wp;
  717. if (!(cpuid_7_ebx & (1 << 7))) {
  718. return 1;
  719. }
  720. ac_test_init(&at1, (void *)(0x123406001000));
  721. at1.flags = AC_PDE_PRESENT_MASK | AC_PTE_PRESENT_MASK |
  722. AC_PDE_USER_MASK | AC_PTE_USER_MASK |
  723. AC_PDE_ACCESSED_MASK | AC_PTE_ACCESSED_MASK |
  724. AC_CPU_CR4_SMEP_MASK |
  725. AC_CPU_CR0_WP_MASK |
  726. AC_ACCESS_WRITE_MASK;
  727. ac_test_setup_pte(&at1, pool);
  728. /*
  729. * Here we write the ro user page when
  730. * cr0.wp=0, then we execute it and SMEP
  731. * fault should happen.
  732. */
  733. err_prepare_andnot_wp = ac_test_do_access(&at1);
  734. if (!err_prepare_andnot_wp) {
  735. printf("%s: SMEP prepare fail\n", __FUNCTION__);
  736. goto clean_up;
  737. }
  738. at1.flags &= ~AC_ACCESS_WRITE_MASK;
  739. at1.flags |= AC_ACCESS_FETCH_MASK;
  740. ac_set_expected_status(&at1);
  741. err_smep_andnot_wp = ac_test_do_access(&at1);
  742. clean_up:
  743. set_cr4_smep(0);
  744. if (!err_prepare_andnot_wp)
  745. goto err;
  746. if (!err_smep_andnot_wp) {
  747. printf("%s: check SMEP without wp fail\n", __FUNCTION__);
  748. goto err;
  749. }
  750. return 1;
  751. err:
  752. return 0;
  753. }
  754. int ac_test_exec(ac_test_t *at, ac_pool_t *pool)
  755. {
  756. int r;
  757. if (verbose) {
  758. ac_test_show(at);
  759. }
  760. ac_test_setup_pte(at, pool);
  761. r = ac_test_do_access(at);
  762. return r;
  763. }
  764. typedef int (*ac_test_fn)(ac_pool_t *pool);
  765. const ac_test_fn ac_test_cases[] =
  766. {
  767. corrupt_hugepage_triger,
  768. check_pfec_on_prefetch_pte,
  769. check_large_pte_dirty_for_nowp,
  770. check_smep_andnot_wp
  771. };
  772. int ac_test_run(void)
  773. {
  774. ac_test_t at;
  775. ac_pool_t pool;
  776. int i, tests, successes;
  777. printf("run\n");
  778. tests = successes = 0;
  779. if (cpuid_7_ecx & (1 << 3)) {
  780. set_cr4_pke(1);
  781. set_cr4_pke(0);
  782. /* Now PKRU = 0xFFFFFFFF. */
  783. } else {
  784. unsigned long cr4 = read_cr4();
  785. tests++;
  786. if (write_cr4_checking(cr4 | X86_CR4_PKE) == GP_VECTOR) {
  787. successes++;
  788. invalid_mask |= AC_PKU_AD_MASK;
  789. invalid_mask |= AC_PKU_WD_MASK;
  790. invalid_mask |= AC_PKU_PKEY_MASK;
  791. invalid_mask |= AC_CPU_CR4_PKE_MASK;
  792. printf("CR4.PKE not available, disabling PKE tests\n");
  793. } else {
  794. printf("Set PKE in CR4 - expect #GP: FAIL!\n");
  795. set_cr4_pke(0);
  796. }
  797. }
  798. if (!(cpuid_7_ebx & (1 << 7))) {
  799. unsigned long cr4 = read_cr4();
  800. tests++;
  801. if (write_cr4_checking(cr4 | CR4_SMEP_MASK) == GP_VECTOR) {
  802. successes++;
  803. invalid_mask |= AC_CPU_CR4_SMEP_MASK;
  804. printf("CR4.SMEP not available, disabling SMEP tests\n");
  805. } else {
  806. printf("Set SMEP in CR4 - expect #GP: FAIL!\n");
  807. set_cr4_smep(0);
  808. }
  809. }
  810. ac_env_int(&pool);
  811. ac_test_init(&at, (void *)(0x123400000000 + 16 * smp_id()));
  812. do {
  813. ++tests;
  814. successes += ac_test_exec(&at, &pool);
  815. } while (ac_test_bump(&at));
  816. for (i = 0; i < ARRAY_SIZE(ac_test_cases); i++) {
  817. ++tests;
  818. successes += ac_test_cases[i](&pool);
  819. }
  820. printf("\n%d tests, %d failures\n", tests, tests - successes);
  821. return successes == tests;
  822. }
  823. int main()
  824. {
  825. int r;
  826. setup_idt();
  827. cpuid_7_ebx = cpuid(7).b;
  828. cpuid_7_ecx = cpuid(7).c;
  829. printf("starting test\n\n");
  830. r = ac_test_run();
  831. return r ? 0 : 1;
  832. }