cache-l2-pl310.c 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. /*
  2. * PL310 level 2 cache (non-architectural bag on the side)
  3. *
  4. * guaranteed to work incorrectly with default settings; must set Sharovr.
  5. *
  6. * clean & invalidate (wbinv) is buggy, so we work around erratum 588369
  7. * by disabling write-back and cache line-fill before, and restoring after.
  8. */
  9. #include "u.h"
  10. #include "../port/lib.h"
  11. #include "mem.h"
  12. #include "dat.h"
  13. #include "fns.h"
  14. #include "io.h"
  15. #include "../port/error.h"
  16. #include "arm.h"
  17. #define NWAYS(l2p) ((l2p)->auxctl & Assoc16way? 16: 8)
  18. #define L2P ((L2pl310 *)soc.l2cache)
  19. enum {
  20. L2size = 1024 * 1024, /* according to the tegra 2 manual */
  21. Wayszgran = 16 * KiB, /* granularity of way sizes */
  22. };
  23. typedef struct L2pl310 L2pl310;
  24. typedef struct Pl310op Pl310op;
  25. struct Pl310op {
  26. ulong pa;
  27. ulong _pad;
  28. ulong indexway;
  29. ulong way;
  30. };
  31. struct L2pl310 {
  32. ulong id;
  33. ulong type;
  34. uchar _pad0[0x100 - 0x8];
  35. ulong ctl;
  36. ulong auxctl;
  37. uchar _pad1[0x730 - 0x108]; /* boring regs */
  38. ulong sync;
  39. uchar _pad2[0x740 - 0x734];
  40. ulong r3p0sync; /* workaround for r3p0 bug */
  41. uchar _pad3[0x770 - 0x744];
  42. Pl310op inv; /* inv.indexway doesn't exist */
  43. uchar _pad4[0x7b0 - 0x780];
  44. Pl310op clean;
  45. uchar _pad5[0x7f0 - 0x7c0];
  46. Pl310op cleaninv;
  47. uchar _pad6[0xc00 - 0x7d0];
  48. ulong filtstart;
  49. ulong filtend;
  50. uchar _pad6[0xf40 - 0xc08];
  51. ulong debug;
  52. /* ... */
  53. };
  54. enum {
  55. /* ctl bits */
  56. L2enable = 1,
  57. /* auxctl bits */
  58. Ipref = 1<<29, /* prefetch enables */
  59. Dpref = 1<<28,
  60. Mbo = 1<<25,
  61. Sharovr = 1<<22, /* shared attribute override (i.e., work right!) */
  62. Parity = 1<<21,
  63. Waycfgshift= 17,
  64. Waycfgmask = (1<<3) - 1,
  65. Assoc16way = 1<<16,
  66. /*
  67. * optim'n to 0 cache lines; must be enabled in a9(?!).
  68. * set CpAClwr0line on all cpus 1st.
  69. */
  70. Fullline0= 1<<0,
  71. /* debug bits */
  72. Wt = 1<<1, /* write-through, not write-back */
  73. Nolinefill= 1<<0,
  74. Basecfg = Wt | Nolinefill,
  75. };
  76. static Lock l2lock;
  77. static int disallowed; /* by user: *l2off= in plan9.ini */
  78. static int l2ison;
  79. static int bg_op_running;
  80. static ulong waysmask;
  81. static Cacheimpl l2cacheimpl;
  82. static void
  83. awaitbgop(void)
  84. {
  85. while (bg_op_running)
  86. ;
  87. }
  88. static void
  89. getlock(void)
  90. {
  91. awaitbgop(); /* wait at normal PL first */
  92. ilock(&l2lock);
  93. awaitbgop(); /* wait under lock */
  94. }
  95. static void
  96. l2pl310sync(void)
  97. {
  98. L2P->sync = 0;
  99. coherence();
  100. }
  101. /* call this first to set sets/ways configuration */
  102. void
  103. l2pl310init(void)
  104. {
  105. int waysz, nways;
  106. ulong new;
  107. L2pl310 *l2p = L2P;
  108. static int configed;
  109. if (getconf("*l2off") != nil) {
  110. // iprint("l2 cache (pl310) disabled\n");
  111. disallowed = 1;
  112. return;
  113. }
  114. if (l2ison || configed)
  115. return;
  116. l2cache = &l2cacheimpl;
  117. cachedwb();
  118. /*
  119. * default config is:
  120. * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
  121. * but the tegra 2 manual says there's 1MB available.
  122. * ways or way-size may be fixed by hardware; the only way to tell
  123. * is to try to change the setting and read it back.
  124. */
  125. l2pl310sync();
  126. l2cache->inv();
  127. /* figure out number of ways */
  128. l2pl310sync();
  129. nways = NWAYS(l2p);
  130. if (!(l2p->auxctl & Assoc16way)) {
  131. l2p->auxctl |= Assoc16way;
  132. coherence();
  133. l2pl310sync();
  134. nways = NWAYS(l2p);
  135. // iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
  136. }
  137. waysmask = MASK(nways);
  138. /* figure out way size (and thus number of sets) */
  139. waysz = L2size / nways;
  140. new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
  141. (log2(waysz / Wayszgran) + 1) << Waycfgshift;
  142. l2p->auxctl = new;
  143. coherence();
  144. l2pl310sync();
  145. l2cache->inv();
  146. // iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
  147. // waysz / CACHELINESZ, waysz);
  148. if (l2p->auxctl != new)
  149. iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
  150. new, l2p->auxctl);
  151. configed++;
  152. }
  153. void
  154. l2pl310info(Memcache *cp)
  155. {
  156. int pow2;
  157. ulong waysz;
  158. L2pl310 *l2p = L2P;
  159. memset(cp, 0, sizeof *cp);
  160. if (!l2ison)
  161. return;
  162. l2pl310init();
  163. assert((l2p->id >> 24) == 'A');
  164. cp->level = 2;
  165. cp->type = Unified;
  166. cp->external = Extcache;
  167. cp->setsways = Cara | Cawa | Cawt | Cawb;
  168. cp->l1ip = 3<<14; /* PIPT */
  169. cp->setsh = cp->waysh = 0; /* bag on the side */
  170. cp->linelen = CACHELINESZ;
  171. cp->log2linelen = log2(CACHELINESZ);
  172. cp->nways = NWAYS(l2p);
  173. pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
  174. if (pow2 < 0)
  175. pow2 = 0;
  176. waysz = (1 << pow2) * Wayszgran;
  177. cp->nsets = waysz / CACHELINESZ;
  178. }
  179. void
  180. l2pl310on(void)
  181. {
  182. ulong ctl;
  183. L2pl310 *l2p = L2P;
  184. if (getconf("*l2off") != nil) {
  185. // iprint("l2 cache (pl310) disabled\n");
  186. disallowed = 1;
  187. return;
  188. }
  189. if (l2ison)
  190. return;
  191. l2pl310init();
  192. l2cache->inv();
  193. /*
  194. * drain l1. can't turn it off (which would make locks not work)
  195. * because doing so makes references below to the l2 registers wedge
  196. * the system.
  197. */
  198. cacheuwbinv();
  199. cacheiinv();
  200. /*
  201. * this is only called once, on cpu0 at startup,
  202. * so we don't need locks here.
  203. * must do all configuration before enabling l2 cache.
  204. */
  205. l2p->filtend = 0;
  206. coherence();
  207. l2p->filtstart = 0; /* no enable bit */
  208. l2p->debug = 0; /* write-back, line fills allowed */
  209. coherence();
  210. ctl = l2p->auxctl;
  211. /* don't change number of sets & ways, but reset all else. */
  212. ctl &= Waycfgmask << Waycfgshift | Assoc16way;
  213. ctl |= Sharovr; /* actually work correctly for a change */
  214. ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
  215. l2p->auxctl = ctl;
  216. coherence();
  217. l2p->ctl |= L2enable;
  218. coherence();
  219. l2ison = 1;
  220. // iprint("l2 cache (pl310) now on\n");
  221. }
  222. void
  223. l2pl310off(void)
  224. {
  225. if (!l2ison)
  226. return;
  227. l2cache->wbinv();
  228. getlock();
  229. L2P->ctl &= ~L2enable;
  230. coherence();
  231. l2ison = 0;
  232. iunlock(&l2lock);
  233. }
  234. static void
  235. applyrange(ulong *reg, void *ava, int len)
  236. {
  237. uintptr va, endva;
  238. if (disallowed || !l2ison)
  239. return;
  240. if (len < 0)
  241. panic("l2cache*se called with negative length");
  242. endva = (uintptr)ava + len;
  243. for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
  244. va += CACHELINESZ)
  245. *reg = PADDR(va);
  246. l2pl310sync();
  247. }
  248. void
  249. l2pl310invse(void *va, int bytes)
  250. {
  251. uintptr start, end;
  252. L2pl310 *l2p = L2P;
  253. /*
  254. * if start & end addresses are not on cache-line boundaries,
  255. * flush first & last cachelines before invalidating.
  256. */
  257. start = (uintptr)va;
  258. end = start + bytes;
  259. getlock();
  260. if (start % CACHELINESZ != 0) {
  261. // iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
  262. // getcallerpc(&va));
  263. applyrange(&l2p->clean.pa, va, 1);
  264. }
  265. if (end % CACHELINESZ != 0) {
  266. // iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
  267. // getcallerpc(&va));
  268. applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
  269. }
  270. applyrange(&l2p->inv.pa, va, bytes);
  271. iunlock(&l2lock);
  272. }
  273. void
  274. l2pl310wbse(void *va, int bytes)
  275. {
  276. getlock();
  277. applyrange(&L2P->clean.pa, va, bytes);
  278. iunlock(&l2lock);
  279. }
  280. /*
  281. * assume that ldrex/strex (thus locks) won't work when Wt in is effect,
  282. * so don't manipulate locks between setting and clearing Wt.
  283. */
  284. void
  285. l2pl310wbinvse(void *va, int bytes)
  286. {
  287. int odb;
  288. L2pl310 *l2p = L2P;
  289. if (!l2ison)
  290. return;
  291. getlock();
  292. applyrange(&l2p->clean.pa, va, bytes); /* paranoia */
  293. odb = l2p->debug;
  294. l2p->debug |= Wt | Nolinefill; /* erratum workaround */
  295. coherence();
  296. applyrange(&l2p->cleaninv.pa, va, bytes);
  297. l2p->debug = odb;
  298. iunlock(&l2lock);
  299. }
  300. /*
  301. * we want to wait for completion at normal PL.
  302. * if waiting is interrupted, interrupt code that calls
  303. * these ops could deadlock on a uniprocessor, so we only
  304. * give up l2lock before waiting on multiprocessors.
  305. * in this port, only cpu 0 gets interrupts other than local timer ones.
  306. */
  307. void
  308. l2pl310inv(void)
  309. {
  310. L2pl310 *l2p = L2P;
  311. if (disallowed)
  312. return;
  313. getlock();
  314. bg_op_running = 1;
  315. l2p->inv.way = waysmask;
  316. coherence();
  317. if (conf.nmach > 1)
  318. iunlock(&l2lock);
  319. while (l2p->inv.way & waysmask)
  320. ;
  321. if (conf.nmach > 1)
  322. ilock(&l2lock);
  323. l2pl310sync();
  324. bg_op_running = 0;
  325. iunlock(&l2lock);
  326. }
  327. /*
  328. * maximum time seen is 2542µs, typical is 625µs.
  329. */
  330. void
  331. l2pl310wb(void)
  332. {
  333. L2pl310 *l2p = L2P;
  334. if (disallowed || !l2ison)
  335. return;
  336. getlock();
  337. bg_op_running = 1;
  338. l2p->clean.way = waysmask;
  339. coherence();
  340. if (conf.nmach > 1)
  341. iunlock(&l2lock);
  342. while (l2p->clean.way & waysmask)
  343. ;
  344. if (conf.nmach > 1)
  345. ilock(&l2lock);
  346. l2pl310sync();
  347. bg_op_running = 0;
  348. iunlock(&l2lock);
  349. }
  350. void
  351. l2pl310wbinv(void)
  352. {
  353. int odb;
  354. L2pl310 *l2p = L2P;
  355. if (disallowed || !l2ison)
  356. return;
  357. l2pl310wb(); /* paranoia */
  358. getlock();
  359. bg_op_running = 1;
  360. odb = l2p->debug;
  361. l2p->debug |= Wt | Nolinefill; /* erratum workaround */
  362. coherence();
  363. l2p->cleaninv.way = waysmask;
  364. coherence();
  365. if (conf.nmach > 1)
  366. iunlock(&l2lock);
  367. while (l2p->cleaninv.way & waysmask)
  368. ;
  369. if (conf.nmach > 1)
  370. ilock(&l2lock);
  371. l2pl310sync();
  372. l2p->debug = odb;
  373. bg_op_running = 0;
  374. iunlock(&l2lock);
  375. }
  376. static Cacheimpl l2cacheimpl = {
  377. .info = l2pl310info,
  378. .on = l2pl310on,
  379. .off = l2pl310off,
  380. .inv = l2pl310inv,
  381. .wb = l2pl310wb,
  382. .wbinv = l2pl310wbinv,
  383. .invse = l2pl310invse,
  384. .wbse = l2pl310wbse,
  385. .wbinvse= l2pl310wbinvse,
  386. };