plat_ras.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. /*
  2. * Copyright (c) 2020-2021, NVIDIA Corporation. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: BSD-3-Clause
  5. */
  6. #include <inttypes.h>
  7. #include <stdbool.h>
  8. #include <stdint.h>
  9. #include <common/debug.h>
  10. #include <lib/bakery_lock.h>
  11. #include <lib/cassert.h>
  12. #include <lib/extensions/ras.h>
  13. #include <lib/utils_def.h>
  14. #include <services/sdei.h>
  15. #include <plat/common/platform.h>
  16. #include <platform_def.h>
  17. #include <tegra194_ras_private.h>
  18. #include <tegra_def.h>
  19. #include <tegra_platform.h>
  20. #include <tegra_private.h>
  21. /*
  22. * ERR<n>FR bits[63:32], it indicates supported RAS errors which can be enabled
  23. * by setting corresponding bits in ERR<n>CTLR
  24. */
  25. #define ERR_FR_EN_BITS_MASK 0xFFFFFFFF00000000ULL
  26. /*
  27. * Number of RAS errors will be cleared per 'tegra194_ras_corrected_err_clear'
  28. * function call.
  29. */
  30. #define RAS_ERRORS_PER_CALL 8
  31. /*
  32. * the max possible RAS node index value.
  33. */
  34. #define RAS_NODE_INDEX_MAX 0x1FFFFFFFU
  35. /* bakery lock for platform RAS handler. */
  36. static DEFINE_BAKERY_LOCK(ras_handler_lock);
  37. #define ras_lock() bakery_lock_get(&ras_handler_lock)
  38. #define ras_unlock() bakery_lock_release(&ras_handler_lock)
  39. /*
  40. * Function to handle an External Abort received at EL3.
  41. * This function is invoked by RAS framework.
  42. */
  43. static void tegra194_ea_handler(unsigned int ea_reason, uint64_t syndrome,
  44. void *cookie, void *handle, uint64_t flags)
  45. {
  46. int32_t ret;
  47. ras_lock();
  48. ERROR("MPIDR 0x%lx: exception reason=%u syndrome=0x%" PRIx64 "\n",
  49. read_mpidr(), ea_reason, syndrome);
  50. /* Call RAS EA handler */
  51. ret = ras_ea_handler(ea_reason, syndrome, cookie, handle, flags);
  52. if (ret != 0) {
  53. ERROR("RAS error handled!\n");
  54. ret = sdei_dispatch_event(TEGRA_SDEI_EP_EVENT_0 +
  55. plat_my_core_pos());
  56. if (ret != 0)
  57. ERROR("sdei_dispatch_event returned %d\n", ret);
  58. } else {
  59. ERROR("Not a RAS error!\n");
  60. }
  61. ras_unlock();
  62. }
  63. /*
  64. * Function to enable all supported RAS error report.
  65. *
  66. * Uncorrected errors are set to report as External abort (SError)
  67. * Corrected errors are set to report as interrupt.
  68. */
  69. void tegra194_ras_enable(void)
  70. {
  71. VERBOSE("%s\n", __func__);
  72. /* skip RAS enablement if not a silicon platform. */
  73. if (!tegra_platform_is_silicon()) {
  74. return;
  75. }
  76. /*
  77. * Iterate for each group(num_idx ERRSELRs starting from idx_start)
  78. * use normal for loop instead of for_each_err_record_info to get rid
  79. * of MISRA noise..
  80. */
  81. for (uint32_t i = 0U; i < err_record_mappings.num_err_records; i++) {
  82. const struct err_record_info *info = &err_record_mappings.err_records[i];
  83. uint32_t idx_start = info->sysreg.idx_start;
  84. uint32_t num_idx = info->sysreg.num_idx;
  85. const struct ras_aux_data *aux_data = (const struct ras_aux_data *)info->aux_data;
  86. assert(aux_data != NULL);
  87. for (uint32_t j = 0; j < num_idx; j++) {
  88. /* ERR<n>CTLR register value. */
  89. uint64_t err_ctrl = 0ULL;
  90. /* all supported errors for this node. */
  91. uint64_t err_fr;
  92. /* uncorrectable errors */
  93. uint64_t uncorr_errs;
  94. /* correctable errors */
  95. uint64_t corr_errs;
  96. /*
  97. * Catch error if something wrong with the RAS aux data
  98. * record table.
  99. */
  100. assert(aux_data[j].err_ctrl != NULL);
  101. /*
  102. * Write to ERRSELR_EL1 to select the RAS error node.
  103. * Always program this at first to select corresponding
  104. * RAS node before any other RAS register r/w.
  105. */
  106. ser_sys_select_record(idx_start + j);
  107. err_fr = read_erxfr_el1() & ERR_FR_EN_BITS_MASK;
  108. uncorr_errs = aux_data[j].err_ctrl();
  109. corr_errs = ~uncorr_errs & err_fr;
  110. /* enable error reporting */
  111. ERR_CTLR_ENABLE_FIELD(err_ctrl, ED);
  112. /* enable SError reporting for uncorrectable errors */
  113. if ((uncorr_errs & err_fr) != 0ULL) {
  114. ERR_CTLR_ENABLE_FIELD(err_ctrl, UE);
  115. }
  116. /* generate interrupt for corrected errors. */
  117. if (corr_errs != 0ULL) {
  118. ERR_CTLR_ENABLE_FIELD(err_ctrl, CFI);
  119. }
  120. /* enable the supported errors */
  121. err_ctrl |= err_fr;
  122. VERBOSE("errselr_el1:0x%x, erxfr:0x%" PRIx64 ", err_ctrl:0x%" PRIx64 "\n",
  123. idx_start + j, err_fr, err_ctrl);
  124. /* enable specified errors, or set to 0 if no supported error */
  125. write_erxctlr_el1(err_ctrl);
  126. }
  127. }
  128. }
  129. /*
  130. * Function to clear RAS ERR<n>STATUS for corrected RAS error.
  131. *
  132. * This function clears number of 'RAS_ERRORS_PER_CALL' RAS errors at most.
  133. * 'cookie' - in/out cookie parameter to specify/store last visited RAS
  134. * error record index. it is set to '0' to indicate no more RAS
  135. * error record to clear.
  136. */
  137. void tegra194_ras_corrected_err_clear(uint64_t *cookie)
  138. {
  139. /*
  140. * 'last_node' and 'last_idx' represent last visited RAS node index from
  141. * previous function call. they are set to 0 when first smc call is made
  142. * or all RAS error are visited by followed multipile smc calls.
  143. */
  144. union prev_record {
  145. struct record {
  146. uint32_t last_node;
  147. uint32_t last_idx;
  148. } rec;
  149. uint64_t value;
  150. } prev;
  151. uint64_t clear_ce_status = 0ULL;
  152. int32_t nerrs_per_call = RAS_ERRORS_PER_CALL;
  153. uint32_t i;
  154. if (cookie == NULL) {
  155. return;
  156. }
  157. prev.value = *cookie;
  158. if ((prev.rec.last_node >= RAS_NODE_INDEX_MAX) ||
  159. (prev.rec.last_idx >= RAS_NODE_INDEX_MAX)) {
  160. return;
  161. }
  162. ERR_STATUS_SET_FIELD(clear_ce_status, AV, 0x1UL);
  163. ERR_STATUS_SET_FIELD(clear_ce_status, V, 0x1UL);
  164. ERR_STATUS_SET_FIELD(clear_ce_status, OF, 0x1UL);
  165. ERR_STATUS_SET_FIELD(clear_ce_status, MV, 0x1UL);
  166. ERR_STATUS_SET_FIELD(clear_ce_status, CE, 0x3UL);
  167. for (i = prev.rec.last_node; i < err_record_mappings.num_err_records; i++) {
  168. const struct err_record_info *info = &err_record_mappings.err_records[i];
  169. uint32_t idx_start = info->sysreg.idx_start;
  170. uint32_t num_idx = info->sysreg.num_idx;
  171. uint32_t j;
  172. j = (i == prev.rec.last_node && prev.value != 0UL) ?
  173. (prev.rec.last_idx + 1U) : 0U;
  174. for (; j < num_idx; j++) {
  175. uint64_t status;
  176. uint32_t err_idx = idx_start + j;
  177. if (err_idx >= RAS_NODE_INDEX_MAX) {
  178. return;
  179. }
  180. write_errselr_el1(err_idx);
  181. status = read_erxstatus_el1();
  182. if (ERR_STATUS_GET_FIELD(status, CE) != 0U) {
  183. write_erxstatus_el1(clear_ce_status);
  184. }
  185. --nerrs_per_call;
  186. /* only clear 'nerrs_per_call' errors each time. */
  187. if (nerrs_per_call <= 0) {
  188. prev.rec.last_idx = j;
  189. prev.rec.last_node = i;
  190. /* save last visited error record index
  191. * into cookie.
  192. */
  193. *cookie = prev.value;
  194. return;
  195. }
  196. }
  197. }
  198. /*
  199. * finish if all ras error records are checked or provided index is out
  200. * of range.
  201. */
  202. *cookie = 0ULL;
  203. }
  204. /* Function to probe an error from error record group. */
  205. static int32_t tegra194_ras_record_probe(const struct err_record_info *info,
  206. int *probe_data)
  207. {
  208. /* Skip probing if not a silicon platform */
  209. if (!tegra_platform_is_silicon()) {
  210. return 0;
  211. }
  212. return ser_probe_sysreg(info->sysreg.idx_start, info->sysreg.num_idx, probe_data);
  213. }
  214. /* Function to handle error from one given node */
  215. static int32_t tegra194_ras_node_handler(uint32_t errselr, const char *name,
  216. const struct ras_error *errors, uint64_t status)
  217. {
  218. bool found = false;
  219. uint32_t ierr = (uint32_t)ERR_STATUS_GET_FIELD(status, IERR);
  220. uint32_t serr = (uint32_t)ERR_STATUS_GET_FIELD(status, SERR);
  221. uint64_t val = 0;
  222. /* not a valid error. */
  223. if (ERR_STATUS_GET_FIELD(status, V) == 0U) {
  224. return 0;
  225. }
  226. ERR_STATUS_SET_FIELD(val, V, 1);
  227. /* keep the log print same as linux arm64_ras driver. */
  228. ERROR("**************************************\n");
  229. ERROR("RAS Error in %s, ERRSELR_EL1=0x%x:\n", name, errselr);
  230. ERROR("\tStatus = 0x%" PRIx64 "\n", status);
  231. /* Print uncorrectable error information. */
  232. if (ERR_STATUS_GET_FIELD(status, UE) != 0U) {
  233. ERR_STATUS_SET_FIELD(val, UE, 1);
  234. ERR_STATUS_SET_FIELD(val, UET, 1);
  235. /* IERR to error message */
  236. for (uint32_t i = 0; errors[i].error_msg != NULL; i++) {
  237. if (ierr == errors[i].error_code) {
  238. ERROR("\tIERR = %s: 0x%x\n",
  239. errors[i].error_msg, ierr);
  240. found = true;
  241. break;
  242. }
  243. }
  244. if (!found) {
  245. ERROR("\tUnknown IERR: 0x%x\n", ierr);
  246. }
  247. ERROR("SERR = %s: 0x%x\n", ras_serr_to_str(serr), serr);
  248. /* Overflow, multiple errors have been detected. */
  249. if (ERR_STATUS_GET_FIELD(status, OF) != 0U) {
  250. ERROR("\tOverflow (there may be more errors) - "
  251. "Uncorrectable\n");
  252. ERR_STATUS_SET_FIELD(val, OF, 1);
  253. }
  254. ERROR("\tUncorrectable (this is fatal)\n");
  255. /* Miscellaneous Register Valid. */
  256. if (ERR_STATUS_GET_FIELD(status, MV) != 0U) {
  257. ERROR("\tMISC0 = 0x%lx\n", read_erxmisc0_el1());
  258. ERROR("\tMISC1 = 0x%lx\n", read_erxmisc1_el1());
  259. ERR_STATUS_SET_FIELD(val, MV, 1);
  260. }
  261. /* Address Valid. */
  262. if (ERR_STATUS_GET_FIELD(status, AV) != 0U) {
  263. ERROR("\tADDR = 0x%lx\n", read_erxaddr_el1());
  264. ERR_STATUS_SET_FIELD(val, AV, 1);
  265. }
  266. /* Deferred error */
  267. if (ERR_STATUS_GET_FIELD(status, DE) != 0U) {
  268. ERROR("\tDeferred error\n");
  269. ERR_STATUS_SET_FIELD(val, DE, 1);
  270. }
  271. } else {
  272. /* For corrected error, simply clear it. */
  273. VERBOSE("corrected RAS error is cleared: ERRSELR_EL1:0x%x, "
  274. "IERR:0x%x, SERR:0x%x\n", errselr, ierr, serr);
  275. ERR_STATUS_SET_FIELD(val, CE, 1);
  276. }
  277. ERROR("**************************************\n");
  278. /* Write to clear reported errors. */
  279. write_erxstatus_el1(val);
  280. /* error handled */
  281. return 0;
  282. }
  283. /* Function to handle one error node from an error record group. */
  284. static int32_t tegra194_ras_record_handler(const struct err_record_info *info,
  285. int probe_data, const struct err_handler_data *const data __unused)
  286. {
  287. uint32_t num_idx = info->sysreg.num_idx;
  288. uint32_t idx_start = info->sysreg.idx_start;
  289. const struct ras_aux_data *aux_data = info->aux_data;
  290. const struct ras_error *errors;
  291. uint32_t offset;
  292. const char *node_name;
  293. uint64_t status = 0ULL;
  294. VERBOSE("%s\n", __func__);
  295. assert(probe_data >= 0);
  296. assert((uint32_t)probe_data < num_idx);
  297. offset = (uint32_t)probe_data;
  298. errors = aux_data[offset].error_records;
  299. node_name = aux_data[offset].name;
  300. assert(errors != NULL);
  301. /* Write to ERRSELR_EL1 to select the error record */
  302. ser_sys_select_record(idx_start + offset);
  303. /* Retrieve status register from the error record */
  304. status = read_erxstatus_el1();
  305. return tegra194_ras_node_handler(idx_start + offset, node_name,
  306. errors, status);
  307. }
  308. /* Instantiate RAS nodes */
  309. PER_CORE_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
  310. PER_CLUSTER_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
  311. SCF_L3_BANK_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
  312. CCPLEX_RAS_NODE_LIST(DEFINE_ONE_RAS_NODE)
  313. /* Instantiate RAS node groups */
  314. static struct ras_aux_data per_core_ras_group[] = {
  315. PER_CORE_RAS_GROUP_NODES
  316. };
  317. CASSERT(ARRAY_SIZE(per_core_ras_group) < RAS_NODE_INDEX_MAX,
  318. assert_max_per_core_ras_group_size);
  319. static struct ras_aux_data per_cluster_ras_group[] = {
  320. PER_CLUSTER_RAS_GROUP_NODES
  321. };
  322. CASSERT(ARRAY_SIZE(per_cluster_ras_group) < RAS_NODE_INDEX_MAX,
  323. assert_max_per_cluster_ras_group_size);
  324. static struct ras_aux_data scf_l3_ras_group[] = {
  325. SCF_L3_BANK_RAS_GROUP_NODES
  326. };
  327. CASSERT(ARRAY_SIZE(scf_l3_ras_group) < RAS_NODE_INDEX_MAX,
  328. assert_max_scf_l3_ras_group_size);
  329. static struct ras_aux_data ccplex_ras_group[] = {
  330. CCPLEX_RAS_GROUP_NODES
  331. };
  332. CASSERT(ARRAY_SIZE(ccplex_ras_group) < RAS_NODE_INDEX_MAX,
  333. assert_max_ccplex_ras_group_size);
  334. /*
  335. * We have same probe and handler for each error record group, use a macro to
  336. * simply the record definition.
  337. */
  338. #define ADD_ONE_ERR_GROUP(errselr_start, group) \
  339. ERR_RECORD_SYSREG_V1((errselr_start), (uint32_t)ARRAY_SIZE((group)), \
  340. &tegra194_ras_record_probe, \
  341. &tegra194_ras_record_handler, (group))
  342. /* RAS error record group information */
  343. static struct err_record_info carmel_ras_records[] = {
  344. /*
  345. * Per core ras error records
  346. * ERRSELR starts from 0*256 + Logical_CPU_ID*16 + 0 to
  347. * 0*256 + Logical_CPU_ID*16 + 5 for each group.
  348. * 8 cores/groups, 6 * 8 nodes in total.
  349. */
  350. ADD_ONE_ERR_GROUP(0x000, per_core_ras_group),
  351. ADD_ONE_ERR_GROUP(0x010, per_core_ras_group),
  352. ADD_ONE_ERR_GROUP(0x020, per_core_ras_group),
  353. ADD_ONE_ERR_GROUP(0x030, per_core_ras_group),
  354. ADD_ONE_ERR_GROUP(0x040, per_core_ras_group),
  355. ADD_ONE_ERR_GROUP(0x050, per_core_ras_group),
  356. ADD_ONE_ERR_GROUP(0x060, per_core_ras_group),
  357. ADD_ONE_ERR_GROUP(0x070, per_core_ras_group),
  358. /*
  359. * Per cluster ras error records
  360. * ERRSELR starts from 2*256 + Logical_Cluster_ID*16 + 0 to
  361. * 2*256 + Logical_Cluster_ID*16 + 3.
  362. * 4 clusters/groups, 3 * 4 nodes in total.
  363. */
  364. ADD_ONE_ERR_GROUP(0x200, per_cluster_ras_group),
  365. ADD_ONE_ERR_GROUP(0x210, per_cluster_ras_group),
  366. ADD_ONE_ERR_GROUP(0x220, per_cluster_ras_group),
  367. ADD_ONE_ERR_GROUP(0x230, per_cluster_ras_group),
  368. /*
  369. * SCF L3_Bank ras error records
  370. * ERRSELR: 3*256 + L3_Bank_ID, L3_Bank_ID: 0-3
  371. * 1 groups, 4 nodes in total.
  372. */
  373. ADD_ONE_ERR_GROUP(0x300, scf_l3_ras_group),
  374. /*
  375. * CCPLEX ras error records
  376. * ERRSELR: 4*256 + Unit_ID, Unit_ID: 0 - 4
  377. * 1 groups, 5 nodes in total.
  378. */
  379. ADD_ONE_ERR_GROUP(0x400, ccplex_ras_group),
  380. };
  381. CASSERT(ARRAY_SIZE(carmel_ras_records) < RAS_NODE_INDEX_MAX,
  382. assert_max_carmel_ras_records_size);
  383. REGISTER_ERR_RECORD_INFO(carmel_ras_records);
  384. /* dummy RAS interrupt */
  385. static struct ras_interrupt carmel_ras_interrupts[] = {};
  386. REGISTER_RAS_INTERRUPTS(carmel_ras_interrupts);
  387. /*******************************************************************************
  388. * RAS handler for the platform
  389. ******************************************************************************/
  390. void plat_ea_handler(unsigned int ea_reason, uint64_t syndrome, void *cookie,
  391. void *handle, uint64_t flags)
  392. {
  393. #if ENABLE_FEAT_RAS
  394. tegra194_ea_handler(ea_reason, syndrome, cookie, handle, flags);
  395. #else
  396. plat_default_ea_handler(ea_reason, syndrome, cookie, handle, flags);
  397. #endif
  398. }