misc_helpers.S 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613
  1. /*
  2. * Copyright (c) 2013-2023, Arm Limited and Contributors. All rights reserved.
  3. *
  4. * SPDX-License-Identifier: BSD-3-Clause
  5. */
  6. #include <arch.h>
  7. #include <asm_macros.S>
  8. #include <assert_macros.S>
  9. #include <common/bl_common.h>
  10. #include <lib/xlat_tables/xlat_tables_defs.h>
  11. .globl smc
  12. .globl zero_normalmem
  13. .globl zeromem
  14. .globl memcpy16
  15. .globl gpt_tlbi_by_pa_ll
  16. .globl disable_mmu_el1
  17. .globl disable_mmu_el3
  18. .globl disable_mmu_icache_el1
  19. .globl disable_mmu_icache_el3
  20. .globl fixup_gdt_reloc
  21. #if SUPPORT_VFP
  22. .globl enable_vfp
  23. #endif
  24. func smc
  25. smc #0
  26. endfunc smc
  27. /* -----------------------------------------------------------------------
  28. * void zero_normalmem(void *mem, unsigned int length);
  29. *
  30. * Initialise a region in normal memory to 0. This functions complies with the
  31. * AAPCS and can be called from C code.
  32. *
  33. * NOTE: MMU must be enabled when using this function as it can only operate on
  34. * normal memory. It is intended to be mainly used from C code when MMU
  35. * is usually enabled.
  36. * -----------------------------------------------------------------------
  37. */
  38. .equ zero_normalmem, zeromem_dczva
  39. /* -----------------------------------------------------------------------
  40. * void zeromem(void *mem, unsigned int length);
  41. *
  42. * Initialise a region of device memory to 0. This functions complies with the
  43. * AAPCS and can be called from C code.
  44. *
  45. * NOTE: When data caches and MMU are enabled, zero_normalmem can usually be
  46. * used instead for faster zeroing.
  47. *
  48. * -----------------------------------------------------------------------
  49. */
  50. func zeromem
  51. /* x2 is the address past the last zeroed address */
  52. add x2, x0, x1
  53. /*
  54. * Uses the fallback path that does not use DC ZVA instruction and
  55. * therefore does not need enabled MMU
  56. */
  57. b .Lzeromem_dczva_fallback_entry
  58. endfunc zeromem
  59. /* -----------------------------------------------------------------------
  60. * void zeromem_dczva(void *mem, unsigned int length);
  61. *
  62. * Fill a region of normal memory of size "length" in bytes with null bytes.
  63. * MMU must be enabled and the memory be of
  64. * normal type. This is because this function internally uses the DC ZVA
  65. * instruction, which generates an Alignment fault if used on any type of
  66. * Device memory (see section D3.4.9 of the ARMv8 ARM, issue k). When the MMU
  67. * is disabled, all memory behaves like Device-nGnRnE memory (see section
  68. * D4.2.8), hence the requirement on the MMU being enabled.
  69. * NOTE: The code assumes that the block size as defined in DCZID_EL0
  70. * register is at least 16 bytes.
  71. *
  72. * -----------------------------------------------------------------------
  73. */
  74. func zeromem_dczva
  75. /*
  76. * The function consists of a series of loops that zero memory one byte
  77. * at a time, 16 bytes at a time or using the DC ZVA instruction to
  78. * zero aligned block of bytes, which is assumed to be more than 16.
  79. * In the case where the DC ZVA instruction cannot be used or if the
  80. * first 16 bytes loop would overflow, there is fallback path that does
  81. * not use DC ZVA.
  82. * Note: The fallback path is also used by the zeromem function that
  83. * branches to it directly.
  84. *
  85. * +---------+ zeromem_dczva
  86. * | entry |
  87. * +----+----+
  88. * |
  89. * v
  90. * +---------+
  91. * | checks |>o-------+ (If any check fails, fallback)
  92. * +----+----+ |
  93. * | |---------------+
  94. * v | Fallback path |
  95. * +------+------+ |---------------+
  96. * | 1 byte loop | |
  97. * +------+------+ .Lzeromem_dczva_initial_1byte_aligned_end
  98. * | |
  99. * v |
  100. * +-------+-------+ |
  101. * | 16 bytes loop | |
  102. * +-------+-------+ |
  103. * | |
  104. * v |
  105. * +------+------+ .Lzeromem_dczva_blocksize_aligned
  106. * | DC ZVA loop | |
  107. * +------+------+ |
  108. * +--------+ | |
  109. * | | | |
  110. * | v v |
  111. * | +-------+-------+ .Lzeromem_dczva_final_16bytes_aligned
  112. * | | 16 bytes loop | |
  113. * | +-------+-------+ |
  114. * | | |
  115. * | v |
  116. * | +------+------+ .Lzeromem_dczva_final_1byte_aligned
  117. * | | 1 byte loop | |
  118. * | +-------------+ |
  119. * | | |
  120. * | v |
  121. * | +---+--+ |
  122. * | | exit | |
  123. * | +------+ |
  124. * | |
  125. * | +--------------+ +------------------+ zeromem
  126. * | | +----------------| zeromem function |
  127. * | | | +------------------+
  128. * | v v
  129. * | +-------------+ .Lzeromem_dczva_fallback_entry
  130. * | | 1 byte loop |
  131. * | +------+------+
  132. * | |
  133. * +-----------+
  134. */
  135. /*
  136. * Readable names for registers
  137. *
  138. * Registers x0, x1 and x2 are also set by zeromem which
  139. * branches into the fallback path directly, so cursor, length and
  140. * stop_address should not be retargeted to other registers.
  141. */
  142. cursor .req x0 /* Start address and then current address */
  143. length .req x1 /* Length in bytes of the region to zero out */
  144. /* Reusing x1 as length is never used after block_mask is set */
  145. block_mask .req x1 /* Bitmask of the block size read in DCZID_EL0 */
  146. stop_address .req x2 /* Address past the last zeroed byte */
  147. block_size .req x3 /* Size of a block in bytes as read in DCZID_EL0 */
  148. tmp1 .req x4
  149. tmp2 .req x5
  150. #if ENABLE_ASSERTIONS
  151. /*
  152. * Check for M bit (MMU enabled) of the current SCTLR_EL(1|3)
  153. * register value and panic if the MMU is disabled.
  154. */
  155. #if defined(IMAGE_BL1) || defined(IMAGE_BL31) || (defined(IMAGE_BL2) && \
  156. BL2_RUNS_AT_EL3)
  157. mrs tmp1, sctlr_el3
  158. #else
  159. mrs tmp1, sctlr_el1
  160. #endif
  161. tst tmp1, #SCTLR_M_BIT
  162. ASM_ASSERT(ne)
  163. #endif /* ENABLE_ASSERTIONS */
  164. /* stop_address is the address past the last to zero */
  165. add stop_address, cursor, length
  166. /*
  167. * Get block_size = (log2(<block size>) >> 2) (see encoding of
  168. * dczid_el0 reg)
  169. */
  170. mrs block_size, dczid_el0
  171. /*
  172. * Select the 4 lowest bits and convert the extracted log2(<block size
  173. * in words>) to <block size in bytes>
  174. */
  175. ubfx block_size, block_size, #0, #4
  176. mov tmp2, #(1 << 2)
  177. lsl block_size, tmp2, block_size
  178. #if ENABLE_ASSERTIONS
  179. /*
  180. * Assumes block size is at least 16 bytes to avoid manual realignment
  181. * of the cursor at the end of the DCZVA loop.
  182. */
  183. cmp block_size, #16
  184. ASM_ASSERT(hs)
  185. #endif
  186. /*
  187. * Not worth doing all the setup for a region less than a block and
  188. * protects against zeroing a whole block when the area to zero is
  189. * smaller than that. Also, as it is assumed that the block size is at
  190. * least 16 bytes, this also protects the initial aligning loops from
  191. * trying to zero 16 bytes when length is less than 16.
  192. */
  193. cmp length, block_size
  194. b.lo .Lzeromem_dczva_fallback_entry
  195. /*
  196. * Calculate the bitmask of the block alignment. It will never
  197. * underflow as the block size is between 4 bytes and 2kB.
  198. * block_mask = block_size - 1
  199. */
  200. sub block_mask, block_size, #1
  201. /*
  202. * length alias should not be used after this point unless it is
  203. * defined as a register other than block_mask's.
  204. */
  205. .unreq length
  206. /*
  207. * If the start address is already aligned to zero block size, go
  208. * straight to the cache zeroing loop. This is safe because at this
  209. * point, the length cannot be smaller than a block size.
  210. */
  211. tst cursor, block_mask
  212. b.eq .Lzeromem_dczva_blocksize_aligned
  213. /*
  214. * Calculate the first block-size-aligned address. It is assumed that
  215. * the zero block size is at least 16 bytes. This address is the last
  216. * address of this initial loop.
  217. */
  218. orr tmp1, cursor, block_mask
  219. add tmp1, tmp1, #1
  220. /*
  221. * If the addition overflows, skip the cache zeroing loops. This is
  222. * quite unlikely however.
  223. */
  224. cbz tmp1, .Lzeromem_dczva_fallback_entry
  225. /*
  226. * If the first block-size-aligned address is past the last address,
  227. * fallback to the simpler code.
  228. */
  229. cmp tmp1, stop_address
  230. b.hi .Lzeromem_dczva_fallback_entry
  231. /*
  232. * If the start address is already aligned to 16 bytes, skip this loop.
  233. * It is safe to do this because tmp1 (the stop address of the initial
  234. * 16 bytes loop) will never be greater than the final stop address.
  235. */
  236. tst cursor, #0xf
  237. b.eq .Lzeromem_dczva_initial_1byte_aligned_end
  238. /* Calculate the next address aligned to 16 bytes */
  239. orr tmp2, cursor, #0xf
  240. add tmp2, tmp2, #1
  241. /* If it overflows, fallback to the simple path (unlikely) */
  242. cbz tmp2, .Lzeromem_dczva_fallback_entry
  243. /*
  244. * Next aligned address cannot be after the stop address because the
  245. * length cannot be smaller than 16 at this point.
  246. */
  247. /* First loop: zero byte per byte */
  248. 1:
  249. strb wzr, [cursor], #1
  250. cmp cursor, tmp2
  251. b.ne 1b
  252. .Lzeromem_dczva_initial_1byte_aligned_end:
  253. /*
  254. * Second loop: we need to zero 16 bytes at a time from cursor to tmp1
  255. * before being able to use the code that deals with block-size-aligned
  256. * addresses.
  257. */
  258. cmp cursor, tmp1
  259. b.hs 2f
  260. 1:
  261. stp xzr, xzr, [cursor], #16
  262. cmp cursor, tmp1
  263. b.lo 1b
  264. 2:
  265. /*
  266. * Third loop: zero a block at a time using DC ZVA cache block zeroing
  267. * instruction.
  268. */
  269. .Lzeromem_dczva_blocksize_aligned:
  270. /*
  271. * Calculate the last block-size-aligned address. If the result equals
  272. * to the start address, the loop will exit immediately.
  273. */
  274. bic tmp1, stop_address, block_mask
  275. cmp cursor, tmp1
  276. b.hs 2f
  277. 1:
  278. /* Zero the block containing the cursor */
  279. dc zva, cursor
  280. /* Increment the cursor by the size of a block */
  281. add cursor, cursor, block_size
  282. cmp cursor, tmp1
  283. b.lo 1b
  284. 2:
  285. /*
  286. * Fourth loop: zero 16 bytes at a time and then byte per byte the
  287. * remaining area
  288. */
  289. .Lzeromem_dczva_final_16bytes_aligned:
  290. /*
  291. * Calculate the last 16 bytes aligned address. It is assumed that the
  292. * block size will never be smaller than 16 bytes so that the current
  293. * cursor is aligned to at least 16 bytes boundary.
  294. */
  295. bic tmp1, stop_address, #15
  296. cmp cursor, tmp1
  297. b.hs 2f
  298. 1:
  299. stp xzr, xzr, [cursor], #16
  300. cmp cursor, tmp1
  301. b.lo 1b
  302. 2:
  303. /* Fifth and final loop: zero byte per byte */
  304. .Lzeromem_dczva_final_1byte_aligned:
  305. cmp cursor, stop_address
  306. b.eq 2f
  307. 1:
  308. strb wzr, [cursor], #1
  309. cmp cursor, stop_address
  310. b.ne 1b
  311. 2:
  312. ret
  313. /* Fallback for unaligned start addresses */
  314. .Lzeromem_dczva_fallback_entry:
  315. /*
  316. * If the start address is already aligned to 16 bytes, skip this loop.
  317. */
  318. tst cursor, #0xf
  319. b.eq .Lzeromem_dczva_final_16bytes_aligned
  320. /* Calculate the next address aligned to 16 bytes */
  321. orr tmp1, cursor, #15
  322. add tmp1, tmp1, #1
  323. /* If it overflows, fallback to byte per byte zeroing */
  324. cbz tmp1, .Lzeromem_dczva_final_1byte_aligned
  325. /* If the next aligned address is after the stop address, fall back */
  326. cmp tmp1, stop_address
  327. b.hs .Lzeromem_dczva_final_1byte_aligned
  328. /* Fallback entry loop: zero byte per byte */
  329. 1:
  330. strb wzr, [cursor], #1
  331. cmp cursor, tmp1
  332. b.ne 1b
  333. b .Lzeromem_dczva_final_16bytes_aligned
  334. .unreq cursor
  335. /*
  336. * length is already unreq'ed to reuse the register for another
  337. * variable.
  338. */
  339. .unreq stop_address
  340. .unreq block_size
  341. .unreq block_mask
  342. .unreq tmp1
  343. .unreq tmp2
  344. endfunc zeromem_dczva
  345. /* --------------------------------------------------------------------------
  346. * void memcpy16(void *dest, const void *src, unsigned int length)
  347. *
  348. * Copy length bytes from memory area src to memory area dest.
  349. * The memory areas should not overlap.
  350. * Destination and source addresses must be 16-byte aligned.
  351. * --------------------------------------------------------------------------
  352. */
  353. func memcpy16
  354. #if ENABLE_ASSERTIONS
  355. orr x3, x0, x1
  356. tst x3, #0xf
  357. ASM_ASSERT(eq)
  358. #endif
  359. /* copy 16 bytes at a time */
  360. m_loop16:
  361. cmp x2, #16
  362. b.lo m_loop1
  363. ldp x3, x4, [x1], #16
  364. stp x3, x4, [x0], #16
  365. sub x2, x2, #16
  366. b m_loop16
  367. /* copy byte per byte */
  368. m_loop1:
  369. cbz x2, m_end
  370. ldrb w3, [x1], #1
  371. strb w3, [x0], #1
  372. subs x2, x2, #1
  373. b.ne m_loop1
  374. m_end:
  375. ret
  376. endfunc memcpy16
  377. /* ---------------------------------------------------------------------------
  378. * Disable the MMU at EL3
  379. * ---------------------------------------------------------------------------
  380. */
  381. func disable_mmu_el3
  382. mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
  383. do_disable_mmu_el3:
  384. mrs x0, sctlr_el3
  385. bic x0, x0, x1
  386. msr sctlr_el3, x0
  387. isb /* ensure MMU is off */
  388. dsb sy
  389. ret
  390. endfunc disable_mmu_el3
  391. func disable_mmu_icache_el3
  392. mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
  393. b do_disable_mmu_el3
  394. endfunc disable_mmu_icache_el3
  395. /* ---------------------------------------------------------------------------
  396. * Disable the MMU at EL1
  397. * ---------------------------------------------------------------------------
  398. */
  399. func disable_mmu_el1
  400. mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT)
  401. do_disable_mmu_el1:
  402. mrs x0, sctlr_el1
  403. bic x0, x0, x1
  404. msr sctlr_el1, x0
  405. isb /* ensure MMU is off */
  406. dsb sy
  407. ret
  408. endfunc disable_mmu_el1
  409. func disable_mmu_icache_el1
  410. mov x1, #(SCTLR_M_BIT | SCTLR_C_BIT | SCTLR_I_BIT)
  411. b do_disable_mmu_el1
  412. endfunc disable_mmu_icache_el1
  413. /* ---------------------------------------------------------------------------
  414. * Enable the use of VFP at EL3
  415. * ---------------------------------------------------------------------------
  416. */
  417. #if SUPPORT_VFP
  418. func enable_vfp
  419. mrs x0, cpacr_el1
  420. orr x0, x0, #CPACR_VFP_BITS
  421. msr cpacr_el1, x0
  422. mrs x0, cptr_el3
  423. mov x1, #AARCH64_CPTR_TFP
  424. bic x0, x0, x1
  425. msr cptr_el3, x0
  426. isb
  427. ret
  428. endfunc enable_vfp
  429. #endif
  430. /* ---------------------------------------------------------------------------
  431. * Helper to fixup Global Descriptor table (GDT) and dynamic relocations
  432. * (.rela.dyn) at runtime.
  433. *
  434. * This function is meant to be used when the firmware is compiled with -fpie
  435. * and linked with -pie options. We rely on the linker script exporting
  436. * appropriate markers for start and end of the section. For GOT, we
  437. * expect __GOT_START__ and __GOT_END__. Similarly for .rela.dyn, we expect
  438. * __RELA_START__ and __RELA_END__.
  439. *
  440. * The function takes the limits of the memory to apply fixups to as
  441. * arguments (which is usually the limits of the relocable BL image).
  442. * x0 - the start of the fixup region
  443. * x1 - the limit of the fixup region
  444. * These addresses have to be 4KB page aligned.
  445. * ---------------------------------------------------------------------------
  446. */
  447. /* Relocation codes */
  448. #define R_AARCH64_NONE 0
  449. #define R_AARCH64_RELATIVE 1027
  450. func fixup_gdt_reloc
  451. mov x6, x0
  452. mov x7, x1
  453. #if ENABLE_ASSERTIONS
  454. /* Test if the limits are 4KB aligned */
  455. orr x0, x0, x1
  456. tst x0, #(PAGE_SIZE_MASK)
  457. ASM_ASSERT(eq)
  458. #endif
  459. /*
  460. * Calculate the offset based on return address in x30.
  461. * Assume that this function is called within a page at the start of
  462. * fixup region.
  463. */
  464. and x2, x30, #~(PAGE_SIZE_MASK)
  465. subs x0, x2, x6 /* Diff(S) = Current Address - Compiled Address */
  466. b.eq 3f /* Diff(S) = 0. No relocation needed */
  467. adrp x1, __GOT_START__
  468. add x1, x1, :lo12:__GOT_START__
  469. adrp x2, __GOT_END__
  470. add x2, x2, :lo12:__GOT_END__
  471. /*
  472. * GOT is an array of 64_bit addresses which must be fixed up as
  473. * new_addr = old_addr + Diff(S).
  474. * The new_addr is the address currently the binary is executing from
  475. * and old_addr is the address at compile time.
  476. */
  477. 1: ldr x3, [x1]
  478. /* Skip adding offset if address is < lower limit */
  479. cmp x3, x6
  480. b.lo 2f
  481. /* Skip adding offset if address is > upper limit */
  482. cmp x3, x7
  483. b.hi 2f
  484. add x3, x3, x0
  485. str x3, [x1]
  486. 2: add x1, x1, #8
  487. cmp x1, x2
  488. b.lo 1b
  489. /* Starting dynamic relocations. Use adrp/adr to get RELA_START and END */
  490. 3: adrp x1, __RELA_START__
  491. add x1, x1, :lo12:__RELA_START__
  492. adrp x2, __RELA_END__
  493. add x2, x2, :lo12:__RELA_END__
  494. /*
  495. * According to ELF-64 specification, the RELA data structure is as
  496. * follows:
  497. * typedef struct {
  498. * Elf64_Addr r_offset;
  499. * Elf64_Xword r_info;
  500. * Elf64_Sxword r_addend;
  501. * } Elf64_Rela;
  502. *
  503. * r_offset is address of reference
  504. * r_info is symbol index and type of relocation (in this case
  505. * code 1027 which corresponds to R_AARCH64_RELATIVE).
  506. * r_addend is constant part of expression.
  507. *
  508. * Size of Elf64_Rela structure is 24 bytes.
  509. */
  510. /* Skip R_AARCH64_NONE entry with code 0 */
  511. 1: ldr x3, [x1, #8]
  512. cbz x3, 2f
  513. #if ENABLE_ASSERTIONS
  514. /* Assert that the relocation type is R_AARCH64_RELATIVE */
  515. cmp x3, #R_AARCH64_RELATIVE
  516. ASM_ASSERT(eq)
  517. #endif
  518. ldr x3, [x1] /* r_offset */
  519. add x3, x0, x3
  520. ldr x4, [x1, #16] /* r_addend */
  521. /* Skip adding offset if r_addend is < lower limit */
  522. cmp x4, x6
  523. b.lo 2f
  524. /* Skip adding offset if r_addend entry is > upper limit */
  525. cmp x4, x7
  526. b.hi 2f
  527. add x4, x0, x4 /* Diff(S) + r_addend */
  528. str x4, [x3]
  529. 2: add x1, x1, #24
  530. cmp x1, x2
  531. b.lo 1b
  532. ret
  533. endfunc fixup_gdt_reloc
  534. /*
  535. * TODO: Currently only supports size of 4KB,
  536. * support other sizes as well.
  537. */
  538. func gpt_tlbi_by_pa_ll
  539. #if ENABLE_ASSERTIONS
  540. cmp x1, #PAGE_SIZE_4KB
  541. ASM_ASSERT(eq)
  542. tst x0, #(PAGE_SIZE_MASK)
  543. ASM_ASSERT(eq)
  544. #endif
  545. lsr x0, x0, #FOUR_KB_SHIFT /* 4KB size encoding is zero */
  546. sys #6, c8, c4, #7, x0 /* TLBI RPALOS, <Xt> */
  547. dsb sy
  548. ret
  549. endfunc gpt_tlbi_by_pa_ll