sparccpuid.S 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. #ifdef OPENSSL_FIPSCANISTER
  2. #include <openssl/fipssyms.h>
  3. #endif
  4. #if defined(__SUNPRO_C) && defined(__sparcv9)
  5. # define ABI64 /* They've said -xarch=v9 at command line */
  6. #elif defined(__GNUC__) && defined(__arch64__)
  7. # define ABI64 /* They've said -m64 at command line */
  8. #endif
  9. #ifdef ABI64
  10. .register %g2,#scratch
  11. .register %g3,#scratch
  12. # define FRAME -192
  13. # define BIAS 2047
  14. #else
  15. # define FRAME -96
  16. # define BIAS 0
  17. #endif
  18. .text
  19. .align 32
  20. .global OPENSSL_wipe_cpu
  21. .type OPENSSL_wipe_cpu,#function
  22. ! Keep in mind that this does not excuse us from wiping the stack!
  23. ! This routine wipes registers, but not the backing store [which
  24. ! resides on the stack, toward lower addresses]. To facilitate for
  25. ! stack wiping I return pointer to the top of stack of the *caller*.
  26. OPENSSL_wipe_cpu:
  27. save %sp,FRAME,%sp
  28. nop
  29. #ifdef __sun
  30. #include <sys/trap.h>
  31. ta ST_CLEAN_WINDOWS
  32. #else
  33. call .walk.reg.wins
  34. #endif
  35. nop
  36. call .PIC.zero.up
  37. mov .zero-(.-4),%o0
  38. ld [%o0],%f0
  39. ld [%o0],%f1
  40. subcc %g0,1,%o0
  41. ! Following is V9 "rd %ccr,%o0" instruction. However! V8
  42. ! specification says that it ("rd %asr2,%o0" in V8 terms) does
  43. ! not cause illegal_instruction trap. It therefore can be used
  44. ! to determine if the CPU the code is executing on is V8- or
  45. ! V9-compliant, as V9 returns a distinct value of 0x99,
  46. ! "negative" and "borrow" bits set in both %icc and %xcc.
  47. .word 0x91408000 !rd %ccr,%o0
  48. cmp %o0,0x99
  49. bne .v8
  50. nop
  51. ! Even though we do not use %fp register bank,
  52. ! we wipe it as memcpy might have used it...
  53. .word 0xbfa00040 !fmovd %f0,%f62
  54. .word 0xbba00040 !...
  55. .word 0xb7a00040
  56. .word 0xb3a00040
  57. .word 0xafa00040
  58. .word 0xaba00040
  59. .word 0xa7a00040
  60. .word 0xa3a00040
  61. .word 0x9fa00040
  62. .word 0x9ba00040
  63. .word 0x97a00040
  64. .word 0x93a00040
  65. .word 0x8fa00040
  66. .word 0x8ba00040
  67. .word 0x87a00040
  68. .word 0x83a00040 !fmovd %f0,%f32
  69. .v8: fmovs %f1,%f31
  70. clr %o0
  71. fmovs %f0,%f30
  72. clr %o1
  73. fmovs %f1,%f29
  74. clr %o2
  75. fmovs %f0,%f28
  76. clr %o3
  77. fmovs %f1,%f27
  78. clr %o4
  79. fmovs %f0,%f26
  80. clr %o5
  81. fmovs %f1,%f25
  82. clr %o7
  83. fmovs %f0,%f24
  84. clr %l0
  85. fmovs %f1,%f23
  86. clr %l1
  87. fmovs %f0,%f22
  88. clr %l2
  89. fmovs %f1,%f21
  90. clr %l3
  91. fmovs %f0,%f20
  92. clr %l4
  93. fmovs %f1,%f19
  94. clr %l5
  95. fmovs %f0,%f18
  96. clr %l6
  97. fmovs %f1,%f17
  98. clr %l7
  99. fmovs %f0,%f16
  100. clr %i0
  101. fmovs %f1,%f15
  102. clr %i1
  103. fmovs %f0,%f14
  104. clr %i2
  105. fmovs %f1,%f13
  106. clr %i3
  107. fmovs %f0,%f12
  108. clr %i4
  109. fmovs %f1,%f11
  110. clr %i5
  111. fmovs %f0,%f10
  112. clr %g1
  113. fmovs %f1,%f9
  114. clr %g2
  115. fmovs %f0,%f8
  116. clr %g3
  117. fmovs %f1,%f7
  118. clr %g4
  119. fmovs %f0,%f6
  120. clr %g5
  121. fmovs %f1,%f5
  122. fmovs %f0,%f4
  123. fmovs %f1,%f3
  124. fmovs %f0,%f2
  125. add %fp,BIAS,%i0 ! return pointer to caller´s top of stack
  126. ret
  127. restore
  128. .zero: .long 0x0,0x0
  129. .PIC.zero.up:
  130. retl
  131. add %o0,%o7,%o0
  132. #ifdef DEBUG
  133. .global walk_reg_wins
  134. .type walk_reg_wins,#function
  135. walk_reg_wins:
  136. #endif
  137. .walk.reg.wins:
  138. save %sp,FRAME,%sp
  139. cmp %i7,%o7
  140. be 2f
  141. clr %o0
  142. cmp %o7,0 ! compiler never cleans %o7...
  143. be 1f ! could have been a leaf function...
  144. clr %o1
  145. call .walk.reg.wins
  146. nop
  147. 1: clr %o2
  148. clr %o3
  149. clr %o4
  150. clr %o5
  151. clr %o7
  152. clr %l0
  153. clr %l1
  154. clr %l2
  155. clr %l3
  156. clr %l4
  157. clr %l5
  158. clr %l6
  159. clr %l7
  160. add %o0,1,%i0 ! used for debugging
  161. 2: ret
  162. restore
  163. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  164. .global OPENSSL_atomic_add
  165. .type OPENSSL_atomic_add,#function
  166. .align 32
  167. OPENSSL_atomic_add:
  168. #ifndef ABI64
  169. subcc %g0,1,%o2
  170. .word 0x95408000 !rd %ccr,%o2, see comment above
  171. cmp %o2,0x99
  172. be .v9
  173. nop
  174. save %sp,FRAME,%sp
  175. ba .enter
  176. nop
  177. #ifdef __sun
  178. ! Note that you do not have to link with libthread to call thr_yield,
  179. ! as libc provides a stub, which is overloaded the moment you link
  180. ! with *either* libpthread or libthread...
  181. #define YIELD_CPU thr_yield
  182. #else
  183. ! applies at least to Linux and FreeBSD... Feedback expected...
  184. #define YIELD_CPU sched_yield
  185. #endif
  186. .spin: call YIELD_CPU
  187. nop
  188. .enter: ld [%i0],%i2
  189. cmp %i2,-4096
  190. be .spin
  191. mov -1,%i2
  192. swap [%i0],%i2
  193. cmp %i2,-1
  194. be .spin
  195. add %i2,%i1,%i2
  196. stbar
  197. st %i2,[%i0]
  198. sra %i2,%g0,%i0
  199. ret
  200. restore
  201. .v9:
  202. #endif
  203. ld [%o0],%o2
  204. 1: add %o1,%o2,%o3
  205. .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
  206. cmp %o2,%o3
  207. bne 1b
  208. mov %o3,%o2 ! cas is always fetching to dest. register
  209. add %o1,%o2,%o0 ! OpenSSL expects the new value
  210. retl
  211. sra %o0,%g0,%o0 ! we return signed int, remember?
  212. .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
  213. .global _sparcv9_rdtick
  214. .align 32
  215. _sparcv9_rdtick:
  216. subcc %g0,1,%o0
  217. .word 0x91408000 !rd %ccr,%o0
  218. cmp %o0,0x99
  219. bne .notick
  220. xor %o0,%o0,%o0
  221. .word 0x91410000 !rd %tick,%o0
  222. retl
  223. .word 0x93323020 !srlx %o0,32,%o1
  224. .notick:
  225. retl
  226. xor %o1,%o1,%o1
  227. .type _sparcv9_rdtick,#function
  228. .size _sparcv9_rdtick,.-_sparcv9_rdtick
  229. .global _sparcv9_vis1_probe
  230. .align 8
  231. _sparcv9_vis1_probe:
  232. .word 0x81b00d80 !fxor %f0,%f0,%f0
  233. add %sp,BIAS+2,%o1
  234. retl
  235. .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
  236. .type _sparcv9_vis1_probe,#function
  237. .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
  238. ! Probe and instrument VIS1 instruction. Output is number of cycles it
  239. ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
  240. ! is slow (documented to be 6 cycles on T2) and the core is in-order
  241. ! single-issue, it should be possible to distinguish Tx reliably...
  242. ! Observed return values are:
  243. !
  244. ! UltraSPARC IIe 7
  245. ! UltraSPARC III 7
  246. ! UltraSPARC T1 24
  247. !
  248. ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
  249. !
  250. ! It would be possible to detect specifically US-T1 by instrumenting
  251. ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
  252. ! a lot of %tick-s, couple of thousand on Linux...
  253. .global _sparcv9_vis1_instrument
  254. .align 8
  255. _sparcv9_vis1_instrument:
  256. .word 0x91410000 !rd %tick,%o0
  257. .word 0x81b00d80 !fxor %f0,%f0,%f0
  258. .word 0x85b08d82 !fxor %f2,%f2,%f2
  259. .word 0x93410000 !rd %tick,%o1
  260. .word 0x81b00d80 !fxor %f0,%f0,%f0
  261. .word 0x85b08d82 !fxor %f2,%f2,%f2
  262. .word 0x95410000 !rd %tick,%o2
  263. .word 0x81b00d80 !fxor %f0,%f0,%f0
  264. .word 0x85b08d82 !fxor %f2,%f2,%f2
  265. .word 0x97410000 !rd %tick,%o3
  266. .word 0x81b00d80 !fxor %f0,%f0,%f0
  267. .word 0x85b08d82 !fxor %f2,%f2,%f2
  268. .word 0x99410000 !rd %tick,%o4
  269. ! calculate intervals
  270. sub %o1,%o0,%o0
  271. sub %o2,%o1,%o1
  272. sub %o3,%o2,%o2
  273. sub %o4,%o3,%o3
  274. ! find minumum value
  275. cmp %o0,%o1
  276. .word 0x38680002 !bgu,a %xcc,.+8
  277. mov %o1,%o0
  278. cmp %o0,%o2
  279. .word 0x38680002 !bgu,a %xcc,.+8
  280. mov %o2,%o0
  281. cmp %o0,%o3
  282. .word 0x38680002 !bgu,a %xcc,.+8
  283. mov %o3,%o0
  284. retl
  285. nop
  286. .type _sparcv9_vis1_instrument,#function
  287. .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
  288. .global _sparcv9_vis2_probe
  289. .align 8
  290. _sparcv9_vis2_probe:
  291. retl
  292. .word 0x81b00980 !bshuffle %f0,%f0,%f0
  293. .type _sparcv9_vis2_probe,#function
  294. .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
  295. .global _sparcv9_fmadd_probe
  296. .align 8
  297. _sparcv9_fmadd_probe:
  298. .word 0x81b00d80 !fxor %f0,%f0,%f0
  299. .word 0x85b08d82 !fxor %f2,%f2,%f2
  300. retl
  301. .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
  302. .type _sparcv9_fmadd_probe,#function
  303. .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
  304. .global OPENSSL_cleanse
  305. .align 32
  306. OPENSSL_cleanse:
  307. cmp %o1,14
  308. nop
  309. #ifdef ABI64
  310. bgu %xcc,.Lot
  311. #else
  312. bgu .Lot
  313. #endif
  314. cmp %o1,0
  315. bne .Little
  316. nop
  317. retl
  318. nop
  319. .Little:
  320. stb %g0,[%o0]
  321. subcc %o1,1,%o1
  322. bnz .Little
  323. add %o0,1,%o0
  324. retl
  325. nop
  326. .align 32
  327. .Lot:
  328. #ifndef ABI64
  329. subcc %g0,1,%g1
  330. ! see above for explanation
  331. .word 0x83408000 !rd %ccr,%g1
  332. cmp %g1,0x99
  333. bne .v8lot
  334. nop
  335. #endif
  336. .v9lot: andcc %o0,7,%g0
  337. bz .v9aligned
  338. nop
  339. stb %g0,[%o0]
  340. sub %o1,1,%o1
  341. ba .v9lot
  342. add %o0,1,%o0
  343. .align 16,0x01000000
  344. .v9aligned:
  345. .word 0xc0720000 !stx %g0,[%o0]
  346. sub %o1,8,%o1
  347. andcc %o1,-8,%g0
  348. #ifdef ABI64
  349. .word 0x126ffffd !bnz %xcc,.v9aligned
  350. #else
  351. .word 0x124ffffd !bnz %icc,.v9aligned
  352. #endif
  353. add %o0,8,%o0
  354. cmp %o1,0
  355. bne .Little
  356. nop
  357. retl
  358. nop
  359. #ifndef ABI64
  360. .v8lot: andcc %o0,3,%g0
  361. bz .v8aligned
  362. nop
  363. stb %g0,[%o0]
  364. sub %o1,1,%o1
  365. ba .v8lot
  366. add %o0,1,%o0
  367. nop
  368. .v8aligned:
  369. st %g0,[%o0]
  370. sub %o1,4,%o1
  371. andcc %o1,-4,%g0
  372. bnz .v8aligned
  373. add %o0,4,%o0
  374. cmp %o1,0
  375. bne .Little
  376. nop
  377. retl
  378. nop
  379. #endif
  380. .type OPENSSL_cleanse,#function
  381. .size OPENSSL_cleanse,.-OPENSSL_cleanse
  382. .global _sparcv9_vis1_instrument_bus
  383. .align 8
  384. _sparcv9_vis1_instrument_bus:
  385. mov %o1,%o3 ! save cnt
  386. .word 0x99410000 !rd %tick,%o4 ! tick
  387. mov %o4,%o5 ! lasttick = tick
  388. set 0,%g4 ! diff
  389. andn %o0,63,%g1
  390. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  391. .word 0x8143e040 !membar #Sync
  392. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  393. .word 0x8143e040 !membar #Sync
  394. ld [%o0],%o4
  395. add %o4,%g4,%g4
  396. .word 0xc9e2100c !cas [%o0],%o4,%g4
  397. .Loop: .word 0x99410000 !rd %tick,%o4
  398. sub %o4,%o5,%g4 ! diff=tick-lasttick
  399. mov %o4,%o5 ! lasttick=tick
  400. andn %o0,63,%g1
  401. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  402. .word 0x8143e040 !membar #Sync
  403. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  404. .word 0x8143e040 !membar #Sync
  405. ld [%o0],%o4
  406. add %o4,%g4,%g4
  407. .word 0xc9e2100c !cas [%o0],%o4,%g4
  408. subcc %o1,1,%o1 ! --$cnt
  409. bnz .Loop
  410. add %o0,4,%o0 ! ++$out
  411. retl
  412. mov %o3,%o0
  413. .type _sparcv9_vis1_instrument_bus,#function
  414. .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
  415. .global _sparcv9_vis1_instrument_bus2
  416. .align 8
  417. _sparcv9_vis1_instrument_bus2:
  418. mov %o1,%o3 ! save cnt
  419. sll %o1,2,%o1 ! cnt*=4
  420. .word 0x99410000 !rd %tick,%o4 ! tick
  421. mov %o4,%o5 ! lasttick = tick
  422. set 0,%g4 ! diff
  423. andn %o0,63,%g1
  424. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  425. .word 0x8143e040 !membar #Sync
  426. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  427. .word 0x8143e040 !membar #Sync
  428. ld [%o0],%o4
  429. add %o4,%g4,%g4
  430. .word 0xc9e2100c !cas [%o0],%o4,%g4
  431. .word 0x99410000 !rd %tick,%o4 ! tick
  432. sub %o4,%o5,%g4 ! diff=tick-lasttick
  433. mov %o4,%o5 ! lasttick=tick
  434. mov %g4,%g5 ! lastdiff=diff
  435. .Loop2:
  436. andn %o0,63,%g1
  437. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  438. .word 0x8143e040 !membar #Sync
  439. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  440. .word 0x8143e040 !membar #Sync
  441. ld [%o0],%o4
  442. add %o4,%g4,%g4
  443. .word 0xc9e2100c !cas [%o0],%o4,%g4
  444. subcc %o2,1,%o2 ! --max
  445. bz .Ldone2
  446. nop
  447. .word 0x99410000 !rd %tick,%o4 ! tick
  448. sub %o4,%o5,%g4 ! diff=tick-lasttick
  449. mov %o4,%o5 ! lasttick=tick
  450. cmp %g4,%g5
  451. mov %g4,%g5 ! lastdiff=diff
  452. .word 0x83408000 !rd %ccr,%g1
  453. and %g1,4,%g1 ! isolate zero flag
  454. xor %g1,4,%g1 ! flip zero flag
  455. subcc %o1,%g1,%o1 ! conditional --$cnt
  456. bnz .Loop2
  457. add %o0,%g1,%o0 ! conditional ++$out
  458. .Ldone2:
  459. srl %o1,2,%o1
  460. retl
  461. sub %o3,%o1,%o0
  462. .type _sparcv9_vis1_instrument_bus2,#function
  463. .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
  464. .section ".init",#alloc,#execinstr
  465. call OPENSSL_cpuid_setup
  466. nop