sparccpuid.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. #ifdef OPENSSL_FIPSCANISTER
  2. #include <openssl/fipssyms.h>
  3. #endif
  4. #if defined(__SUNPRO_C) && defined(__sparcv9)
  5. # define ABI64 /* They've said -xarch=v9 at command line */
  6. #elif defined(__GNUC__) && defined(__arch64__)
  7. # define ABI64 /* They've said -m64 at command line */
  8. #endif
  9. #ifdef ABI64
  10. .register %g2,#scratch
  11. .register %g3,#scratch
  12. # define FRAME -192
  13. # define BIAS 2047
  14. #else
  15. # define FRAME -96
  16. # define BIAS 0
  17. #endif
  18. .text
  19. .align 32
  20. .global OPENSSL_wipe_cpu
  21. .type OPENSSL_wipe_cpu,#function
  22. ! Keep in mind that this does not excuse us from wiping the stack!
  23. ! This routine wipes registers, but not the backing store [which
  24. ! resides on the stack, toward lower addresses]. To facilitate for
  25. ! stack wiping I return pointer to the top of stack of the *caller*.
  26. OPENSSL_wipe_cpu:
  27. save %sp,FRAME,%sp
  28. nop
  29. #ifdef __sun
  30. #include <sys/trap.h>
  31. ta ST_CLEAN_WINDOWS
  32. #else
  33. call .walk.reg.wins
  34. #endif
  35. nop
  36. call .PIC.zero.up
  37. mov .zero-(.-4),%o0
  38. ld [%o0],%f0
  39. ld [%o0],%f1
  40. subcc %g0,1,%o0
  41. ! Following is V9 "rd %ccr,%o0" instruction. However! V8
  42. ! specification says that it ("rd %asr2,%o0" in V8 terms) does
  43. ! not cause illegal_instruction trap. It therefore can be used
  44. ! to determine if the CPU the code is executing on is V8- or
  45. ! V9-compliant, as V9 returns a distinct value of 0x99,
  46. ! "negative" and "borrow" bits set in both %icc and %xcc.
  47. .word 0x91408000 !rd %ccr,%o0
  48. cmp %o0,0x99
  49. bne .v8
  50. nop
  51. ! Even though we do not use %fp register bank,
  52. ! we wipe it as memcpy might have used it...
  53. .word 0xbfa00040 !fmovd %f0,%f62
  54. .word 0xbba00040 !...
  55. .word 0xb7a00040
  56. .word 0xb3a00040
  57. .word 0xafa00040
  58. .word 0xaba00040
  59. .word 0xa7a00040
  60. .word 0xa3a00040
  61. .word 0x9fa00040
  62. .word 0x9ba00040
  63. .word 0x97a00040
  64. .word 0x93a00040
  65. .word 0x8fa00040
  66. .word 0x8ba00040
  67. .word 0x87a00040
  68. .word 0x83a00040 !fmovd %f0,%f32
  69. .v8: fmovs %f1,%f31
  70. clr %o0
  71. fmovs %f0,%f30
  72. clr %o1
  73. fmovs %f1,%f29
  74. clr %o2
  75. fmovs %f0,%f28
  76. clr %o3
  77. fmovs %f1,%f27
  78. clr %o4
  79. fmovs %f0,%f26
  80. clr %o5
  81. fmovs %f1,%f25
  82. clr %o7
  83. fmovs %f0,%f24
  84. clr %l0
  85. fmovs %f1,%f23
  86. clr %l1
  87. fmovs %f0,%f22
  88. clr %l2
  89. fmovs %f1,%f21
  90. clr %l3
  91. fmovs %f0,%f20
  92. clr %l4
  93. fmovs %f1,%f19
  94. clr %l5
  95. fmovs %f0,%f18
  96. clr %l6
  97. fmovs %f1,%f17
  98. clr %l7
  99. fmovs %f0,%f16
  100. clr %i0
  101. fmovs %f1,%f15
  102. clr %i1
  103. fmovs %f0,%f14
  104. clr %i2
  105. fmovs %f1,%f13
  106. clr %i3
  107. fmovs %f0,%f12
  108. clr %i4
  109. fmovs %f1,%f11
  110. clr %i5
  111. fmovs %f0,%f10
  112. clr %g1
  113. fmovs %f1,%f9
  114. clr %g2
  115. fmovs %f0,%f8
  116. clr %g3
  117. fmovs %f1,%f7
  118. clr %g4
  119. fmovs %f0,%f6
  120. clr %g5
  121. fmovs %f1,%f5
  122. fmovs %f0,%f4
  123. fmovs %f1,%f3
  124. fmovs %f0,%f2
  125. add %fp,BIAS,%i0 ! return pointer to caller´s top of stack
  126. ret
  127. restore
  128. .zero: .long 0x0,0x0
  129. .PIC.zero.up:
  130. retl
  131. add %o0,%o7,%o0
  132. #ifdef DEBUG
  133. .global walk_reg_wins
  134. .type walk_reg_wins,#function
  135. walk_reg_wins:
  136. #endif
  137. .walk.reg.wins:
  138. save %sp,FRAME,%sp
  139. cmp %i7,%o7
  140. be 2f
  141. clr %o0
  142. cmp %o7,0 ! compiler never cleans %o7...
  143. be 1f ! could have been a leaf function...
  144. clr %o1
  145. call .walk.reg.wins
  146. nop
  147. 1: clr %o2
  148. clr %o3
  149. clr %o4
  150. clr %o5
  151. clr %o7
  152. clr %l0
  153. clr %l1
  154. clr %l2
  155. clr %l3
  156. clr %l4
  157. clr %l5
  158. clr %l6
  159. clr %l7
  160. add %o0,1,%i0 ! used for debugging
  161. 2: ret
  162. restore
  163. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  164. .global OPENSSL_atomic_add
  165. .type OPENSSL_atomic_add,#function
  166. .align 32
  167. OPENSSL_atomic_add:
  168. #ifndef ABI64
  169. subcc %g0,1,%o2
  170. .word 0x95408000 !rd %ccr,%o2, see comment above
  171. cmp %o2,0x99
  172. be .v9
  173. nop
  174. save %sp,FRAME,%sp
  175. ba .enter
  176. nop
  177. #ifdef __sun
  178. ! Note that you do not have to link with libthread to call thr_yield,
  179. ! as libc provides a stub, which is overloaded the moment you link
  180. ! with *either* libpthread or libthread...
  181. #define YIELD_CPU thr_yield
  182. #else
  183. ! applies at least to Linux and FreeBSD... Feedback expected...
  184. #define YIELD_CPU sched_yield
  185. #endif
  186. .spin: call YIELD_CPU
  187. nop
  188. .enter: ld [%i0],%i2
  189. cmp %i2,-4096
  190. be .spin
  191. mov -1,%i2
  192. swap [%i0],%i2
  193. cmp %i2,-1
  194. be .spin
  195. add %i2,%i1,%i2
  196. stbar
  197. st %i2,[%i0]
  198. sra %i2,%g0,%i0
  199. ret
  200. restore
  201. .v9:
  202. #endif
  203. ld [%o0],%o2
  204. 1: add %o1,%o2,%o3
  205. .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
  206. cmp %o2,%o3
  207. bne 1b
  208. mov %o3,%o2 ! cas is always fetching to dest. register
  209. add %o1,%o2,%o0 ! OpenSSL expects the new value
  210. retl
  211. sra %o0,%g0,%o0 ! we return signed int, remember?
  212. .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
  213. .global _sparcv9_rdtick
  214. .align 32
  215. _sparcv9_rdtick:
  216. subcc %g0,1,%o0
  217. .word 0x91408000 !rd %ccr,%o0
  218. cmp %o0,0x99
  219. bne .notick
  220. xor %o0,%o0,%o0
  221. .word 0x91410000 !rd %tick,%o0
  222. retl
  223. .word 0x93323020 !srlx %o0,32,%o1
  224. .notick:
  225. retl
  226. xor %o1,%o1,%o1
  227. .type _sparcv9_rdtick,#function
  228. .size _sparcv9_rdtick,.-_sparcv9_rdtick
  229. .global _sparcv9_vis1_probe
  230. .align 8
  231. _sparcv9_vis1_probe:
  232. add %sp,BIAS+2,%o1
  233. .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
  234. retl
  235. .word 0x81b00d80 !fxor %f0,%f0,%f0
  236. .type _sparcv9_vis1_probe,#function
  237. .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
  238. ! Probe and instrument VIS1 instruction. Output is number of cycles it
  239. ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
  240. ! is slow (documented to be 6 cycles on T2) and the core is in-order
  241. ! single-issue, it should be possible to distinguish Tx reliably...
  242. ! Observed return values are:
  243. !
  244. ! UltraSPARC IIe 7
  245. ! UltraSPARC III 7
  246. ! UltraSPARC T1 24
  247. ! SPARC T4 65(*)
  248. !
  249. ! (*) result has lesser to do with VIS instruction latencies, rdtick
  250. ! appears that slow, but it does the trick in sense that FP and
  251. ! VIS code paths are still slower than integer-only ones.
  252. !
  253. ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
  254. !
  255. ! It would be possible to detect specifically US-T1 by instrumenting
  256. ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
  257. ! a lot of %tick-s, couple of thousand on Linux...
  258. .global _sparcv9_vis1_instrument
  259. .align 8
  260. _sparcv9_vis1_instrument:
  261. .word 0x81b00d80 !fxor %f0,%f0,%f0
  262. .word 0x85b08d82 !fxor %f2,%f2,%f2
  263. .word 0x91410000 !rd %tick,%o0
  264. .word 0x81b00d80 !fxor %f0,%f0,%f0
  265. .word 0x85b08d82 !fxor %f2,%f2,%f2
  266. .word 0x93410000 !rd %tick,%o1
  267. .word 0x81b00d80 !fxor %f0,%f0,%f0
  268. .word 0x85b08d82 !fxor %f2,%f2,%f2
  269. .word 0x95410000 !rd %tick,%o2
  270. .word 0x81b00d80 !fxor %f0,%f0,%f0
  271. .word 0x85b08d82 !fxor %f2,%f2,%f2
  272. .word 0x97410000 !rd %tick,%o3
  273. .word 0x81b00d80 !fxor %f0,%f0,%f0
  274. .word 0x85b08d82 !fxor %f2,%f2,%f2
  275. .word 0x99410000 !rd %tick,%o4
  276. ! calculate intervals
  277. sub %o1,%o0,%o0
  278. sub %o2,%o1,%o1
  279. sub %o3,%o2,%o2
  280. sub %o4,%o3,%o3
  281. ! find minumum value
  282. cmp %o0,%o1
  283. .word 0x38680002 !bgu,a %xcc,.+8
  284. mov %o1,%o0
  285. cmp %o0,%o2
  286. .word 0x38680002 !bgu,a %xcc,.+8
  287. mov %o2,%o0
  288. cmp %o0,%o3
  289. .word 0x38680002 !bgu,a %xcc,.+8
  290. mov %o3,%o0
  291. retl
  292. nop
  293. .type _sparcv9_vis1_instrument,#function
  294. .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
  295. .global _sparcv9_vis2_probe
  296. .align 8
  297. _sparcv9_vis2_probe:
  298. retl
  299. .word 0x81b00980 !bshuffle %f0,%f0,%f0
  300. .type _sparcv9_vis2_probe,#function
  301. .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
  302. .global _sparcv9_fmadd_probe
  303. .align 8
  304. _sparcv9_fmadd_probe:
  305. .word 0x81b00d80 !fxor %f0,%f0,%f0
  306. .word 0x85b08d82 !fxor %f2,%f2,%f2
  307. retl
  308. .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
  309. .type _sparcv9_fmadd_probe,#function
  310. .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
  311. .global _sparcv9_rdcfr
  312. .align 8
  313. _sparcv9_rdcfr:
  314. retl
  315. .word 0x91468000 !rd %asr26,%o0
  316. .type _sparcv9_rdcfr,#function
  317. .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
  318. .global _sparcv9_vis3_probe
  319. .align 8
  320. _sparcv9_vis3_probe:
  321. retl
  322. .word 0x81b022a0 !xmulx %g0,%g0,%g0
  323. .type _sparcv9_vis3_probe,#function
  324. .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
  325. .global _sparcv9_random
  326. .align 8
  327. _sparcv9_random:
  328. retl
  329. .word 0x91b002a0 !random %o0
  330. .type _sparcv9_random,#function
  331. .size _sparcv9_random,.-_sparcv9_vis3_probe
  332. .global OPENSSL_cleanse
  333. .align 32
  334. OPENSSL_cleanse:
  335. cmp %o1,14
  336. nop
  337. #ifdef ABI64
  338. bgu %xcc,.Lot
  339. #else
  340. bgu .Lot
  341. #endif
  342. cmp %o1,0
  343. bne .Little
  344. nop
  345. retl
  346. nop
  347. .Little:
  348. stb %g0,[%o0]
  349. subcc %o1,1,%o1
  350. bnz .Little
  351. add %o0,1,%o0
  352. retl
  353. nop
  354. .align 32
  355. .Lot:
  356. #ifndef ABI64
  357. subcc %g0,1,%g1
  358. ! see above for explanation
  359. .word 0x83408000 !rd %ccr,%g1
  360. cmp %g1,0x99
  361. bne .v8lot
  362. nop
  363. #endif
  364. .v9lot: andcc %o0,7,%g0
  365. bz .v9aligned
  366. nop
  367. stb %g0,[%o0]
  368. sub %o1,1,%o1
  369. ba .v9lot
  370. add %o0,1,%o0
  371. .align 16,0x01000000
  372. .v9aligned:
  373. .word 0xc0720000 !stx %g0,[%o0]
  374. sub %o1,8,%o1
  375. andcc %o1,-8,%g0
  376. #ifdef ABI64
  377. .word 0x126ffffd !bnz %xcc,.v9aligned
  378. #else
  379. .word 0x124ffffd !bnz %icc,.v9aligned
  380. #endif
  381. add %o0,8,%o0
  382. cmp %o1,0
  383. bne .Little
  384. nop
  385. retl
  386. nop
  387. #ifndef ABI64
  388. .v8lot: andcc %o0,3,%g0
  389. bz .v8aligned
  390. nop
  391. stb %g0,[%o0]
  392. sub %o1,1,%o1
  393. ba .v8lot
  394. add %o0,1,%o0
  395. nop
  396. .v8aligned:
  397. st %g0,[%o0]
  398. sub %o1,4,%o1
  399. andcc %o1,-4,%g0
  400. bnz .v8aligned
  401. add %o0,4,%o0
  402. cmp %o1,0
  403. bne .Little
  404. nop
  405. retl
  406. nop
  407. #endif
  408. .type OPENSSL_cleanse,#function
  409. .size OPENSSL_cleanse,.-OPENSSL_cleanse
  410. .global _sparcv9_vis1_instrument_bus
  411. .align 8
  412. _sparcv9_vis1_instrument_bus:
  413. mov %o1,%o3 ! save cnt
  414. .word 0x99410000 !rd %tick,%o4 ! tick
  415. mov %o4,%o5 ! lasttick = tick
  416. set 0,%g4 ! diff
  417. andn %o0,63,%g1
  418. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  419. .word 0x8143e040 !membar #Sync
  420. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  421. .word 0x8143e040 !membar #Sync
  422. ld [%o0],%o4
  423. add %o4,%g4,%g4
  424. .word 0xc9e2100c !cas [%o0],%o4,%g4
  425. .Loop: .word 0x99410000 !rd %tick,%o4
  426. sub %o4,%o5,%g4 ! diff=tick-lasttick
  427. mov %o4,%o5 ! lasttick=tick
  428. andn %o0,63,%g1
  429. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  430. .word 0x8143e040 !membar #Sync
  431. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  432. .word 0x8143e040 !membar #Sync
  433. ld [%o0],%o4
  434. add %o4,%g4,%g4
  435. .word 0xc9e2100c !cas [%o0],%o4,%g4
  436. subcc %o1,1,%o1 ! --$cnt
  437. bnz .Loop
  438. add %o0,4,%o0 ! ++$out
  439. retl
  440. mov %o3,%o0
  441. .type _sparcv9_vis1_instrument_bus,#function
  442. .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
  443. .global _sparcv9_vis1_instrument_bus2
  444. .align 8
  445. _sparcv9_vis1_instrument_bus2:
  446. mov %o1,%o3 ! save cnt
  447. sll %o1,2,%o1 ! cnt*=4
  448. .word 0x99410000 !rd %tick,%o4 ! tick
  449. mov %o4,%o5 ! lasttick = tick
  450. set 0,%g4 ! diff
  451. andn %o0,63,%g1
  452. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  453. .word 0x8143e040 !membar #Sync
  454. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  455. .word 0x8143e040 !membar #Sync
  456. ld [%o0],%o4
  457. add %o4,%g4,%g4
  458. .word 0xc9e2100c !cas [%o0],%o4,%g4
  459. .word 0x99410000 !rd %tick,%o4 ! tick
  460. sub %o4,%o5,%g4 ! diff=tick-lasttick
  461. mov %o4,%o5 ! lasttick=tick
  462. mov %g4,%g5 ! lastdiff=diff
  463. .Loop2:
  464. andn %o0,63,%g1
  465. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  466. .word 0x8143e040 !membar #Sync
  467. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  468. .word 0x8143e040 !membar #Sync
  469. ld [%o0],%o4
  470. add %o4,%g4,%g4
  471. .word 0xc9e2100c !cas [%o0],%o4,%g4
  472. subcc %o2,1,%o2 ! --max
  473. bz .Ldone2
  474. nop
  475. .word 0x99410000 !rd %tick,%o4 ! tick
  476. sub %o4,%o5,%g4 ! diff=tick-lasttick
  477. mov %o4,%o5 ! lasttick=tick
  478. cmp %g4,%g5
  479. mov %g4,%g5 ! lastdiff=diff
  480. .word 0x83408000 !rd %ccr,%g1
  481. and %g1,4,%g1 ! isolate zero flag
  482. xor %g1,4,%g1 ! flip zero flag
  483. subcc %o1,%g1,%o1 ! conditional --$cnt
  484. bnz .Loop2
  485. add %o0,%g1,%o0 ! conditional ++$out
  486. .Ldone2:
  487. srl %o1,2,%o1
  488. retl
  489. sub %o3,%o1,%o0
  490. .type _sparcv9_vis1_instrument_bus2,#function
  491. .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
  492. .section ".init",#alloc,#execinstr
  493. call OPENSSL_cpuid_setup
  494. nop