sparccpuid.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. #if defined(__SUNPRO_C) && defined(__sparcv9)
  2. # define ABI64 /* They've said -xarch=v9 at command line */
  3. #elif defined(__GNUC__) && defined(__arch64__)
  4. # define ABI64 /* They've said -m64 at command line */
  5. #endif
  6. #ifdef ABI64
  7. .register %g2,#scratch
  8. .register %g3,#scratch
  9. # define FRAME -192
  10. # define BIAS 2047
  11. #else
  12. # define FRAME -96
  13. # define BIAS 0
  14. #endif
  15. .text
  16. .align 32
  17. .global OPENSSL_wipe_cpu
  18. .type OPENSSL_wipe_cpu,#function
  19. ! Keep in mind that this does not excuse us from wiping the stack!
  20. ! This routine wipes registers, but not the backing store [which
  21. ! resides on the stack, toward lower addresses]. To facilitate for
  22. ! stack wiping I return pointer to the top of stack of the *caller*.
  23. OPENSSL_wipe_cpu:
  24. save %sp,FRAME,%sp
  25. nop
  26. #ifdef __sun
  27. #include <sys/trap.h>
  28. ta ST_CLEAN_WINDOWS
  29. #else
  30. call .walk.reg.wins
  31. #endif
  32. nop
  33. call .PIC.zero.up
  34. mov .zero-(.-4),%o0
  35. ld [%o0],%f0
  36. ld [%o0],%f1
  37. subcc %g0,1,%o0
  38. ! Following is V9 "rd %ccr,%o0" instruction. However! V8
  39. ! specification says that it ("rd %asr2,%o0" in V8 terms) does
  40. ! not cause illegal_instruction trap. It therefore can be used
  41. ! to determine if the CPU the code is executing on is V8- or
  42. ! V9-compliant, as V9 returns a distinct value of 0x99,
  43. ! "negative" and "borrow" bits set in both %icc and %xcc.
  44. .word 0x91408000 !rd %ccr,%o0
  45. cmp %o0,0x99
  46. bne .v8
  47. nop
  48. ! Even though we do not use %fp register bank,
  49. ! we wipe it as memcpy might have used it...
  50. .word 0xbfa00040 !fmovd %f0,%f62
  51. .word 0xbba00040 !...
  52. .word 0xb7a00040
  53. .word 0xb3a00040
  54. .word 0xafa00040
  55. .word 0xaba00040
  56. .word 0xa7a00040
  57. .word 0xa3a00040
  58. .word 0x9fa00040
  59. .word 0x9ba00040
  60. .word 0x97a00040
  61. .word 0x93a00040
  62. .word 0x8fa00040
  63. .word 0x8ba00040
  64. .word 0x87a00040
  65. .word 0x83a00040 !fmovd %f0,%f32
  66. .v8: fmovs %f1,%f31
  67. clr %o0
  68. fmovs %f0,%f30
  69. clr %o1
  70. fmovs %f1,%f29
  71. clr %o2
  72. fmovs %f0,%f28
  73. clr %o3
  74. fmovs %f1,%f27
  75. clr %o4
  76. fmovs %f0,%f26
  77. clr %o5
  78. fmovs %f1,%f25
  79. clr %o7
  80. fmovs %f0,%f24
  81. clr %l0
  82. fmovs %f1,%f23
  83. clr %l1
  84. fmovs %f0,%f22
  85. clr %l2
  86. fmovs %f1,%f21
  87. clr %l3
  88. fmovs %f0,%f20
  89. clr %l4
  90. fmovs %f1,%f19
  91. clr %l5
  92. fmovs %f0,%f18
  93. clr %l6
  94. fmovs %f1,%f17
  95. clr %l7
  96. fmovs %f0,%f16
  97. clr %i0
  98. fmovs %f1,%f15
  99. clr %i1
  100. fmovs %f0,%f14
  101. clr %i2
  102. fmovs %f1,%f13
  103. clr %i3
  104. fmovs %f0,%f12
  105. clr %i4
  106. fmovs %f1,%f11
  107. clr %i5
  108. fmovs %f0,%f10
  109. clr %g1
  110. fmovs %f1,%f9
  111. clr %g2
  112. fmovs %f0,%f8
  113. clr %g3
  114. fmovs %f1,%f7
  115. clr %g4
  116. fmovs %f0,%f6
  117. clr %g5
  118. fmovs %f1,%f5
  119. fmovs %f0,%f4
  120. fmovs %f1,%f3
  121. fmovs %f0,%f2
  122. add %fp,BIAS,%i0 ! return pointer to caller´s top of stack
  123. ret
  124. restore
  125. .zero: .long 0x0,0x0
  126. .PIC.zero.up:
  127. retl
  128. add %o0,%o7,%o0
  129. #ifdef DEBUG
  130. .global walk_reg_wins
  131. .type walk_reg_wins,#function
  132. walk_reg_wins:
  133. #endif
  134. .walk.reg.wins:
  135. save %sp,FRAME,%sp
  136. cmp %i7,%o7
  137. be 2f
  138. clr %o0
  139. cmp %o7,0 ! compiler never cleans %o7...
  140. be 1f ! could have been a leaf function...
  141. clr %o1
  142. call .walk.reg.wins
  143. nop
  144. 1: clr %o2
  145. clr %o3
  146. clr %o4
  147. clr %o5
  148. clr %o7
  149. clr %l0
  150. clr %l1
  151. clr %l2
  152. clr %l3
  153. clr %l4
  154. clr %l5
  155. clr %l6
  156. clr %l7
  157. add %o0,1,%i0 ! used for debugging
  158. 2: ret
  159. restore
  160. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  161. .global OPENSSL_atomic_add
  162. .type OPENSSL_atomic_add,#function
  163. .align 32
  164. OPENSSL_atomic_add:
  165. #ifndef ABI64
  166. subcc %g0,1,%o2
  167. .word 0x95408000 !rd %ccr,%o2, see comment above
  168. cmp %o2,0x99
  169. be .v9
  170. nop
  171. save %sp,FRAME,%sp
  172. ba .enter
  173. nop
  174. #ifdef __sun
  175. ! Note that you do not have to link with libthread to call thr_yield,
  176. ! as libc provides a stub, which is overloaded the moment you link
  177. ! with *either* libpthread or libthread...
  178. #define YIELD_CPU thr_yield
  179. #else
  180. ! applies at least to Linux and FreeBSD... Feedback expected...
  181. #define YIELD_CPU sched_yield
  182. #endif
  183. .spin: call YIELD_CPU
  184. nop
  185. .enter: ld [%i0],%i2
  186. cmp %i2,-4096
  187. be .spin
  188. mov -1,%i2
  189. swap [%i0],%i2
  190. cmp %i2,-1
  191. be .spin
  192. add %i2,%i1,%i2
  193. stbar
  194. st %i2,[%i0]
  195. sra %i2,%g0,%i0
  196. ret
  197. restore
  198. .v9:
  199. #endif
  200. ld [%o0],%o2
  201. 1: add %o1,%o2,%o3
  202. .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
  203. cmp %o2,%o3
  204. bne 1b
  205. mov %o3,%o2 ! cas is always fetching to dest. register
  206. add %o1,%o2,%o0 ! OpenSSL expects the new value
  207. retl
  208. sra %o0,%g0,%o0 ! we return signed int, remember?
  209. .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
  210. .global _sparcv9_rdtick
  211. .align 32
  212. _sparcv9_rdtick:
  213. subcc %g0,1,%o0
  214. .word 0x91408000 !rd %ccr,%o0
  215. cmp %o0,0x99
  216. bne .notick
  217. xor %o0,%o0,%o0
  218. .word 0x91410000 !rd %tick,%o0
  219. retl
  220. .word 0x93323020 !srlx %o0,32,%o1
  221. .notick:
  222. retl
  223. xor %o1,%o1,%o1
  224. .type _sparcv9_rdtick,#function
  225. .size _sparcv9_rdtick,.-_sparcv9_rdtick
  226. .global _sparcv9_vis1_probe
  227. .align 8
  228. _sparcv9_vis1_probe:
  229. add %sp,BIAS+2,%o1
  230. .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
  231. retl
  232. .word 0x81b00d80 !fxor %f0,%f0,%f0
  233. .type _sparcv9_vis1_probe,#function
  234. .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
  235. ! Probe and instrument VIS1 instruction. Output is number of cycles it
  236. ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
  237. ! is slow (documented to be 6 cycles on T2) and the core is in-order
  238. ! single-issue, it should be possible to distinguish Tx reliably...
  239. ! Observed return values are:
  240. !
  241. ! UltraSPARC IIe 7
  242. ! UltraSPARC III 7
  243. ! UltraSPARC T1 24
  244. ! SPARC T4 65(*)
  245. !
  246. ! (*) result has lesser to do with VIS instruction latencies, rdtick
  247. ! appears that slow, but it does the trick in sense that FP and
  248. ! VIS code paths are still slower than integer-only ones.
  249. !
  250. ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
  251. !
  252. ! It would be possible to detect specifically US-T1 by instrumenting
  253. ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
  254. ! a lot of %tick-s, couple of thousand on Linux...
  255. .global _sparcv9_vis1_instrument
  256. .align 8
  257. _sparcv9_vis1_instrument:
  258. .word 0x81b00d80 !fxor %f0,%f0,%f0
  259. .word 0x85b08d82 !fxor %f2,%f2,%f2
  260. .word 0x91410000 !rd %tick,%o0
  261. .word 0x81b00d80 !fxor %f0,%f0,%f0
  262. .word 0x85b08d82 !fxor %f2,%f2,%f2
  263. .word 0x93410000 !rd %tick,%o1
  264. .word 0x81b00d80 !fxor %f0,%f0,%f0
  265. .word 0x85b08d82 !fxor %f2,%f2,%f2
  266. .word 0x95410000 !rd %tick,%o2
  267. .word 0x81b00d80 !fxor %f0,%f0,%f0
  268. .word 0x85b08d82 !fxor %f2,%f2,%f2
  269. .word 0x97410000 !rd %tick,%o3
  270. .word 0x81b00d80 !fxor %f0,%f0,%f0
  271. .word 0x85b08d82 !fxor %f2,%f2,%f2
  272. .word 0x99410000 !rd %tick,%o4
  273. ! calculate intervals
  274. sub %o1,%o0,%o0
  275. sub %o2,%o1,%o1
  276. sub %o3,%o2,%o2
  277. sub %o4,%o3,%o3
  278. ! find minumum value
  279. cmp %o0,%o1
  280. .word 0x38680002 !bgu,a %xcc,.+8
  281. mov %o1,%o0
  282. cmp %o0,%o2
  283. .word 0x38680002 !bgu,a %xcc,.+8
  284. mov %o2,%o0
  285. cmp %o0,%o3
  286. .word 0x38680002 !bgu,a %xcc,.+8
  287. mov %o3,%o0
  288. retl
  289. nop
  290. .type _sparcv9_vis1_instrument,#function
  291. .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
  292. .global _sparcv9_vis2_probe
  293. .align 8
  294. _sparcv9_vis2_probe:
  295. retl
  296. .word 0x81b00980 !bshuffle %f0,%f0,%f0
  297. .type _sparcv9_vis2_probe,#function
  298. .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
  299. .global _sparcv9_fmadd_probe
  300. .align 8
  301. _sparcv9_fmadd_probe:
  302. .word 0x81b00d80 !fxor %f0,%f0,%f0
  303. .word 0x85b08d82 !fxor %f2,%f2,%f2
  304. retl
  305. .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
  306. .type _sparcv9_fmadd_probe,#function
  307. .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
  308. .global _sparcv9_rdcfr
  309. .align 8
  310. _sparcv9_rdcfr:
  311. retl
  312. .word 0x91468000 !rd %asr26,%o0
  313. .type _sparcv9_rdcfr,#function
  314. .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
  315. .global _sparcv9_vis3_probe
  316. .align 8
  317. _sparcv9_vis3_probe:
  318. retl
  319. .word 0x81b022a0 !xmulx %g0,%g0,%g0
  320. .type _sparcv9_vis3_probe,#function
  321. .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
  322. .global _sparcv9_random
  323. .align 8
  324. _sparcv9_random:
  325. retl
  326. .word 0x91b002a0 !random %o0
  327. .type _sparcv9_random,#function
  328. .size _sparcv9_random,.-_sparcv9_vis3_probe
  329. .global OPENSSL_cleanse
  330. .align 32
  331. OPENSSL_cleanse:
  332. cmp %o1,14
  333. nop
  334. #ifdef ABI64
  335. bgu %xcc,.Lot
  336. #else
  337. bgu .Lot
  338. #endif
  339. cmp %o1,0
  340. bne .Little
  341. nop
  342. retl
  343. nop
  344. .Little:
  345. stb %g0,[%o0]
  346. subcc %o1,1,%o1
  347. bnz .Little
  348. add %o0,1,%o0
  349. retl
  350. nop
  351. .align 32
  352. .Lot:
  353. #ifndef ABI64
  354. subcc %g0,1,%g1
  355. ! see above for explanation
  356. .word 0x83408000 !rd %ccr,%g1
  357. cmp %g1,0x99
  358. bne .v8lot
  359. nop
  360. #endif
  361. .v9lot: andcc %o0,7,%g0
  362. bz .v9aligned
  363. nop
  364. stb %g0,[%o0]
  365. sub %o1,1,%o1
  366. ba .v9lot
  367. add %o0,1,%o0
  368. .align 16,0x01000000
  369. .v9aligned:
  370. .word 0xc0720000 !stx %g0,[%o0]
  371. sub %o1,8,%o1
  372. andcc %o1,-8,%g0
  373. #ifdef ABI64
  374. .word 0x126ffffd !bnz %xcc,.v9aligned
  375. #else
  376. .word 0x124ffffd !bnz %icc,.v9aligned
  377. #endif
  378. add %o0,8,%o0
  379. cmp %o1,0
  380. bne .Little
  381. nop
  382. retl
  383. nop
  384. #ifndef ABI64
  385. .v8lot: andcc %o0,3,%g0
  386. bz .v8aligned
  387. nop
  388. stb %g0,[%o0]
  389. sub %o1,1,%o1
  390. ba .v8lot
  391. add %o0,1,%o0
  392. nop
  393. .v8aligned:
  394. st %g0,[%o0]
  395. sub %o1,4,%o1
  396. andcc %o1,-4,%g0
  397. bnz .v8aligned
  398. add %o0,4,%o0
  399. cmp %o1,0
  400. bne .Little
  401. nop
  402. retl
  403. nop
  404. #endif
  405. .type OPENSSL_cleanse,#function
  406. .size OPENSSL_cleanse,.-OPENSSL_cleanse
  407. .global _sparcv9_vis1_instrument_bus
  408. .weak _sparcv9_vis1_instrument_bus
  409. .align 8
  410. _sparcv9_vis1_instrument_bus:
  411. mov %o1,%o3 ! save cnt
  412. .word 0x99410000 !rd %tick,%o4 ! tick
  413. mov %o4,%o5 ! lasttick = tick
  414. set 0,%g4 ! diff
  415. andn %o0,63,%g1
  416. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  417. .word 0x8143e040 !membar #Sync
  418. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  419. .word 0x8143e040 !membar #Sync
  420. ld [%o0],%o4
  421. add %o4,%g4,%g4
  422. .word 0xc9e2100c !cas [%o0],%o4,%g4
  423. .Loop: .word 0x99410000 !rd %tick,%o4
  424. sub %o4,%o5,%g4 ! diff=tick-lasttick
  425. mov %o4,%o5 ! lasttick=tick
  426. andn %o0,63,%g1
  427. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  428. .word 0x8143e040 !membar #Sync
  429. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  430. .word 0x8143e040 !membar #Sync
  431. ld [%o0],%o4
  432. add %o4,%g4,%g4
  433. .word 0xc9e2100c !cas [%o0],%o4,%g4
  434. subcc %o1,1,%o1 ! --$cnt
  435. bnz .Loop
  436. add %o0,4,%o0 ! ++$out
  437. retl
  438. mov %o3,%o0
  439. .type _sparcv9_vis1_instrument_bus,#function
  440. .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
  441. .global _sparcv9_vis1_instrument_bus2
  442. .weak _sparcv9_vis1_instrument_bus2
  443. .align 8
  444. _sparcv9_vis1_instrument_bus2:
  445. mov %o1,%o3 ! save cnt
  446. sll %o1,2,%o1 ! cnt*=4
  447. .word 0x99410000 !rd %tick,%o4 ! tick
  448. mov %o4,%o5 ! lasttick = tick
  449. set 0,%g4 ! diff
  450. andn %o0,63,%g1
  451. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  452. .word 0x8143e040 !membar #Sync
  453. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  454. .word 0x8143e040 !membar #Sync
  455. ld [%o0],%o4
  456. add %o4,%g4,%g4
  457. .word 0xc9e2100c !cas [%o0],%o4,%g4
  458. .word 0x99410000 !rd %tick,%o4 ! tick
  459. sub %o4,%o5,%g4 ! diff=tick-lasttick
  460. mov %o4,%o5 ! lasttick=tick
  461. mov %g4,%g5 ! lastdiff=diff
  462. .Loop2:
  463. andn %o0,63,%g1
  464. .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
  465. .word 0x8143e040 !membar #Sync
  466. .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
  467. .word 0x8143e040 !membar #Sync
  468. ld [%o0],%o4
  469. add %o4,%g4,%g4
  470. .word 0xc9e2100c !cas [%o0],%o4,%g4
  471. subcc %o2,1,%o2 ! --max
  472. bz .Ldone2
  473. nop
  474. .word 0x99410000 !rd %tick,%o4 ! tick
  475. sub %o4,%o5,%g4 ! diff=tick-lasttick
  476. mov %o4,%o5 ! lasttick=tick
  477. cmp %g4,%g5
  478. mov %g4,%g5 ! lastdiff=diff
  479. .word 0x83408000 !rd %ccr,%g1
  480. and %g1,4,%g1 ! isolate zero flag
  481. xor %g1,4,%g1 ! flip zero flag
  482. subcc %o1,%g1,%o1 ! conditional --$cnt
  483. bnz .Loop2
  484. add %o0,%g1,%o0 ! conditional ++$out
  485. .Ldone2:
  486. srl %o1,2,%o1
  487. retl
  488. sub %o3,%o1,%o0
  489. .type _sparcv9_vis1_instrument_bus2,#function
  490. .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
  491. .section ".init",#alloc,#execinstr
  492. call OPENSSL_cpuid_setup
  493. nop