x86_64cpuid.pl 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. #!/usr/bin/env perl
  2. $flavour = shift;
  3. $output = shift;
  4. if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
  5. $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
  6. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  7. open OUT,"| \"$^X\" ${dir}perlasm/x86_64-xlate.pl $flavour $output";
  8. *STDOUT=*OUT;
  9. if ($win64) { $arg1="%rcx"; $arg2="%rdx"; }
  10. else { $arg1="%rdi"; $arg2="%rsi"; }
  11. print<<___;
  12. .extern OPENSSL_cpuid_setup
  13. .section .init
  14. call OPENSSL_cpuid_setup
  15. .text
  16. .globl OPENSSL_atomic_add
  17. .type OPENSSL_atomic_add,\@abi-omnipotent
  18. .align 16
  19. OPENSSL_atomic_add:
  20. movl ($arg1),%eax
  21. .Lspin: leaq ($arg2,%rax),%r8
  22. .byte 0xf0 # lock
  23. cmpxchgl %r8d,($arg1)
  24. jne .Lspin
  25. movl %r8d,%eax
  26. .byte 0x48,0x98 # cltq/cdqe
  27. ret
  28. .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
  29. .globl OPENSSL_rdtsc
  30. .type OPENSSL_rdtsc,\@abi-omnipotent
  31. .align 16
  32. OPENSSL_rdtsc:
  33. rdtsc
  34. shl \$32,%rdx
  35. or %rdx,%rax
  36. ret
  37. .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
  38. .globl OPENSSL_ia32_cpuid
  39. .type OPENSSL_ia32_cpuid,\@abi-omnipotent
  40. .align 16
  41. OPENSSL_ia32_cpuid:
  42. mov %rbx,%r8
  43. xor %eax,%eax
  44. cpuid
  45. mov %eax,%r11d # max value for standard query level
  46. xor %eax,%eax
  47. cmp \$0x756e6547,%ebx # "Genu"
  48. setne %al
  49. mov %eax,%r9d
  50. cmp \$0x49656e69,%edx # "ineI"
  51. setne %al
  52. or %eax,%r9d
  53. cmp \$0x6c65746e,%ecx # "ntel"
  54. setne %al
  55. or %eax,%r9d # 0 indicates Intel CPU
  56. jz .Lintel
  57. cmp \$0x68747541,%ebx # "Auth"
  58. setne %al
  59. mov %eax,%r10d
  60. cmp \$0x69746E65,%edx # "enti"
  61. setne %al
  62. or %eax,%r10d
  63. cmp \$0x444D4163,%ecx # "cAMD"
  64. setne %al
  65. or %eax,%r10d # 0 indicates AMD CPU
  66. jnz .Lintel
  67. # AMD specific
  68. mov \$0x80000000,%eax
  69. cpuid
  70. cmp \$0x80000008,%eax
  71. jb .Lintel
  72. mov \$0x80000008,%eax
  73. cpuid
  74. movzb %cl,%r10 # number of cores - 1
  75. inc %r10 # number of cores
  76. mov \$1,%eax
  77. cpuid
  78. bt \$28,%edx # test hyper-threading bit
  79. jnc .Ldone
  80. shr \$16,%ebx # number of logical processors
  81. cmp %r10b,%bl
  82. ja .Ldone
  83. and \$0xefffffff,%edx # ~(1<<28)
  84. jmp .Ldone
  85. .Lintel:
  86. cmp \$4,%r11d
  87. mov \$-1,%r10d
  88. jb .Lnocacheinfo
  89. mov \$4,%eax
  90. mov \$0,%ecx # query L1D
  91. cpuid
  92. mov %eax,%r10d
  93. shr \$14,%r10d
  94. and \$0xfff,%r10d # number of cores -1 per L1D
  95. .Lnocacheinfo:
  96. mov \$1,%eax
  97. cpuid
  98. cmp \$0,%r9d
  99. jne .Lnotintel
  100. or \$0x00100000,%edx # use reserved 20th bit to engage RC4_CHAR
  101. and \$15,%ah
  102. cmp \$15,%ah # examine Family ID
  103. je .Lnotintel
  104. or \$0x40000000,%edx # use reserved bit to skip unrolled loop
  105. .Lnotintel:
  106. bt \$28,%edx # test hyper-threading bit
  107. jnc .Ldone
  108. and \$0xefffffff,%edx # ~(1<<28)
  109. cmp \$0,%r10d
  110. je .Ldone
  111. or \$0x10000000,%edx # 1<<28
  112. shr \$16,%ebx
  113. cmp \$1,%bl # see if cache is shared
  114. ja .Ldone
  115. and \$0xefffffff,%edx # ~(1<<28)
  116. .Ldone:
  117. shl \$32,%rcx
  118. mov %edx,%eax
  119. mov %r8,%rbx
  120. or %rcx,%rax
  121. ret
  122. .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
  123. .globl OPENSSL_cleanse
  124. .type OPENSSL_cleanse,\@abi-omnipotent
  125. .align 16
  126. OPENSSL_cleanse:
  127. xor %rax,%rax
  128. cmp \$15,$arg2
  129. jae .Lot
  130. cmp \$0,$arg2
  131. je .Lret
  132. .Little:
  133. mov %al,($arg1)
  134. sub \$1,$arg2
  135. lea 1($arg1),$arg1
  136. jnz .Little
  137. .Lret:
  138. ret
  139. .align 16
  140. .Lot:
  141. test \$7,$arg1
  142. jz .Laligned
  143. mov %al,($arg1)
  144. lea -1($arg2),$arg2
  145. lea 1($arg1),$arg1
  146. jmp .Lot
  147. .Laligned:
  148. mov %rax,($arg1)
  149. lea -8($arg2),$arg2
  150. test \$-8,$arg2
  151. lea 8($arg1),$arg1
  152. jnz .Laligned
  153. cmp \$0,$arg2
  154. jne .Little
  155. ret
  156. .size OPENSSL_cleanse,.-OPENSSL_cleanse
  157. ___
  158. print<<___ if (!$win64);
  159. .globl OPENSSL_wipe_cpu
  160. .type OPENSSL_wipe_cpu,\@abi-omnipotent
  161. .align 16
  162. OPENSSL_wipe_cpu:
  163. pxor %xmm0,%xmm0
  164. pxor %xmm1,%xmm1
  165. pxor %xmm2,%xmm2
  166. pxor %xmm3,%xmm3
  167. pxor %xmm4,%xmm4
  168. pxor %xmm5,%xmm5
  169. pxor %xmm6,%xmm6
  170. pxor %xmm7,%xmm7
  171. pxor %xmm8,%xmm8
  172. pxor %xmm9,%xmm9
  173. pxor %xmm10,%xmm10
  174. pxor %xmm11,%xmm11
  175. pxor %xmm12,%xmm12
  176. pxor %xmm13,%xmm13
  177. pxor %xmm14,%xmm14
  178. pxor %xmm15,%xmm15
  179. xorq %rcx,%rcx
  180. xorq %rdx,%rdx
  181. xorq %rsi,%rsi
  182. xorq %rdi,%rdi
  183. xorq %r8,%r8
  184. xorq %r9,%r9
  185. xorq %r10,%r10
  186. xorq %r11,%r11
  187. leaq 8(%rsp),%rax
  188. ret
  189. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  190. ___
  191. print<<___ if ($win64);
  192. .globl OPENSSL_wipe_cpu
  193. .type OPENSSL_wipe_cpu,\@abi-omnipotent
  194. .align 16
  195. OPENSSL_wipe_cpu:
  196. pxor %xmm0,%xmm0
  197. pxor %xmm1,%xmm1
  198. pxor %xmm2,%xmm2
  199. pxor %xmm3,%xmm3
  200. pxor %xmm4,%xmm4
  201. pxor %xmm5,%xmm5
  202. xorq %rcx,%rcx
  203. xorq %rdx,%rdx
  204. xorq %r8,%r8
  205. xorq %r9,%r9
  206. xorq %r10,%r10
  207. xorq %r11,%r11
  208. leaq 8(%rsp),%rax
  209. ret
  210. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  211. ___
  212. close STDOUT; # flush