2
0

x86_64cpuid.pl 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310
  1. #!/usr/bin/env perl
  2. $flavour = shift;
  3. $output = shift;
  4. if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
  5. $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
  6. $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  7. ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
  8. ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
  9. die "can't locate x86_64-xlate.pl";
  10. open OUT,"| \"$^X\" $xlate $flavour $output";
  11. *STDOUT=*OUT;
  12. ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
  13. ("%rdi","%rsi","%rdx","%rcx"); # Unix order
  14. print<<___;
  15. .extern OPENSSL_cpuid_setup
  16. .hidden OPENSSL_cpuid_setup
  17. .section .init
  18. call OPENSSL_cpuid_setup
  19. .hidden OPENSSL_ia32cap_P
  20. .comm OPENSSL_ia32cap_P,16,4
  21. .text
  22. .globl OPENSSL_atomic_add
  23. .type OPENSSL_atomic_add,\@abi-omnipotent
  24. .align 16
  25. OPENSSL_atomic_add:
  26. movl ($arg1),%eax
  27. .Lspin: leaq ($arg2,%rax),%r8
  28. .byte 0xf0 # lock
  29. cmpxchgl %r8d,($arg1)
  30. jne .Lspin
  31. movl %r8d,%eax
  32. .byte 0x48,0x98 # cltq/cdqe
  33. ret
  34. .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
  35. .globl OPENSSL_rdtsc
  36. .type OPENSSL_rdtsc,\@abi-omnipotent
  37. .align 16
  38. OPENSSL_rdtsc:
  39. rdtsc
  40. shl \$32,%rdx
  41. or %rdx,%rax
  42. ret
  43. .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
  44. .globl OPENSSL_ia32_cpuid
  45. .type OPENSSL_ia32_cpuid,\@function,1
  46. .align 16
  47. OPENSSL_ia32_cpuid:
  48. mov %rbx,%r8 # save %rbx
  49. xor %eax,%eax
  50. mov %eax,8(%rdi) # clear extended feature flags
  51. cpuid
  52. mov %eax,%r11d # max value for standard query level
  53. xor %eax,%eax
  54. cmp \$0x756e6547,%ebx # "Genu"
  55. setne %al
  56. mov %eax,%r9d
  57. cmp \$0x49656e69,%edx # "ineI"
  58. setne %al
  59. or %eax,%r9d
  60. cmp \$0x6c65746e,%ecx # "ntel"
  61. setne %al
  62. or %eax,%r9d # 0 indicates Intel CPU
  63. jz .Lintel
  64. cmp \$0x68747541,%ebx # "Auth"
  65. setne %al
  66. mov %eax,%r10d
  67. cmp \$0x69746E65,%edx # "enti"
  68. setne %al
  69. or %eax,%r10d
  70. cmp \$0x444D4163,%ecx # "cAMD"
  71. setne %al
  72. or %eax,%r10d # 0 indicates AMD CPU
  73. jnz .Lintel
  74. # AMD specific
  75. mov \$0x80000000,%eax
  76. cpuid
  77. cmp \$0x80000001,%eax
  78. jb .Lintel
  79. mov %eax,%r10d
  80. mov \$0x80000001,%eax
  81. cpuid
  82. or %ecx,%r9d
  83. and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
  84. cmp \$0x80000008,%r10d
  85. jb .Lintel
  86. mov \$0x80000008,%eax
  87. cpuid
  88. movzb %cl,%r10 # number of cores - 1
  89. inc %r10 # number of cores
  90. mov \$1,%eax
  91. cpuid
  92. bt \$28,%edx # test hyper-threading bit
  93. jnc .Lgeneric
  94. shr \$16,%ebx # number of logical processors
  95. cmp %r10b,%bl
  96. ja .Lgeneric
  97. and \$0xefffffff,%edx # ~(1<<28)
  98. jmp .Lgeneric
  99. .Lintel:
  100. cmp \$4,%r11d
  101. mov \$-1,%r10d
  102. jb .Lnocacheinfo
  103. mov \$4,%eax
  104. mov \$0,%ecx # query L1D
  105. cpuid
  106. mov %eax,%r10d
  107. shr \$14,%r10d
  108. and \$0xfff,%r10d # number of cores -1 per L1D
  109. .Lnocacheinfo:
  110. mov \$1,%eax
  111. cpuid
  112. and \$0xbfefffff,%edx # force reserved bits to 0
  113. cmp \$0,%r9d
  114. jne .Lnotintel
  115. or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
  116. and \$15,%ah
  117. cmp \$15,%ah # examine Family ID
  118. jne .Lnotintel
  119. or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
  120. .Lnotintel:
  121. bt \$28,%edx # test hyper-threading bit
  122. jnc .Lgeneric
  123. and \$0xefffffff,%edx # ~(1<<28)
  124. cmp \$0,%r10d
  125. je .Lgeneric
  126. or \$0x10000000,%edx # 1<<28
  127. shr \$16,%ebx
  128. cmp \$1,%bl # see if cache is shared
  129. ja .Lgeneric
  130. and \$0xefffffff,%edx # ~(1<<28)
  131. .Lgeneric:
  132. and \$0x00000800,%r9d # isolate AMD XOP flag
  133. and \$0xfffff7ff,%ecx
  134. or %ecx,%r9d # merge AMD XOP flag
  135. mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
  136. cmp \$7,%r11d
  137. jb .Lno_extended_info
  138. mov \$7,%eax
  139. xor %ecx,%ecx
  140. cpuid
  141. mov %ebx,8(%rdi) # save extended feature flags
  142. .Lno_extended_info:
  143. bt \$27,%r9d # check OSXSAVE bit
  144. jnc .Lclear_avx
  145. xor %ecx,%ecx # XCR0
  146. .byte 0x0f,0x01,0xd0 # xgetbv
  147. and \$6,%eax # isolate XMM and YMM state support
  148. cmp \$6,%eax
  149. je .Ldone
  150. .Lclear_avx:
  151. mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
  152. and %eax,%r9d # clear AVX, FMA and AMD XOP bits
  153. andl \$0xffffffdf,8(%rdi) # cleax AVX2, ~(1<<5)
  154. .Ldone:
  155. shl \$32,%r9
  156. mov %r10d,%eax
  157. mov %r8,%rbx # restore %rbx
  158. or %r9,%rax
  159. ret
  160. .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
  161. .globl OPENSSL_cleanse
  162. .type OPENSSL_cleanse,\@abi-omnipotent
  163. .align 16
  164. OPENSSL_cleanse:
  165. xor %rax,%rax
  166. cmp \$15,$arg2
  167. jae .Lot
  168. cmp \$0,$arg2
  169. je .Lret
  170. .Little:
  171. mov %al,($arg1)
  172. sub \$1,$arg2
  173. lea 1($arg1),$arg1
  174. jnz .Little
  175. .Lret:
  176. ret
  177. .align 16
  178. .Lot:
  179. test \$7,$arg1
  180. jz .Laligned
  181. mov %al,($arg1)
  182. lea -1($arg2),$arg2
  183. lea 1($arg1),$arg1
  184. jmp .Lot
  185. .Laligned:
  186. mov %rax,($arg1)
  187. lea -8($arg2),$arg2
  188. test \$-8,$arg2
  189. lea 8($arg1),$arg1
  190. jnz .Laligned
  191. cmp \$0,$arg2
  192. jne .Little
  193. ret
  194. .size OPENSSL_cleanse,.-OPENSSL_cleanse
  195. ___
  196. print<<___ if (!$win64);
  197. .globl OPENSSL_wipe_cpu
  198. .type OPENSSL_wipe_cpu,\@abi-omnipotent
  199. .align 16
  200. OPENSSL_wipe_cpu:
  201. pxor %xmm0,%xmm0
  202. pxor %xmm1,%xmm1
  203. pxor %xmm2,%xmm2
  204. pxor %xmm3,%xmm3
  205. pxor %xmm4,%xmm4
  206. pxor %xmm5,%xmm5
  207. pxor %xmm6,%xmm6
  208. pxor %xmm7,%xmm7
  209. pxor %xmm8,%xmm8
  210. pxor %xmm9,%xmm9
  211. pxor %xmm10,%xmm10
  212. pxor %xmm11,%xmm11
  213. pxor %xmm12,%xmm12
  214. pxor %xmm13,%xmm13
  215. pxor %xmm14,%xmm14
  216. pxor %xmm15,%xmm15
  217. xorq %rcx,%rcx
  218. xorq %rdx,%rdx
  219. xorq %rsi,%rsi
  220. xorq %rdi,%rdi
  221. xorq %r8,%r8
  222. xorq %r9,%r9
  223. xorq %r10,%r10
  224. xorq %r11,%r11
  225. leaq 8(%rsp),%rax
  226. ret
  227. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  228. ___
  229. print<<___ if ($win64);
  230. .globl OPENSSL_wipe_cpu
  231. .type OPENSSL_wipe_cpu,\@abi-omnipotent
  232. .align 16
  233. OPENSSL_wipe_cpu:
  234. pxor %xmm0,%xmm0
  235. pxor %xmm1,%xmm1
  236. pxor %xmm2,%xmm2
  237. pxor %xmm3,%xmm3
  238. pxor %xmm4,%xmm4
  239. pxor %xmm5,%xmm5
  240. xorq %rcx,%rcx
  241. xorq %rdx,%rdx
  242. xorq %r8,%r8
  243. xorq %r9,%r9
  244. xorq %r10,%r10
  245. xorq %r11,%r11
  246. leaq 8(%rsp),%rax
  247. ret
  248. .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
  249. ___
  250. print<<___;
  251. .globl OPENSSL_ia32_rdrand
  252. .type OPENSSL_ia32_rdrand,\@abi-omnipotent
  253. .align 16
  254. OPENSSL_ia32_rdrand:
  255. mov \$8,%ecx
  256. .Loop_rdrand:
  257. rdrand %rax
  258. jc .Lbreak_rdrand
  259. loop .Loop_rdrand
  260. .Lbreak_rdrand:
  261. cmp \$0,%rax
  262. cmove %rcx,%rax
  263. ret
  264. .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
  265. .globl OPENSSL_ia32_rdseed
  266. .type OPENSSL_ia32_rdseed,\@abi-omnipotent
  267. .align 16
  268. OPENSSL_ia32_rdseed:
  269. mov \$8,%ecx
  270. .Loop_rdseed:
  271. rdseed %rax
  272. jc .Lbreak_rdseed
  273. loop .Loop_rdseed
  274. .Lbreak_rdseed:
  275. cmp \$0,%rax
  276. cmove %rcx,%rax
  277. ret
  278. .size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
  279. ___
  280. close STDOUT; # flush