2
0

add-cfi.x86_64.awk 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. # Insert GAS CFI directives ("control frame information") into x86-64 asm input
  2. BEGIN {
  3. # don't put CFI data in the .eh_frame ELF section (which we don't keep)
  4. print ".cfi_sections .debug_frame"
  5. # only emit CFI directives inside a function
  6. in_function = 0
  7. # emit .loc directives with line numbers from original source
  8. printf ".file 1 \"%s\"\n", ARGV[1]
  9. line_number = 0
  10. # used to detect "call label; label:" trick
  11. called = ""
  12. }
  13. function get_const1() {
  14. # for instructions with 2 operands, get 1st operand (assuming it is constant)
  15. match($0, /-?(0x[0-9a-fA-F]+|[0-9]+),/)
  16. return parse_const(substr($0, RSTART, RLENGTH-1))
  17. }
  18. function canonicalize_reg(register) {
  19. if (match(register, /^r/))
  20. return register
  21. else if (match(register, /^e/))
  22. return "r" substr(register, 2, length(register)-1)
  23. else if (match(register, /[hl]$/)) # AH, AL, BH, BL, etc
  24. return "r" substr(register, 1, 1) "x"
  25. else # AX, BX, CX, etc
  26. return "r" register
  27. }
  28. function get_reg() {
  29. # only use if you already know there is 1 and only 1 register
  30. match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/)
  31. return canonicalize_reg(substr($0, RSTART+1, RLENGTH-1))
  32. }
  33. function get_reg1() {
  34. # for instructions with 2 operands, get 1st operand (assuming it is register)
  35. match($0, /%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15),/)
  36. return canonicalize_reg(substr($0, RSTART+1, RLENGTH-2))
  37. }
  38. function get_reg2() {
  39. # for instructions with 2 operands, get 2nd operand (assuming it is register)
  40. match($0, /,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/)
  41. return canonicalize_reg(substr($0, RSTART+2, RLENGTH-2))
  42. }
  43. function adjust_sp_offset(delta) {
  44. if (in_function)
  45. printf ".cfi_adjust_cfa_offset %d\n", delta
  46. }
  47. {
  48. line_number = line_number + 1
  49. # clean the input up before doing anything else
  50. # delete comments
  51. gsub(/(#|\/\/).*/, "")
  52. # canonicalize whitespace
  53. gsub(/[ \t]+/, " ") # mawk doesn't understand \s
  54. gsub(/ *, */, ",")
  55. gsub(/ *: */, ": ")
  56. gsub(/ $/, "")
  57. gsub(/^ /, "")
  58. }
  59. # check for assembler directives which we care about
  60. /^\.(section|data|text)/ {
  61. # a .cfi_startproc/.cfi_endproc pair should be within the same section
  62. # otherwise, clang will choke when generating ELF output
  63. if (in_function) {
  64. print ".cfi_endproc"
  65. in_function = 0
  66. }
  67. }
  68. /^\.type [a-zA-Z0-9_]+,@function/ {
  69. functions[substr($2, 1, length($2)-10)] = 1
  70. }
  71. # not interested in assembler directives beyond this, just pass them through
  72. /^\./ {
  73. print
  74. next
  75. }
  76. /^[a-zA-Z0-9_]+:/ {
  77. label = substr($1, 1, length($1)-1) # drop trailing :
  78. if (called == label) {
  79. # note adjustment of stack pointer from "call label; label:"
  80. adjust_sp_offset(8)
  81. }
  82. if (functions[label]) {
  83. if (in_function)
  84. print ".cfi_endproc"
  85. in_function = 1
  86. print ".cfi_startproc"
  87. for (register in saved)
  88. delete saved[register]
  89. for (register in dirty)
  90. delete dirty[register]
  91. }
  92. # an instruction may follow on the same line, so continue processing
  93. }
  94. /^$/ { next }
  95. {
  96. called = ""
  97. printf ".loc 1 %d\n", line_number
  98. print
  99. }
  100. # KEEPING UP WITH THE STACK POINTER
  101. # %rsp should only be adjusted by pushing/popping or adding/subtracting constants
  102. #
  103. /pushl?/ {
  104. adjust_sp_offset(8)
  105. }
  106. /popl?/ {
  107. adjust_sp_offset(-8)
  108. }
  109. /addl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(-get_const1()) }
  110. /subl? \$-?(0x[0-9a-fA-F]+|[0-9]+),%rsp/ { adjust_sp_offset(get_const1()) }
  111. /call/ {
  112. if (match($0, /call [0-9]+f/)) # "forward" label
  113. called = substr($0, RSTART+5, RLENGTH-6)
  114. else if (match($0, /call [0-9a-zA-Z_]+/))
  115. called = substr($0, RSTART+5, RLENGTH-5)
  116. }
  117. # TRACKING REGISTER VALUES FROM THE PREVIOUS STACK FRAME
  118. #
  119. /pushl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15)/ { # don't match "push (%reg)"
  120. # if a register is being pushed, and its value has not changed since the
  121. # beginning of this function, the pushed value can be used when printing
  122. # local variables at the next level up the stack
  123. # emit '.cfi_rel_offset' for that
  124. if (in_function) {
  125. register = get_reg()
  126. if (!saved[register] && !dirty[register]) {
  127. printf ".cfi_rel_offset %s,0\n", register
  128. saved[register] = 1
  129. }
  130. }
  131. }
  132. /movl? %r(ax|bx|cx|dx|si|di|bp|8|9|10|11|12|13|14|15),-?(0x[0-9a-fA-F]+|[0-9]+)?\(%rsp\)/ {
  133. if (in_function) {
  134. register = get_reg()
  135. if (match($0, /-?(0x[0-9a-fA-F]+|[0-9]+)\(%rsp\)/)) {
  136. offset = parse_const(substr($0, RSTART, RLENGTH-6))
  137. } else {
  138. offset = 0
  139. }
  140. if (!saved[register] && !dirty[register]) {
  141. printf ".cfi_rel_offset %s,%d\n", register, offset
  142. saved[register] = 1
  143. }
  144. }
  145. }
  146. # IF REGISTER VALUES ARE UNCEREMONIOUSLY TRASHED
  147. # ...then we want to know about it.
  148. #
  149. function trashed(register) {
  150. if (in_function && !saved[register] && !dirty[register]) {
  151. printf ".cfi_undefined %s\n", register
  152. }
  153. dirty[register] = 1
  154. }
  155. # this does NOT exhaustively check for all possible instructions which could
  156. # overwrite a register value inherited from the caller (just the common ones)
  157. /mov.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) }
  158. /(add|addl|sub|subl|and|or|xor|lea|sal|sar|shl|shr).*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ {
  159. trashed(get_reg2())
  160. }
  161. /^i?mul [^,]*$/ { trashed("rax"); trashed("rdx") }
  162. /^i?mul.*,%[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)$/ { trashed(get_reg2()) }
  163. /^i?div/ { trashed("rax"); trashed("rdx") }
  164. /(dec|inc|not|neg|pop) %[er]?([abcd][xlh]|si|di|bp|8|9|10|11|12|13|14|15)/ { trashed(get_reg()) }
  165. /cpuid/ { trashed("rax"); trashed("rbx"); trashed("rcx"); trashed("rdx") }
  166. END {
  167. if (in_function)
  168. print ".cfi_endproc"
  169. }