2
0

x86_64-xlate.pl 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556
  1. #!/usr/bin/env perl
  2. # Ascetic x86_64 AT&T to MASM assembler translator by <appro>.
  3. #
  4. # Why AT&T to MASM and not vice versa? Several reasons. Because AT&T
  5. # format is way easier to parse. Because it's simpler to "gear" from
  6. # Unix ABI to Windows one [see cross-reference "card" at the end of
  7. # file]. Because Linux targets were available first...
  8. #
  9. # In addition the script also "distills" code suitable for GNU
  10. # assembler, so that it can be compiled with more rigid assemblers,
  11. # such as Solaris /usr/ccs/bin/as.
  12. #
  13. # This translator is not designed to convert *arbitrary* assembler
  14. # code from AT&T format to MASM one. It's designed to convert just
  15. # enough to provide for dual-ABI OpenSSL modules development...
  16. # There *are* limitations and you might have to modify your assembler
  17. # code or this script to achieve the desired result...
  18. #
  19. # Currently recognized limitations:
  20. #
  21. # - can't use multiple ops per line;
  22. # - indirect calls and jumps are not supported;
  23. #
  24. # Dual-ABI styling rules.
  25. #
  26. # 1. Adhere to Unix register and stack layout [see the end for
  27. # explanation].
  28. # 2. Forget about "red zone," stick to more traditional blended
  29. # stack frame allocation. If volatile storage is actually required
  30. # that is. If not, just leave the stack as is.
  31. # 3. Functions tagged with ".type name,@function" get crafted with
  32. # unified Win64 prologue and epilogue automatically. If you want
  33. # to take care of ABI differences yourself, tag functions as
  34. # ".type name,@abi-omnipotent" instead.
  35. # 4. To optimize the Win64 prologue you can specify number of input
  36. # arguments as ".type name,@function,N." Keep in mind that if N is
  37. # larger than 6, then you *have to* write "abi-omnipotent" code,
  38. # because >6 cases can't be addressed with unified prologue.
  39. # 5. Name local labels as .L*, do *not* use dynamic labels such as 1:
  40. # (sorry about latter).
  41. # 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is
  42. # required to identify the spots, where to inject Win64 epilogue!
  43. # But on the pros, it's then prefixed with rep automatically:-)
  44. # 7. Due to MASM limitations [and certain general counter-intuitivity
  45. # of ip-relative addressing] generation of position-independent
  46. # code is assisted by synthetic directive, .picmeup, which puts
  47. # address of the *next* instruction into target register.
  48. #
  49. # Example 1:
  50. # .picmeup %rax
  51. # lea .Label-.(%rax),%rax
  52. # Example 2:
  53. # .picmeup %rcx
  54. # .Lpic_point:
  55. # ...
  56. # lea .Label-.Lpic_point(%rcx),%rbp
  57. my $output = shift;
  58. { my ($stddev,$stdino,@junk)=stat(STDOUT);
  59. my ($outdev,$outino,@junk)=stat($output);
  60. open STDOUT,">$output" || die "can't open $output: $!"
  61. if ($stddev!=$outdev || $stdino!=$outino);
  62. }
  63. my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005
  64. my $masm=$masmref if ($output =~ /\.asm/);
  65. if ($masm && `ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/)
  66. { $masm=$1 + $2*2**-16 + $4*2**-32; }
  67. my $current_segment;
  68. my $current_function;
  69. { package opcode; # pick up opcodes
  70. sub re {
  71. my $self = shift; # single instance in enough...
  72. local *line = shift;
  73. undef $ret;
  74. if ($line =~ /^([a-z][a-z0-9]*)/i) {
  75. $self->{op} = $1;
  76. $ret = $self;
  77. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  78. undef $self->{sz};
  79. if ($self->{op} =~ /^(movz)b.*/) { # movz is pain...
  80. $self->{op} = $1;
  81. $self->{sz} = "b";
  82. } elsif ($self->{op} =~ /call/) {
  83. $self->{sz} = ""
  84. } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) {
  85. $self->{op} = $1;
  86. $self->{sz} = $2;
  87. }
  88. }
  89. $ret;
  90. }
  91. sub size {
  92. my $self = shift;
  93. my $sz = shift;
  94. $self->{sz} = $sz if (defined($sz) && !defined($self->{sz}));
  95. $self->{sz};
  96. }
  97. sub out {
  98. my $self = shift;
  99. if (!$masm) {
  100. if ($self->{op} eq "movz") { # movz is pain...
  101. sprintf "%s%s%s",$self->{op},$self->{sz},shift;
  102. } elsif ($self->{op} =~ /^set/) {
  103. "$self->{op}";
  104. } elsif ($self->{op} eq "ret") {
  105. ".byte 0xf3,0xc3";
  106. } else {
  107. "$self->{op}$self->{sz}";
  108. }
  109. } else {
  110. $self->{op} =~ s/^movz/movzx/;
  111. if ($self->{op} eq "ret") {
  112. $self->{op} = "";
  113. if ($current_function->{abi} eq "svr4") {
  114. $self->{op} = "mov rdi,QWORD PTR 8[rsp]\t;WIN64 epilogue\n\t".
  115. "mov rsi,QWORD PTR 16[rsp]\n\t";
  116. }
  117. $self->{op} .= "DB\t0F3h,0C3h\t\t;repret";
  118. }
  119. $self->{op};
  120. }
  121. }
  122. }
  123. { package const; # pick up constants, which start with $
  124. sub re {
  125. my $self = shift; # single instance in enough...
  126. local *line = shift;
  127. undef $ret;
  128. if ($line =~ /^\$([^,]+)/) {
  129. $self->{value} = $1;
  130. $ret = $self;
  131. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  132. }
  133. $ret;
  134. }
  135. sub out {
  136. my $self = shift;
  137. if (!$masm) {
  138. # Solaris /usr/ccs/bin/as can't handle multiplications
  139. # in $self->{value}
  140. $self->{value} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi;
  141. $self->{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
  142. sprintf "\$%s",$self->{value};
  143. } else {
  144. $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig;
  145. sprintf "%s",$self->{value};
  146. }
  147. }
  148. }
  149. { package ea; # pick up effective addresses: expr(%reg,%reg,scale)
  150. sub re {
  151. my $self = shift; # single instance in enough...
  152. local *line = shift;
  153. undef $ret;
  154. if ($line =~ /^([^\(,]*)\(([%\w,]+)\)/) {
  155. $self->{label} = $1;
  156. ($self->{base},$self->{index},$self->{scale})=split(/,/,$2);
  157. $self->{scale} = 1 if (!defined($self->{scale}));
  158. $ret = $self;
  159. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  160. $self->{base} =~ s/^%//;
  161. $self->{index} =~ s/^%// if (defined($self->{index}));
  162. }
  163. $ret;
  164. }
  165. sub size {}
  166. sub out {
  167. my $self = shift;
  168. my $sz = shift;
  169. # Silently convert all EAs to 64-bit. This is required for
  170. # elder GNU assembler and results in more compact code,
  171. # *but* most importantly AES module depends on this feature!
  172. $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
  173. $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/;
  174. if (!$masm) {
  175. # Solaris /usr/ccs/bin/as can't handle multiplications
  176. # in $self->{label}
  177. use integer;
  178. $self->{label} =~ s/(?<![0-9a-f])(0[x0-9a-f]+)/oct($1)/egi;
  179. $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
  180. $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg;
  181. if (defined($self->{index})) {
  182. sprintf "%s(%%%s,%%%s,%d)",
  183. $self->{label},$self->{base},
  184. $self->{index},$self->{scale};
  185. } else {
  186. sprintf "%s(%%%s)", $self->{label},$self->{base};
  187. }
  188. } else {
  189. %szmap = ( b=>"BYTE", w=>"WORD", l=>"DWORD", q=>"QWORD" );
  190. $self->{label} =~ s/\./\$/g;
  191. $self->{label} =~ s/0x([0-9a-f]+)/0$1h/ig;
  192. $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/);
  193. if (defined($self->{index})) {
  194. sprintf "%s PTR %s[%s*%d+%s]",$szmap{$sz},
  195. $self->{label},
  196. $self->{index},$self->{scale},
  197. $self->{base};
  198. } elsif ($self->{base} eq "rip") {
  199. sprintf "%s PTR %s",$szmap{$sz},$self->{label};
  200. } else {
  201. sprintf "%s PTR %s[%s]",$szmap{$sz},
  202. $self->{label},$self->{base};
  203. }
  204. }
  205. }
  206. }
  207. { package register; # pick up registers, which start with %.
  208. sub re {
  209. my $class = shift; # muliple instances...
  210. my $self = {};
  211. local *line = shift;
  212. undef $ret;
  213. if ($line =~ /^%(\w+)/) {
  214. bless $self,$class;
  215. $self->{value} = $1;
  216. $ret = $self;
  217. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  218. }
  219. $ret;
  220. }
  221. sub size {
  222. my $self = shift;
  223. undef $ret;
  224. if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; }
  225. elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; }
  226. elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; }
  227. elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; }
  228. elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; }
  229. elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; }
  230. elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; }
  231. elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; }
  232. $ret;
  233. }
  234. sub out {
  235. my $self = shift;
  236. sprintf $masm?"%s":"%%%s",$self->{value};
  237. }
  238. }
  239. { package label; # pick up labels, which end with :
  240. sub re {
  241. my $self = shift; # single instance is enough...
  242. local *line = shift;
  243. undef $ret;
  244. if ($line =~ /(^[\.\w]+\:)/) {
  245. $self->{value} = $1;
  246. $ret = $self;
  247. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  248. $self->{value} =~ s/\.L/\$L/ if ($masm);
  249. }
  250. $ret;
  251. }
  252. sub out {
  253. my $self = shift;
  254. if (!$masm) {
  255. $self->{value};
  256. } elsif ($self->{value} ne "$current_function->{name}:") {
  257. $self->{value};
  258. } elsif ($current_function->{abi} eq "svr4") {
  259. my $func = "$current_function->{name} PROC\n".
  260. " mov QWORD PTR 8[rsp],rdi\t;WIN64 prologue\n".
  261. " mov QWORD PTR 16[rsp],rsi\n";
  262. my $narg = $current_function->{narg};
  263. $narg=6 if (!defined($narg));
  264. $func .= " mov rdi,rcx\n" if ($narg>0);
  265. $func .= " mov rsi,rdx\n" if ($narg>1);
  266. $func .= " mov rdx,r8\n" if ($narg>2);
  267. $func .= " mov rcx,r9\n" if ($narg>3);
  268. $func .= " mov r8,QWORD PTR 40[rsp]\n" if ($narg>4);
  269. $func .= " mov r9,QWORD PTR 48[rsp]\n" if ($narg>5);
  270. $func .= "\n";
  271. } else {
  272. "$current_function->{name} PROC";
  273. }
  274. }
  275. }
  276. { package expr; # pick up expressioins
  277. sub re {
  278. my $self = shift; # single instance is enough...
  279. local *line = shift;
  280. undef $ret;
  281. if ($line =~ /(^[^,]+)/) {
  282. $self->{value} = $1;
  283. $ret = $self;
  284. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  285. $self->{value} =~ s/\.L/\$L/g if ($masm);
  286. }
  287. $ret;
  288. }
  289. sub out {
  290. my $self = shift;
  291. $self->{value};
  292. }
  293. }
  294. { package directive; # pick up directives, which start with .
  295. sub re {
  296. my $self = shift; # single instance is enough...
  297. local *line = shift;
  298. undef $ret;
  299. my $dir;
  300. my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2:
  301. ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48,
  302. "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48,
  303. "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48,
  304. "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48,
  305. "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c,
  306. "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c,
  307. "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c,
  308. "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c );
  309. if ($line =~ /^\s*(\.\w+)/) {
  310. if (!$masm) {
  311. $self->{value} = $1;
  312. $line =~ s/\@abi\-omnipotent/\@function/;
  313. $line =~ s/\@function.*/\@function/;
  314. if ($line =~ /\.picmeup\s+(%r[\w]+)/i) {
  315. $self->{value} = sprintf "\t.long\t0x%x,0x90000000",$opcode{$1};
  316. } elsif ($line =~ /\.asciz\s+"(.*)"$/) {
  317. $self->{value} = ".byte\t".join(",",unpack("C*",$1),0);
  318. } elsif ($line =~ /\.extern/) {
  319. $self->{value} = ""; # swallow extern
  320. } else {
  321. $self->{value} = $line;
  322. }
  323. $line = "";
  324. return $self;
  325. }
  326. $dir = $1;
  327. $ret = $self;
  328. undef $self->{value};
  329. $line = substr($line,@+[0]); $line =~ s/^\s+//;
  330. SWITCH: for ($dir) {
  331. /\.(text)/
  332. && do { my $v=undef;
  333. $v="$current_segment\tENDS\n" if ($current_segment);
  334. $current_segment = "_$1\$";
  335. $current_segment =~ tr/[a-z]/[A-Z]/;
  336. $v.="$current_segment\tSEGMENT ";
  337. $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE";
  338. $v.=" 'CODE'";
  339. $self->{value} = $v;
  340. last;
  341. };
  342. /\.extern/ && do { $self->{value} = "EXTRN\t".$line.":BYTE"; last; };
  343. /\.globl/ && do { $self->{value} = "PUBLIC\t".$line; last; };
  344. /\.type/ && do { ($sym,$type,$narg) = split(',',$line);
  345. if ($type eq "\@function") {
  346. undef $current_function;
  347. $current_function->{name} = $sym;
  348. $current_function->{abi} = "svr4";
  349. $current_function->{narg} = $narg;
  350. } elsif ($type eq "\@abi-omnipotent") {
  351. undef $current_function;
  352. $current_function->{name} = $sym;
  353. }
  354. last;
  355. };
  356. /\.size/ && do { if (defined($current_function)) {
  357. $self->{value}="$current_function->{name}\tENDP";
  358. undef $current_function;
  359. }
  360. last;
  361. };
  362. /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; };
  363. /\.(byte|value|long|quad)/
  364. && do { my @arr = split(',',$line);
  365. my $sz = substr($1,0,1);
  366. my $last = pop(@arr);
  367. my $conv = sub { my $var=shift;
  368. if ($var=~s/0x([0-9a-f]+)/0$1h/i) { $var; }
  369. else { sprintf"0%Xh",$var; }
  370. };
  371. $sz =~ tr/bvlq/BWDQ/;
  372. $self->{value} = "\tD$sz\t";
  373. for (@arr) { $self->{value} .= &$conv($_).","; }
  374. $self->{value} .= &$conv($last);
  375. last;
  376. };
  377. /\.picmeup/ && do { $self->{value} = sprintf"\tDD\t 0%Xh,090000000h",$opcode{$line};
  378. last;
  379. };
  380. /\.asciz/ && do { if ($line =~ /^"(.*)"$/) {
  381. my @str=unpack("C*",$1);
  382. push @str,0;
  383. while ($#str>15) {
  384. $self->{value}.="DB\t"
  385. .join(",",@str[0..15])."\n";
  386. foreach (0..15) { shift @str; }
  387. }
  388. $self->{value}.="DB\t"
  389. .join(",",@str) if (@str);
  390. }
  391. last;
  392. };
  393. }
  394. $line = "";
  395. }
  396. $ret;
  397. }
  398. sub out {
  399. my $self = shift;
  400. $self->{value};
  401. }
  402. }
  403. while($line=<>) {
  404. chomp($line);
  405. $line =~ s|[#!].*$||; # get rid of asm-style comments...
  406. $line =~ s|/\*.*\*/||; # ... and C-style comments...
  407. $line =~ s|^\s+||; # ... and skip white spaces in beginning
  408. undef $label;
  409. undef $opcode;
  410. undef $dst;
  411. undef $src;
  412. undef $sz;
  413. if ($label=label->re(\$line)) { print $label->out(); }
  414. if (directive->re(\$line)) {
  415. printf "%s",directive->out();
  416. } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: {
  417. if ($src=register->re(\$line)) { opcode->size($src->size()); }
  418. elsif ($src=const->re(\$line)) { }
  419. elsif ($src=ea->re(\$line)) { }
  420. elsif ($src=expr->re(\$line)) { }
  421. last ARGUMENT if ($line !~ /^,/);
  422. $line = substr($line,1); $line =~ s/^\s+//;
  423. if ($dst=register->re(\$line)) { opcode->size($dst->size()); }
  424. elsif ($dst=const->re(\$line)) { }
  425. elsif ($dst=ea->re(\$line)) { }
  426. } # ARGUMENT:
  427. $sz=opcode->size();
  428. if (defined($dst)) {
  429. if (!$masm) {
  430. printf "\t%s\t%s,%s", $opcode->out($dst->size()),
  431. $src->out($sz),$dst->out($sz);
  432. } else {
  433. printf "\t%s\t%s,%s", $opcode->out(),
  434. $dst->out($sz),$src->out($sz);
  435. }
  436. } elsif (defined($src)) {
  437. printf "\t%s\t%s",$opcode->out(),$src->out($sz);
  438. } else {
  439. printf "\t%s",$opcode->out();
  440. }
  441. }
  442. print $line,"\n";
  443. }
  444. print "\n$current_segment\tENDS\nEND\n" if ($masm);
  445. close STDOUT;
  446. #################################################
  447. # Cross-reference x86_64 ABI "card"
  448. #
  449. # Unix Win64
  450. # %rax * *
  451. # %rbx - -
  452. # %rcx #4 #1
  453. # %rdx #3 #2
  454. # %rsi #2 -
  455. # %rdi #1 -
  456. # %rbp - -
  457. # %rsp - -
  458. # %r8 #5 #3
  459. # %r9 #6 #4
  460. # %r10 * *
  461. # %r11 * *
  462. # %r12 - -
  463. # %r13 - -
  464. # %r14 - -
  465. # %r15 - -
  466. #
  467. # (*) volatile register
  468. # (-) preserved by callee
  469. # (#) Nth argument, volatile
  470. #
  471. # In Unix terms top of stack is argument transfer area for arguments
  472. # which could not be accomodated in registers. Or in other words 7th
  473. # [integer] argument resides at 8(%rsp) upon function entry point.
  474. # 128 bytes above %rsp constitute a "red zone" which is not touched
  475. # by signal handlers and can be used as temporal storage without
  476. # allocating a frame.
  477. #
  478. # In Win64 terms N*8 bytes on top of stack is argument transfer area,
  479. # which belongs to/can be overwritten by callee. N is the number of
  480. # arguments passed to callee, *but* not less than 4! This means that
  481. # upon function entry point 5th argument resides at 40(%rsp), as well
  482. # as that 32 bytes from 8(%rsp) can always be used as temporal
  483. # storage [without allocating a frame]. One can actually argue that
  484. # one can assume a "red zone" above stack pointer under Win64 as well.
  485. # Point is that at apparently no occasion Windows kernel would alter
  486. # the area above user stack pointer in true asynchronous manner...
  487. #
  488. # All the above means that if assembler programmer adheres to Unix
  489. # register and stack layout, but disregards the "red zone" existense,
  490. # it's possible to use following prologue and epilogue to "gear" from
  491. # Unix to Win64 ABI in leaf functions with not more than 6 arguments.
  492. #
  493. # omnipotent_function:
  494. # ifdef WIN64
  495. # movq %rdi,8(%rsp)
  496. # movq %rsi,16(%rsp)
  497. # movq %rcx,%rdi ; if 1st argument is actually present
  498. # movq %rdx,%rsi ; if 2nd argument is actually ...
  499. # movq %r8,%rdx ; if 3rd argument is ...
  500. # movq %r9,%rcx ; if 4th argument ...
  501. # movq 40(%rsp),%r8 ; if 5th ...
  502. # movq 48(%rsp),%r9 ; if 6th ...
  503. # endif
  504. # ...
  505. # ifdef WIN64
  506. # movq 8(%rsp),%rdi
  507. # movq 16(%rsp),%rsi
  508. # endif
  509. # ret