gstaging.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. /* This file is part of asmc, a bootstrapping OS with minimal seed
  2. Copyright (C) 2018 Giovanni Mascellani <gio@debian.org>
  3. https://gitlab.com/giomasce/asmc
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  14. #include <stdio.h>
  15. #include "platform.h"
  16. #define MAX_TOKEN_LEN 128
  17. #define STACK_LEN 1024
  18. #define SYMBOL_TABLE_LEN 1024
  19. #define WRITE_LABEL_BUF_LEN 128
  20. #define TEMP_VAR "__temp"
  21. char write_label_buf[WRITE_LABEL_BUF_LEN];
  22. int atoi(char*);
  23. int sprintf(char *str, const char *format, ...);
  24. int find_symbol(const char *name, int *loc, int *arity);
  25. void add_symbol(const char *name, int loc, int arity);
  26. void add_symbol_wrapper(const char *name, int loc, int arity);
  27. void add_symbol_placeholder(const char *name, int arity);
  28. void fix_symbol_placeholder(const char *name, int loc, int arity);
  29. int *get_symbol_num();
  30. int *get_stage();
  31. int *get_label_num();
  32. char *get_stack_vars();
  33. int *get_block_depth();
  34. int *get_stack_depth();
  35. int *get_temp_depth();
  36. int *get_token_given_back();
  37. int *get_token_len();
  38. char *get_token_buf();
  39. char *get_buf2();
  40. int *get_read_fd();
  41. int *get_emit_fd();
  42. int *get_current_loc();
  43. int decode_number(const char *operand, unsigned int *num);
  44. int strcmp(const char *s1, const char *s2);
  45. void strcpy(char *d, const char *s);
  46. int strlen(const char *s);
  47. void init_symbols();
  48. void init_g_compiler();
  49. void assert(int cond);
  50. void assert(int cond) {
  51. if (!cond) {
  52. platform_panic();
  53. }
  54. }
  55. void emit(char c);
  56. void emit2(char c) {
  57. if (*get_stage() == 1) {
  58. platform_write_char(*get_emit_fd(), c);
  59. }
  60. (*get_current_loc())++;
  61. }
  62. void emit32(int x);
  63. void emit322(int x) {
  64. emit(x);
  65. emit(x >> 8);
  66. emit(x >> 16);
  67. emit(x >> 24);
  68. }
  69. void emit_str(char *x, int len);
  70. void emit_str2(char *x, int len) {
  71. while (len > 0) {
  72. emit(*x);
  73. x++;
  74. len--;
  75. }
  76. }
  77. int gen_label();
  78. int gen_label2() {
  79. return (*get_label_num())++;
  80. }
  81. char *write_label(int id);
  82. char *write_label2(int id) {
  83. sprintf(write_label_buf, ".%d", id);
  84. return write_label_buf;
  85. }
  86. int get_symbol(char *name, int *arity);
  87. int get_symbol2(char *name, int *arity) {
  88. if (*get_stage() == 1 || arity != 0) {
  89. int loc;
  90. int res = find_symbol(name, &loc, arity);
  91. assert(res);
  92. return loc;
  93. } else {
  94. return 0;
  95. }
  96. }
  97. void push_var(char *var_name, int temp);
  98. void push_var2(char *var_name, int temp) {
  99. int len = strlen(var_name);
  100. assert(len > 0);
  101. assert(len < MAX_TOKEN_LEN);
  102. assert(*get_stack_depth() < STACK_LEN);
  103. strcpy(get_stack_vars() + *get_stack_depth() * MAX_TOKEN_LEN, var_name);
  104. (*get_stack_depth())++;
  105. if (temp) {
  106. (*get_temp_depth())++;
  107. } else {
  108. assert(*get_temp_depth() == 0);
  109. }
  110. }
  111. void pop_var(int temp);
  112. void pop_var2(int temp) {
  113. assert(*get_stack_depth() > 0);
  114. (*get_stack_depth())--;
  115. if (temp) {
  116. assert(*get_temp_depth() > 0);
  117. (*get_temp_depth())--;
  118. }
  119. }
  120. int pop_temps();
  121. int pop_temps2() {
  122. while (*get_temp_depth() > 0) {
  123. pop_var(1);
  124. }
  125. }
  126. int find_in_stack(char *var_name);
  127. int find_in_stack2(char *var_name) {
  128. int i;
  129. for (i = 0; i < *get_stack_depth(); i++) {
  130. if (strcmp(var_name, get_stack_vars() + (*get_stack_depth() - 1 - i) * MAX_TOKEN_LEN) == 0) {
  131. return i;
  132. }
  133. }
  134. return -1;
  135. }
  136. int is_whitespace(char x);
  137. int is_whitespace2(char x) {
  138. return x == ' ' || x == '\t' || x == '\n';
  139. }
  140. char *get_token();
  141. char *get_token2() {
  142. if (*get_token_given_back()) {
  143. *get_token_given_back() = 0;
  144. return get_token_buf();
  145. }
  146. int x;
  147. *get_token_len() = 0;
  148. int state = 0;
  149. while (1) {
  150. x = platform_read_char(*get_read_fd());
  151. if (x == -1) {
  152. break;
  153. }
  154. int save_char = 0;
  155. if (state == 0) {
  156. if (is_whitespace(x)) {
  157. if (*get_token_len() > 0) {
  158. break;
  159. }
  160. } else if ((char) x == '#') {
  161. state = 1;
  162. } else {
  163. if ((char) x == '"') {
  164. state = 2;
  165. } else if ((char) x == '\'') {
  166. state = 4;
  167. }
  168. save_char = 1;
  169. }
  170. } else if (state == 1) {
  171. if ((char) x == '\n') {
  172. state = 0;
  173. if (*get_token_len() > 0) {
  174. break;
  175. }
  176. }
  177. } else if (state == 2) {
  178. if ((char) x == '"') {
  179. state = 0;
  180. } else if ((char) x == '\\') {
  181. state = 3;
  182. }
  183. save_char = 1;
  184. } else if (state == 3) {
  185. state = 2;
  186. save_char = 1;
  187. } else if (state == 4) {
  188. if ((char) x == '\'') {
  189. state = 0;
  190. } else if ((char) x == '\\') {
  191. state = 5;
  192. }
  193. save_char = 1;
  194. } else if (state == 5) {
  195. state = 4;
  196. save_char = 1;
  197. } else {
  198. assert(0);
  199. }
  200. if (save_char) {
  201. get_token_buf()[(*get_token_len())++] = (char) x;
  202. }
  203. }
  204. get_token_buf()[*get_token_len()] = '\0';
  205. return get_token_buf();
  206. }
  207. void give_back_token();
  208. void give_back_token2() {
  209. assert(!*get_token_given_back());
  210. *get_token_given_back() = 1;
  211. }
  212. char escaped(char x);
  213. char escaped2(char x) {
  214. if (x == 'n') { return '\n'; }
  215. if (x == 't') { return '\t'; }
  216. if (x == '0') { return '\0'; }
  217. if (x == '\\') { return '\\'; }
  218. if (x == '\'') { return '\''; }
  219. if (x == '"') { return '"'; }
  220. return 0;
  221. }
  222. void emit_escaped_string(char *s);
  223. void emit_escaped_string2(char *s) {
  224. assert(*s == '"');
  225. s++;
  226. while (1) {
  227. assert(*s != 0);
  228. if (*s == '"') {
  229. s++;
  230. assert(*s == 0);
  231. return;
  232. }
  233. if (*s == '\\') {
  234. s++;
  235. assert(*s != 0);
  236. emit(escaped(*s));
  237. } else {
  238. emit(*s);
  239. }
  240. s++;
  241. }
  242. }
  243. int decode_number_or_char(const char *operand, unsigned int *num);
  244. int decode_number_or_char2(const char *operand, unsigned int *num) {
  245. if (*operand == '\'') {
  246. if (operand[1] == '\\') {
  247. *num = escaped(operand[2]);
  248. assert(operand[3] == '\'');
  249. assert(operand[4] == 0);
  250. } else {
  251. *num = operand[1];
  252. assert(operand[2] == '\'');
  253. assert(operand[3] == 0);
  254. }
  255. return 1;
  256. } else {
  257. return decode_number(operand, num);
  258. }
  259. }
  260. int compute_rel(int addr);
  261. int compute_rel2(int addr) {
  262. return addr - *get_current_loc() - 4;
  263. }
  264. void push_expr(char *tok, int want_addr);
  265. void push_expr2(char *tok, int want_addr) {
  266. // Try to interpret as a number
  267. int val;
  268. if (decode_number_or_char(tok, &val)) {
  269. assert(!want_addr);
  270. push_var(TEMP_VAR, 1);
  271. emit(0x68); // push val
  272. emit32(val);
  273. return;
  274. }
  275. // Look for the name in the stack
  276. int pos = find_in_stack(tok);
  277. if (pos != -1) {
  278. if (want_addr) {
  279. push_var(TEMP_VAR, 1);
  280. emit_str("\x8d\x84\x24", 3); // lea eax, [esp+pos]
  281. emit32(4 * pos);
  282. emit(0x50); // push eax
  283. } else {
  284. push_var(TEMP_VAR, 1);
  285. emit_str("\xff\xb4\x24", 3); // push [esp+pos]
  286. emit32(4 * pos);
  287. }
  288. } else {
  289. int arity;
  290. int loc = get_symbol(tok, &arity);
  291. if (arity == -2) {
  292. assert(!want_addr);
  293. }
  294. if (want_addr || arity == -2) {
  295. push_var(TEMP_VAR, 1);
  296. emit(0x68); // push loc
  297. emit32(loc);
  298. } else {
  299. if (arity == -1) {
  300. push_var(TEMP_VAR, 1);
  301. emit(0xb8); // mov eax, loc
  302. emit32(loc);
  303. emit_str("\xff\x30", 2); // push [eax]
  304. } else {
  305. emit(0xe8); // call rel
  306. emit32(compute_rel(loc));
  307. emit_str("\x81\xc4", 2); // add esp, ...
  308. emit32(4 * arity);
  309. while (arity > 0) {
  310. pop_var(1);
  311. arity--;
  312. }
  313. push_var(TEMP_VAR, 1);
  314. emit(0x50); // push eax
  315. }
  316. }
  317. }
  318. }
  319. void push_expr_until_brace();
  320. void push_expr_until_brace2() {
  321. while (1) {
  322. char *tok = get_token();
  323. if (strcmp(tok, "{") == 0) {
  324. give_back_token();
  325. break;
  326. } else if (*tok == '"') {
  327. int str_lab = gen_label();
  328. int jmp_lab = gen_label();
  329. emit(0xe9); // jmp rel
  330. emit32(compute_rel(get_symbol(write_label(jmp_lab), 0)));
  331. add_symbol_wrapper(write_label(str_lab), *get_current_loc(), -1);
  332. emit_escaped_string(tok);
  333. emit(0);
  334. add_symbol_wrapper(write_label(jmp_lab), *get_current_loc(), -1);
  335. push_var(TEMP_VAR, 1);
  336. emit(0x68); // push val
  337. emit32(get_symbol(write_label(str_lab), 0));
  338. } else {
  339. // Check if we want the address
  340. int want_addr = 0;
  341. if (*tok == '@') {
  342. tok++;
  343. want_addr = 1;
  344. }
  345. push_expr(tok, want_addr);
  346. }
  347. }
  348. assert(*get_temp_depth() > 0);
  349. }
  350. int decode_number_or_symbol(char *str);
  351. int decode_number_or_symbol2(char *str) {
  352. int val;
  353. int res = decode_number_or_char(str, &val);
  354. if (res) {
  355. return val;
  356. }
  357. int arity;
  358. return get_symbol(str, &arity);
  359. }
  360. void parse_block();
  361. void parse_block2() {
  362. (*get_block_depth())++;
  363. int saved_stack_depth = *get_stack_depth();
  364. char *tok = get_token();
  365. assert(strcmp(tok, "{") == 0);
  366. while (1) {
  367. char *tok = get_token();
  368. //fprintf(stderr, "Parsing token %s\n", tok);
  369. assert(*tok != '\0');
  370. if (strcmp(tok, "}") == 0) {
  371. break;
  372. } else if (strcmp(tok, ";") == 0) {
  373. emit_str("\x81\xc4", 2); // add esp, ...
  374. emit32(4 * *get_temp_depth());
  375. pop_temps();
  376. } else if (strcmp(tok, "ret") == 0) {
  377. if (*get_temp_depth() > 0) {
  378. emit(0x58); // pop eax
  379. pop_var(1);
  380. }
  381. emit_str("\x81\xc4", 2); // add esp, ..
  382. emit32(4 * *get_stack_depth());
  383. emit_str("\x5d\xc3", 2); // pop ebp; ret
  384. } else if (strcmp(tok, "if") == 0) {
  385. push_expr_until_brace();
  386. int else_lab = gen_label();
  387. pop_var(1);
  388. emit_str("\x58\x83\xF8\x00\x0F\x84", 6); // pop eax; cmp eax, 0; je rel
  389. emit32(compute_rel(get_symbol(write_label(else_lab), 0)));
  390. parse_block();
  391. char *else_tok = get_token();
  392. if (strcmp(else_tok, "else") == 0) {
  393. int fi_lab = gen_label();
  394. emit(0xe9); // jmp rel
  395. emit32(compute_rel(get_symbol(write_label(fi_lab), 0)));
  396. add_symbol_wrapper(write_label(else_lab), *get_current_loc(), -1);
  397. parse_block();
  398. add_symbol_wrapper(write_label(fi_lab), *get_current_loc(), -1);
  399. } else {
  400. add_symbol_wrapper(write_label(else_lab), *get_current_loc(), -1);
  401. give_back_token();
  402. }
  403. } else if (strcmp(tok, "while") == 0) {
  404. int restart_lab = gen_label();
  405. int end_lab = gen_label();
  406. add_symbol_wrapper(write_label(restart_lab), *get_current_loc(), -1);
  407. push_expr_until_brace();
  408. pop_var(1);
  409. emit_str("\x58\x83\xF8\x00\x0F\x84", 6); // pop eax; cmp eax, 0; je rel
  410. emit32(compute_rel(get_symbol(write_label(end_lab), 0)));
  411. parse_block();
  412. emit(0xe9); // jmp rel
  413. emit32(compute_rel(get_symbol(write_label(restart_lab), 0)));
  414. add_symbol_wrapper(write_label(end_lab), *get_current_loc(), -1);
  415. } else if (*tok == '$') {
  416. char *name = tok + 1;
  417. assert(*name != '\0');
  418. push_var(name, 0);
  419. emit_str("\x83\xec\x04", 3); // sub esp, 4
  420. } else if (*tok == '\\') {
  421. char *arity_str = tok + 1;
  422. assert(*arity_str != '\0');
  423. int arity = decode_number_or_symbol(arity_str);
  424. pop_var(1);
  425. emit_str("\x58\xff\xd0", 3); // pop eax; call eax
  426. emit_str("\x81\xc4", 2); // add esp, ...
  427. emit32(4 * arity);
  428. while (arity > 0) {
  429. pop_var(1);
  430. arity--;
  431. }
  432. push_var(TEMP_VAR, 1);
  433. emit(0x50); // push eax
  434. } else if (*tok == '"') {
  435. int str_lab = gen_label();
  436. int jmp_lab = gen_label();
  437. emit(0xe9); // jmp rel
  438. emit32(compute_rel(get_symbol(write_label(jmp_lab), 0)));
  439. add_symbol_wrapper(write_label(str_lab), *get_current_loc(), -1);
  440. emit_escaped_string(tok);
  441. emit(0);
  442. add_symbol_wrapper(write_label(jmp_lab), *get_current_loc(), -1);
  443. push_var(TEMP_VAR, 1);
  444. emit(0x68); // push val
  445. emit32(get_symbol(write_label(str_lab), 0));
  446. } else {
  447. // Check if we want the address
  448. int want_addr = 0;
  449. if (*tok == '@') {
  450. tok++;
  451. want_addr = 1;
  452. }
  453. push_expr(tok, want_addr);
  454. }
  455. //fprintf(stderr, "Stack depth: %d; temp depth: %d; block depth: %d\n", *get_stack_depth(), *get_temp_depth(), *get_block_depth());
  456. }
  457. emit_str("\x81\xc4", 2); // add esp, ..
  458. assert(*get_stack_depth() >= saved_stack_depth);
  459. emit32(4 * (*get_stack_depth() - saved_stack_depth));
  460. *get_stack_depth() = saved_stack_depth;
  461. (*get_block_depth())--;
  462. }
  463. void parse();
  464. void parse2() {
  465. while (1) {
  466. char *tok = get_token();
  467. if (*tok == 0) {
  468. break;
  469. }
  470. if (strcmp(tok, "fun") == 0) {
  471. char *name = get_token();
  472. strcpy(get_buf2(), name);
  473. name = get_buf2();
  474. char *arity_str = get_token();
  475. int arity = atoi(arity_str);
  476. fix_symbol_placeholder(name, *get_current_loc(), arity);
  477. emit_str("\x55\x89\xe5", 3); // push ebp; mov ebp, esp
  478. parse_block();
  479. emit_str("\x5d\xc3", 2); // pop ebp; ret
  480. } else if (strcmp(tok, "ifun") == 0) {
  481. char *name = get_token();
  482. strcpy(get_buf2(), name);
  483. name = get_buf2();
  484. char *arity_str = get_token();
  485. int arity = atoi(arity_str);
  486. add_symbol_placeholder(name, arity);
  487. } else if (strcmp(tok, "const") == 0) {
  488. char *name = get_token();
  489. strcpy(get_buf2(), name);
  490. name = get_buf2();
  491. char *val_str = get_token();
  492. int val = decode_number_or_symbol(val_str);
  493. add_symbol_wrapper(name, val, -2);
  494. } else if (*tok == '$') {
  495. char *name = tok + 1;
  496. assert(*name != '\0');
  497. add_symbol_wrapper(name, *get_current_loc(), -1);
  498. emit32(0);
  499. } else if (*tok == '%') {
  500. char *name = tok + 1;
  501. assert(*name != '\0');
  502. add_symbol_wrapper(name, *get_current_loc(), -2);
  503. char *len_str = get_token();
  504. int len = decode_number_or_symbol(len_str);
  505. while (len > 0) {
  506. emit(0);
  507. len--;
  508. }
  509. } else {
  510. assert(0);
  511. }
  512. }
  513. }
  514. void emit_preamble() {
  515. /*
  516. 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
  517. 4: 8b 4c 24 08 mov ecx,DWORD PTR [esp+0x8]
  518. 8: 89 01 mov DWORD PTR [ecx],eax
  519. a: c3 ret
  520. */
  521. add_symbol_wrapper("=", *get_current_loc(), 2);
  522. emit_str("\x8B\x44\x24\x04\x8B\x4C\x24\x08\x89\x01\xC3", 11);
  523. /*
  524. 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
  525. 4: 8b 44 85 08 mov eax,DWORD PTR [ebp+eax*4+0x8]
  526. 8: c3 ret
  527. */
  528. add_symbol_wrapper("param", *get_current_loc(), 1);
  529. emit_str("\x8B\x44\x24\x04\x8B\x44\x85\x08\xC3", 9);
  530. /*
  531. 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
  532. 4: 03 44 24 08 add eax,DWORD PTR [esp+0x8]
  533. 8: c3 ret
  534. */
  535. add_symbol_wrapper("+", *get_current_loc(), 2);
  536. emit_str("\x8B\x44\x24\x04\x03\x44\x24\x08\xC3", 9);
  537. /*
  538. 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
  539. 4: 2b 44 24 08 sub eax,DWORD PTR [esp+0x8]
  540. 8: c3 ret
  541. */
  542. add_symbol_wrapper("-", *get_current_loc(), 2);
  543. emit_str("\x8B\x44\x24\x04\x2b\x44\x24\x08\xC3", 9);
  544. }
  545. void compile(int fd_in, int fd_out, int start_loc, int with_preamble);
  546. void compile2(int fd_in, int fd_out, int start_loc, int with_preamble) {
  547. *get_emit_fd() = fd_out;
  548. *get_read_fd() = fd_in;
  549. *get_block_depth() = 0;
  550. *get_stack_depth() = 0;
  551. *get_temp_depth() = 0;
  552. for (*get_stage() = 0; *get_stage() < 2; (*get_stage())++) {
  553. platform_reset_file(fd_in);
  554. *get_label_num() = 0;
  555. *get_current_loc() = start_loc;
  556. if (with_preamble) {
  557. emit_preamble();
  558. }
  559. parse();
  560. assert(*get_block_depth() == 0);
  561. assert(*get_stack_depth() == 0);
  562. assert(*get_temp_depth() == 0);
  563. }
  564. }
  565. int main(int argc, char **argv) {
  566. init_symbols();
  567. init_g_compiler();
  568. int fd_in = platform_open_file(argv[1]);
  569. int fd_out = 1;
  570. compile(fd_in, fd_out, 0x100000, 1);
  571. return 0;
  572. }