1
0

cc.c 27 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144
  1. /* This file is part of asmc, a bootstrapping OS with minimal seed
  2. Copyright (C) 2018 Giovanni Mascellani <gio@debian.org>
  3. https://gitlab.com/giomasce/asmc
  4. This program is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  14. #include <stdio.h>
  15. #include <stdlib.h>
  16. #include <assert.h>
  17. #include <string.h>
  18. #include <sys/types.h>
  19. #include <sys/stat.h>
  20. #include <sys/mman.h>
  21. #include <fcntl.h>
  22. #include <unistd.h>
  23. #define TOK_CALL_OPEN ((char) 0x80)
  24. #define TOK_CALL_CLOSED ((char) 0x81)
  25. #define TOK_SHL ((char) 0x82)
  26. #define TOK_SHR ((char) 0x83)
  27. #define TOK_LE ((char) 0x84)
  28. #define TOK_GE ((char) 0x85)
  29. #define TOK_EQ ((char) 0x86)
  30. #define TOK_NE ((char) 0x87)
  31. #define TOK_AND ((char) 0x88)
  32. #define TOK_OR ((char) 0x89)
  33. #define TOK_DEREF ((char) 0x8a)
  34. #define TOK_ADDR ((char) 0x8b)
  35. #define TOKS_CALL_OPEN "\x80"
  36. #define TOKS_AND "\x88"
  37. #define TOKS_OR "\x89"
  38. #define TOKS_EQ_NE "\x86\x87"
  39. #define TOKS_LE_GE "\x84\x85"
  40. #define TOKS_SHL_SHR "\x82\x83"
  41. #define TOKS_DEREF_ADDR "\x8a\x8b"
  42. #define MAX_ID_LEN 128
  43. #define STACK_LEN 1024
  44. #define SYMBOL_TABLE_LEN 1024
  45. #define MAX_STRING_NUM 1024
  46. #define GEN_LAB_BUF_LEN 32
  47. int block_depth;
  48. int stack_depth;
  49. int current_loc;
  50. int ret_depth;
  51. int char_op;
  52. int symbol_num;
  53. int stage;
  54. int string_num;
  55. int lab_id;
  56. char stack_vars[MAX_ID_LEN * STACK_LEN];
  57. char symbol_names[MAX_ID_LEN * SYMBOL_TABLE_LEN];
  58. int symbol_locs[SYMBOL_TABLE_LEN];
  59. int string_ids[MAX_STRING_NUM];
  60. char *string_begin[MAX_STRING_NUM];
  61. char *string_end[MAX_STRING_NUM];
  62. char gen_lab_buf[GEN_LAB_BUF_LEN];
  63. int gen_id() {
  64. return lab_id++;
  65. }
  66. char *gen_lab(int id) {
  67. sprintf(gen_lab_buf, "__label%d", id);
  68. return gen_lab_buf;
  69. }
  70. char escaped(char x) {
  71. if (x == 'n') { return '\n'; }
  72. if (x == 't') { return '\t'; }
  73. if (x == '0') { return '\0'; }
  74. if (x == '\\') { return '\\'; }
  75. if (x == '\'') { return '\''; }
  76. return 0;
  77. }
  78. char *find_matching_rev(char open, char closed, char *begin, char *end) {
  79. int depth = 0;
  80. end--;
  81. while (begin >= end) {
  82. if (*end == closed) {
  83. depth++;
  84. } else if (*end == open) {
  85. depth--;
  86. }
  87. if (depth == 0) {
  88. return end;
  89. }
  90. end--;
  91. }
  92. return 0;
  93. }
  94. char *find_matching(char open, char closed, char *begin, char *end) {
  95. int depth = 0;
  96. while (begin < end) {
  97. if (*begin == open) {
  98. depth++;
  99. } else if (*begin == closed) {
  100. depth--;
  101. }
  102. if (depth == 0) {
  103. return begin;
  104. }
  105. begin++;
  106. }
  107. return 0;
  108. }
  109. int strncmp2(char *b1, char *e1, char *b2) {
  110. int len = strlen(b2);
  111. return e1 - b1 == len && strncmp(b1, b2, len) == 0;
  112. }
  113. int find_symbol(char *name) {
  114. int i;
  115. for (i = 0; i < symbol_num; i++) {
  116. if (strcmp(name, symbol_names + i * MAX_ID_LEN) == 0) {
  117. break;
  118. }
  119. }
  120. if (i == symbol_num) {
  121. i = SYMBOL_TABLE_LEN;
  122. }
  123. return i;
  124. }
  125. int find_symbol2(char *begin, char *end) {
  126. int i;
  127. for (i = 0; i < symbol_num; i++) {
  128. if (strncmp2(begin, end, symbol_names + i * MAX_ID_LEN)) {
  129. break;
  130. }
  131. }
  132. if (i == symbol_num) {
  133. i = SYMBOL_TABLE_LEN;
  134. }
  135. return i;
  136. }
  137. int get_symbol(char *name) {
  138. if (stage == 0) {
  139. return 0;
  140. } else if (stage == 1) {
  141. int pos = find_symbol(name);
  142. assert(pos != SYMBOL_TABLE_LEN);
  143. return symbol_locs[pos];
  144. } else {
  145. assert(0);
  146. }
  147. }
  148. int get_symbol2(char *begin, char *end) {
  149. if (stage == 0) {
  150. return 0;
  151. } else if (stage == 1) {
  152. int pos = find_symbol2(begin, end);
  153. assert(pos != SYMBOL_TABLE_LEN);
  154. return symbol_locs[pos];
  155. } else {
  156. assert(0);
  157. }
  158. }
  159. void add_symbol(char *name, int loc) {
  160. int len = strlen(name);
  161. assert(len > 0);
  162. assert(len < MAX_ID_LEN);
  163. if (stage == 0) {
  164. assert(find_symbol(name) == SYMBOL_TABLE_LEN);
  165. assert(symbol_num < SYMBOL_TABLE_LEN);
  166. symbol_locs[symbol_num] = loc;
  167. strcpy(symbol_names + symbol_num * MAX_ID_LEN, name);
  168. symbol_num = symbol_num + 1;
  169. } else if (stage == 1) {
  170. int idx = find_symbol(name);
  171. assert(idx < symbol_num);
  172. assert(symbol_locs[idx] == loc);
  173. } else {
  174. assert(0);
  175. }
  176. }
  177. void push_var(char *var_name) {
  178. int len = strlen(var_name);
  179. assert(len > 0);
  180. assert(len < MAX_ID_LEN);
  181. assert(stack_depth < STACK_LEN);
  182. strcpy(stack_vars + stack_depth * MAX_ID_LEN, var_name);
  183. stack_depth++;
  184. }
  185. void pop_var() {
  186. stack_depth--;
  187. }
  188. void pop_to_depth(int depth) {
  189. stack_depth = depth;
  190. }
  191. int find_in_stack(char *var_name) {
  192. int i;
  193. for (i = 0; i < stack_depth; i++) {
  194. if (strcmp(var_name, stack_vars + (stack_depth - 1 - i) * MAX_ID_LEN) == 0) {
  195. return i;
  196. }
  197. }
  198. return -1;
  199. }
  200. int find_in_stack2(char* begin, char *end) {
  201. int i;
  202. for (i = 0; i < stack_depth; i++) {
  203. if (strncmp2(begin, end, stack_vars + (stack_depth - 1 - i) * MAX_ID_LEN)) {
  204. return i;
  205. }
  206. }
  207. return -1;
  208. }
  209. void emit(char x) {
  210. if (stage == 1) {
  211. fwrite(&x, 1, 1, stdout);
  212. }
  213. current_loc++;
  214. }
  215. void emit32(int x) {
  216. emit(x);
  217. emit(x >> 8);
  218. emit(x >> 16);
  219. emit(x >> 24);
  220. }
  221. int is_whitespace(char x) {
  222. return x == ' ' || x == '\t' || x == '\n';
  223. }
  224. int is_id(char x) {
  225. return ('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9') || x == '_';
  226. }
  227. void remove_spaces(char *begin, char *end) {
  228. char *read_buf = begin;
  229. while (read_buf != end) {
  230. if (*read_buf == '\0') {
  231. *begin = '\0';
  232. return;
  233. }
  234. if (is_whitespace(*read_buf)) {
  235. read_buf++;
  236. } else {
  237. *begin = *read_buf;
  238. begin++;
  239. read_buf++;
  240. }
  241. }
  242. if (begin != end) {
  243. *begin = '\0';
  244. }
  245. }
  246. int find_char(char *s, char *e, char c) {
  247. char *s2 = s;
  248. while (1) {
  249. if (s2 == e) {
  250. return -1;
  251. }
  252. if (*s2 == c) {
  253. return s2 - s;
  254. }
  255. if (*s2 == '\0') {
  256. return -1;
  257. }
  258. s2++;
  259. }
  260. }
  261. int find_char_back(char *s, char *e, char c) {
  262. char *s2 = e;
  263. while (1) {
  264. s2--;
  265. if (*s2 == c) {
  266. return s2 - s;
  267. }
  268. if (s2 == s) {
  269. return -1;
  270. }
  271. }
  272. }
  273. char *isstrpref(char *s1, char *s2) {
  274. while (1) {
  275. if (*s1 == '\0') {
  276. return s2;
  277. }
  278. if (*s1 != *s2) {
  279. return 0;
  280. }
  281. s1++;
  282. s2++;
  283. }
  284. }
  285. void trimstr(char *buf) {
  286. char *write_buf = buf;
  287. char *read_buf = buf;
  288. while (is_whitespace(*read_buf)) {
  289. read_buf++;
  290. }
  291. while (*read_buf != '\0') {
  292. *write_buf = *read_buf;
  293. write_buf++;
  294. read_buf++;
  295. }
  296. *write_buf = '\0';
  297. write_buf--;
  298. while (write_buf >= buf && is_whitespace(*write_buf)) {
  299. *write_buf = '\0';
  300. write_buf--;
  301. }
  302. }
  303. void fix_operations(char *exp) {
  304. while (*exp != '\0') {
  305. if (*(exp+1) != '\0') {
  306. if (*exp == '<' && *(exp+1) == '<') { *exp = TOK_SHL; *(exp+1) = ' '; }
  307. if (*exp == '>' && *(exp+1) == '>') { *exp = TOK_SHR; *(exp+1) = ' '; }
  308. if (*exp == '<' && *(exp+1) == '=') { *exp = TOK_LE; *(exp+1) = ' '; }
  309. if (*exp == '>' && *(exp+1) == '=') { *exp = TOK_GE; *(exp+1) = ' '; }
  310. if (*exp == '=' && *(exp+1) == '=') { *exp = TOK_EQ; *(exp+1) = ' '; }
  311. if (*exp == '!' && *(exp+1) == '=') { *exp = TOK_NE; *(exp+1) = ' '; }
  312. if (*exp == '&' && *(exp+1) == '&') { *exp = TOK_AND; *(exp+1) = ' '; }
  313. if (*exp == '|' && *(exp+1) == '|') { *exp = TOK_OR; *(exp+1) = ' '; }
  314. }
  315. exp++;
  316. }
  317. }
  318. void fix_operations2(char *exp) {
  319. int prev_is_id = 0;
  320. while (*exp != '\0') {
  321. if (!prev_is_id) {
  322. if (*exp == '*') { *exp = TOK_DEREF; }
  323. if (*exp == '&') { *exp = TOK_ADDR; }
  324. } else {
  325. if (*exp == '(') {
  326. char *match = find_matching('(', ')', exp, exp+strlen(exp));
  327. assert(match != 0);
  328. *exp = TOK_CALL_OPEN;
  329. *match = TOK_CALL_CLOSED;
  330. }
  331. }
  332. if (is_id(*exp) || *exp == ')') {
  333. prev_is_id = 1;
  334. } else {
  335. prev_is_id = 0;
  336. }
  337. exp++;
  338. }
  339. }
  340. void print(char *begin, char *end) {
  341. while (begin < end) {
  342. if (*begin == '\0') {
  343. break;
  344. }
  345. putchar(*begin);
  346. begin++;
  347. }
  348. }
  349. void fix_strings(char *begin, char *end) {
  350. int mode = 0;
  351. while (begin < end) {
  352. assert((unsigned char) *begin < 0x80);
  353. if (mode == 0) {
  354. if (*begin == '\'') {
  355. mode = 1;
  356. } else if (*begin == '"') {
  357. mode = 2;
  358. } else if (begin + 1 < end && *begin == '/' && *(begin+1) == '/') {
  359. mode = 3;
  360. *begin = ' ';
  361. begin++;
  362. *begin = ' ';
  363. } else if (begin + 1 < end && *begin == '/' && *(begin+1) == '*') {
  364. mode = 4;
  365. *begin = ' ';
  366. begin++;
  367. *begin = ' ';
  368. } else if (*begin == '#') {
  369. mode = 3;
  370. *begin = ' ';
  371. } else {
  372. if (is_whitespace(*begin)) {
  373. *begin = ' ';
  374. }
  375. }
  376. } else if (mode == 1) {
  377. if (*begin == '\'') {
  378. mode = 0;
  379. } else if (*begin == '\\') {
  380. assert(begin + 1 < end);
  381. *begin = *begin + 0x80;
  382. begin++;
  383. *begin = *begin + 0x80;
  384. } else {
  385. assert(*begin >= 0x20);
  386. *begin = *begin + 0x80;
  387. }
  388. } else if (mode == 2) {
  389. if (*begin == '"') {
  390. mode = 0;
  391. } else if (*begin == '\\') {
  392. assert(begin + 1 < end);
  393. *begin = *begin + 0x80;
  394. begin++;
  395. *begin = *begin + 0x80;
  396. } else {
  397. assert(*begin >= 0x20);
  398. *begin = *begin + 0x80;
  399. }
  400. } else if (mode == 3) {
  401. if (*begin == '\n') {
  402. mode = 0;
  403. *begin = ' ';
  404. } else {
  405. *begin = ' ';
  406. }
  407. } else if (mode == 4) {
  408. if (begin + 1 < end && *begin == '*' && *(begin+1) == '/') {
  409. mode = 0;
  410. *begin = ' ';
  411. begin++;
  412. *begin = ' ';
  413. } else {
  414. *begin = ' ';
  415. }
  416. }
  417. begin++;
  418. }
  419. }
  420. char *find_id(char *s) {
  421. while (s != 0) {
  422. if (!is_id(*s)) {
  423. break;
  424. }
  425. s++;
  426. }
  427. return s;
  428. }
  429. int decode_number(char *begin, char *end, unsigned int *num) {
  430. *num = 0;
  431. int is_decimal = 1;
  432. int digit_seen = 0;
  433. if (*begin == '0' && *begin == 'x') {
  434. begin += 2;
  435. is_decimal = 0;
  436. }
  437. while (1) {
  438. if (begin == end) {
  439. if (digit_seen) {
  440. return 1;
  441. } else {
  442. return 0;
  443. }
  444. }
  445. digit_seen = 1;
  446. if (is_decimal) {
  447. *num *= 10;
  448. } else {
  449. *num *= 16;
  450. }
  451. if ('0' <= *begin && *begin <= '9') {
  452. *num += *begin - '0';
  453. } else if (!is_decimal && 'a' <= *begin && *begin <= 'f') {
  454. *num += *begin - 'a' + 10;
  455. } else {
  456. return 0;
  457. }
  458. begin++;
  459. }
  460. }
  461. void eval_expr(char *begin, char *end, int addr);
  462. int run_eval_expr(char *begin, char *op, char *end, int addr) {
  463. if (*op == '=') {
  464. eval_expr(op+1, end, 0);
  465. eval_expr(begin, op, 1);
  466. emit(0x58); // pop eax
  467. pop_var();
  468. emit(0x59); // pop ecx
  469. pop_var();
  470. if (char_op) {
  471. char_op = 0;
  472. emit(0x88); // mov [eax], cl
  473. emit(0x08);
  474. } else {
  475. emit(0x89); // mov [eax], ecx
  476. emit(0x08);
  477. }
  478. push_var("__temp");
  479. if (addr) {
  480. emit(0x50); // push eax
  481. } else {
  482. emit(0x51); // push ecx
  483. }
  484. } else if (*op == TOK_CALL_OPEN) {
  485. assert(!addr);
  486. char *match = find_matching(TOK_CALL_OPEN, TOK_CALL_CLOSED, op, end);
  487. assert(match != 0);
  488. char *params_begin = op + 1;
  489. char *params_end = end - 1;
  490. assert(params_end == match);
  491. int param_num = 0;
  492. while (1) {
  493. if (params_begin == params_end) {
  494. break;
  495. }
  496. int pos = find_char_back(params_begin, params_end, ',');
  497. if (pos != -1) {
  498. eval_expr(params_begin + pos + 1, params_end, 0);
  499. params_end = params_begin + pos;
  500. param_num++;
  501. } else {
  502. eval_expr(params_begin, params_end, 0);
  503. param_num++;
  504. break;
  505. }
  506. }
  507. eval_expr(begin, op, 1);
  508. pop_var();
  509. emit(0x58); // pop eax
  510. emit(0xff); // call eax
  511. emit(0xd0);
  512. emit(0x81); // add esp, ...
  513. emit(0xc4);
  514. emit32(4 * param_num);
  515. for (int i = 0; i < param_num; i++) {
  516. pop_var();
  517. }
  518. push_var("__temp");
  519. emit(0x50); // push eax
  520. } else if (*op == TOK_DEREF) {
  521. assert(begin == op);
  522. eval_expr(op+1, end, 0);
  523. pop_var();
  524. emit(0x58); // pop eax
  525. if (!addr) {
  526. emit(0xb8); // mov eax, [eax]
  527. emit(0x00);
  528. }
  529. push_var("__temp");
  530. emit(0x50); // push eax
  531. } else if (*op == TOK_ADDR) {
  532. assert(!addr);
  533. assert(begin == op);
  534. eval_expr(op+1, end, 1);
  535. } else if (*op == '!' || *op == '~') {
  536. assert(!addr);
  537. assert(begin == op);
  538. eval_expr(op+1, end, 0);
  539. pop_var();
  540. emit(0x58); // pop eax
  541. if (*op == '!') {
  542. emit(0x83); // cmp eax, 0
  543. emit(0xf8);
  544. emit(0x00);
  545. emit(0x74); // je 0x9
  546. emit(0x04);
  547. emit(0x31); // xor eax, eax
  548. emit(0xc0);
  549. emit(0xeb); // jmp 0xe
  550. emit(0x05);
  551. emit(0xb8); // mov eax, 1
  552. emit32(1);
  553. } else if (*op == '~') {
  554. emit(0xf7); // not eax
  555. emit(0xd0);
  556. } else {
  557. assert(0);
  558. }
  559. emit(0x50); // push eax
  560. push_var("__temp");
  561. } else {
  562. assert(!addr);
  563. eval_expr(op+1, end, 0);
  564. eval_expr(begin, op, 0);
  565. emit(0x58); // pop eax
  566. pop_var();
  567. emit(0x59); // pop ecx
  568. pop_var();
  569. if (*op == '+') {
  570. emit(0x01); // add eax, ecx
  571. emit(0xc8);
  572. } else if (*op == '-') {
  573. emit(0x29); // sub eax, ecx
  574. emit(0xc8);
  575. } else if (*op == '*') {
  576. emit(0xf7); // imul ecx
  577. emit(0xe9);
  578. } else if (*op == '/') {
  579. emit(0x31); // xor edx, edx
  580. emit(0xd2);
  581. emit(0xf7); // idiv ecx
  582. emit(0xf9);
  583. } else if (*op == '%') {
  584. emit(0x31); // xor edx, edx
  585. emit(0xd2);
  586. emit(0xf7); // idiv ecx
  587. emit(0xf9);
  588. emit(0x89); // mov eax, edx
  589. emit(0xd0);
  590. } else if (*op == TOK_SHL) {
  591. emit(0xd3); // shl eax, cl
  592. emit(0xe0);
  593. } else if (*op == TOK_SHR) {
  594. emit(0xd3); // shr eax, cl
  595. emit(0xe8);
  596. } else if (*op == '&') {
  597. emit(0x21); // and eax, ecx
  598. emit(0xc8);
  599. } else if (*op == '|') {
  600. emit(0x09); // or eax, ecx
  601. emit(0xc8);
  602. } else if (*op == TOK_AND) {
  603. emit(0x83); // cmp eax, 0
  604. emit(0xf8);
  605. emit(0x00);
  606. emit(0x74); // je 0x11
  607. emit(0x0c);
  608. emit(0x83); // cmp ecx, 0
  609. emit(0xf9);
  610. emit(0x00);
  611. emit(0x74); // je 0x11
  612. emit(0x07);
  613. emit(0xb8); // mov eax, 1
  614. emit32(1);
  615. emit(0xeb); // jmp 0x13
  616. emit(0x02);
  617. emit(0x31); // xor eax, eax
  618. emit(0xc0);
  619. } else if (*op == TOK_OR) {
  620. emit(0x83); // cmp eax, 0
  621. emit(0xf8);
  622. emit(0x00);
  623. emit(0x75); // jne 0xe
  624. emit(0x09);
  625. emit(0x83); // cmp ecx, 0
  626. emit(0xf9);
  627. emit(0x00);
  628. emit(0x75); // jne 0xe
  629. emit(0x04);
  630. emit(0x31); // xor eax, eax
  631. emit(0xc0);
  632. emit(0xeb); // jmp 0x13
  633. emit(0x05);
  634. emit(0xb8); // mov eax, 1
  635. emit32(1);
  636. } else {
  637. emit(0x39); // cmp eax, ecx
  638. emit(0xc8);
  639. if (*op == TOK_EQ) {
  640. emit(0x74); // je 0x8
  641. emit(0x04);
  642. } else if (*op == TOK_NE) {
  643. emit(0x75); // jne 0x8
  644. emit(0x04);
  645. } else if (*op == '<') {
  646. emit(0x7c); // jl 0x8
  647. emit(0x04);
  648. } else if (*op == TOK_LE) {
  649. emit(0x7e); // jle 0x8
  650. emit(0x04);
  651. } else if (*op == '>') {
  652. emit(0x7f); // jg 0x8
  653. emit(0x04);
  654. } else if (*op == TOK_GE) {
  655. emit(0x7d); // jge 0x8
  656. emit(0x04);
  657. } else {
  658. return 0;
  659. }
  660. emit(0x31); // xor eax, eax
  661. emit(0xc0);
  662. emit(0xeb); // jmp 0xd
  663. emit(0x05);
  664. emit(0xb8); // mov eax, 1
  665. emit32(1);
  666. }
  667. push_var("__temp");
  668. emit(0x50); // push eax
  669. }
  670. return 1;
  671. }
  672. int is_in(char x, char *set) {
  673. while (1) {
  674. if (*set == '\0') {
  675. return 0;
  676. }
  677. if (x == *set) {
  678. return 1;
  679. }
  680. set++;
  681. }
  682. }
  683. int parse_expr(char *begin, char *end, char *pivots, int dir, int addr) {
  684. if (dir == 0) {
  685. char *p = begin;
  686. while (p < end) {
  687. if (is_in(*p, pivots)) {
  688. return run_eval_expr(begin, p, end, addr);
  689. } else if (*p == '(') {
  690. p = find_matching('(', ')', p, end);
  691. assert(p != 0);
  692. } else if (*p == '[') {
  693. p = find_matching('[', ']', p, end);
  694. assert(p != 0);
  695. }
  696. p++;
  697. }
  698. } else {
  699. char *p = end-1;
  700. while (p >= begin) {
  701. if (is_in(*p, pivots)) {
  702. return run_eval_expr(begin, p, end, addr);
  703. } else if (*p == ')') {
  704. p = find_matching_rev('(', ')', begin, p+1);
  705. assert(p != 0);
  706. } else if (*p == ']') {
  707. p = find_matching_rev('[', ']', begin, p+1);
  708. assert(p != 0);
  709. }
  710. p--;
  711. }
  712. }
  713. return 0;
  714. }
  715. void eval_expr(char *begin, char *end, int addr) {
  716. if (*begin == '(') {
  717. char *match = find_matching('(', ')', begin, end);
  718. if (match == end-1) {
  719. eval_expr(begin+1, end-1, addr);
  720. }
  721. } else {
  722. if (parse_expr(begin, end, "=", 0, addr)) {
  723. } else if (parse_expr(begin, end, TOKS_OR, 1, addr)) {
  724. } else if (parse_expr(begin, end, TOKS_AND, 1, addr)) {
  725. } else if (parse_expr(begin, end, "|", 1, addr)) {
  726. } else if (parse_expr(begin, end, "&", 1, addr)) {
  727. } else if (parse_expr(begin, end, TOKS_EQ_NE, 1, addr)) {
  728. } else if (parse_expr(begin, end, TOKS_LE_GE "<>", 1, addr)) {
  729. } else if (parse_expr(begin, end, TOKS_SHL_SHR, 1, addr)) {
  730. } else if (parse_expr(begin, end, "+-", 1, addr)) {
  731. } else if (parse_expr(begin, end, "*/%", 1, addr)) {
  732. } else if (parse_expr(begin, end, "!~" TOKS_DEREF_ADDR, 0, addr)) {
  733. } else if (parse_expr(begin, end, TOKS_CALL_OPEN, 1, addr)) {
  734. } else {
  735. if ('0' <= *begin && *begin <= '9') {
  736. assert(!addr);
  737. int val;
  738. int res = decode_number(begin, end, &val);
  739. assert(res);
  740. emit(0x68); // push val
  741. emit32(val);
  742. push_var("__temp");
  743. } else if (*begin == '\'') {
  744. assert(*(end-1) == '\'');
  745. push_var("__temp");
  746. if (end - begin == 3) {
  747. emit(0x68); // push val
  748. emit32(*(begin+1) + 0x80);
  749. } else if (end - begin == 4) {
  750. assert(*(begin+1) == '\\' - 0x80);
  751. emit(0x68); // push val
  752. emit32(escaped(*(begin+2) + 0x80));
  753. } else {
  754. assert(0);
  755. }
  756. } else if (*begin == '"') {
  757. int id = gen_id();
  758. int var_addr = get_symbol(gen_lab(id));
  759. string_ids[string_num] = id;
  760. string_begin[string_num] = begin;
  761. string_end[string_num] = end;
  762. string_num++;
  763. assert(!addr);
  764. emit(0x68); // push addr
  765. emit32(var_addr);
  766. push_var("__temp");
  767. } else {
  768. int pos = find_in_stack2(begin, end);
  769. if (pos != -1) {
  770. if (addr) {
  771. emit(0x89); // mov eax, esp
  772. emit(0xe0);
  773. emit(0x05); // add eax, pos
  774. emit32(4 * pos);
  775. emit(0x50); // push eax
  776. push_var("__temp");
  777. } else {
  778. emit(0x8b); // mov eax, [esp+pos]
  779. emit(0x84);
  780. emit(0x24);
  781. emit32(4 * pos);
  782. emit(0x50); // push eax
  783. push_var("__temp");
  784. }
  785. } else {
  786. int var_addr = get_symbol2(begin, end);
  787. if (addr) {
  788. emit(0x68); // push addr
  789. emit32(var_addr);
  790. push_var("__temp");
  791. } else {
  792. emit(0xb8); // mov eax, addr
  793. emit32(var_addr);
  794. emit(0xff); // push DWORD [eax]
  795. emit(0x30);
  796. push_var("__temp");
  797. }
  798. }
  799. }
  800. }
  801. }
  802. }
  803. void compile_expression(char *exp) {
  804. fix_operations(exp);
  805. remove_spaces(exp, 0);
  806. fix_operations2(exp);
  807. char *end = exp + strlen(exp);
  808. fprintf(stderr, "Expression: %s\n", exp);
  809. eval_expr(exp, end, 0);
  810. }
  811. void compile_statement(char *begin, char *end) {
  812. *end = '\0';
  813. trimstr(begin);
  814. if (*begin == '\0') {
  815. return;
  816. }
  817. char *p;
  818. if (p = isstrpref("return", begin)) {
  819. assert(block_depth > 1);
  820. if (*p == '\0') {
  821. fprintf(stderr, "Empty return statement\n");
  822. } else {
  823. p++;
  824. fprintf(stderr, "Return statement: %s\n", p);
  825. compile_expression(p);
  826. emit(0x58); // pop eax
  827. pop_var();
  828. }
  829. int exit_stack_depth = stack_depth;
  830. emit(0x81); // add esp, ..
  831. emit(0xc4);
  832. emit32(4 * (exit_stack_depth - ret_depth));
  833. emit(0xc3); // ret
  834. } else if (p = isstrpref("int", begin)) {
  835. fprintf(stderr, "Declaration: %s\n", begin);
  836. char *name = p + 1;
  837. trimstr(name);
  838. fprintf(stderr, " declared name is: %s\n", name);
  839. if (block_depth == 1) {
  840. fprintf(stderr, " this is a top level declaration\n");
  841. add_symbol(name, current_loc);
  842. emit32(0);
  843. } else {
  844. push_var(name);
  845. emit(0x83); // sub esp, 4
  846. emit(0xec);
  847. emit(0x04);
  848. }
  849. } else {
  850. assert(block_depth > 1);
  851. char_op = 0;
  852. if (p = isstrpref("char", begin)) {
  853. char_op = 1;
  854. begin = p + 1;
  855. }
  856. fprintf(stderr, "Statement: %s\n", begin);
  857. compile_expression(begin);
  858. emit(0x83); // add esp, 4
  859. emit(0xc4);
  860. emit(0x04);
  861. pop_var();
  862. }
  863. }
  864. void process_fi(int *else_lab, int *fi_lab);
  865. void process_if(char *exp, int *else_lab, int *fi_lab) {
  866. process_fi(else_lab, fi_lab);
  867. *else_lab = gen_id();
  868. *fi_lab = gen_id();
  869. compile_expression(exp);
  870. pop_var();
  871. emit(0x58); // pop eax
  872. emit(0x83); // cmp eax, 0
  873. emit(0xf8);
  874. emit(0x00);
  875. emit(0x0f); // je ...
  876. emit(0x84);
  877. int disp = get_symbol(gen_lab(*else_lab)) - 4 - current_loc;
  878. emit32(disp);
  879. }
  880. void process_if_end(int *else_lab, int *fi_lab) {
  881. emit(0xe9); // jmp ...
  882. int disp = get_symbol(gen_lab(*fi_lab)) - 4 - current_loc;
  883. emit32(disp);
  884. }
  885. void process_else(int *else_lab, int *fi_lab) {
  886. assert(*else_lab != 0);
  887. add_symbol(gen_lab(*else_lab), current_loc);
  888. *else_lab = 0;
  889. }
  890. void process_elseif(char *exp, int *else_lab, int *fi_lab) {
  891. process_else(else_lab, fi_lab);
  892. *else_lab = gen_id();
  893. compile_expression(exp);
  894. pop_var();
  895. emit(0x58); // pop eax
  896. emit(0x83); // cmp eax, 0
  897. emit(0xf8);
  898. emit(0x00);
  899. emit(0x0f); // je ...
  900. emit(0x84);
  901. int disp = get_symbol(gen_lab(*else_lab)) - 4 - current_loc;
  902. emit32(disp);
  903. }
  904. void process_fi(int *else_lab, int *fi_lab) {
  905. if (*else_lab != 0) {
  906. add_symbol(gen_lab(*else_lab), current_loc);
  907. *else_lab = 0;
  908. }
  909. if (*fi_lab != 0) {
  910. add_symbol(gen_lab(*fi_lab), current_loc);
  911. *fi_lab = 0;
  912. }
  913. }
  914. void compile_block_with_head(char *def_begin, char *block_begin, char *block_end, int *else_lab, int *fi_lab);
  915. void compile_block(char *begin, char *end) {
  916. block_depth++;
  917. int saved_stack_depth = stack_depth;
  918. int else_lab = 0;
  919. int fi_lab = 0;
  920. while (1) {
  921. int semicolon_pos = find_char(begin, end, ';');
  922. int brace_pos = find_char(begin, end, '{');
  923. if (semicolon_pos == -1 && brace_pos == -1) {
  924. *end = '\0';
  925. trimstr(begin);
  926. assert(*begin == '\0');
  927. break;
  928. } else {
  929. if (semicolon_pos == -1) {
  930. semicolon_pos = brace_pos + 1;
  931. }
  932. if (brace_pos == -1) {
  933. brace_pos = semicolon_pos + 1;
  934. }
  935. if (semicolon_pos < brace_pos) {
  936. process_fi(&else_lab, &fi_lab);
  937. compile_statement(begin, begin + semicolon_pos);
  938. begin = begin + semicolon_pos + 1;
  939. } else {
  940. char *res = find_matching('{', '}', begin+brace_pos, end);
  941. compile_block_with_head(begin, begin+brace_pos, res, &else_lab, &fi_lab);
  942. begin = res + 1;
  943. }
  944. }
  945. }
  946. process_fi(&else_lab, &fi_lab);
  947. if (block_depth != 1) {
  948. int exit_stack_depth = stack_depth;
  949. pop_to_depth(saved_stack_depth);
  950. emit(0x81); // add esp, ..
  951. emit(0xc4);
  952. emit32(4 * (exit_stack_depth - saved_stack_depth));
  953. }
  954. block_depth--;
  955. }
  956. void compile_block_with_head(char *def_begin, char *block_begin, char *block_end, int *else_lab, int *fi_lab) {
  957. *block_begin = '\0';
  958. int param_num = 0;
  959. remove_spaces(def_begin, 0);
  960. int open_pos = find_char(def_begin, block_begin, '(');
  961. int closed_pos;
  962. if (open_pos != -1) {
  963. char *closed = find_matching('(', ')', def_begin + open_pos, block_begin);
  964. assert(closed != 0);
  965. closed_pos = closed - def_begin;
  966. assert(def_begin[closed_pos+1] == '\0');
  967. def_begin[open_pos] = '\0';
  968. def_begin[closed_pos] = '\0';
  969. }
  970. int must_process_if_end = 0;
  971. if (block_depth == 1) {
  972. // It must be a function
  973. assert(*else_lab == 0);
  974. assert(*fi_lab == 0);
  975. assert(open_pos != -1);
  976. fprintf(stderr, "Beginning of a function with name %s\n", def_begin);
  977. add_symbol(def_begin, current_loc);
  978. assert(stack_depth == 0);
  979. char *params_begin = def_begin + open_pos + 1;
  980. char *params_end = def_begin + closed_pos;
  981. while (1) {
  982. if (params_begin == params_end) {
  983. break;
  984. }
  985. int pos = find_char_back(params_begin, params_end, ',');
  986. if (pos != -1) {
  987. params_begin[pos] = '\0';
  988. push_var(params_begin + pos + 1);
  989. param_num++;
  990. fprintf(stderr, " with parameter %s\n", params_begin + pos + 1);
  991. params_end = params_begin + pos;
  992. } else {
  993. push_var(params_begin);
  994. param_num++;
  995. fprintf(stderr, " with parameter %s\n", params_begin);
  996. break;
  997. }
  998. }
  999. push_var("__ret");
  1000. ret_depth = stack_depth;
  1001. } else if (open_pos != -1 && strncmp2(def_begin, def_begin + open_pos, "if")) {
  1002. fprintf(stderr, "Begin of an if block: %s\n", def_begin + open_pos + 1);
  1003. must_process_if_end = 1;
  1004. process_if(def_begin + open_pos + 1, else_lab, fi_lab);
  1005. } else if (open_pos != -1 && strncmp2(def_begin, def_begin + open_pos, "elseif")) {
  1006. fprintf(stderr, "Begin of an else-if block: %s\n", def_begin + open_pos + 1);
  1007. must_process_if_end = 1;
  1008. process_elseif(def_begin + open_pos + 1, else_lab, fi_lab);
  1009. } else if (open_pos == -1 && strcmp(def_begin, "else") == 0) {
  1010. fprintf(stderr, "Begin of an else block\n");
  1011. must_process_if_end = 1;
  1012. process_else(else_lab, fi_lab);
  1013. } else {
  1014. assert(0);
  1015. }
  1016. compile_block(block_begin+1, block_end-1);
  1017. if (block_depth == 1) {
  1018. pop_var();
  1019. emit(0xc3); // ret
  1020. int i;
  1021. for (i = 0; i < param_num; i++) {
  1022. pop_var();
  1023. }
  1024. assert(stack_depth == 0);
  1025. }
  1026. if (must_process_if_end) {
  1027. process_if_end(else_lab, fi_lab);
  1028. }
  1029. fprintf(stderr, "End of block\n");
  1030. }
  1031. void create_strings() {
  1032. for (int i = 0; i < string_num; i++) {
  1033. add_symbol(gen_lab(string_ids[i]), current_loc);
  1034. char *begin = string_begin[i];
  1035. char *end = string_end[i];
  1036. assert(*begin == '"');
  1037. assert(*(end-1) == '"');
  1038. assert(end - begin >= 2);
  1039. begin++;
  1040. end--;
  1041. while (begin < end) {
  1042. if (*begin == '"') {
  1043. begin++;
  1044. assert(*begin == '"');
  1045. begin++;
  1046. } else {
  1047. assert(*begin < 0);
  1048. emit(*begin + 0x80);
  1049. begin++;
  1050. }
  1051. }
  1052. }
  1053. }
  1054. int main() {
  1055. symbol_num = 0;
  1056. for (stage = 0; stage < 2; stage++) {
  1057. lab_id = 1;
  1058. string_num = 0;
  1059. int fd = open("test.c", O_RDONLY);
  1060. int len = lseek(fd, 0, SEEK_END);
  1061. lseek(fd, 0, SEEK_SET);
  1062. char *src = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
  1063. fix_strings(src, src+len);
  1064. //remove_spaces(src, src+len);
  1065. //print(src, src+len);
  1066. block_depth = 0;
  1067. stack_depth = 0;
  1068. current_loc = 0x100000;
  1069. compile_block(src, src+len);
  1070. create_strings();
  1071. assert(block_depth == 0);
  1072. assert(stack_depth == 0);
  1073. }
  1074. return 0;
  1075. }