|
- /* This file is part of asmc, a bootstrapping OS with minimal seed
- Copyright (C) 2018 Giovanni Mascellani <gio@debian.org>
- https://gitlab.com/giomasce/asmc
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
- #include <stdio.h>
- #include <stdlib.h>
- #include <assert.h>
- #include <string.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <sys/mman.h>
- #include <fcntl.h>
- #include <unistd.h>
- #define TOK_CALL_OPEN ((char) 0x80)
- #define TOK_CALL_CLOSED ((char) 0x81)
- #define TOK_SHL ((char) 0x82)
- #define TOK_SHR ((char) 0x83)
- #define TOK_LE ((char) 0x84)
- #define TOK_GE ((char) 0x85)
- #define TOK_EQ ((char) 0x86)
- #define TOK_NE ((char) 0x87)
- #define TOK_AND ((char) 0x88)
- #define TOK_OR ((char) 0x89)
- #define TOK_DEREF ((char) 0x8a)
- #define TOK_ADDR ((char) 0x8b)
- #define TOKS_CALL_OPEN "\x80"
- #define TOKS_AND "\x88"
- #define TOKS_OR "\x89"
- #define TOKS_EQ_NE "\x86\x87"
- #define TOKS_LE_GE "\x84\x85"
- #define TOKS_SHL_SHR "\x82\x83"
- #define TOKS_DEREF_ADDR "\x8a\x8b"
- #define MAX_ID_LEN 128
- #define STACK_LEN 1024
- #define SYMBOL_TABLE_LEN 1024
- #define MAX_STRING_NUM 1024
- #define GEN_LAB_BUF_LEN 32
- int block_depth;
- int stack_depth;
- int current_loc;
- int ret_depth;
- int char_op;
- int symbol_num;
- int stage;
- int string_num;
- int lab_id;
- char stack_vars[MAX_ID_LEN * STACK_LEN];
- char symbol_names[MAX_ID_LEN * SYMBOL_TABLE_LEN];
- int symbol_locs[SYMBOL_TABLE_LEN];
- int string_ids[MAX_STRING_NUM];
- char *string_begin[MAX_STRING_NUM];
- char *string_end[MAX_STRING_NUM];
- char gen_lab_buf[GEN_LAB_BUF_LEN];
- int gen_id() {
- return lab_id++;
- }
- char *gen_lab(int id) {
- sprintf(gen_lab_buf, "__label%d", id);
- return gen_lab_buf;
- }
- char escaped(char x) {
- if (x == 'n') { return '\n'; }
- if (x == 't') { return '\t'; }
- if (x == '0') { return '\0'; }
- if (x == '\\') { return '\\'; }
- if (x == '\'') { return '\''; }
- return 0;
- }
- char *find_matching_rev(char open, char closed, char *begin, char *end) {
- int depth = 0;
- end--;
- while (begin >= end) {
- if (*end == closed) {
- depth++;
- } else if (*end == open) {
- depth--;
- }
- if (depth == 0) {
- return end;
- }
- end--;
- }
- return 0;
- }
- char *find_matching(char open, char closed, char *begin, char *end) {
- int depth = 0;
- while (begin < end) {
- if (*begin == open) {
- depth++;
- } else if (*begin == closed) {
- depth--;
- }
- if (depth == 0) {
- return begin;
- }
- begin++;
- }
- return 0;
- }
- int strncmp2(char *b1, char *e1, char *b2) {
- int len = strlen(b2);
- return e1 - b1 == len && strncmp(b1, b2, len) == 0;
- }
- int find_symbol(char *name) {
- int i;
- for (i = 0; i < symbol_num; i++) {
- if (strcmp(name, symbol_names + i * MAX_ID_LEN) == 0) {
- break;
- }
- }
- if (i == symbol_num) {
- i = SYMBOL_TABLE_LEN;
- }
- return i;
- }
- int find_symbol2(char *begin, char *end) {
- int i;
- for (i = 0; i < symbol_num; i++) {
- if (strncmp2(begin, end, symbol_names + i * MAX_ID_LEN)) {
- break;
- }
- }
- if (i == symbol_num) {
- i = SYMBOL_TABLE_LEN;
- }
- return i;
- }
- int get_symbol(char *name) {
- if (stage == 0) {
- return 0;
- } else if (stage == 1) {
- int pos = find_symbol(name);
- assert(pos != SYMBOL_TABLE_LEN);
- return symbol_locs[pos];
- } else {
- assert(0);
- }
- }
- int get_symbol2(char *begin, char *end) {
- if (stage == 0) {
- return 0;
- } else if (stage == 1) {
- int pos = find_symbol2(begin, end);
- assert(pos != SYMBOL_TABLE_LEN);
- return symbol_locs[pos];
- } else {
- assert(0);
- }
- }
- void add_symbol(char *name, int loc) {
- int len = strlen(name);
- assert(len > 0);
- assert(len < MAX_ID_LEN);
- if (stage == 0) {
- assert(find_symbol(name) == SYMBOL_TABLE_LEN);
- assert(symbol_num < SYMBOL_TABLE_LEN);
- symbol_locs[symbol_num] = loc;
- strcpy(symbol_names + symbol_num * MAX_ID_LEN, name);
- symbol_num = symbol_num + 1;
- } else if (stage == 1) {
- int idx = find_symbol(name);
- assert(idx < symbol_num);
- assert(symbol_locs[idx] == loc);
- } else {
- assert(0);
- }
- }
- void push_var(char *var_name) {
- int len = strlen(var_name);
- assert(len > 0);
- assert(len < MAX_ID_LEN);
- assert(stack_depth < STACK_LEN);
- strcpy(stack_vars + stack_depth * MAX_ID_LEN, var_name);
- stack_depth++;
- }
- void pop_var() {
- stack_depth--;
- }
- void pop_to_depth(int depth) {
- stack_depth = depth;
- }
- int find_in_stack(char *var_name) {
- int i;
- for (i = 0; i < stack_depth; i++) {
- if (strcmp(var_name, stack_vars + (stack_depth - 1 - i) * MAX_ID_LEN) == 0) {
- return i;
- }
- }
- return -1;
- }
- int find_in_stack2(char* begin, char *end) {
- int i;
- for (i = 0; i < stack_depth; i++) {
- if (strncmp2(begin, end, stack_vars + (stack_depth - 1 - i) * MAX_ID_LEN)) {
- return i;
- }
- }
- return -1;
- }
- void emit(char x) {
- if (stage == 1) {
- fwrite(&x, 1, 1, stdout);
- }
- current_loc++;
- }
- void emit32(int x) {
- emit(x);
- emit(x >> 8);
- emit(x >> 16);
- emit(x >> 24);
- }
- int is_whitespace(char x) {
- return x == ' ' || x == '\t' || x == '\n';
- }
- int is_id(char x) {
- return ('a' <= x && x <= 'z') || ('A' <= x && x <= 'Z') || ('0' <= x && x <= '9') || x == '_';
- }
- void remove_spaces(char *begin, char *end) {
- char *read_buf = begin;
- while (read_buf != end) {
- if (*read_buf == '\0') {
- *begin = '\0';
- return;
- }
- if (is_whitespace(*read_buf)) {
- read_buf++;
- } else {
- *begin = *read_buf;
- begin++;
- read_buf++;
- }
- }
- if (begin != end) {
- *begin = '\0';
- }
- }
- int find_char(char *s, char *e, char c) {
- char *s2 = s;
- while (1) {
- if (s2 == e) {
- return -1;
- }
- if (*s2 == c) {
- return s2 - s;
- }
- if (*s2 == '\0') {
- return -1;
- }
- s2++;
- }
- }
- int find_char_back(char *s, char *e, char c) {
- char *s2 = e;
- while (1) {
- s2--;
- if (*s2 == c) {
- return s2 - s;
- }
- if (s2 == s) {
- return -1;
- }
- }
- }
- char *isstrpref(char *s1, char *s2) {
- while (1) {
- if (*s1 == '\0') {
- return s2;
- }
- if (*s1 != *s2) {
- return 0;
- }
- s1++;
- s2++;
- }
- }
- void trimstr(char *buf) {
- char *write_buf = buf;
- char *read_buf = buf;
- while (is_whitespace(*read_buf)) {
- read_buf++;
- }
- while (*read_buf != '\0') {
- *write_buf = *read_buf;
- write_buf++;
- read_buf++;
- }
- *write_buf = '\0';
- write_buf--;
- while (write_buf >= buf && is_whitespace(*write_buf)) {
- *write_buf = '\0';
- write_buf--;
- }
- }
- void fix_operations(char *exp) {
- while (*exp != '\0') {
- if (*(exp+1) != '\0') {
- if (*exp == '<' && *(exp+1) == '<') { *exp = TOK_SHL; *(exp+1) = ' '; }
- if (*exp == '>' && *(exp+1) == '>') { *exp = TOK_SHR; *(exp+1) = ' '; }
- if (*exp == '<' && *(exp+1) == '=') { *exp = TOK_LE; *(exp+1) = ' '; }
- if (*exp == '>' && *(exp+1) == '=') { *exp = TOK_GE; *(exp+1) = ' '; }
- if (*exp == '=' && *(exp+1) == '=') { *exp = TOK_EQ; *(exp+1) = ' '; }
- if (*exp == '!' && *(exp+1) == '=') { *exp = TOK_NE; *(exp+1) = ' '; }
- if (*exp == '&' && *(exp+1) == '&') { *exp = TOK_AND; *(exp+1) = ' '; }
- if (*exp == '|' && *(exp+1) == '|') { *exp = TOK_OR; *(exp+1) = ' '; }
- }
- exp++;
- }
- }
- void fix_operations2(char *exp) {
- int prev_is_id = 0;
- while (*exp != '\0') {
- if (!prev_is_id) {
- if (*exp == '*') { *exp = TOK_DEREF; }
- if (*exp == '&') { *exp = TOK_ADDR; }
- } else {
- if (*exp == '(') {
- char *match = find_matching('(', ')', exp, exp+strlen(exp));
- assert(match != 0);
- *exp = TOK_CALL_OPEN;
- *match = TOK_CALL_CLOSED;
- }
- }
- if (is_id(*exp) || *exp == ')') {
- prev_is_id = 1;
- } else {
- prev_is_id = 0;
- }
- exp++;
- }
- }
- void print(char *begin, char *end) {
- while (begin < end) {
- if (*begin == '\0') {
- break;
- }
- putchar(*begin);
- begin++;
- }
- }
- void fix_strings(char *begin, char *end) {
- int mode = 0;
- while (begin < end) {
- assert((unsigned char) *begin < 0x80);
- if (mode == 0) {
- if (*begin == '\'') {
- mode = 1;
- } else if (*begin == '"') {
- mode = 2;
- } else if (begin + 1 < end && *begin == '/' && *(begin+1) == '/') {
- mode = 3;
- *begin = ' ';
- begin++;
- *begin = ' ';
- } else if (begin + 1 < end && *begin == '/' && *(begin+1) == '*') {
- mode = 4;
- *begin = ' ';
- begin++;
- *begin = ' ';
- } else if (*begin == '#') {
- mode = 3;
- *begin = ' ';
- } else {
- if (is_whitespace(*begin)) {
- *begin = ' ';
- }
- }
- } else if (mode == 1) {
- if (*begin == '\'') {
- mode = 0;
- } else if (*begin == '\\') {
- assert(begin + 1 < end);
- *begin = *begin + 0x80;
- begin++;
- *begin = *begin + 0x80;
- } else {
- assert(*begin >= 0x20);
- *begin = *begin + 0x80;
- }
- } else if (mode == 2) {
- if (*begin == '"') {
- mode = 0;
- } else if (*begin == '\\') {
- assert(begin + 1 < end);
- *begin = *begin + 0x80;
- begin++;
- *begin = *begin + 0x80;
- } else {
- assert(*begin >= 0x20);
- *begin = *begin + 0x80;
- }
- } else if (mode == 3) {
- if (*begin == '\n') {
- mode = 0;
- *begin = ' ';
- } else {
- *begin = ' ';
- }
- } else if (mode == 4) {
- if (begin + 1 < end && *begin == '*' && *(begin+1) == '/') {
- mode = 0;
- *begin = ' ';
- begin++;
- *begin = ' ';
- } else {
- *begin = ' ';
- }
- }
- begin++;
- }
- }
- char *find_id(char *s) {
- while (s != 0) {
- if (!is_id(*s)) {
- break;
- }
- s++;
- }
- return s;
- }
- int decode_number(char *begin, char *end, unsigned int *num) {
- *num = 0;
- int is_decimal = 1;
- int digit_seen = 0;
- if (*begin == '0' && *begin == 'x') {
- begin += 2;
- is_decimal = 0;
- }
- while (1) {
- if (begin == end) {
- if (digit_seen) {
- return 1;
- } else {
- return 0;
- }
- }
- digit_seen = 1;
- if (is_decimal) {
- *num *= 10;
- } else {
- *num *= 16;
- }
- if ('0' <= *begin && *begin <= '9') {
- *num += *begin - '0';
- } else if (!is_decimal && 'a' <= *begin && *begin <= 'f') {
- *num += *begin - 'a' + 10;
- } else {
- return 0;
- }
- begin++;
- }
- }
- void eval_expr(char *begin, char *end, int addr);
- int run_eval_expr(char *begin, char *op, char *end, int addr) {
- if (*op == '=') {
- eval_expr(op+1, end, 0);
- eval_expr(begin, op, 1);
- emit(0x58); // pop eax
- pop_var();
- emit(0x59); // pop ecx
- pop_var();
- if (char_op) {
- char_op = 0;
- emit(0x88); // mov [eax], cl
- emit(0x08);
- } else {
- emit(0x89); // mov [eax], ecx
- emit(0x08);
- }
- push_var("__temp");
- if (addr) {
- emit(0x50); // push eax
- } else {
- emit(0x51); // push ecx
- }
- } else if (*op == TOK_CALL_OPEN) {
- assert(!addr);
- char *match = find_matching(TOK_CALL_OPEN, TOK_CALL_CLOSED, op, end);
- assert(match != 0);
- char *params_begin = op + 1;
- char *params_end = end - 1;
- assert(params_end == match);
- int param_num = 0;
- while (1) {
- if (params_begin == params_end) {
- break;
- }
- int pos = find_char_back(params_begin, params_end, ',');
- if (pos != -1) {
- eval_expr(params_begin + pos + 1, params_end, 0);
- params_end = params_begin + pos;
- param_num++;
- } else {
- eval_expr(params_begin, params_end, 0);
- param_num++;
- break;
- }
- }
- eval_expr(begin, op, 1);
- pop_var();
- emit(0x58); // pop eax
- emit(0xff); // call eax
- emit(0xd0);
- emit(0x81); // add esp, ...
- emit(0xc4);
- emit32(4 * param_num);
- for (int i = 0; i < param_num; i++) {
- pop_var();
- }
- push_var("__temp");
- emit(0x50); // push eax
- } else if (*op == TOK_DEREF) {
- assert(begin == op);
- eval_expr(op+1, end, 0);
- pop_var();
- emit(0x58); // pop eax
- if (!addr) {
- emit(0xb8); // mov eax, [eax]
- emit(0x00);
- }
- push_var("__temp");
- emit(0x50); // push eax
- } else if (*op == TOK_ADDR) {
- assert(!addr);
- assert(begin == op);
- eval_expr(op+1, end, 1);
- } else if (*op == '!' || *op == '~') {
- assert(!addr);
- assert(begin == op);
- eval_expr(op+1, end, 0);
- pop_var();
- emit(0x58); // pop eax
- if (*op == '!') {
- emit(0x83); // cmp eax, 0
- emit(0xf8);
- emit(0x00);
- emit(0x74); // je 0x9
- emit(0x04);
- emit(0x31); // xor eax, eax
- emit(0xc0);
- emit(0xeb); // jmp 0xe
- emit(0x05);
- emit(0xb8); // mov eax, 1
- emit32(1);
- } else if (*op == '~') {
- emit(0xf7); // not eax
- emit(0xd0);
- } else {
- assert(0);
- }
- emit(0x50); // push eax
- push_var("__temp");
- } else {
- assert(!addr);
- eval_expr(op+1, end, 0);
- eval_expr(begin, op, 0);
- emit(0x58); // pop eax
- pop_var();
- emit(0x59); // pop ecx
- pop_var();
- if (*op == '+') {
- emit(0x01); // add eax, ecx
- emit(0xc8);
- } else if (*op == '-') {
- emit(0x29); // sub eax, ecx
- emit(0xc8);
- } else if (*op == '*') {
- emit(0xf7); // imul ecx
- emit(0xe9);
- } else if (*op == '/') {
- emit(0x31); // xor edx, edx
- emit(0xd2);
- emit(0xf7); // idiv ecx
- emit(0xf9);
- } else if (*op == '%') {
- emit(0x31); // xor edx, edx
- emit(0xd2);
- emit(0xf7); // idiv ecx
- emit(0xf9);
- emit(0x89); // mov eax, edx
- emit(0xd0);
- } else if (*op == TOK_SHL) {
- emit(0xd3); // shl eax, cl
- emit(0xe0);
- } else if (*op == TOK_SHR) {
- emit(0xd3); // shr eax, cl
- emit(0xe8);
- } else if (*op == '&') {
- emit(0x21); // and eax, ecx
- emit(0xc8);
- } else if (*op == '|') {
- emit(0x09); // or eax, ecx
- emit(0xc8);
- } else if (*op == TOK_AND) {
- emit(0x83); // cmp eax, 0
- emit(0xf8);
- emit(0x00);
- emit(0x74); // je 0x11
- emit(0x0c);
- emit(0x83); // cmp ecx, 0
- emit(0xf9);
- emit(0x00);
- emit(0x74); // je 0x11
- emit(0x07);
- emit(0xb8); // mov eax, 1
- emit32(1);
- emit(0xeb); // jmp 0x13
- emit(0x02);
- emit(0x31); // xor eax, eax
- emit(0xc0);
- } else if (*op == TOK_OR) {
- emit(0x83); // cmp eax, 0
- emit(0xf8);
- emit(0x00);
- emit(0x75); // jne 0xe
- emit(0x09);
- emit(0x83); // cmp ecx, 0
- emit(0xf9);
- emit(0x00);
- emit(0x75); // jne 0xe
- emit(0x04);
- emit(0x31); // xor eax, eax
- emit(0xc0);
- emit(0xeb); // jmp 0x13
- emit(0x05);
- emit(0xb8); // mov eax, 1
- emit32(1);
- } else {
- emit(0x39); // cmp eax, ecx
- emit(0xc8);
- if (*op == TOK_EQ) {
- emit(0x74); // je 0x8
- emit(0x04);
- } else if (*op == TOK_NE) {
- emit(0x75); // jne 0x8
- emit(0x04);
- } else if (*op == '<') {
- emit(0x7c); // jl 0x8
- emit(0x04);
- } else if (*op == TOK_LE) {
- emit(0x7e); // jle 0x8
- emit(0x04);
- } else if (*op == '>') {
- emit(0x7f); // jg 0x8
- emit(0x04);
- } else if (*op == TOK_GE) {
- emit(0x7d); // jge 0x8
- emit(0x04);
- } else {
- return 0;
- }
- emit(0x31); // xor eax, eax
- emit(0xc0);
- emit(0xeb); // jmp 0xd
- emit(0x05);
- emit(0xb8); // mov eax, 1
- emit32(1);
- }
- push_var("__temp");
- emit(0x50); // push eax
- }
- return 1;
- }
- int is_in(char x, char *set) {
- while (1) {
- if (*set == '\0') {
- return 0;
- }
- if (x == *set) {
- return 1;
- }
- set++;
- }
- }
- int parse_expr(char *begin, char *end, char *pivots, int dir, int addr) {
- if (dir == 0) {
- char *p = begin;
- while (p < end) {
- if (is_in(*p, pivots)) {
- return run_eval_expr(begin, p, end, addr);
- } else if (*p == '(') {
- p = find_matching('(', ')', p, end);
- assert(p != 0);
- } else if (*p == '[') {
- p = find_matching('[', ']', p, end);
- assert(p != 0);
- }
- p++;
- }
- } else {
- char *p = end-1;
- while (p >= begin) {
- if (is_in(*p, pivots)) {
- return run_eval_expr(begin, p, end, addr);
- } else if (*p == ')') {
- p = find_matching_rev('(', ')', begin, p+1);
- assert(p != 0);
- } else if (*p == ']') {
- p = find_matching_rev('[', ']', begin, p+1);
- assert(p != 0);
- }
- p--;
- }
- }
- return 0;
- }
- void eval_expr(char *begin, char *end, int addr) {
- if (*begin == '(') {
- char *match = find_matching('(', ')', begin, end);
- if (match == end-1) {
- eval_expr(begin+1, end-1, addr);
- }
- } else {
- if (parse_expr(begin, end, "=", 0, addr)) {
- } else if (parse_expr(begin, end, TOKS_OR, 1, addr)) {
- } else if (parse_expr(begin, end, TOKS_AND, 1, addr)) {
- } else if (parse_expr(begin, end, "|", 1, addr)) {
- } else if (parse_expr(begin, end, "&", 1, addr)) {
- } else if (parse_expr(begin, end, TOKS_EQ_NE, 1, addr)) {
- } else if (parse_expr(begin, end, TOKS_LE_GE "<>", 1, addr)) {
- } else if (parse_expr(begin, end, TOKS_SHL_SHR, 1, addr)) {
- } else if (parse_expr(begin, end, "+-", 1, addr)) {
- } else if (parse_expr(begin, end, "*/%", 1, addr)) {
- } else if (parse_expr(begin, end, "!~" TOKS_DEREF_ADDR, 0, addr)) {
- } else if (parse_expr(begin, end, TOKS_CALL_OPEN, 1, addr)) {
- } else {
- if ('0' <= *begin && *begin <= '9') {
- assert(!addr);
- int val;
- int res = decode_number(begin, end, &val);
- assert(res);
- emit(0x68); // push val
- emit32(val);
- push_var("__temp");
- } else if (*begin == '\'') {
- assert(*(end-1) == '\'');
- push_var("__temp");
- if (end - begin == 3) {
- emit(0x68); // push val
- emit32(*(begin+1) + 0x80);
- } else if (end - begin == 4) {
- assert(*(begin+1) == '\\' - 0x80);
- emit(0x68); // push val
- emit32(escaped(*(begin+2) + 0x80));
- } else {
- assert(0);
- }
- } else if (*begin == '"') {
- int id = gen_id();
- int var_addr = get_symbol(gen_lab(id));
- string_ids[string_num] = id;
- string_begin[string_num] = begin;
- string_end[string_num] = end;
- string_num++;
- assert(!addr);
- emit(0x68); // push addr
- emit32(var_addr);
- push_var("__temp");
- } else {
- int pos = find_in_stack2(begin, end);
- if (pos != -1) {
- if (addr) {
- emit(0x89); // mov eax, esp
- emit(0xe0);
- emit(0x05); // add eax, pos
- emit32(4 * pos);
- emit(0x50); // push eax
- push_var("__temp");
- } else {
- emit(0x8b); // mov eax, [esp+pos]
- emit(0x84);
- emit(0x24);
- emit32(4 * pos);
- emit(0x50); // push eax
- push_var("__temp");
- }
- } else {
- int var_addr = get_symbol2(begin, end);
- if (addr) {
- emit(0x68); // push addr
- emit32(var_addr);
- push_var("__temp");
- } else {
- emit(0xb8); // mov eax, addr
- emit32(var_addr);
- emit(0xff); // push DWORD [eax]
- emit(0x30);
- push_var("__temp");
- }
- }
- }
- }
- }
- }
- void compile_expression(char *exp) {
- fix_operations(exp);
- remove_spaces(exp, 0);
- fix_operations2(exp);
- char *end = exp + strlen(exp);
- fprintf(stderr, "Expression: %s\n", exp);
- eval_expr(exp, end, 0);
- }
- void compile_statement(char *begin, char *end) {
- *end = '\0';
- trimstr(begin);
- if (*begin == '\0') {
- return;
- }
- char *p;
- if (p = isstrpref("return", begin)) {
- assert(block_depth > 1);
- if (*p == '\0') {
- fprintf(stderr, "Empty return statement\n");
- } else {
- p++;
- fprintf(stderr, "Return statement: %s\n", p);
- compile_expression(p);
- emit(0x58); // pop eax
- pop_var();
- }
- int exit_stack_depth = stack_depth;
- emit(0x81); // add esp, ..
- emit(0xc4);
- emit32(4 * (exit_stack_depth - ret_depth));
- emit(0xc3); // ret
- } else if (p = isstrpref("int", begin)) {
- fprintf(stderr, "Declaration: %s\n", begin);
- char *name = p + 1;
- trimstr(name);
- fprintf(stderr, " declared name is: %s\n", name);
- if (block_depth == 1) {
- fprintf(stderr, " this is a top level declaration\n");
- add_symbol(name, current_loc);
- emit32(0);
- } else {
- push_var(name);
- emit(0x83); // sub esp, 4
- emit(0xec);
- emit(0x04);
- }
- } else {
- assert(block_depth > 1);
- char_op = 0;
- if (p = isstrpref("char", begin)) {
- char_op = 1;
- begin = p + 1;
- }
- fprintf(stderr, "Statement: %s\n", begin);
- compile_expression(begin);
- emit(0x83); // add esp, 4
- emit(0xc4);
- emit(0x04);
- pop_var();
- }
- }
- void process_fi(int *else_lab, int *fi_lab);
- void process_if(char *exp, int *else_lab, int *fi_lab) {
- process_fi(else_lab, fi_lab);
- *else_lab = gen_id();
- *fi_lab = gen_id();
- compile_expression(exp);
- pop_var();
- emit(0x58); // pop eax
- emit(0x83); // cmp eax, 0
- emit(0xf8);
- emit(0x00);
- emit(0x0f); // je ...
- emit(0x84);
- int disp = get_symbol(gen_lab(*else_lab)) - 4 - current_loc;
- emit32(disp);
- }
- void process_if_end(int *else_lab, int *fi_lab) {
- emit(0xe9); // jmp ...
- int disp = get_symbol(gen_lab(*fi_lab)) - 4 - current_loc;
- emit32(disp);
- }
- void process_else(int *else_lab, int *fi_lab) {
- assert(*else_lab != 0);
- add_symbol(gen_lab(*else_lab), current_loc);
- *else_lab = 0;
- }
- void process_elseif(char *exp, int *else_lab, int *fi_lab) {
- process_else(else_lab, fi_lab);
- *else_lab = gen_id();
- compile_expression(exp);
- pop_var();
- emit(0x58); // pop eax
- emit(0x83); // cmp eax, 0
- emit(0xf8);
- emit(0x00);
- emit(0x0f); // je ...
- emit(0x84);
- int disp = get_symbol(gen_lab(*else_lab)) - 4 - current_loc;
- emit32(disp);
- }
- void process_fi(int *else_lab, int *fi_lab) {
- if (*else_lab != 0) {
- add_symbol(gen_lab(*else_lab), current_loc);
- *else_lab = 0;
- }
- if (*fi_lab != 0) {
- add_symbol(gen_lab(*fi_lab), current_loc);
- *fi_lab = 0;
- }
- }
- void compile_block_with_head(char *def_begin, char *block_begin, char *block_end, int *else_lab, int *fi_lab);
- void compile_block(char *begin, char *end) {
- block_depth++;
- int saved_stack_depth = stack_depth;
- int else_lab = 0;
- int fi_lab = 0;
- while (1) {
- int semicolon_pos = find_char(begin, end, ';');
- int brace_pos = find_char(begin, end, '{');
- if (semicolon_pos == -1 && brace_pos == -1) {
- *end = '\0';
- trimstr(begin);
- assert(*begin == '\0');
- break;
- } else {
- if (semicolon_pos == -1) {
- semicolon_pos = brace_pos + 1;
- }
- if (brace_pos == -1) {
- brace_pos = semicolon_pos + 1;
- }
- if (semicolon_pos < brace_pos) {
- process_fi(&else_lab, &fi_lab);
- compile_statement(begin, begin + semicolon_pos);
- begin = begin + semicolon_pos + 1;
- } else {
- char *res = find_matching('{', '}', begin+brace_pos, end);
- compile_block_with_head(begin, begin+brace_pos, res, &else_lab, &fi_lab);
- begin = res + 1;
- }
- }
- }
- process_fi(&else_lab, &fi_lab);
- if (block_depth != 1) {
- int exit_stack_depth = stack_depth;
- pop_to_depth(saved_stack_depth);
- emit(0x81); // add esp, ..
- emit(0xc4);
- emit32(4 * (exit_stack_depth - saved_stack_depth));
- }
- block_depth--;
- }
- void compile_block_with_head(char *def_begin, char *block_begin, char *block_end, int *else_lab, int *fi_lab) {
- *block_begin = '\0';
- int param_num = 0;
- remove_spaces(def_begin, 0);
- int open_pos = find_char(def_begin, block_begin, '(');
- int closed_pos;
- if (open_pos != -1) {
- char *closed = find_matching('(', ')', def_begin + open_pos, block_begin);
- assert(closed != 0);
- closed_pos = closed - def_begin;
- assert(def_begin[closed_pos+1] == '\0');
- def_begin[open_pos] = '\0';
- def_begin[closed_pos] = '\0';
- }
- int must_process_if_end = 0;
- if (block_depth == 1) {
- // It must be a function
- assert(*else_lab == 0);
- assert(*fi_lab == 0);
- assert(open_pos != -1);
- fprintf(stderr, "Beginning of a function with name %s\n", def_begin);
- add_symbol(def_begin, current_loc);
- assert(stack_depth == 0);
- char *params_begin = def_begin + open_pos + 1;
- char *params_end = def_begin + closed_pos;
- while (1) {
- if (params_begin == params_end) {
- break;
- }
- int pos = find_char_back(params_begin, params_end, ',');
- if (pos != -1) {
- params_begin[pos] = '\0';
- push_var(params_begin + pos + 1);
- param_num++;
- fprintf(stderr, " with parameter %s\n", params_begin + pos + 1);
- params_end = params_begin + pos;
- } else {
- push_var(params_begin);
- param_num++;
- fprintf(stderr, " with parameter %s\n", params_begin);
- break;
- }
- }
- push_var("__ret");
- ret_depth = stack_depth;
- } else if (open_pos != -1 && strncmp2(def_begin, def_begin + open_pos, "if")) {
- fprintf(stderr, "Begin of an if block: %s\n", def_begin + open_pos + 1);
- must_process_if_end = 1;
- process_if(def_begin + open_pos + 1, else_lab, fi_lab);
- } else if (open_pos != -1 && strncmp2(def_begin, def_begin + open_pos, "elseif")) {
- fprintf(stderr, "Begin of an else-if block: %s\n", def_begin + open_pos + 1);
- must_process_if_end = 1;
- process_elseif(def_begin + open_pos + 1, else_lab, fi_lab);
- } else if (open_pos == -1 && strcmp(def_begin, "else") == 0) {
- fprintf(stderr, "Begin of an else block\n");
- must_process_if_end = 1;
- process_else(else_lab, fi_lab);
- } else {
- assert(0);
- }
- compile_block(block_begin+1, block_end-1);
- if (block_depth == 1) {
- pop_var();
- emit(0xc3); // ret
- int i;
- for (i = 0; i < param_num; i++) {
- pop_var();
- }
- assert(stack_depth == 0);
- }
- if (must_process_if_end) {
- process_if_end(else_lab, fi_lab);
- }
- fprintf(stderr, "End of block\n");
- }
- void create_strings() {
- for (int i = 0; i < string_num; i++) {
- add_symbol(gen_lab(string_ids[i]), current_loc);
- char *begin = string_begin[i];
- char *end = string_end[i];
- assert(*begin == '"');
- assert(*(end-1) == '"');
- assert(end - begin >= 2);
- begin++;
- end--;
- while (begin < end) {
- if (*begin == '"') {
- begin++;
- assert(*begin == '"');
- begin++;
- } else {
- assert(*begin < 0);
- emit(*begin + 0x80);
- begin++;
- }
- }
- }
- }
- int main() {
- symbol_num = 0;
- for (stage = 0; stage < 2; stage++) {
- lab_id = 1;
- string_num = 0;
- int fd = open("test.c", O_RDONLY);
- int len = lseek(fd, 0, SEEK_END);
- lseek(fd, 0, SEEK_SET);
- char *src = mmap(0, len, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
- fix_strings(src, src+len);
- //remove_spaces(src, src+len);
- //print(src, src+len);
- block_depth = 0;
- stack_depth = 0;
- current_loc = 0x100000;
- compile_block(src, src+len);
- create_strings();
- assert(block_depth == 0);
- assert(stack_depth == 0);
- }
- return 0;
- }
|