123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612 |
- /* This file is part of asmc, a bootstrapping OS with minimal seed
- Copyright (C) 2018 Giovanni Mascellani <gio@debian.org>
- https://gitlab.com/giomasce/asmc
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
- #include <stdio.h>
- #include "platform.h"
- #define MAX_TOKEN_LEN 128
- #define STACK_LEN 1024
- #define SYMBOL_TABLE_LEN 1024
- #define WRITE_LABEL_BUF_LEN 128
- #define TEMP_VAR "__temp"
- char write_label_buf[WRITE_LABEL_BUF_LEN];
- int atoi(char*);
- int sprintf(char *str, const char *format, ...);
- int find_symbol(const char *name, int *loc, int *arity);
- void add_symbol(const char *name, int loc, int arity);
- void add_symbol_wrapper(const char *name, int loc, int arity);
- void add_symbol_placeholder(const char *name, int arity);
- void fix_symbol_placeholder(const char *name, int loc, int arity);
- int *get_symbol_num();
- int *get_stage();
- int *get_label_num();
- char *get_stack_vars();
- int *get_block_depth();
- int *get_stack_depth();
- int *get_temp_depth();
- int *get_token_given_back();
- int *get_token_len();
- char *get_token_buf();
- char *get_buf2();
- int *get_read_fd();
- int *get_emit_fd();
- int *get_current_loc();
- int decode_number(const char *operand, unsigned int *num);
- int strcmp(const char *s1, const char *s2);
- void strcpy(char *d, const char *s);
- int strlen(const char *s);
- void init_symbols();
- void init_g_compiler();
- void assert(int cond);
- void assert(int cond) {
- if (!cond) {
- platform_panic();
- }
- }
- void emit(char c);
- void emit2(char c) {
- if (*get_stage() == 1) {
- platform_write_char(*get_emit_fd(), c);
- }
- (*get_current_loc())++;
- }
- void emit32(int x);
- void emit322(int x) {
- emit(x);
- emit(x >> 8);
- emit(x >> 16);
- emit(x >> 24);
- }
- void emit_str(char *x, int len);
- void emit_str2(char *x, int len) {
- while (len > 0) {
- emit(*x);
- x++;
- len--;
- }
- }
- int gen_label();
- int gen_label2() {
- return (*get_label_num())++;
- }
- char *write_label(int id);
- char *write_label2(int id) {
- sprintf(write_label_buf, ".%d", id);
- return write_label_buf;
- }
- int get_symbol(char *name, int *arity);
- int get_symbol2(char *name, int *arity) {
- if (*get_stage() == 1 || arity != 0) {
- int loc;
- int res = find_symbol(name, &loc, arity);
- assert(res);
- return loc;
- } else {
- return 0;
- }
- }
- void push_var(char *var_name, int temp);
- void push_var2(char *var_name, int temp) {
- int len = strlen(var_name);
- assert(len > 0);
- assert(len < MAX_TOKEN_LEN);
- assert(*get_stack_depth() < STACK_LEN);
- strcpy(get_stack_vars() + *get_stack_depth() * MAX_TOKEN_LEN, var_name);
- (*get_stack_depth())++;
- if (temp) {
- (*get_temp_depth())++;
- } else {
- assert(*get_temp_depth() == 0);
- }
- }
- void pop_var(int temp);
- void pop_var2(int temp) {
- assert(*get_stack_depth() > 0);
- (*get_stack_depth())--;
- if (temp) {
- assert(*get_temp_depth() > 0);
- (*get_temp_depth())--;
- }
- }
- int pop_temps();
- int pop_temps2() {
- while (*get_temp_depth() > 0) {
- pop_var(1);
- }
- }
- int find_in_stack(char *var_name);
- int find_in_stack2(char *var_name) {
- int i;
- for (i = 0; i < *get_stack_depth(); i++) {
- if (strcmp(var_name, get_stack_vars() + (*get_stack_depth() - 1 - i) * MAX_TOKEN_LEN) == 0) {
- return i;
- }
- }
- return -1;
- }
- int is_whitespace(char x);
- int is_whitespace2(char x) {
- return x == ' ' || x == '\t' || x == '\n';
- }
- char *get_token();
- char *get_token2() {
- if (*get_token_given_back()) {
- *get_token_given_back() = 0;
- return get_token_buf();
- }
- int x;
- *get_token_len() = 0;
- int state = 0;
- while (1) {
- x = platform_read_char(*get_read_fd());
- if (x == -1) {
- break;
- }
- int save_char = 0;
- if (state == 0) {
- if (is_whitespace(x)) {
- if (*get_token_len() > 0) {
- break;
- }
- } else if ((char) x == '#') {
- state = 1;
- } else {
- if ((char) x == '"') {
- state = 2;
- } else if ((char) x == '\'') {
- state = 4;
- }
- save_char = 1;
- }
- } else if (state == 1) {
- if ((char) x == '\n') {
- state = 0;
- if (*get_token_len() > 0) {
- break;
- }
- }
- } else if (state == 2) {
- if ((char) x == '"') {
- state = 0;
- } else if ((char) x == '\\') {
- state = 3;
- }
- save_char = 1;
- } else if (state == 3) {
- state = 2;
- save_char = 1;
- } else if (state == 4) {
- if ((char) x == '\'') {
- state = 0;
- } else if ((char) x == '\\') {
- state = 5;
- }
- save_char = 1;
- } else if (state == 5) {
- state = 4;
- save_char = 1;
- } else {
- assert(0);
- }
- if (save_char) {
- get_token_buf()[(*get_token_len())++] = (char) x;
- }
- }
- get_token_buf()[*get_token_len()] = '\0';
- return get_token_buf();
- }
- void give_back_token();
- void give_back_token2() {
- assert(!*get_token_given_back());
- *get_token_given_back() = 1;
- }
- char escaped(char x);
- char escaped2(char x) {
- if (x == 'n') { return '\n'; }
- if (x == 't') { return '\t'; }
- if (x == '0') { return '\0'; }
- if (x == '\\') { return '\\'; }
- if (x == '\'') { return '\''; }
- if (x == '"') { return '"'; }
- return 0;
- }
- void emit_escaped_string(char *s);
- void emit_escaped_string2(char *s) {
- assert(*s == '"');
- s++;
- while (1) {
- assert(*s != 0);
- if (*s == '"') {
- s++;
- assert(*s == 0);
- return;
- }
- if (*s == '\\') {
- s++;
- assert(*s != 0);
- emit(escaped(*s));
- } else {
- emit(*s);
- }
- s++;
- }
- }
- int decode_number_or_char(const char *operand, unsigned int *num);
- int decode_number_or_char2(const char *operand, unsigned int *num) {
- if (*operand == '\'') {
- if (operand[1] == '\\') {
- *num = escaped(operand[2]);
- assert(operand[3] == '\'');
- assert(operand[4] == 0);
- } else {
- *num = operand[1];
- assert(operand[2] == '\'');
- assert(operand[3] == 0);
- }
- return 1;
- } else {
- return decode_number(operand, num);
- }
- }
- int compute_rel(int addr);
- int compute_rel2(int addr) {
- return addr - *get_current_loc() - 4;
- }
- void push_expr(char *tok, int want_addr);
- void push_expr2(char *tok, int want_addr) {
- // Try to interpret as a number
- int val;
- if (decode_number_or_char(tok, &val)) {
- assert(!want_addr);
- push_var(TEMP_VAR, 1);
- emit(0x68); // push val
- emit32(val);
- return;
- }
- // Look for the name in the stack
- int pos = find_in_stack(tok);
- if (pos != -1) {
- if (want_addr) {
- push_var(TEMP_VAR, 1);
- emit_str("\x8d\x84\x24", 3); // lea eax, [esp+pos]
- emit32(4 * pos);
- emit(0x50); // push eax
- } else {
- push_var(TEMP_VAR, 1);
- emit_str("\xff\xb4\x24", 3); // push [esp+pos]
- emit32(4 * pos);
- }
- } else {
- int arity;
- int loc = get_symbol(tok, &arity);
- if (arity == -2) {
- assert(!want_addr);
- }
- if (want_addr || arity == -2) {
- push_var(TEMP_VAR, 1);
- emit(0x68); // push loc
- emit32(loc);
- } else {
- if (arity == -1) {
- push_var(TEMP_VAR, 1);
- emit(0xb8); // mov eax, loc
- emit32(loc);
- emit_str("\xff\x30", 2); // push [eax]
- } else {
- emit(0xe8); // call rel
- emit32(compute_rel(loc));
- emit_str("\x81\xc4", 2); // add esp, ...
- emit32(4 * arity);
- while (arity > 0) {
- pop_var(1);
- arity--;
- }
- push_var(TEMP_VAR, 1);
- emit(0x50); // push eax
- }
- }
- }
- }
- void push_expr_until_brace();
- void push_expr_until_brace2() {
- while (1) {
- char *tok = get_token();
- if (strcmp(tok, "{") == 0) {
- give_back_token();
- break;
- } else if (*tok == '"') {
- int str_lab = gen_label();
- int jmp_lab = gen_label();
- emit(0xe9); // jmp rel
- emit32(compute_rel(get_symbol(write_label(jmp_lab), 0)));
- add_symbol_wrapper(write_label(str_lab), *get_current_loc(), -1);
- emit_escaped_string(tok);
- emit(0);
- add_symbol_wrapper(write_label(jmp_lab), *get_current_loc(), -1);
- push_var(TEMP_VAR, 1);
- emit(0x68); // push val
- emit32(get_symbol(write_label(str_lab), 0));
- } else {
- // Check if we want the address
- int want_addr = 0;
- if (*tok == '@') {
- tok++;
- want_addr = 1;
- }
- push_expr(tok, want_addr);
- }
- }
- assert(*get_temp_depth() > 0);
- }
- int decode_number_or_symbol(char *str);
- int decode_number_or_symbol2(char *str) {
- int val;
- int res = decode_number_or_char(str, &val);
- if (res) {
- return val;
- }
- int arity;
- return get_symbol(str, &arity);
- }
- void parse_block();
- void parse_block2() {
- (*get_block_depth())++;
- int saved_stack_depth = *get_stack_depth();
- char *tok = get_token();
- assert(strcmp(tok, "{") == 0);
- while (1) {
- char *tok = get_token();
- //fprintf(stderr, "Parsing token %s\n", tok);
- assert(*tok != '\0');
- if (strcmp(tok, "}") == 0) {
- break;
- } else if (strcmp(tok, ";") == 0) {
- emit_str("\x81\xc4", 2); // add esp, ...
- emit32(4 * *get_temp_depth());
- pop_temps();
- } else if (strcmp(tok, "ret") == 0) {
- if (*get_temp_depth() > 0) {
- emit(0x58); // pop eax
- pop_var(1);
- }
- emit_str("\x81\xc4", 2); // add esp, ..
- emit32(4 * *get_stack_depth());
- emit_str("\x5d\xc3", 2); // pop ebp; ret
- } else if (strcmp(tok, "if") == 0) {
- push_expr_until_brace();
- int else_lab = gen_label();
- pop_var(1);
- emit_str("\x58\x83\xF8\x00\x0F\x84", 6); // pop eax; cmp eax, 0; je rel
- emit32(compute_rel(get_symbol(write_label(else_lab), 0)));
- parse_block();
- char *else_tok = get_token();
- if (strcmp(else_tok, "else") == 0) {
- int fi_lab = gen_label();
- emit(0xe9); // jmp rel
- emit32(compute_rel(get_symbol(write_label(fi_lab), 0)));
- add_symbol_wrapper(write_label(else_lab), *get_current_loc(), -1);
- parse_block();
- add_symbol_wrapper(write_label(fi_lab), *get_current_loc(), -1);
- } else {
- add_symbol_wrapper(write_label(else_lab), *get_current_loc(), -1);
- give_back_token();
- }
- } else if (strcmp(tok, "while") == 0) {
- int restart_lab = gen_label();
- int end_lab = gen_label();
- add_symbol_wrapper(write_label(restart_lab), *get_current_loc(), -1);
- push_expr_until_brace();
- pop_var(1);
- emit_str("\x58\x83\xF8\x00\x0F\x84", 6); // pop eax; cmp eax, 0; je rel
- emit32(compute_rel(get_symbol(write_label(end_lab), 0)));
- parse_block();
- emit(0xe9); // jmp rel
- emit32(compute_rel(get_symbol(write_label(restart_lab), 0)));
- add_symbol_wrapper(write_label(end_lab), *get_current_loc(), -1);
- } else if (*tok == '$') {
- char *name = tok + 1;
- assert(*name != '\0');
- push_var(name, 0);
- emit_str("\x83\xec\x04", 3); // sub esp, 4
- } else if (*tok == '\\') {
- char *arity_str = tok + 1;
- assert(*arity_str != '\0');
- int arity = decode_number_or_symbol(arity_str);
- pop_var(1);
- emit_str("\x58\xff\xd0", 3); // pop eax; call eax
- emit_str("\x81\xc4", 2); // add esp, ...
- emit32(4 * arity);
- while (arity > 0) {
- pop_var(1);
- arity--;
- }
- push_var(TEMP_VAR, 1);
- emit(0x50); // push eax
- } else if (*tok == '"') {
- int str_lab = gen_label();
- int jmp_lab = gen_label();
- emit(0xe9); // jmp rel
- emit32(compute_rel(get_symbol(write_label(jmp_lab), 0)));
- add_symbol_wrapper(write_label(str_lab), *get_current_loc(), -1);
- emit_escaped_string(tok);
- emit(0);
- add_symbol_wrapper(write_label(jmp_lab), *get_current_loc(), -1);
- push_var(TEMP_VAR, 1);
- emit(0x68); // push val
- emit32(get_symbol(write_label(str_lab), 0));
- } else {
- // Check if we want the address
- int want_addr = 0;
- if (*tok == '@') {
- tok++;
- want_addr = 1;
- }
- push_expr(tok, want_addr);
- }
- //fprintf(stderr, "Stack depth: %d; temp depth: %d; block depth: %d\n", *get_stack_depth(), *get_temp_depth(), *get_block_depth());
- }
- emit_str("\x81\xc4", 2); // add esp, ..
- assert(*get_stack_depth() >= saved_stack_depth);
- emit32(4 * (*get_stack_depth() - saved_stack_depth));
- *get_stack_depth() = saved_stack_depth;
- (*get_block_depth())--;
- }
- void parse();
- void parse2() {
- while (1) {
- char *tok = get_token();
- if (*tok == 0) {
- break;
- }
- if (strcmp(tok, "fun") == 0) {
- char *name = get_token();
- strcpy(get_buf2(), name);
- name = get_buf2();
- char *arity_str = get_token();
- int arity = atoi(arity_str);
- fix_symbol_placeholder(name, *get_current_loc(), arity);
- emit_str("\x55\x89\xe5", 3); // push ebp; mov ebp, esp
- parse_block();
- emit_str("\x5d\xc3", 2); // pop ebp; ret
- } else if (strcmp(tok, "ifun") == 0) {
- char *name = get_token();
- strcpy(get_buf2(), name);
- name = get_buf2();
- char *arity_str = get_token();
- int arity = atoi(arity_str);
- add_symbol_placeholder(name, arity);
- } else if (strcmp(tok, "const") == 0) {
- char *name = get_token();
- strcpy(get_buf2(), name);
- name = get_buf2();
- char *val_str = get_token();
- int val = decode_number_or_symbol(val_str);
- add_symbol_wrapper(name, val, -2);
- } else if (*tok == '$') {
- char *name = tok + 1;
- assert(*name != '\0');
- add_symbol_wrapper(name, *get_current_loc(), -1);
- emit32(0);
- } else if (*tok == '%') {
- char *name = tok + 1;
- assert(*name != '\0');
- add_symbol_wrapper(name, *get_current_loc(), -2);
- char *len_str = get_token();
- int len = decode_number_or_symbol(len_str);
- while (len > 0) {
- emit(0);
- len--;
- }
- } else {
- assert(0);
- }
- }
- }
- void emit_preamble() {
- /*
- 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
- 4: 8b 4c 24 08 mov ecx,DWORD PTR [esp+0x8]
- 8: 89 01 mov DWORD PTR [ecx],eax
- a: c3 ret
- */
- add_symbol_wrapper("=", *get_current_loc(), 2);
- emit_str("\x8B\x44\x24\x04\x8B\x4C\x24\x08\x89\x01\xC3", 11);
- /*
- 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
- 4: 8b 44 85 08 mov eax,DWORD PTR [ebp+eax*4+0x8]
- 8: c3 ret
- */
- add_symbol_wrapper("param", *get_current_loc(), 1);
- emit_str("\x8B\x44\x24\x04\x8B\x44\x85\x08\xC3", 9);
- /*
- 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
- 4: 03 44 24 08 add eax,DWORD PTR [esp+0x8]
- 8: c3 ret
- */
- add_symbol_wrapper("+", *get_current_loc(), 2);
- emit_str("\x8B\x44\x24\x04\x03\x44\x24\x08\xC3", 9);
- /*
- 0: 8b 44 24 04 mov eax,DWORD PTR [esp+0x4]
- 4: 2b 44 24 08 sub eax,DWORD PTR [esp+0x8]
- 8: c3 ret
- */
- add_symbol_wrapper("-", *get_current_loc(), 2);
- emit_str("\x8B\x44\x24\x04\x2b\x44\x24\x08\xC3", 9);
- }
- void compile(int fd_in, int fd_out, int start_loc, int with_preamble);
- void compile2(int fd_in, int fd_out, int start_loc, int with_preamble) {
- *get_emit_fd() = fd_out;
- *get_read_fd() = fd_in;
- *get_block_depth() = 0;
- *get_stack_depth() = 0;
- *get_temp_depth() = 0;
- for (*get_stage() = 0; *get_stage() < 2; (*get_stage())++) {
- platform_reset_file(fd_in);
- *get_label_num() = 0;
- *get_current_loc() = start_loc;
- if (with_preamble) {
- emit_preamble();
- }
- parse();
- assert(*get_block_depth() == 0);
- assert(*get_stack_depth() == 0);
- assert(*get_temp_depth() == 0);
- }
- }
- int main(int argc, char **argv) {
- init_symbols();
- init_g_compiler();
- int fd_in = platform_open_file(argv[1]);
- int fd_out = 1;
- compile(fd_in, fd_out, 0x100000, 1);
- return 0;
- }
|