reader.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. #include "reader.h"
  2. #include "alloc.h"
  3. #include <string.h>
  4. #include <stdint.h>
  5. Cell* reader_next_list_cell(Cell* cell, ReaderState* rs) {
  6. cell->next = alloc_nil();
  7. cell = cell->next;
  8. rs->state = PST_ATOM;
  9. return cell;
  10. }
  11. Cell* reader_end_list(Cell* cell, ReaderState* rs) {
  12. if (rs->level<1) {
  13. rs->state = PST_ERR_UNEXP_CLOSING_BRACE;
  14. return cell;
  15. }
  16. rs->level--;
  17. rs->stack--;
  18. if (cell->addr) cell->next = alloc_nil();
  19. cell = *rs->stack;
  20. Cell* tmpc = cell;
  21. cell = reader_next_list_cell(cell, rs);
  22. rs->state = PST_ATOM;
  23. return cell;
  24. }
  25. ReaderState* read_char(char c, ReaderState* rs) {
  26. Cell* cell = rs->cell;
  27. Cell* new_cell;
  28. if (!cell) {
  29. // make a root
  30. cell = alloc_nil();
  31. cell->next = alloc_nil();
  32. *rs->stack = cell;
  33. }
  34. if (rs->state == PST_ATOM) {
  35. if (c==' ' || c==13 || c==10) {
  36. // skip whitespace
  37. } else if (c==';') {
  38. // comment
  39. rs->state = PST_COMMENT;
  40. } else if (c>='0' && c<='9') {
  41. rs->state = PST_NUM;
  42. rs->valuestate = VST_DEFAULT;
  43. new_cell = alloc_int(0);
  44. new_cell->value = c-'0';
  45. cell->addr = new_cell;
  46. } else if (c=='(') {
  47. // start list
  48. new_cell = alloc_nil();
  49. cell->addr = new_cell;
  50. *rs->stack = cell;
  51. cell = new_cell;
  52. rs->stack++;
  53. rs->level++;
  54. rs->state = PST_ATOM;
  55. } else if (c==')') {
  56. // end of list
  57. cell = reader_end_list(cell, rs);
  58. } else if (c=='[') {
  59. // bytes (hex notation)
  60. rs->state = PST_BYTES;
  61. rs->sym_len = 0;
  62. new_cell = alloc_bytes();
  63. cell->addr = new_cell;
  64. } else if (c=='"') {
  65. // string
  66. rs->state = PST_STR;
  67. rs->sym_len = 0;
  68. new_cell = alloc_string();
  69. cell->addr = new_cell;
  70. } else {
  71. // symbol
  72. rs->state = PST_SYM;
  73. rs->sym_len = 1;
  74. new_cell = alloc_num_bytes(SYM_INIT_BUFFER_SIZE);
  75. new_cell->tag = TAG_SYM;
  76. memset(new_cell->addr, 0, SYM_INIT_BUFFER_SIZE);
  77. ((char*)new_cell->addr)[0] = c;
  78. new_cell->size = SYM_INIT_BUFFER_SIZE; // buffer space
  79. cell->addr = new_cell;
  80. }
  81. } else if (rs->state == PST_COMMENT) {
  82. if (c=='\n' || c==0) {
  83. rs->state = PST_ATOM;
  84. }
  85. } else if (rs->state == PST_NUM || rs->state == PST_NUM_NEG) {
  86. if (c>='0' && c<='9' || ((rs->valuestate == VST_HEX && c>='a' && c<='f'))) {
  87. // build number
  88. Cell* vcell = (Cell*)cell->addr;
  89. int mul = 10;
  90. if (rs->valuestate == VST_HEX) mul = 16;
  91. int d = 0;
  92. if (c>='a') {
  93. d = 10+(c-'a');
  94. } else {
  95. d = c-'0';
  96. }
  97. if (rs->state == PST_NUM_NEG) {
  98. vcell->value = vcell->value*mul - d;
  99. } else {
  100. vcell->value = vcell->value*mul + d;
  101. }
  102. } else if (c==' ' || c==13 || c==10) {
  103. cell = reader_next_list_cell(cell, rs);
  104. } else if (c==')') {
  105. cell = reader_end_list(cell, rs);
  106. } else if (c=='x') {
  107. rs->valuestate = VST_HEX;
  108. } else {
  109. rs->state = PST_ERR_UNEXP_JUNK_IN_NUMBER;
  110. }
  111. } else if (rs->state == PST_SYM || rs->state == PST_STR) {
  112. int append = 0;
  113. if (rs->state == PST_STR) {
  114. if (c=='"') {
  115. // string is over
  116. Cell* vcell = (Cell*)cell->addr;
  117. vcell->size = (rs->sym_len);
  118. cell = reader_next_list_cell(cell, rs);
  119. } else {
  120. append = 1;
  121. }
  122. }
  123. else {
  124. if (c==')') {
  125. cell = reader_end_list(cell, rs);
  126. } else if (c==' ' || c==13 || c==10) {
  127. cell = reader_next_list_cell(cell, rs);
  128. } else if (rs->state == PST_SYM && (c>='0' && c<='9')) {
  129. Cell* vcell = (Cell*)cell->addr;
  130. // detect negative number
  131. if (((char*)vcell->addr)[0] == '-') {
  132. // we're actually not a symbol, correct the cell.
  133. rs->state = PST_NUM_NEG;
  134. vcell->tag = TAG_INT;
  135. vcell->value = -(c-'0');
  136. } else {
  137. append = 1;
  138. }
  139. } else {
  140. append = 1;
  141. }
  142. }
  143. if (append) {
  144. // build symbol/string
  145. Cell* vcell = (Cell*)cell->addr;
  146. int idx = rs->sym_len;
  147. rs->sym_len++;
  148. if (rs->sym_len>=vcell->size-1) {
  149. // grow buffer
  150. vcell->addr = cell_realloc(vcell->addr, vcell->size, 2*vcell->size);
  151. memset(vcell->addr+vcell->size, 0, vcell->size);
  152. vcell->size = 2*vcell->size;
  153. }
  154. ((char*)vcell->addr)[idx] = c;
  155. }
  156. } else if (rs->state == PST_BYTES) {
  157. if (c==']') {
  158. Cell* vcell = (Cell*)cell->addr;
  159. vcell->size = (rs->sym_len)/2;
  160. cell = reader_next_list_cell(cell, rs);
  161. } else if ((c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F')) {
  162. int n = c;
  163. if (n>='a') n-=('a'-'9'-1); // hex 'a' to 10 offset
  164. if (n>='A') n-=('A'-'9'-1); // hex 'a' to 10 offset
  165. n-='0'; // char to value
  166. Cell* vcell = (Cell*)cell->addr;
  167. int idx = rs->sym_len;
  168. rs->sym_len++;
  169. if (rs->sym_len>=(vcell->size/2)-1) {
  170. // grow buffer
  171. vcell->addr = cell_realloc(vcell->addr, vcell->size, 2*vcell->size); // TODO: check the math
  172. memset(vcell->addr+vcell->size, 0, vcell->size);
  173. vcell->size = 2*vcell->size;
  174. }
  175. if (idx%2==0) { // even digit
  176. ((uint8_t*)vcell->addr)[idx/2] = n<<4; // high nybble
  177. } else { // odd digit
  178. ((uint8_t*)vcell->addr)[idx/2] |= n;
  179. }
  180. } else if (c==' ' || c==13 || c==10) {
  181. // skip
  182. } else {
  183. rs->state = PST_ERR_UNEXP_JUNK_IN_BYTES;
  184. }
  185. }
  186. rs->cell = cell;
  187. return rs;
  188. }
  189. Cell* read_string(char* in) {
  190. ReaderState rs;
  191. Cell stack_root[100];
  192. rs.state = PST_ATOM;
  193. rs.cell = 0;
  194. rs.level = 0;
  195. rs.stack = (void*)&stack_root;
  196. int i=0;
  197. int len = strlen(in);
  198. for (i=0; i<len; i++) {
  199. read_char(in[i], &rs);
  200. if (rs.state>=10) {
  201. //print("<read error %d at %d.>\n",rs.state,i);
  202. return alloc_error(ERR_SYNTAX);
  203. }
  204. //printf("rs %c: %d\n", in[i], rs.state);
  205. }
  206. if (rs.level!=0) {
  207. //print("<missing %d closing parens.>\r\n",rs.level);
  208. return alloc_error(ERR_SYNTAX);
  209. }
  210. if (rs.state!=PST_ATOM) {
  211. //printf("<read error: unexpected end of input.>\n");
  212. //return alloc_error(ERR_SYNTAX);
  213. }
  214. Cell* root = *rs.stack;
  215. if (root) {
  216. Cell* ret = car(root);
  217. //if (root->next) free(root->next);
  218. //free(root);
  219. return ret;
  220. }
  221. return alloc_error(ERR_SYNTAX);
  222. }
  223. Cell* read_string_cell(Cell* in) {
  224. if (!in) return alloc_nil();
  225. if (!in->size) return alloc_nil();
  226. char* str = (char*)in->addr;
  227. str[in->size]=0;
  228. //printf("read[%s]\r\n",str);
  229. return read_string(str);
  230. }