reader.c 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. #include "reader.h"
  2. #include "alloc.h"
  3. #include <string.h>
  4. #include <stdint.h>
  5. Cell* reader_next_list_cell(Cell* cell, ReaderState* rs) {
  6. cell->dr.next = alloc_nil();
  7. cell = cell->dr.next;
  8. rs->state = PST_ATOM;
  9. return cell;
  10. }
  11. Cell* reader_end_list(Cell* cell, ReaderState* rs) {
  12. Cell* tmpc;
  13. if (rs->level<1) {
  14. rs->state = PST_ERR_UNEXP_CLOSING_BRACE;
  15. return cell;
  16. }
  17. rs->level--;
  18. rs->stack--;
  19. if (cell->ar.addr) cell->dr.next = alloc_nil();
  20. cell = *rs->stack;
  21. tmpc = cell;
  22. cell = reader_next_list_cell(cell, rs);
  23. rs->state = PST_ATOM;
  24. return cell;
  25. }
  26. ReaderState* read_char(char c, ReaderState* rs) {
  27. Cell* cell = rs->cell;
  28. Cell* new_cell;
  29. if (!cell) {
  30. // make a root
  31. cell = alloc_nil();
  32. cell->dr.next = alloc_nil();
  33. *rs->stack = cell;
  34. }
  35. if (rs->state == PST_ATOM) {
  36. if (c==' ' || c==13 || c==10) {
  37. // skip whitespace
  38. } else if (c==';') {
  39. // comment
  40. rs->state = PST_COMMENT;
  41. } else if (c>='0' && c<='9') {
  42. rs->state = PST_NUM;
  43. rs->valuestate = VST_DEFAULT;
  44. new_cell = alloc_int(0);
  45. new_cell->ar.value = c-'0';
  46. cell->ar.addr = new_cell;
  47. } else if (c=='(') {
  48. // start list
  49. new_cell = alloc_nil();
  50. cell->ar.addr = new_cell;
  51. *rs->stack = cell;
  52. cell = new_cell;
  53. rs->stack++;
  54. rs->level++;
  55. rs->state = PST_ATOM;
  56. } else if (c==')') {
  57. // end of list
  58. cell = reader_end_list(cell, rs);
  59. } else if (c=='[') {
  60. // bytes (hex notation)
  61. rs->state = PST_BYTES;
  62. rs->sym_len = 0;
  63. new_cell = alloc_bytes();
  64. cell->ar.addr = new_cell;
  65. } else if (c=='"') {
  66. // string
  67. rs->state = PST_STR;
  68. rs->sym_len = 0;
  69. new_cell = alloc_string();
  70. cell->ar.addr = new_cell;
  71. } else {
  72. // symbol
  73. rs->state = PST_SYM;
  74. rs->sym_len = 1;
  75. new_cell = alloc_num_bytes(SYM_INIT_BUFFER_SIZE);
  76. new_cell->tag = TAG_SYM;
  77. memset(new_cell->ar.addr, 0, SYM_INIT_BUFFER_SIZE);
  78. ((char*)new_cell->ar.addr)[0] = c;
  79. new_cell->dr.size = SYM_INIT_BUFFER_SIZE; // buffer space
  80. cell->ar.addr = new_cell;
  81. }
  82. } else if (rs->state == PST_COMMENT) {
  83. if (c=='\n' || c==0) {
  84. rs->state = PST_ATOM;
  85. }
  86. } else if (rs->state == PST_NUM || rs->state == PST_NUM_NEG) {
  87. if (c>='0' && c<='9' || ((rs->valuestate == VST_HEX && c>='a' && c<='f'))) {
  88. // build number
  89. Cell* vcell = (Cell*)cell->ar.addr;
  90. int mul = 10, d = 0;
  91. if (rs->valuestate == VST_HEX) mul = 16;
  92. if (c>='a') {
  93. d = 10+(c-'a');
  94. } else {
  95. d = c-'0';
  96. }
  97. if (rs->state == PST_NUM_NEG) {
  98. vcell->ar.value = vcell->ar.value*mul - d;
  99. } else {
  100. vcell->ar.value = vcell->ar.value*mul + d;
  101. }
  102. } else if (c==' ' || c==13 || c==10) {
  103. cell = reader_next_list_cell(cell, rs);
  104. } else if (c==')') {
  105. cell = reader_end_list(cell, rs);
  106. } else if (c=='x') {
  107. rs->valuestate = VST_HEX;
  108. } else {
  109. rs->state = PST_ERR_UNEXP_JUNK_IN_NUMBER;
  110. }
  111. } else if (rs->state == PST_SYM || rs->state == PST_STR) {
  112. int append = 0;
  113. if (rs->state == PST_STR) {
  114. if (c=='"') {
  115. // string is over
  116. Cell* vcell = (Cell*)cell->ar.addr;
  117. vcell->dr.size = (rs->sym_len);
  118. cell = reader_next_list_cell(cell, rs);
  119. } else {
  120. append = 1;
  121. }
  122. }
  123. else {
  124. if (c==')') {
  125. cell = reader_end_list(cell, rs);
  126. } else if (c==' ' || c==13 || c==10) {
  127. cell = reader_next_list_cell(cell, rs);
  128. } else if (rs->state == PST_SYM && (c>='0' && c<='9')) {
  129. Cell* vcell = (Cell*)cell->ar.addr;
  130. // detect negative number
  131. if (((char*)vcell->ar.addr)[0] == '-') {
  132. // we're actually not a symbol, correct the cell.
  133. rs->state = PST_NUM_NEG;
  134. vcell->tag = TAG_INT;
  135. vcell->ar.value = -(c-'0');
  136. } else {
  137. append = 1;
  138. }
  139. } else {
  140. append = 1;
  141. }
  142. }
  143. if (append) {
  144. // build symbol/string
  145. Cell* vcell = (Cell*)cell->ar.addr;
  146. int idx = rs->sym_len;
  147. rs->sym_len++;
  148. if (rs->sym_len>=vcell->dr.size-1) {
  149. // grow buffer
  150. vcell->ar.addr = cell_realloc(vcell->ar.addr, vcell->dr.size, 2*vcell->dr.size);
  151. memset((char*)vcell->ar.addr+vcell->dr.size, 0, vcell->dr.size);
  152. vcell->dr.size = 2*vcell->dr.size;
  153. }
  154. ((char*)vcell->ar.addr)[idx] = c;
  155. }
  156. } else if (rs->state == PST_BYTES) {
  157. if (c==']') {
  158. Cell* vcell = (Cell*)cell->ar.addr;
  159. vcell->dr.size = (rs->sym_len)/2;
  160. cell = reader_next_list_cell(cell, rs);
  161. } else if ((c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F')) {
  162. int n = c, idx;
  163. Cell* vcell;
  164. if (n>='a') n-=('a'-'9'-1); // hex 'a' to 10 offset
  165. if (n>='A') n-=('A'-'9'-1); // hex 'a' to 10 offset
  166. n-='0'; // char to value
  167. vcell = (Cell*)cell->ar.addr;
  168. idx = rs->sym_len;
  169. rs->sym_len++;
  170. if (rs->sym_len>=(vcell->dr.size/2)-1) {
  171. // grow buffer
  172. vcell->ar.addr = cell_realloc(vcell->ar.addr, vcell->dr.size, 2*vcell->dr.size); // TODO: check the math
  173. memset((char*)vcell->ar.addr+vcell->dr.size, 0, vcell->dr.size);
  174. vcell->dr.size = 2*vcell->dr.size;
  175. }
  176. if (idx%2==0) { // even digit
  177. ((uint8_t*)vcell->ar.addr)[idx/2] = n<<4; // high nybble
  178. } else { // odd digit
  179. ((uint8_t*)vcell->ar.addr)[idx/2] |= n;
  180. }
  181. } else if (c==' ' || c==13 || c==10) {
  182. // skip
  183. } else {
  184. rs->state = PST_ERR_UNEXP_JUNK_IN_BYTES;
  185. }
  186. }
  187. rs->cell = cell;
  188. return rs;
  189. }
  190. Cell* read_string(char* in) {
  191. ReaderState rs;
  192. int i, len;
  193. Cell stack_root[32];
  194. Cell* root;
  195. Cell* ret_cell;
  196. rs.state = PST_ATOM;
  197. rs.cell = 0;
  198. rs.level = 0;
  199. rs.stack = (void*)&stack_root;
  200. i=0;
  201. len = strlen(in);
  202. for (i=0; i<len; i++) {
  203. read_char(in[i], &rs);
  204. if (rs.state>=10) {
  205. //print("<read error %d at %d.>\n",rs.state,i);
  206. return alloc_error(ERR_SYNTAX);
  207. }
  208. //printf("rs %c: %d\n", in[i], rs.state);
  209. }
  210. if (rs.level!=0) {
  211. //print("<missing %d closing parens.>\r\n",rs.level);
  212. return alloc_error(ERR_SYNTAX);
  213. }
  214. if (rs.state!=PST_ATOM) {
  215. //printf("<read error: unexpected end of input.>\n");
  216. //return alloc_error(ERR_SYNTAX);
  217. }
  218. root = *rs.stack;
  219. if (root) {
  220. ret_cell = car(root);
  221. //if (root->dr.next) free(root->dr.next);
  222. //free(root);
  223. return ret_cell;
  224. }
  225. return alloc_error(ERR_SYNTAX);
  226. }
  227. Cell* read_string_cell(Cell* in) {
  228. char* str;
  229. if (!in) return alloc_nil();
  230. if (!in->dr.size) return alloc_nil();
  231. str = (char*)in->ar.addr;
  232. str[in->dr.size]=0;
  233. //printf("read[%s]\r\n",str);
  234. return read_string(str);
  235. }