reader.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #include "reader.h"
  2. #include "alloc.h"
  3. #include <string.h>
  4. Cell* reader_next_list_cell(Cell* cell, ReaderState* rs) {
  5. cell->dr.next = alloc_nil();
  6. cell = cell->dr.next;
  7. rs->state = PST_ATOM;
  8. return cell;
  9. }
  10. Cell* reader_end_list(Cell* cell, ReaderState* rs) {
  11. if (rs->level<1) {
  12. rs->state = PST_ERR_UNEXP_CLOSING_BRACE;
  13. return cell;
  14. }
  15. rs->level--;
  16. rs->stack--;
  17. if (cell->ar.addr) cell->dr.next = alloc_nil();
  18. cell = *rs->stack;
  19. cell = reader_next_list_cell(cell, rs);
  20. rs->state = PST_ATOM;
  21. return cell;
  22. }
  23. ReaderState* read_char(char c, ReaderState* rs) {
  24. Cell* cell = rs->cell;
  25. Cell* new_cell;
  26. if (!cell) {
  27. // make a root
  28. cell = alloc_nil();
  29. cell->dr.next = alloc_nil();
  30. *rs->stack = cell;
  31. }
  32. if (rs->state == PST_ATOM) {
  33. if (c==' ' || c==13 || c==10) {
  34. // skip whitespace
  35. } else if (c==';') {
  36. // comment
  37. rs->state = PST_COMMENT;
  38. } else if (c>='0' && c<='9') {
  39. rs->state = PST_NUM;
  40. rs->valuestate = VST_DEFAULT;
  41. new_cell = alloc_int(0);
  42. new_cell->ar.value = c-'0';
  43. cell->ar.addr = new_cell;
  44. } else if (c=='(') {
  45. // start list
  46. new_cell = alloc_nil();
  47. cell->ar.addr = new_cell;
  48. *rs->stack = cell;
  49. cell = new_cell;
  50. rs->stack++;
  51. rs->level++;
  52. rs->state = PST_ATOM;
  53. } else if (c==')') {
  54. // end of list
  55. cell = reader_end_list(cell, rs);
  56. } else if (c=='[') {
  57. // bytes (hex notation)
  58. rs->state = PST_BYTES;
  59. rs->sym_len = 0;
  60. new_cell = alloc_bytes();
  61. cell->ar.addr = new_cell;
  62. } else if (c=='"') {
  63. // string
  64. rs->state = PST_STR;
  65. rs->sym_len = 0;
  66. new_cell = alloc_string();
  67. cell->ar.addr = new_cell;
  68. } else {
  69. // symbol
  70. rs->state = PST_SYM;
  71. rs->sym_len = 1;
  72. new_cell = alloc_num_bytes(SYM_INIT_BUFFER_SIZE);
  73. new_cell->tag = TAG_SYM;
  74. memset(new_cell->ar.addr, 0, SYM_INIT_BUFFER_SIZE);
  75. ((char*)new_cell->ar.addr)[0] = c;
  76. new_cell->dr.size = SYM_INIT_BUFFER_SIZE; // buffer space
  77. cell->ar.addr = new_cell;
  78. }
  79. } else if (rs->state == PST_COMMENT) {
  80. if (c=='\n' || c==0) {
  81. rs->state = PST_ATOM;
  82. }
  83. } else if (rs->state == PST_NUM || rs->state == PST_NUM_NEG) {
  84. if ((c>='0' && c<='9') || ((rs->valuestate == VST_HEX && c>='a' && c<='f'))) {
  85. // build number
  86. Cell* vcell = (Cell*)cell->ar.addr;
  87. int mul = 10, d = 0;
  88. if (rs->valuestate == VST_HEX) mul = 16;
  89. if (c>='a') {
  90. d = 10+(c-'a');
  91. } else {
  92. d = c-'0';
  93. }
  94. if (rs->state == PST_NUM_NEG) {
  95. vcell->ar.value = vcell->ar.value*mul - d;
  96. } else {
  97. vcell->ar.value = vcell->ar.value*mul + d;
  98. }
  99. } else if (c==' ' || c==13 || c==10) {
  100. cell = reader_next_list_cell(cell, rs);
  101. } else if (c==')') {
  102. cell = reader_end_list(cell, rs);
  103. } else if (c=='x') {
  104. rs->valuestate = VST_HEX;
  105. } else {
  106. rs->state = PST_ERR_UNEXP_JUNK_IN_NUMBER;
  107. }
  108. } else if (rs->state == PST_SYM || rs->state == PST_STR) {
  109. int append = 0;
  110. if (rs->state == PST_STR) {
  111. if (c=='"') {
  112. // string is over
  113. Cell* vcell = (Cell*)cell->ar.addr;
  114. vcell->dr.size = (rs->sym_len);
  115. cell = reader_next_list_cell(cell, rs);
  116. } else {
  117. append = 1;
  118. }
  119. }
  120. else {
  121. if (c==')') {
  122. cell = reader_end_list(cell, rs);
  123. } else if (c==' ' || c==13 || c==10) {
  124. cell = reader_next_list_cell(cell, rs);
  125. } else if (rs->state == PST_SYM && (c>='0' && c<='9')) {
  126. Cell* vcell = (Cell*)cell->ar.addr;
  127. // detect negative number
  128. if (((char*)vcell->ar.addr)[0] == '-') {
  129. // we're actually not a symbol, correct the cell.
  130. rs->state = PST_NUM_NEG;
  131. vcell->tag = TAG_INT;
  132. vcell->ar.value = -(c-'0');
  133. } else {
  134. append = 1;
  135. }
  136. } else {
  137. append = 1;
  138. }
  139. }
  140. if (append) {
  141. // build symbol/string
  142. Cell* vcell = (Cell*)cell->ar.addr;
  143. int idx = rs->sym_len;
  144. rs->sym_len++;
  145. if (rs->sym_len>=vcell->dr.size-1) {
  146. // grow buffer
  147. vcell->ar.addr = cell_realloc(vcell->ar.addr, vcell->dr.size, 2*vcell->dr.size);
  148. memset((char*)vcell->ar.addr+vcell->dr.size, 0, vcell->dr.size);
  149. vcell->dr.size = 2*vcell->dr.size;
  150. }
  151. ((char*)vcell->ar.addr)[idx] = c;
  152. }
  153. } else if (rs->state == PST_BYTES) {
  154. if (c==']') {
  155. Cell* vcell = (Cell*)cell->ar.addr;
  156. vcell->dr.size = (rs->sym_len)/2;
  157. cell = reader_next_list_cell(cell, rs);
  158. } else if ((c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F')) {
  159. int n = c, idx;
  160. Cell* vcell;
  161. if (n>='a') n-=('a'-'9'-1); // hex 'a' to 10 offset
  162. if (n>='A') n-=('A'-'9'-1); // hex 'a' to 10 offset
  163. n-='0'; // char to value
  164. vcell = (Cell*)cell->ar.addr;
  165. idx = rs->sym_len;
  166. rs->sym_len++;
  167. if (rs->sym_len>=(vcell->dr.size/2)-1) {
  168. // grow buffer
  169. vcell->ar.addr = cell_realloc(vcell->ar.addr, vcell->dr.size, 2*vcell->dr.size); // TODO: check the math
  170. memset((char*)vcell->ar.addr+vcell->dr.size, 0, vcell->dr.size);
  171. vcell->dr.size = 2*vcell->dr.size;
  172. }
  173. if (idx%2==0) { // even digit
  174. ((uint8_t*)vcell->ar.addr)[idx/2] = n<<4; // high nybble
  175. } else { // odd digit
  176. ((uint8_t*)vcell->ar.addr)[idx/2] |= n;
  177. }
  178. } else if (c==' ' || c==13 || c==10) {
  179. // skip
  180. } else {
  181. rs->state = PST_ERR_UNEXP_JUNK_IN_BYTES;
  182. }
  183. }
  184. rs->cell = cell;
  185. return rs;
  186. }
  187. Cell* read_string(char* in) {
  188. ReaderState rs;
  189. int i, len;
  190. Cell stack_root[32];
  191. Cell* root;
  192. Cell* ret_cell;
  193. rs.state = PST_ATOM;
  194. rs.cell = 0;
  195. rs.level = 0;
  196. rs.stack = (void*)&stack_root;
  197. i=0;
  198. len = strlen(in);
  199. for (i=0; i<len; i++) {
  200. read_char(in[i], &rs);
  201. if (rs.state>=10) {
  202. //print("<read error %d at %d.>\n",rs.state,i);
  203. return alloc_error(ERR_SYNTAX);
  204. }
  205. //printf("rs %c: %d\n", in[i], rs.state);
  206. }
  207. if (rs.level!=0) {
  208. //print("<missing %d closing parens.>\r\n",rs.level);
  209. return alloc_error(ERR_SYNTAX);
  210. }
  211. if (rs.state!=PST_ATOM) {
  212. //printf("<read error: unexpected end of input.>\n");
  213. //return alloc_error(ERR_SYNTAX);
  214. }
  215. root = *rs.stack;
  216. if (root) {
  217. ret_cell = car(root);
  218. //if (root->dr.next) free(root->dr.next);
  219. //free(root);
  220. return ret_cell;
  221. }
  222. return alloc_error(ERR_SYNTAX);
  223. }
  224. Cell* read_string_cell(Cell* in) {
  225. char* str;
  226. if (!in) return alloc_nil();
  227. if (!in->dr.size) return alloc_nil();
  228. str = (char*)in->ar.addr;
  229. str[in->dr.size]=0;
  230. //printf("read[%s]\r\n",str);
  231. return read_string(str);
  232. }