JsonBencSerializer.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. /* vim: set expandtab ts=4 sw=4: */
  2. /*
  3. * You may redistribute this program and/or modify it under the terms of
  4. * the GNU General Public License as published by the Free Software Foundation,
  5. * either version 3 of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "memory/Allocator.h"
  16. #include "io/Reader.h"
  17. #include "io/Writer.h"
  18. #include "benc/Dict.h"
  19. #include "benc/List.h"
  20. #include "benc/String.h"
  21. #include "benc/serialization/BencSerializer.h"
  22. #include "util/Bits.h"
  23. #include "util/Hex.h"
  24. #include <stdio.h>
  25. #include <inttypes.h>
  26. #include <stdbool.h>
  27. #include <stdlib.h>
  28. #include <errno.h>
  29. static int32_t parseGeneric(struct Reader* reader,
  30. struct Allocator* allocator,
  31. Object** output);
  32. static int32_t serializeGenericWithPadding(struct Writer* writer,
  33. size_t padSpaceCount,
  34. const Object* obj);
  35. /** What the name says. */
  36. static const char* thirtyTwoSpaces = " ";
  37. /**
  38. * Write some number of spaces for padding.
  39. *
  40. * @param padSpaces the number of spaces to pad.
  41. * @param padCounter an integer which is used for internal bookkeeping.
  42. * @param writer where to write the padding.
  43. */
  44. #define PAD(padSpaces, padCounter, writer) \
  45. padCounter = 0; \
  46. while (32 < padSpaces + padCounter) { \
  47. Writer_write(writer, thirtyTwoSpaces, 32); \
  48. padCounter += 32; \
  49. } \
  50. Writer_write(writer, thirtyTwoSpaces, padSpaces - padCounter)
  51. static inline int outOfContent()
  52. {
  53. return -2;
  54. }
  55. #define OUT_OF_CONTENT_TO_READ outOfContent()
  56. static inline int unparsable()
  57. {
  58. return -3;
  59. }
  60. #define UNPARSABLE unparsable()
  61. /** @see BencSerializer.h */
  62. static int32_t serializeString(struct Writer* writer,
  63. const String* string)
  64. {
  65. Writer_write(writer, "\"", 1);
  66. size_t i;
  67. uint8_t chr;
  68. char buffer[4];
  69. for (i = 0; i < string->len; i++) {
  70. chr = (uint8_t) string->bytes[i] & 0xFF;
  71. /* Nonprinting chars, \ and " are hex'd */
  72. if (chr < 126 && chr > 31 && chr != '\\' && chr != '"') {
  73. snprintf(buffer, 4, "%c", chr);
  74. Writer_write(writer, buffer, 1);
  75. } else {
  76. snprintf(buffer, 4, "\\x%.2X", chr);
  77. Writer_write(writer, buffer, 4);
  78. }
  79. }
  80. return Writer_write(writer, "\"", 1);
  81. }
  82. /**
  83. * Read until 1 char after the target character.
  84. */
  85. static inline int readUntil(uint8_t target, struct Reader* reader)
  86. {
  87. uint8_t nextChar;
  88. do {
  89. if (Reader_read(reader, (char*)&nextChar, 1)) {
  90. printf("Unexpected end of input while looking for '%c'\n",target);
  91. return OUT_OF_CONTENT_TO_READ;
  92. }
  93. } while (nextChar != target);
  94. return 0;
  95. }
  96. static inline int parseString(struct Reader* reader,
  97. struct Allocator* allocator,
  98. String** output)
  99. {
  100. #define BUFF_SZ (1<<8)
  101. #define BUFF_MAX (1<<20)
  102. int curSize = BUFF_SZ;
  103. struct Allocator* localAllocator = Allocator_child(allocator);
  104. uint8_t* buffer = Allocator_malloc(localAllocator, curSize);
  105. if (readUntil('"', reader) || Reader_read(reader, buffer, 1)) {
  106. printf("Unterminated string\n");
  107. Allocator_free(localAllocator);
  108. return OUT_OF_CONTENT_TO_READ;
  109. }
  110. for (int i = 0; i < BUFF_MAX - 1; i++) {
  111. if (buffer[i] == '\\') {
  112. // \x01 (skip the x)
  113. Reader_skip(reader, 1);
  114. uint8_t hex[2];
  115. if (Reader_read(reader, (char*)hex, 2)) {
  116. printf("Unexpected end of input parsing escape sequence\n");
  117. Allocator_free(localAllocator);
  118. return OUT_OF_CONTENT_TO_READ;
  119. }
  120. int byte = Hex_decodeByte(hex[0], hex[1]);
  121. if (byte == -1) {
  122. printf("Invalid escape \"%c%c\" after \"%.*s\"\n",hex[0],hex[1],i+1,buffer);
  123. Allocator_free(localAllocator);
  124. return UNPARSABLE;
  125. }
  126. buffer[i] = (uint8_t) byte;
  127. } else if (buffer[i] == '"') {
  128. *output = String_newBinary((char*)buffer, i, allocator);
  129. Allocator_free(localAllocator);
  130. return 0;
  131. }
  132. if (i == curSize - 1) {
  133. curSize <<= 1;
  134. buffer = Allocator_realloc(localAllocator, buffer, curSize);
  135. }
  136. if (Reader_read(reader, buffer + i + 1, 1)) {
  137. if (i+1 <= 20) {
  138. printf("Unterminated string \"%.*s\"\n", i+1, buffer);
  139. } else {
  140. printf("Unterminated string starting with \"%.*s...\"\n", 20, buffer);
  141. }
  142. Allocator_free(localAllocator);
  143. return OUT_OF_CONTENT_TO_READ;
  144. }
  145. }
  146. printf("Maximum string length of %d bytes exceeded.\n",BUFF_SZ);
  147. Allocator_free(localAllocator);
  148. return UNPARSABLE;
  149. #undef BUFF_SZ
  150. #undef BUFF_MAX
  151. }
  152. /** @see BencSerializer.h */
  153. static int32_t serializeint64_t(struct Writer* writer,
  154. int64_t integer)
  155. {
  156. char buffer[32];
  157. Bits_memset(buffer, 0, 32);
  158. snprintf(buffer, 32, "%" PRId64, integer);
  159. return Writer_write(writer, buffer, CString_strlen(buffer));
  160. }
  161. /** @see BencSerializer.h */
  162. static int32_t parseint64_t(struct Reader* reader,
  163. int64_t* output)
  164. {
  165. uint8_t buffer[32];
  166. for (int i = 0; i < 21; i++) {
  167. int32_t status = Reader_read(reader, buffer + i, 0);
  168. if (i == 0 && buffer[i] == '-' && status == 0) {
  169. // It's just a negative number, no need to fail it.
  170. continue;
  171. }
  172. if (buffer[i] < '0' || buffer[i] > '9' || status != 0 /* end of input */) {
  173. buffer[i] = '\0';
  174. int64_t out = strtol((char*)buffer, NULL, 10);
  175. // Failed parse causes 0 to be set.
  176. if (out == 0 && buffer[0] != '0' && (buffer[0] != '-' || buffer[1] != '0')) {
  177. printf("Failed to parse \"%s\": not a number\n",buffer);
  178. return UNPARSABLE;
  179. }
  180. if ((out == INT64_MAX || out == INT64_MIN) && errno == ERANGE) {
  181. printf("Failed to parse \"%s\": number too large/small\n",buffer);
  182. return UNPARSABLE;
  183. }
  184. *output = out;
  185. return 0;
  186. }
  187. Reader_skip(reader, 1);
  188. }
  189. // Larger than the max possible int64.
  190. buffer[22] = '\0';
  191. printf("Failed to parse \"%s\": number too large\n",buffer);
  192. return UNPARSABLE;
  193. }
  194. /**
  195. * Serialize a bencoded list with padding at the beginning of each line.
  196. *
  197. * @param writer the place to write the output to.
  198. * @param padSpaceCount the number of spaces to place at the beginning of each line.
  199. * @param list the list to serialize
  200. */
  201. static int32_t serializeListWithPadding(struct Writer* writer,
  202. const size_t padSpaceCount,
  203. const List* list)
  204. {
  205. int padCounter;
  206. Writer_write(writer, "[\n", 2);
  207. const struct List_Item* entry = *list;
  208. while (entry != NULL) {
  209. PAD(padSpaceCount + 2, padCounter, writer);
  210. serializeGenericWithPadding(writer, padSpaceCount + 2, entry->elem);
  211. entry = entry->next;
  212. if (entry != NULL) {
  213. Writer_write(writer, ",\n", 2);
  214. }
  215. }
  216. Writer_write(writer, "\n", 1);
  217. PAD(padSpaceCount, padCounter, writer);
  218. return Writer_write(writer, "]", 1);
  219. }
  220. /** @see BencSerializer.h */
  221. static int32_t serializeList(struct Writer* writer,
  222. const List* list)
  223. {
  224. return serializeListWithPadding(writer, 0, list);
  225. }
  226. /**
  227. * Parse a comment in with "slash splat" or double slash notation,
  228. * leave the reader on the first character after the last end of comment mark.
  229. */
  230. static inline int parseComment(struct Reader* reader)
  231. {
  232. char chars[2];
  233. int ret = Reader_read(reader, &chars, 2);
  234. if (ret) {
  235. printf("Warning: expected comment\n");
  236. return OUT_OF_CONTENT_TO_READ;
  237. }
  238. if (chars[0] != '/') {
  239. printf("Warning: expected a comment starting with '/', instead found '%c'\n",chars[0]);
  240. return UNPARSABLE;
  241. }
  242. switch (chars[1]) {
  243. case '*':;
  244. do {
  245. readUntil('*', reader);
  246. } while (!(ret = Reader_read(reader, &chars, 1)) && chars[0] != '/');
  247. if (ret) {
  248. printf("Unterminated multiline comment\n");
  249. return OUT_OF_CONTENT_TO_READ;
  250. }
  251. return 0;
  252. case '/':;
  253. return readUntil('\n', reader);
  254. default:
  255. printf("Warning: expected a comment starting with \"//\" or \"/*\", "
  256. "instead found \"/%c\"\n",chars[1]);
  257. return UNPARSABLE;
  258. }
  259. }
  260. /** @see BencSerializer.h */
  261. static int32_t parseList(struct Reader* reader,
  262. struct Allocator* allocator,
  263. List* output)
  264. {
  265. char nextChar;
  266. readUntil('[', reader);
  267. Object* element;
  268. struct List_Item* thisEntry = NULL;
  269. struct List_Item** lastEntryPointer = output;
  270. int ret;
  271. for (;;) {
  272. for (;;) {
  273. if (Reader_read(reader, &nextChar, 0) != 0) {
  274. printf("Unterminated list\n");
  275. return OUT_OF_CONTENT_TO_READ;
  276. }
  277. if (nextChar == '/') {
  278. if ((ret = parseComment(reader)) != 0) {
  279. return ret;
  280. }
  281. continue;
  282. }
  283. switch (nextChar) {
  284. case '0':
  285. case '1':
  286. case '2':
  287. case '3':
  288. case '4':
  289. case '5':
  290. case '6':
  291. case '7':
  292. case '8':
  293. case '9':
  294. case '[':
  295. case '{':
  296. case '"':
  297. break;
  298. case ']':
  299. Reader_skip(reader, 1);
  300. return 0;
  301. default:
  302. // FIXME(gerard): silently skipping anything we don't understand
  303. // might not be the best idea
  304. Reader_skip(reader, 1);
  305. continue;
  306. }
  307. break;
  308. }
  309. if ((ret = parseGeneric(reader, allocator, &element)) != 0) {
  310. return ret;
  311. }
  312. thisEntry = Allocator_malloc(allocator, sizeof(struct List_Item));
  313. thisEntry->elem = element;
  314. thisEntry->next = NULL;
  315. // Read backwards so that the list reads out forward.
  316. *lastEntryPointer = thisEntry;
  317. lastEntryPointer = &(thisEntry->next);
  318. }
  319. }
  320. /**
  321. * Serialize a bencoded dictionary with padding before each line.
  322. *
  323. * @param writer the place to write the output to.
  324. * @param padSpaceCount the number of spaces to place at the beginning of each line.
  325. * @param dictionary the dictionary to serialize.
  326. */
  327. static int32_t serializeDictionaryWithPadding(struct Writer* writer,
  328. size_t padSpaceCount,
  329. const Dict* dictionary)
  330. {
  331. int padCounter = 0;
  332. Writer_write(writer, "{\n", 2);
  333. const struct Dict_Entry* entry = *dictionary;
  334. while (entry != NULL) {
  335. PAD(padSpaceCount + 2, padCounter, writer);
  336. serializeString(writer, entry->key);
  337. Writer_write(writer, " : ", 3);
  338. serializeGenericWithPadding(writer, padSpaceCount + 2, entry->val);
  339. entry = entry->next;
  340. if (entry != NULL) {
  341. Writer_write(writer, ",\n", 2);
  342. }
  343. }
  344. Writer_write(writer, "\n", 1);
  345. PAD(padSpaceCount, padCounter, writer);
  346. return Writer_write(writer, "}", 1);
  347. }
  348. /** @see BencSerializer.h */
  349. static int32_t serializeDictionary(struct Writer* writer,
  350. const Dict* dictionary)
  351. {
  352. return serializeDictionaryWithPadding(writer, 0, dictionary);
  353. }
  354. /** @see BencSerializer.h */
  355. static int32_t parseDictionary(struct Reader* reader,
  356. struct Allocator* allocator,
  357. Dict* output)
  358. {
  359. uint8_t nextChar;
  360. readUntil('{', reader);
  361. String* key;
  362. Object* value;
  363. struct Dict_Entry* entryPointer;
  364. struct Dict_Entry* lastEntryPointer = NULL;
  365. int ret = 0;
  366. for (;;) {
  367. while (!ret) {
  368. ret = Reader_read(reader, &nextChar, 0);
  369. switch (nextChar) {
  370. case '"':
  371. break;
  372. case '}':
  373. Reader_skip(reader, 1);
  374. *output = lastEntryPointer;
  375. return 0;
  376. case '/':
  377. parseComment(reader);
  378. continue;
  379. default:
  380. Reader_skip(reader, 1);
  381. continue;
  382. }
  383. break;
  384. }
  385. if (ret) {
  386. printf("Unterminated dictionary\n");
  387. return OUT_OF_CONTENT_TO_READ;
  388. }
  389. // Get key and value.
  390. if ((ret = parseString(reader, allocator, &key)) != 0) {
  391. return ret;
  392. }
  393. readUntil(':', reader);
  394. if ((ret = parseGeneric(reader, allocator, &value)) != 0) {
  395. return ret;
  396. }
  397. /* Allocate the entry. */
  398. entryPointer = Allocator_malloc(allocator, sizeof(struct Dict_Entry));
  399. entryPointer->next = lastEntryPointer;
  400. entryPointer->key = key;
  401. entryPointer->val = value;
  402. lastEntryPointer = entryPointer;
  403. }
  404. }
  405. static int32_t parseGeneric(struct Reader* reader,
  406. struct Allocator* allocator,
  407. Object** output)
  408. {
  409. int ret = 0;
  410. char firstChar;
  411. for (;;) {
  412. ret = Reader_read(reader, &firstChar, 0);
  413. switch (firstChar) {
  414. case ' ':
  415. case '\r':
  416. case '\n':
  417. case '\t':
  418. Reader_skip(reader, 1);
  419. continue;
  420. case '/':;
  421. if ((ret = parseComment(reader)) != 0) {
  422. return ret;
  423. }
  424. continue;
  425. default:
  426. break;
  427. }
  428. if (ret) {
  429. printf("Unexpected end of input\n");
  430. return OUT_OF_CONTENT_TO_READ;
  431. }
  432. break;
  433. }
  434. Object* out = Allocator_malloc(allocator, sizeof(Object));
  435. switch (firstChar) {
  436. case '0':
  437. case '1':
  438. case '2':
  439. case '3':
  440. case '4':
  441. case '5':
  442. case '6':
  443. case '7':
  444. case '8':
  445. case '9':;
  446. // int64_t. Int is special because it is not a pointer but a int64_t.
  447. int64_t bint;
  448. if ((ret = parseint64_t(reader, &bint)) == UNPARSABLE) {
  449. break;
  450. }
  451. out->type = Object_INTEGER;
  452. out->as.number = bint;
  453. break;
  454. case '[':;
  455. // List.
  456. List* list = Allocator_calloc(allocator, sizeof(List), 1);
  457. ret = parseList(reader, allocator, list);
  458. out->type = Object_LIST;
  459. out->as.list = list;
  460. break;
  461. case '{':;
  462. // Dictionary
  463. Dict* dict = Allocator_calloc(allocator, sizeof(Dict), 1);
  464. ret = parseDictionary(reader, allocator, dict);
  465. out->type = Object_DICT;
  466. out->as.dictionary = dict;
  467. break;
  468. case '"':;
  469. // String
  470. String* string = NULL;
  471. ret = parseString(reader, allocator, &string);
  472. out->type = Object_STRING;
  473. out->as.string = string;
  474. break;
  475. default:
  476. printf("While looking for something to parse: "
  477. "expected one of 0 1 2 3 4 5 6 7 8 9 [ { \", found '%c'\n", firstChar);
  478. return UNPARSABLE;
  479. }
  480. if (ret != 0) {
  481. // Something went wrong while parsing.
  482. return ret;
  483. }
  484. *output = out;
  485. return 0;
  486. }
  487. /**
  488. * Serialize a benc object into a json string with padding before each line.
  489. *
  490. * @param writer a Writer which to write the output to.
  491. * @param number of pad spaces to place before each line.
  492. * @param obj the object to serialize.
  493. * @return -2 if the type of object cannot be determined, otherwise
  494. * whatever is returned by the Writer.
  495. */
  496. static int32_t serializeGenericWithPadding(struct Writer* writer,
  497. size_t padSpaceCount,
  498. const Object* obj)
  499. {
  500. switch (obj->type)
  501. {
  502. case Object_STRING:
  503. return serializeString(writer, obj->as.string);
  504. case Object_DICT:
  505. return serializeDictionaryWithPadding(writer, padSpaceCount, obj->as.dictionary);
  506. case Object_LIST:
  507. return serializeListWithPadding(writer, padSpaceCount, obj->as.list);
  508. case Object_INTEGER:
  509. return serializeint64_t(writer, obj->as.number);
  510. default:
  511. return -2;
  512. }
  513. }
  514. static const struct BencSerializer SERIALIZER =
  515. {
  516. .serializeString = serializeString,
  517. .parseString = parseString,
  518. .serializeint64_t = serializeint64_t,
  519. .parseint64_t = parseint64_t,
  520. .serializeList = serializeList,
  521. .parseList = parseList,
  522. .serializeDictionary = serializeDictionary,
  523. .parseDictionary = parseDictionary
  524. };
  525. const struct BencSerializer* JsonBencSerializer_get()
  526. {
  527. return &SERIALIZER;
  528. }
  529. #undef PAD