JsonBencSerializer.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
  1. /* vim: set expandtab ts=4 sw=4: */
  2. /*
  3. * You may redistribute this program and/or modify it under the terms of
  4. * the GNU General Public License as published by the Free Software Foundation,
  5. * either version 3 of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  14. */
  15. #include "memory/Allocator.h"
  16. #include "io/Reader.h"
  17. #include "io/Writer.h"
  18. #include "benc/Dict.h"
  19. #include "benc/List.h"
  20. #include "benc/String.h"
  21. #include "benc/serialization/BencSerializer.h"
  22. #include "benc/serialization/json/JsonBencSerializer.h"
  23. #include "util/Bits.h"
  24. #include "util/Hex.h"
  25. #include <stdio.h>
  26. #include <inttypes.h>
  27. #include <stdbool.h>
  28. #include <stdlib.h>
  29. #include <errno.h>
  30. static int32_t parseGeneric(struct Reader* reader,
  31. struct Allocator* allocator,
  32. Object** output);
  33. static int32_t serializeGenericWithPadding(struct Writer* writer,
  34. size_t padSpaceCount,
  35. const Object* obj);
  36. /** What the name says. */
  37. static const char* thirtyTwoSpaces = " ";
  38. /**
  39. * Write some number of spaces for padding.
  40. *
  41. * @param padSpaces the number of spaces to pad.
  42. * @param padCounter an integer which is used for internal bookkeeping.
  43. * @param writer where to write the padding.
  44. */
  45. #define PAD(padSpaces, padCounter, writer) \
  46. padCounter = 0; \
  47. while (32 < padSpaces + padCounter) { \
  48. Writer_write(writer, thirtyTwoSpaces, 32); \
  49. padCounter += 32; \
  50. } \
  51. Writer_write(writer, thirtyTwoSpaces, padSpaces - padCounter)
  52. static inline int outOfContent()
  53. {
  54. return -2;
  55. }
  56. #define OUT_OF_CONTENT_TO_READ outOfContent()
  57. static inline int unparsable()
  58. {
  59. return -3;
  60. }
  61. #define UNPARSABLE unparsable()
  62. /** @see BencSerializer.h */
  63. static int32_t serializeString(struct Writer* writer,
  64. const String* string)
  65. {
  66. Writer_write(writer, "\"", 1);
  67. size_t i;
  68. uint8_t chr;
  69. char buffer[5];
  70. for (i = 0; i < string->len; i++) {
  71. chr = (uint8_t) string->bytes[i] & 0xFF;
  72. /* Nonprinting chars, \ and " are hex'd */
  73. if (chr < 126 && chr > 31 && chr != '\\' && chr != '"') {
  74. snprintf(buffer, 5, "%c", chr);
  75. Writer_write(writer, buffer, 1);
  76. } else {
  77. snprintf(buffer, 5, "\\x%.2X", chr);
  78. Writer_write(writer, buffer, 4);
  79. }
  80. }
  81. return Writer_write(writer, "\"", 1);
  82. }
  83. /**
  84. * Read until 1 char after the target character.
  85. */
  86. static inline int readUntil(uint8_t target, struct Reader* reader)
  87. {
  88. uint8_t nextChar;
  89. do {
  90. if (Reader_read(reader, (char*)&nextChar, 1)) {
  91. printf("Unexpected end of input while looking for '%c'\n",target);
  92. return OUT_OF_CONTENT_TO_READ;
  93. }
  94. } while (nextChar != target);
  95. return 0;
  96. }
  97. static inline int parseString(struct Reader* reader,
  98. struct Allocator* allocator,
  99. String** output)
  100. {
  101. #define BUFF_SZ (1<<8)
  102. #define BUFF_MAX (1<<20)
  103. int curSize = BUFF_SZ;
  104. struct Allocator* localAllocator = Allocator_child(allocator);
  105. uint8_t* buffer = Allocator_malloc(localAllocator, curSize);
  106. if (readUntil('"', reader) || Reader_read(reader, buffer, 1)) {
  107. printf("Unterminated string\n");
  108. Allocator_free(localAllocator);
  109. return OUT_OF_CONTENT_TO_READ;
  110. }
  111. for (int i = 0; i < BUFF_MAX - 1; i++) {
  112. if (buffer[i] == '\\') {
  113. // \x01 (skip the x)
  114. Reader_skip(reader, 1);
  115. uint8_t hex[2];
  116. if (Reader_read(reader, (char*)hex, 2)) {
  117. printf("Unexpected end of input parsing escape sequence\n");
  118. Allocator_free(localAllocator);
  119. return OUT_OF_CONTENT_TO_READ;
  120. }
  121. int byte = Hex_decodeByte(hex[0], hex[1]);
  122. if (byte == -1) {
  123. printf("Invalid escape \"%c%c\" after \"%.*s\"\n",hex[0],hex[1],i+1,buffer);
  124. Allocator_free(localAllocator);
  125. return UNPARSABLE;
  126. }
  127. buffer[i] = (uint8_t) byte;
  128. } else if (buffer[i] == '"') {
  129. *output = String_newBinary((char*)buffer, i, allocator);
  130. Allocator_free(localAllocator);
  131. return 0;
  132. }
  133. if (i == curSize - 1) {
  134. curSize <<= 1;
  135. buffer = Allocator_realloc(localAllocator, buffer, curSize);
  136. }
  137. if (Reader_read(reader, buffer + i + 1, 1)) {
  138. if (i+1 <= 20) {
  139. printf("Unterminated string \"%.*s\"\n", i+1, buffer);
  140. } else {
  141. printf("Unterminated string starting with \"%.*s...\"\n", 20, buffer);
  142. }
  143. Allocator_free(localAllocator);
  144. return OUT_OF_CONTENT_TO_READ;
  145. }
  146. }
  147. printf("Maximum string length of %d bytes exceeded.\n",BUFF_SZ);
  148. Allocator_free(localAllocator);
  149. return UNPARSABLE;
  150. #undef BUFF_SZ
  151. #undef BUFF_MAX
  152. }
  153. /** @see BencSerializer.h */
  154. static int32_t serializeint64_t(struct Writer* writer,
  155. int64_t integer)
  156. {
  157. char buffer[32];
  158. Bits_memset(buffer, 0, 32);
  159. snprintf(buffer, 32, "%" PRId64, integer);
  160. return Writer_write(writer, buffer, CString_strlen(buffer));
  161. }
  162. /** @see BencSerializer.h */
  163. static int32_t parseint64_t(struct Reader* reader,
  164. int64_t* output)
  165. {
  166. uint8_t buffer[32];
  167. for (int i = 0; i < 21; i++) {
  168. int32_t status = Reader_read(reader, buffer + i, 0);
  169. if (i == 0 && buffer[i] == '-' && status == 0) {
  170. // It's just a negative number, no need to fail it.
  171. continue;
  172. }
  173. if (buffer[i] < '0' || buffer[i] > '9' || status != 0 /* end of input */) {
  174. buffer[i] = '\0';
  175. int64_t out = strtol((char*)buffer, NULL, 10);
  176. // Failed parse causes 0 to be set.
  177. if (out == 0 && buffer[0] != '0' && (buffer[0] != '-' || buffer[1] != '0')) {
  178. printf("Failed to parse \"%s\": not a number\n",buffer);
  179. return UNPARSABLE;
  180. }
  181. if ((out == INT64_MAX || out == INT64_MIN) && errno == ERANGE) {
  182. printf("Failed to parse \"%s\": number too large/small\n",buffer);
  183. return UNPARSABLE;
  184. }
  185. *output = out;
  186. return 0;
  187. }
  188. Reader_skip(reader, 1);
  189. }
  190. // Larger than the max possible int64.
  191. buffer[22] = '\0';
  192. printf("Failed to parse \"%s\": number too large\n",buffer);
  193. return UNPARSABLE;
  194. }
  195. /**
  196. * Serialize a bencoded list with padding at the beginning of each line.
  197. *
  198. * @param writer the place to write the output to.
  199. * @param padSpaceCount the number of spaces to place at the beginning of each line.
  200. * @param list the list to serialize
  201. */
  202. static int32_t serializeListWithPadding(struct Writer* writer,
  203. const size_t padSpaceCount,
  204. const List* list)
  205. {
  206. int padCounter;
  207. Writer_write(writer, "[\n", 2);
  208. const struct List_Item* entry = *list;
  209. while (entry != NULL) {
  210. PAD(padSpaceCount + 2, padCounter, writer);
  211. serializeGenericWithPadding(writer, padSpaceCount + 2, entry->elem);
  212. entry = entry->next;
  213. if (entry != NULL) {
  214. Writer_write(writer, ",\n", 2);
  215. }
  216. }
  217. Writer_write(writer, "\n", 1);
  218. PAD(padSpaceCount, padCounter, writer);
  219. return Writer_write(writer, "]", 1);
  220. }
  221. /** @see BencSerializer.h */
  222. static int32_t serializeList(struct Writer* writer,
  223. const List* list)
  224. {
  225. return serializeListWithPadding(writer, 0, list);
  226. }
  227. /**
  228. * Parse a comment in with "slash splat" or double slash notation,
  229. * leave the reader on the first character after the last end of comment mark.
  230. */
  231. static inline int parseComment(struct Reader* reader)
  232. {
  233. char chars[2];
  234. int ret = Reader_read(reader, &chars, 2);
  235. if (ret) {
  236. printf("Warning: expected comment\n");
  237. return OUT_OF_CONTENT_TO_READ;
  238. }
  239. if (chars[0] != '/') {
  240. printf("Warning: expected a comment starting with '/', instead found '%c'\n",chars[0]);
  241. return UNPARSABLE;
  242. }
  243. switch (chars[1]) {
  244. case '*':;
  245. do {
  246. readUntil('*', reader);
  247. } while (!(ret = Reader_read(reader, &chars, 1)) && chars[0] != '/');
  248. if (ret) {
  249. printf("Unterminated multiline comment\n");
  250. return OUT_OF_CONTENT_TO_READ;
  251. }
  252. return 0;
  253. case '/':;
  254. return readUntil('\n', reader);
  255. default:
  256. printf("Warning: expected a comment starting with \"//\" or \"/*\", "
  257. "instead found \"/%c\"\n",chars[1]);
  258. return UNPARSABLE;
  259. }
  260. }
  261. /** @see BencSerializer.h */
  262. static int32_t parseList(struct Reader* reader,
  263. struct Allocator* allocator,
  264. List* output)
  265. {
  266. char nextChar;
  267. readUntil('[', reader);
  268. Object* element;
  269. struct List_Item* thisEntry = NULL;
  270. struct List_Item** lastEntryPointer = output;
  271. int ret;
  272. for (;;) {
  273. for (;;) {
  274. if (Reader_read(reader, &nextChar, 0) != 0) {
  275. printf("Unterminated list\n");
  276. return OUT_OF_CONTENT_TO_READ;
  277. }
  278. if (nextChar == '/') {
  279. if ((ret = parseComment(reader)) != 0) {
  280. return ret;
  281. }
  282. continue;
  283. }
  284. switch (nextChar) {
  285. case '0':
  286. case '1':
  287. case '2':
  288. case '3':
  289. case '4':
  290. case '5':
  291. case '6':
  292. case '7':
  293. case '8':
  294. case '9':
  295. case '[':
  296. case '{':
  297. case '"':
  298. break;
  299. case ']':
  300. Reader_skip(reader, 1);
  301. return 0;
  302. default:
  303. // FIXME(gerard): silently skipping anything we don't understand
  304. // might not be the best idea
  305. Reader_skip(reader, 1);
  306. continue;
  307. }
  308. break;
  309. }
  310. if ((ret = parseGeneric(reader, allocator, &element)) != 0) {
  311. return ret;
  312. }
  313. thisEntry = Allocator_malloc(allocator, sizeof(struct List_Item));
  314. thisEntry->elem = element;
  315. thisEntry->next = NULL;
  316. // Read backwards so that the list reads out forward.
  317. *lastEntryPointer = thisEntry;
  318. lastEntryPointer = &(thisEntry->next);
  319. }
  320. }
  321. /**
  322. * Serialize a bencoded dictionary with padding before each line.
  323. *
  324. * @param writer the place to write the output to.
  325. * @param padSpaceCount the number of spaces to place at the beginning of each line.
  326. * @param dictionary the dictionary to serialize.
  327. */
  328. static int32_t serializeDictionaryWithPadding(struct Writer* writer,
  329. size_t padSpaceCount,
  330. const Dict* dictionary)
  331. {
  332. int padCounter = 0;
  333. Writer_write(writer, "{\n", 2);
  334. const struct Dict_Entry* entry = *dictionary;
  335. while (entry != NULL) {
  336. PAD(padSpaceCount + 2, padCounter, writer);
  337. serializeString(writer, entry->key);
  338. Writer_write(writer, " : ", 3);
  339. serializeGenericWithPadding(writer, padSpaceCount + 2, entry->val);
  340. entry = entry->next;
  341. if (entry != NULL) {
  342. Writer_write(writer, ",\n", 2);
  343. }
  344. }
  345. Writer_write(writer, "\n", 1);
  346. PAD(padSpaceCount, padCounter, writer);
  347. return Writer_write(writer, "}", 1);
  348. }
  349. /** @see BencSerializer.h */
  350. static int32_t serializeDictionary(struct Writer* writer,
  351. const Dict* dictionary)
  352. {
  353. return serializeDictionaryWithPadding(writer, 0, dictionary);
  354. }
  355. /** @see BencSerializer.h */
  356. static int32_t parseDictionary(struct Reader* reader,
  357. struct Allocator* allocator,
  358. Dict* output)
  359. {
  360. uint8_t nextChar;
  361. readUntil('{', reader);
  362. String* key;
  363. Object* value;
  364. struct Dict_Entry* first = NULL;
  365. struct Dict_Entry* last = NULL;
  366. int ret = 0;
  367. for (;;) {
  368. while (!ret) {
  369. ret = Reader_read(reader, &nextChar, 0);
  370. switch (nextChar) {
  371. case '"':
  372. break;
  373. case '}':
  374. Reader_skip(reader, 1);
  375. *output = first;
  376. return 0;
  377. case '/':
  378. parseComment(reader);
  379. continue;
  380. default:
  381. Reader_skip(reader, 1);
  382. continue;
  383. }
  384. break;
  385. }
  386. if (ret) {
  387. printf("Unterminated dictionary\n");
  388. return OUT_OF_CONTENT_TO_READ;
  389. }
  390. // Get key and value.
  391. if ((ret = parseString(reader, allocator, &key)) != 0) {
  392. return ret;
  393. }
  394. readUntil(':', reader);
  395. if ((ret = parseGeneric(reader, allocator, &value)) != 0) {
  396. return ret;
  397. }
  398. /* Allocate the entry. */
  399. struct Dict_Entry* entry = Allocator_calloc(allocator, sizeof(struct Dict_Entry), 1);
  400. entry->key = key;
  401. entry->val = value;
  402. if (last) {
  403. last->next = entry;
  404. } else {
  405. first = entry;
  406. }
  407. last = entry;
  408. }
  409. }
  410. static int32_t parseGeneric(struct Reader* reader,
  411. struct Allocator* allocator,
  412. Object** output)
  413. {
  414. int ret = 0;
  415. char firstChar;
  416. for (;;) {
  417. ret = Reader_read(reader, &firstChar, 0);
  418. switch (firstChar) {
  419. case ' ':
  420. case '\r':
  421. case '\n':
  422. case '\t':
  423. Reader_skip(reader, 1);
  424. continue;
  425. case '/':;
  426. if ((ret = parseComment(reader)) != 0) {
  427. return ret;
  428. }
  429. continue;
  430. default:
  431. break;
  432. }
  433. if (ret) {
  434. printf("Unexpected end of input\n");
  435. return OUT_OF_CONTENT_TO_READ;
  436. }
  437. break;
  438. }
  439. Object* out = Allocator_malloc(allocator, sizeof(Object));
  440. switch (firstChar) {
  441. case '0':
  442. case '1':
  443. case '2':
  444. case '3':
  445. case '4':
  446. case '5':
  447. case '6':
  448. case '7':
  449. case '8':
  450. case '9':;
  451. // int64_t. Int is special because it is not a pointer but a int64_t.
  452. int64_t bint;
  453. if ((ret = parseint64_t(reader, &bint)) == UNPARSABLE) {
  454. break;
  455. }
  456. out->type = Object_INTEGER;
  457. out->as.number = bint;
  458. break;
  459. case '[':;
  460. // List.
  461. List* list = Allocator_calloc(allocator, sizeof(List), 1);
  462. ret = parseList(reader, allocator, list);
  463. out->type = Object_LIST;
  464. out->as.list = list;
  465. break;
  466. case '{':;
  467. // Dictionary
  468. Dict* dict = Allocator_calloc(allocator, sizeof(Dict), 1);
  469. ret = parseDictionary(reader, allocator, dict);
  470. out->type = Object_DICT;
  471. out->as.dictionary = dict;
  472. break;
  473. case '"':;
  474. // String
  475. String* string = NULL;
  476. ret = parseString(reader, allocator, &string);
  477. out->type = Object_STRING;
  478. out->as.string = string;
  479. break;
  480. default:
  481. printf("While looking for something to parse: "
  482. "expected one of 0 1 2 3 4 5 6 7 8 9 [ { \", found '%c'\n", firstChar);
  483. return UNPARSABLE;
  484. }
  485. if (ret != 0) {
  486. // Something went wrong while parsing.
  487. return ret;
  488. }
  489. *output = out;
  490. return 0;
  491. }
  492. /**
  493. * Serialize a benc object into a json string with padding before each line.
  494. *
  495. * @param writer a Writer which to write the output to.
  496. * @param number of pad spaces to place before each line.
  497. * @param obj the object to serialize.
  498. * @return -2 if the type of object cannot be determined, otherwise
  499. * whatever is returned by the Writer.
  500. */
  501. static int32_t serializeGenericWithPadding(struct Writer* writer,
  502. size_t padSpaceCount,
  503. const Object* obj)
  504. {
  505. switch (obj->type)
  506. {
  507. case Object_STRING:
  508. return serializeString(writer, obj->as.string);
  509. case Object_DICT:
  510. return serializeDictionaryWithPadding(writer, padSpaceCount, obj->as.dictionary);
  511. case Object_LIST:
  512. return serializeListWithPadding(writer, padSpaceCount, obj->as.list);
  513. case Object_INTEGER:
  514. return serializeint64_t(writer, obj->as.number);
  515. default:
  516. return -2;
  517. }
  518. }
  519. static const struct BencSerializer SERIALIZER =
  520. {
  521. .serializeString = serializeString,
  522. .parseString = parseString,
  523. .serializeint64_t = serializeint64_t,
  524. .parseint64_t = parseint64_t,
  525. .serializeList = serializeList,
  526. .parseList = parseList,
  527. .serializeDictionary = serializeDictionary,
  528. .parseDictionary = parseDictionary
  529. };
  530. const struct BencSerializer* JsonBencSerializer_get(void)
  531. {
  532. return &SERIALIZER;
  533. }
  534. #undef PAD