JsonBencSerializer.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591
  1. /* vim: set expandtab ts=4 sw=4: */
  2. /*
  3. * You may redistribute this program and/or modify it under the terms of
  4. * the GNU General Public License as published by the Free Software Foundation,
  5. * either version 3 of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "memory/Allocator.h"
  16. #include "io/Reader.h"
  17. #include "io/Writer.h"
  18. #include "benc/Dict.h"
  19. #include "benc/List.h"
  20. #include "benc/String.h"
  21. #include "benc/serialization/BencSerializer.h"
  22. #include "util/platform/libc/strlen.h"
  23. #include "util/Bits.h"
  24. #include "util/Hex.h"
  25. #include <stdio.h>
  26. #include <inttypes.h>
  27. #include <stdbool.h>
  28. #include <stdlib.h>
  29. #include <errno.h>
  30. static int32_t parseGeneric(struct Reader* reader,
  31. struct Allocator* allocator,
  32. Object** output);
  33. static int32_t serializeGenericWithPadding(struct Writer* writer,
  34. size_t padSpaceCount,
  35. const Object* obj);
  36. /** What the name says. */
  37. static const char* thirtyTwoSpaces = " ";
  38. /**
  39. * Write some number of spaces for padding.
  40. *
  41. * @param padSpaces the number of spaces to pad.
  42. * @param padCounter an integer which is used for internal bookkeeping.
  43. * @param writer where to write the padding.
  44. */
  45. #define PAD(padSpaces, padCounter, writer) \
  46. padCounter = 0; \
  47. while (32 < padSpaces + padCounter) { \
  48. Writer_write(writer, thirtyTwoSpaces, 32); \
  49. padCounter += 32; \
  50. } \
  51. Writer_write(writer, thirtyTwoSpaces, padSpaces - padCounter)
  52. static inline int outOfContent()
  53. {
  54. return -2;
  55. }
  56. #define OUT_OF_CONTENT_TO_READ outOfContent()
  57. static inline int unparsable()
  58. {
  59. return -3;
  60. }
  61. #define UNPARSABLE unparsable()
  62. /** @see BencSerializer.h */
  63. static int32_t serializeString(struct Writer* writer,
  64. const String* string)
  65. {
  66. Writer_write(writer, "\"", 1);
  67. size_t i;
  68. uint8_t chr;
  69. char buffer[4];
  70. for (i = 0; i < string->len; i++) {
  71. chr = (uint8_t) string->bytes[i] & 0xFF;
  72. /* Nonprinting chars, \ and " are hex'd */
  73. if (chr < 126 && chr > 31 && chr != '\\' && chr != '"') {
  74. snprintf(buffer, 4, "%c", chr);
  75. Writer_write(writer, buffer, 1);
  76. } else {
  77. snprintf(buffer, 4, "\\x%.2X", chr);
  78. Writer_write(writer, buffer, 4);
  79. }
  80. }
  81. return Writer_write(writer, "\"", 1);
  82. }
  83. /**
  84. * Read until 1 char after the target character.
  85. */
  86. static inline int readUntil(uint8_t target, struct Reader* reader)
  87. {
  88. uint8_t nextChar;
  89. do {
  90. if (Reader_read(reader, (char*)&nextChar, 1)) {
  91. printf("Unexpected end of input while looking for '%c'\n",target);
  92. return OUT_OF_CONTENT_TO_READ;
  93. }
  94. } while (nextChar != target);
  95. return 0;
  96. }
  97. static inline int parseString(struct Reader* reader,
  98. struct Allocator* allocator,
  99. String** output)
  100. {
  101. #define BUFF_SZ (1<<8)
  102. #define BUFF_MAX (1<<20)
  103. int curSize = BUFF_SZ;
  104. struct Allocator* localAllocator = Allocator_child(allocator);
  105. uint8_t* buffer = Allocator_malloc(localAllocator, curSize);
  106. if (readUntil('"', reader) || Reader_read(reader, buffer, 1)) {
  107. printf("Unterminated string\n");
  108. Allocator_free(localAllocator);
  109. return OUT_OF_CONTENT_TO_READ;
  110. }
  111. for (int i = 0; i < BUFF_MAX - 1; i++) {
  112. if (buffer[i] == '\\') {
  113. // \x01 (skip the x)
  114. Reader_skip(reader, 1);
  115. uint8_t hex[2];
  116. if (Reader_read(reader, (char*)hex, 2)) {
  117. printf("Unexpected end of input parsing escape sequence\n");
  118. Allocator_free(localAllocator);
  119. return OUT_OF_CONTENT_TO_READ;
  120. }
  121. int byte = Hex_decodeByte(hex[0], hex[1]);
  122. if (byte == -1) {
  123. printf("Invalid escape \"%c%c\" after \"%.*s\"\n",hex[0],hex[1],i+1,buffer);
  124. Allocator_free(localAllocator);
  125. return UNPARSABLE;
  126. }
  127. buffer[i] = (uint8_t) byte;
  128. } else if (buffer[i] == '"') {
  129. *output = String_newBinary((char*)buffer, i, allocator);
  130. Allocator_free(localAllocator);
  131. return 0;
  132. }
  133. if (i == curSize - 1) {
  134. curSize <<= 1;
  135. buffer = Allocator_realloc(localAllocator, buffer, curSize);
  136. }
  137. if (Reader_read(reader, buffer + i + 1, 1)) {
  138. if (i+1 <= 20) {
  139. printf("Unterminated string \"%.*s\"\n", i+1, buffer);
  140. } else {
  141. printf("Unterminated string starting with \"%.*s...\"\n", 20, buffer);
  142. }
  143. Allocator_free(localAllocator);
  144. return OUT_OF_CONTENT_TO_READ;
  145. }
  146. }
  147. printf("Maximum string length of %d bytes exceeded.\n",BUFF_SZ);
  148. Allocator_free(localAllocator);
  149. return UNPARSABLE;
  150. #undef BUFF_SZ
  151. #undef BUFF_MAX
  152. }
  153. /** @see BencSerializer.h */
  154. static int32_t serializeint64_t(struct Writer* writer,
  155. int64_t integer)
  156. {
  157. char buffer[32];
  158. Bits_memset(buffer, 0, 32);
  159. snprintf(buffer, 32, "%" PRId64, integer);
  160. return Writer_write(writer, buffer, strlen(buffer));
  161. }
  162. /** @see BencSerializer.h */
  163. static int32_t parseint64_t(struct Reader* reader,
  164. int64_t* output)
  165. {
  166. uint8_t buffer[32];
  167. for (int i = 0; i < 21; i++) {
  168. int32_t status = Reader_read(reader, buffer + i, 0);
  169. if (i == 0 && buffer[i] == '-' && status == 0) {
  170. // It's just a negative number, no need to fail it.
  171. continue;
  172. }
  173. if (buffer[i] < '0' || buffer[i] > '9' || status != 0 /* end of input */) {
  174. buffer[i] = '\0';
  175. int64_t out = strtol((char*)buffer, NULL, 10);
  176. // Failed parse causes 0 to be set.
  177. if (out == 0 && buffer[0] != '0' && (buffer[0] != '-' || buffer[1] != '0')) {
  178. printf("Failed to parse \"%s\": not a number\n",buffer);
  179. return UNPARSABLE;
  180. }
  181. if ((out == INT64_MAX || out == INT64_MIN) && errno == ERANGE) {
  182. printf("Failed to parse \"%s\": number too large/small\n",buffer);
  183. return UNPARSABLE;
  184. }
  185. *output = out;
  186. return 0;
  187. }
  188. Reader_skip(reader, 1);
  189. }
  190. // Larger than the max possible int64.
  191. buffer[22] = '\0';
  192. printf("Failed to parse \"%s\": number too large\n",buffer);
  193. return UNPARSABLE;
  194. }
  195. /**
  196. * Serialize a bencoded list with padding at the beginning of each line.
  197. *
  198. * @param writer the place to write the output to.
  199. * @param padSpaceCount the number of spaces to place at the beginning of each line.
  200. * @param list the list to serialize
  201. */
  202. static int32_t serializeListWithPadding(struct Writer* writer,
  203. const size_t padSpaceCount,
  204. const List* list)
  205. {
  206. int padCounter;
  207. Writer_write(writer, "[\n", 2);
  208. const struct List_Item* entry = *list;
  209. while (entry != NULL) {
  210. PAD(padSpaceCount + 2, padCounter, writer);
  211. serializeGenericWithPadding(writer, padSpaceCount + 2, entry->elem);
  212. entry = entry->next;
  213. if (entry != NULL) {
  214. Writer_write(writer, ",\n", 2);
  215. }
  216. }
  217. Writer_write(writer, "\n", 1);
  218. PAD(padSpaceCount, padCounter, writer);
  219. return Writer_write(writer, "]", 1);
  220. }
  221. /** @see BencSerializer.h */
  222. static int32_t serializeList(struct Writer* writer,
  223. const List* list)
  224. {
  225. return serializeListWithPadding(writer, 0, list);
  226. }
  227. /**
  228. * Parse a comment in with "slash splat" or double slash notation,
  229. * leave the reader on the first character after the last end of comment mark.
  230. */
  231. static inline int parseComment(struct Reader* reader)
  232. {
  233. char chars[2];
  234. int ret = Reader_read(reader, &chars, 2);
  235. if (ret) {
  236. printf("Warning: expected comment\n");
  237. return OUT_OF_CONTENT_TO_READ;
  238. }
  239. if (chars[0] != '/') {
  240. printf("Warning: expected a comment starting with '/', instead found '%c'\n",chars[0]);
  241. return UNPARSABLE;
  242. }
  243. switch (chars[1]) {
  244. case '*':;
  245. do {
  246. readUntil('*', reader);
  247. } while (!(ret = Reader_read(reader, &chars, 1)) && chars[0] != '/');
  248. if (ret) {
  249. printf("Unterminated multiline comment\n");
  250. return OUT_OF_CONTENT_TO_READ;
  251. }
  252. return 0;
  253. case '/':;
  254. return readUntil('\n', reader);
  255. default:
  256. printf("Warning: expected a comment starting with \"//\" or \"/*\", "
  257. "instead found \"/%c\"\n",chars[1]);
  258. return UNPARSABLE;
  259. }
  260. }
  261. /** @see BencSerializer.h */
  262. static int32_t parseList(struct Reader* reader,
  263. struct Allocator* allocator,
  264. List* output)
  265. {
  266. char nextChar;
  267. readUntil('[', reader);
  268. Object* element;
  269. struct List_Item* thisEntry = NULL;
  270. struct List_Item** lastEntryPointer = output;
  271. int ret;
  272. for (;;) {
  273. for (;;) {
  274. if (Reader_read(reader, &nextChar, 0) != 0) {
  275. printf("Unterminated list\n");
  276. return OUT_OF_CONTENT_TO_READ;
  277. }
  278. if (nextChar == '/') {
  279. if ((ret = parseComment(reader)) != 0) {
  280. return ret;
  281. }
  282. continue;
  283. }
  284. switch (nextChar) {
  285. case '0':
  286. case '1':
  287. case '2':
  288. case '3':
  289. case '4':
  290. case '5':
  291. case '6':
  292. case '7':
  293. case '8':
  294. case '9':
  295. case '[':
  296. case '{':
  297. case '"':
  298. break;
  299. case ']':
  300. Reader_skip(reader, 1);
  301. return 0;
  302. default:
  303. // FIXME(gerard): silently skipping anything we don't understand
  304. // might not be the best idea
  305. Reader_skip(reader, 1);
  306. continue;
  307. }
  308. break;
  309. }
  310. if ((ret = parseGeneric(reader, allocator, &element)) != 0) {
  311. return ret;
  312. }
  313. thisEntry = Allocator_malloc(allocator, sizeof(struct List_Item));
  314. thisEntry->elem = element;
  315. thisEntry->next = NULL;
  316. // Read backwards so that the list reads out forward.
  317. *lastEntryPointer = thisEntry;
  318. lastEntryPointer = &(thisEntry->next);
  319. }
  320. }
  321. /**
  322. * Serialize a bencoded dictionary with padding before each line.
  323. *
  324. * @param writer the place to write the output to.
  325. * @param padSpaceCount the number of spaces to place at the beginning of each line.
  326. * @param dictionary the dictionary to serialize.
  327. */
  328. static int32_t serializeDictionaryWithPadding(struct Writer* writer,
  329. size_t padSpaceCount,
  330. const Dict* dictionary)
  331. {
  332. int padCounter = 0;
  333. Writer_write(writer, "{\n", 2);
  334. const struct Dict_Entry* entry = *dictionary;
  335. while (entry != NULL) {
  336. PAD(padSpaceCount + 2, padCounter, writer);
  337. serializeString(writer, entry->key);
  338. Writer_write(writer, " : ", 3);
  339. serializeGenericWithPadding(writer, padSpaceCount + 2, entry->val);
  340. entry = entry->next;
  341. if (entry != NULL) {
  342. Writer_write(writer, ",\n", 2);
  343. }
  344. }
  345. Writer_write(writer, "\n", 1);
  346. PAD(padSpaceCount, padCounter, writer);
  347. return Writer_write(writer, "}", 1);
  348. }
  349. /** @see BencSerializer.h */
  350. static int32_t serializeDictionary(struct Writer* writer,
  351. const Dict* dictionary)
  352. {
  353. return serializeDictionaryWithPadding(writer, 0, dictionary);
  354. }
  355. /** @see BencSerializer.h */
  356. static int32_t parseDictionary(struct Reader* reader,
  357. struct Allocator* allocator,
  358. Dict* output)
  359. {
  360. uint8_t nextChar;
  361. readUntil('{', reader);
  362. String* key;
  363. Object* value;
  364. struct Dict_Entry* entryPointer;
  365. struct Dict_Entry* lastEntryPointer = NULL;
  366. int ret = 0;
  367. for (;;) {
  368. while (!ret) {
  369. ret = Reader_read(reader, &nextChar, 0);
  370. switch (nextChar) {
  371. case '"':
  372. break;
  373. case '}':
  374. Reader_skip(reader, 1);
  375. *output = lastEntryPointer;
  376. return 0;
  377. case '/':
  378. parseComment(reader);
  379. continue;
  380. default:
  381. Reader_skip(reader, 1);
  382. continue;
  383. }
  384. break;
  385. }
  386. if (ret) {
  387. printf("Unterminated dictionary\n");
  388. return OUT_OF_CONTENT_TO_READ;
  389. }
  390. // Get key and value.
  391. if ((ret = parseString(reader, allocator, &key)) != 0) {
  392. return ret;
  393. }
  394. readUntil(':', reader);
  395. if ((ret = parseGeneric(reader, allocator, &value)) != 0) {
  396. return ret;
  397. }
  398. /* Allocate the entry. */
  399. entryPointer = Allocator_malloc(allocator, sizeof(struct Dict_Entry));
  400. entryPointer->next = lastEntryPointer;
  401. entryPointer->key = key;
  402. entryPointer->val = value;
  403. lastEntryPointer = entryPointer;
  404. }
  405. }
  406. static int32_t parseGeneric(struct Reader* reader,
  407. struct Allocator* allocator,
  408. Object** output)
  409. {
  410. int ret = 0;
  411. char firstChar;
  412. for (;;) {
  413. ret = Reader_read(reader, &firstChar, 0);
  414. switch (firstChar) {
  415. case ' ':
  416. case '\r':
  417. case '\n':
  418. case '\t':
  419. Reader_skip(reader, 1);
  420. continue;
  421. case '/':;
  422. if ((ret = parseComment(reader)) != 0) {
  423. return ret;
  424. }
  425. continue;
  426. default:
  427. break;
  428. }
  429. if (ret) {
  430. printf("Unexpected end of input\n");
  431. return OUT_OF_CONTENT_TO_READ;
  432. }
  433. break;
  434. }
  435. Object* out = Allocator_malloc(allocator, sizeof(Object));
  436. switch (firstChar) {
  437. case '0':
  438. case '1':
  439. case '2':
  440. case '3':
  441. case '4':
  442. case '5':
  443. case '6':
  444. case '7':
  445. case '8':
  446. case '9':;
  447. // int64_t. Int is special because it is not a pointer but a int64_t.
  448. int64_t bint;
  449. if ((ret = parseint64_t(reader, &bint)) == UNPARSABLE) {
  450. break;
  451. }
  452. out->type = Object_INTEGER;
  453. out->as.number = bint;
  454. break;
  455. case '[':;
  456. // List.
  457. List* list = Allocator_calloc(allocator, sizeof(List), 1);
  458. ret = parseList(reader, allocator, list);
  459. out->type = Object_LIST;
  460. out->as.list = list;
  461. break;
  462. case '{':;
  463. // Dictionary
  464. Dict* dict = Allocator_calloc(allocator, sizeof(Dict), 1);
  465. ret = parseDictionary(reader, allocator, dict);
  466. out->type = Object_DICT;
  467. out->as.dictionary = dict;
  468. break;
  469. case '"':;
  470. // String
  471. String* string = NULL;
  472. ret = parseString(reader, allocator, &string);
  473. out->type = Object_STRING;
  474. out->as.string = string;
  475. break;
  476. default:
  477. printf("While looking for something to parse: "
  478. "expected one of 0 1 2 3 4 5 6 7 8 9 [ { \", found '%c'\n", firstChar);
  479. return UNPARSABLE;
  480. }
  481. if (ret != 0) {
  482. // Something went wrong while parsing.
  483. return ret;
  484. }
  485. *output = out;
  486. return 0;
  487. }
  488. /**
  489. * Serialize a benc object into a json string with padding before each line.
  490. *
  491. * @param writer a Writer which to write the output to.
  492. * @param number of pad spaces to place before each line.
  493. * @param obj the object to serialize.
  494. * @return -2 if the type of object cannot be determined, otherwise
  495. * whatever is returned by the Writer.
  496. */
  497. static int32_t serializeGenericWithPadding(struct Writer* writer,
  498. size_t padSpaceCount,
  499. const Object* obj)
  500. {
  501. switch (obj->type)
  502. {
  503. case Object_STRING:
  504. return serializeString(writer, obj->as.string);
  505. case Object_DICT:
  506. return serializeDictionaryWithPadding(writer, padSpaceCount, obj->as.dictionary);
  507. case Object_LIST:
  508. return serializeListWithPadding(writer, padSpaceCount, obj->as.list);
  509. case Object_INTEGER:
  510. return serializeint64_t(writer, obj->as.number);
  511. default:
  512. return -2;
  513. }
  514. }
  515. static const struct BencSerializer SERIALIZER =
  516. {
  517. .serializeString = serializeString,
  518. .parseString = parseString,
  519. .serializeint64_t = serializeint64_t,
  520. .parseint64_t = parseint64_t,
  521. .serializeList = serializeList,
  522. .parseList = parseList,
  523. .serializeDictionary = serializeDictionary,
  524. .parseDictionary = parseDictionary
  525. };
  526. const struct BencSerializer* JsonBencSerializer_get()
  527. {
  528. return &SERIALIZER;
  529. }
  530. #undef PAD