StandardBencSerializer.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454
  1. /* vim: set expandtab ts=4 sw=4: */
  2. /*
  3. * You may redistribute this program and/or modify it under the terms of
  4. * the GNU General Public License as published by the Free Software Foundation,
  5. * either version 3 of the License, or (at your option) any later version.
  6. *
  7. * This program is distributed in the hope that it will be useful,
  8. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. * GNU General Public License for more details.
  11. *
  12. * You should have received a copy of the GNU General Public License
  13. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include "util/Bits.h"
  16. #include "memory/Allocator.h"
  17. #include "io/Reader.h"
  18. #include "io/Writer.h"
  19. #include "benc/Dict.h"
  20. #include "benc/List.h"
  21. #include "benc/serialization/BencSerializer.h"
  22. #define string_strlen
  23. #include "util/platform/libc/string.h"
  24. #include <stdio.h>
  25. /* for parseint64_t */
  26. #include <limits.h>
  27. #include <stdlib.h> // strtol()
  28. #include <errno.h>
  29. static int32_t parseGeneric(struct Reader* reader,
  30. struct Allocator* allocator,
  31. Object** output);
  32. static int32_t serializeGeneric(struct Writer* writer,
  33. const Object* obj);
  34. /**
  35. * Helper function for writing an integer into a writer in base 10 format.
  36. *
  37. * @param writer the place to write the integer to.
  38. * @param integer the number to write.
  39. */
  40. static int32_t writeint64_t(struct Writer* writer,
  41. int64_t integer)
  42. {
  43. char buffer[32] = {0};
  44. snprintf(buffer, 32, "%" PRId64, integer);
  45. return Writer_write(writer, buffer, strlen(buffer));
  46. }
  47. /** @see BencSerializer.h */
  48. static int32_t serializeString(struct Writer* writer,
  49. const String* string)
  50. {
  51. writeint64_t(writer, string->len);
  52. Writer_write(writer, ":", 1);
  53. return Writer_write(writer, string->bytes, string->len);
  54. }
  55. /** @see BencSerializer.h */
  56. static int32_t parseString(struct Reader* reader,
  57. struct Allocator* allocator,
  58. String** output)
  59. {
  60. #define OUT_OF_CONTENT_TO_READ -2
  61. #define UNPARSABLE -3
  62. /* Strings longer than 1*10^21-1 represent numbers definitly larger than uint64. */
  63. #define NUMBER_MAXLENGTH 21
  64. char number[32];
  65. char nextChar;
  66. int ret;
  67. /* Parse the size of the string. */
  68. size_t i = 0;
  69. for (i = 0; ; i++) {
  70. ret = Reader_read(reader, &nextChar, 1);
  71. if (ret != 0) {
  72. return OUT_OF_CONTENT_TO_READ;
  73. }
  74. if (nextChar == ':') {
  75. /* Found the separator. */
  76. break;
  77. }
  78. if (nextChar < '0' || nextChar > '9') {
  79. /* Invalid character. */
  80. return UNPARSABLE;
  81. }
  82. if (i >= NUMBER_MAXLENGTH) {
  83. /* Massive huge number. */
  84. return UNPARSABLE;
  85. }
  86. number[i] = nextChar;
  87. }
  88. number[i] = '\0';
  89. size_t length = strtoul(number, NULL, 10);
  90. char* bytes = Allocator_malloc(allocator, length + 1);
  91. String* string = Allocator_malloc(allocator, sizeof(String));
  92. /* Put a null terminator after the end so it can be treated as a normal string. */
  93. bytes[length] = '\0';
  94. if (Reader_read(reader, bytes, length) != 0) {
  95. return OUT_OF_CONTENT_TO_READ;
  96. }
  97. string->bytes = bytes;
  98. string->len = length;
  99. *output = string;
  100. return 0;
  101. #undef OUT_OF_CONTENT_TO_READ
  102. #undef UNPARSABLE
  103. #undef NUMBER_MAXLENGTH
  104. }
  105. /** @see BencSerializer.h */
  106. static int32_t serializeint64_t(struct Writer* writer,
  107. int64_t integer)
  108. {
  109. Writer_write(writer, "i", 1);
  110. writeint64_t(writer, integer);
  111. return Writer_write(writer, "e", 1);
  112. }
  113. /** @see BencSerializer.h */
  114. static int32_t parseint64_t(struct Reader* reader,
  115. int64_t* output)
  116. {
  117. #define OUT_OF_CONTENT_TO_READ -2
  118. #define UNPARSABLE -3
  119. char buffer[32];
  120. int i;
  121. for (i = 0; ; i++) {
  122. if (Reader_read(reader, buffer + i, 1) != 0) {
  123. return OUT_OF_CONTENT_TO_READ;
  124. }
  125. if (i == 0) {
  126. if (buffer[i] != 'i') {
  127. /* Not an int. */
  128. return UNPARSABLE;
  129. } else {
  130. continue;
  131. }
  132. }
  133. if (buffer[i] == 'e') {
  134. break;
  135. }
  136. if (i == 1 && buffer[i] == '-') {
  137. /* It's just a negative number, no need to fail it. */
  138. continue;
  139. }
  140. if (buffer[i] < '0' || buffer[i] > '9') {
  141. return UNPARSABLE;
  142. }
  143. if (i > 21) {
  144. /* Larger than the max possible int64. */
  145. return UNPARSABLE;
  146. }
  147. }
  148. /* buffer + 1, skip the 'i' */
  149. int64_t out = strtol(buffer + 1, NULL, 10);
  150. /* Failed parse causes 0 to be set. */
  151. if (out == 0 && buffer[1] != '0' && (buffer[1] != '-' || buffer[2] != '0')) {
  152. return UNPARSABLE;
  153. }
  154. if ((out == LONG_MAX || out == LONG_MIN) && errno == ERANGE) {
  155. /* errno (holds nose) */
  156. return UNPARSABLE;
  157. }
  158. *output = out;
  159. return 0;
  160. #undef OUT_OF_CONTENT_TO_READ
  161. #undef UNPARSABLE
  162. }
  163. /** @see BencSerializer.h */
  164. static int32_t serializeList(struct Writer* writer,
  165. const List* list)
  166. {
  167. int ret = Writer_write(writer, "l", 1);
  168. if (list) {
  169. const struct List_Item* entry = *list;
  170. while (ret == 0 && entry != NULL) {
  171. ret = serializeGeneric(writer, entry->elem);
  172. entry = entry->next;
  173. }
  174. }
  175. if (ret == 0) {
  176. ret = Writer_write(writer, "e", 1);
  177. }
  178. return ret;
  179. }
  180. /** @see BencSerializer.h */
  181. static int32_t parseList(struct Reader* reader,
  182. struct Allocator* allocator,
  183. List* output)
  184. {
  185. #define OUT_OF_CONTENT_TO_READ -2
  186. #define UNPARSABLE -3
  187. char nextChar;
  188. if (Reader_read(reader, &nextChar, 1) != 0) {
  189. return OUT_OF_CONTENT_TO_READ;
  190. }
  191. if (nextChar != 'l') {
  192. return UNPARSABLE;
  193. }
  194. Object* element;
  195. struct List_Item* thisEntry = NULL;
  196. struct List_Item** lastEntryPointer = output;
  197. int ret;
  198. if (Reader_read(reader, &nextChar, 0) != 0) {
  199. return OUT_OF_CONTENT_TO_READ;
  200. }
  201. *lastEntryPointer = NULL;
  202. while (nextChar != 'e') {
  203. ret = parseGeneric(reader, allocator, &element);
  204. if (ret != 0) {
  205. return ret;
  206. }
  207. thisEntry = Allocator_malloc(allocator, sizeof(struct List_Item));
  208. thisEntry->elem = element;
  209. /* Read backwards so that the list reads out forward. */
  210. *lastEntryPointer = thisEntry;
  211. lastEntryPointer = &(thisEntry->next);
  212. if (Reader_read(reader, &nextChar, 0) != 0) {
  213. return OUT_OF_CONTENT_TO_READ;
  214. }
  215. }
  216. if (thisEntry) {
  217. thisEntry->next = NULL;
  218. }
  219. /* move the pointer to after the 'e' at the end of the list. */
  220. Reader_skip(reader, 1);
  221. return 0;
  222. #undef OUT_OF_CONTENT_TO_READ
  223. #undef UNPARSABLE
  224. }
  225. /** @see BencSerializer.h */
  226. static int32_t serializeDictionary(struct Writer* writer,
  227. const Dict* dictionary)
  228. {
  229. const struct Dict_Entry* entry = *dictionary;
  230. Writer_write(writer, "d", 1);
  231. while (entry != NULL) {
  232. serializeString(writer, entry->key);
  233. serializeGeneric(writer, entry->val);
  234. entry = entry->next;
  235. }
  236. return Writer_write(writer, "e", 1);
  237. }
  238. /** @see BencSerializer.h */
  239. static int32_t parseDictionary(struct Reader* reader,
  240. struct Allocator* allocator,
  241. Dict* output)
  242. {
  243. #define OUT_OF_CONTENT_TO_READ -2
  244. #define UNPARSABLE -3
  245. char nextChar;
  246. if (Reader_read(reader, &nextChar, 1) < 0) {
  247. return OUT_OF_CONTENT_TO_READ;
  248. }
  249. if (nextChar != 'd') {
  250. /* Not a dictionary. */
  251. return UNPARSABLE;
  252. }
  253. String* key;
  254. Object* value;
  255. struct Dict_Entry* entryPointer;
  256. struct Dict_Entry* lastEntryPointer = NULL;
  257. int ret;
  258. for (;;) {
  259. /* Peek at the next char. */
  260. if (Reader_read(reader, &nextChar, 0) < 0) {
  261. /* Ran over read buffer. */
  262. return OUT_OF_CONTENT_TO_READ;
  263. }
  264. if (nextChar == 'e') {
  265. /* Got to the end. */
  266. break;
  267. }
  268. /* Get key and value. */
  269. ret = parseString(reader, allocator, &key);
  270. if (ret != 0) {
  271. return ret;
  272. }
  273. ret = parseGeneric(reader, allocator, &value);
  274. if (ret != 0) {
  275. return ret;
  276. }
  277. /* Allocate the entry. */
  278. entryPointer = Allocator_malloc(allocator, sizeof(struct Dict_Entry));
  279. entryPointer->next = lastEntryPointer;
  280. entryPointer->key = key;
  281. entryPointer->val = value;
  282. lastEntryPointer = entryPointer;
  283. }
  284. /* We got an 'e', leave the pointer on the next char after it. */
  285. Reader_skip(reader, 1);
  286. *output = lastEntryPointer;
  287. return 0;
  288. #undef OUT_OF_CONTENT_TO_READ
  289. #undef UNPARSABLE
  290. }
  291. /**
  292. * Parse an unknown data type.
  293. * This is not exposed to the world because it is expected that one would
  294. * know what type they are parsing to begin with. This is used by parseDictionary
  295. * and parseList to grab pieces of data which are of unknown type and parse them.
  296. *
  297. * @param reader the reader to get the stream of data from.
  298. * @param allocator the means of storing the parsed data.
  299. * @param output a pointer which will be pointed to the output.
  300. */
  301. static int32_t parseGeneric(struct Reader* reader,
  302. struct Allocator* allocator,
  303. Object** output)
  304. {
  305. #define OUT_OF_CONTENT_TO_READ -2
  306. #define UNPARSABLE -3
  307. int ret;
  308. char firstChar;
  309. ret = Reader_read(reader, &firstChar, 0);
  310. if (ret != 0) {
  311. return OUT_OF_CONTENT_TO_READ;
  312. }
  313. Object* out = Allocator_malloc(allocator, sizeof(Object));
  314. if (firstChar <= '9' && firstChar >= '0') {
  315. /* It's a string! */
  316. String* string = NULL;
  317. ret = parseString(reader, allocator, &string);
  318. out->type = Object_STRING;
  319. out->as.string = string;
  320. } else {
  321. switch (firstChar) {
  322. case 'i':;
  323. /* int64_t. Int is special because it is not a pointer but a int64_t. */
  324. int64_t bint = 0;
  325. ret = parseint64_t(reader, &bint);
  326. out->type = Object_INTEGER;
  327. out->as.number = bint;
  328. break;
  329. case 'l':;
  330. /* List. */
  331. List* list = Allocator_calloc(allocator, sizeof(List), 1);
  332. ret = parseList(reader, allocator, list);
  333. out->type = Object_LIST;
  334. out->as.list = list;
  335. break;
  336. case 'd':;
  337. /* Dictionary. */
  338. Dict* dict = Allocator_calloc(allocator, sizeof(Dict), 1);
  339. ret = parseDictionary(reader, allocator, dict);
  340. out->type = Object_DICT;
  341. out->as.dictionary = dict;
  342. break;
  343. default:
  344. return UNPARSABLE;
  345. }
  346. }
  347. if (ret != 0) {
  348. /* Something went wrong while parsing. */
  349. return ret;
  350. }
  351. *output = out;
  352. return 0;
  353. #undef OUT_OF_CONTENT_TO_READ
  354. #undef UNPARSABLE
  355. }
  356. /**
  357. * Serialize a benc object into a bencoded string.
  358. * This is not exposed to the world because it is expected that one would
  359. * know what type they are serializing to begin with.
  360. *
  361. * @param writer a Writer which to write the output to.
  362. * @param obj the object to serialize.
  363. * @return -2 if the type of object cannot be determined, otherwise
  364. * whatever is returned by the Writer.
  365. */
  366. static int32_t serializeGeneric(struct Writer* writer,
  367. const Object* obj)
  368. {
  369. switch (obj->type)
  370. {
  371. case Object_STRING:
  372. return serializeString(writer, obj->as.string);
  373. break;
  374. case Object_DICT:
  375. return serializeDictionary(writer, obj->as.dictionary);
  376. break;
  377. case Object_LIST:
  378. return serializeList(writer, obj->as.list);
  379. break;
  380. case Object_INTEGER:
  381. return serializeint64_t(writer, obj->as.number);
  382. break;
  383. default:
  384. return -2;
  385. }
  386. }
  387. static const struct BencSerializer SERIALIZER =
  388. {
  389. .serializeString = serializeString,
  390. .parseString = parseString,
  391. .serializeint64_t = serializeint64_t,
  392. .parseint64_t = parseint64_t,
  393. .serializeList = serializeList,
  394. .parseList = parseList,
  395. .serializeDictionary = serializeDictionary,
  396. .parseDictionary = parseDictionary
  397. };
  398. const struct BencSerializer* StandardBencSerializer_get()
  399. {
  400. return &SERIALIZER;
  401. }