/* vim: set expandtab ts=4 sw=4: */ /* * You may redistribute this program and/or modify it under the terms of * the GNU General Public License as published by the Free Software Foundation, * either version 3 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "memory/Allocator.h" #include "io/Reader.h" #include "io/Writer.h" #include "benc/Dict.h" #include "benc/List.h" #include "benc/String.h" #include "benc/serialization/BencSerializer.h" #include "benc/serialization/json/JsonBencSerializer.h" #include "util/Bits.h" #include "util/Hex.h" #include #include #include #include #include static int32_t parseGeneric(struct Reader* reader, struct Allocator* allocator, Object** output); static int32_t serializeGenericWithPadding(struct Writer* writer, size_t padSpaceCount, const Object* obj); /** What the name says. */ static const char* thirtyTwoSpaces = " "; /** * Write some number of spaces for padding. * * @param padSpaces the number of spaces to pad. * @param padCounter an integer which is used for internal bookkeeping. * @param writer where to write the padding. */ #define PAD(padSpaces, padCounter, writer) \ padCounter = 0; \ while (32 < padSpaces + padCounter) { \ Writer_write(writer, thirtyTwoSpaces, 32); \ padCounter += 32; \ } \ Writer_write(writer, thirtyTwoSpaces, padSpaces - padCounter) static inline int outOfContent() { return -2; } #define OUT_OF_CONTENT_TO_READ outOfContent() static inline int unparsable() { return -3; } #define UNPARSABLE unparsable() /** @see BencSerializer.h */ static int32_t serializeString(struct Writer* writer, const String* string) { Writer_write(writer, "\"", 1); size_t i; uint8_t chr; char buffer[5]; for (i = 0; i < string->len; i++) { chr = (uint8_t) string->bytes[i] & 0xFF; /* Nonprinting chars, \ and " are hex'd */ if (chr < 126 && chr > 31 && chr != '\\' && chr != '"') { snprintf(buffer, 5, "%c", chr); Writer_write(writer, buffer, 1); } else { snprintf(buffer, 5, "\\x%.2X", chr); Writer_write(writer, buffer, 4); } } return Writer_write(writer, "\"", 1); } /** * Read until 1 char after the target character. */ static inline int readUntil(uint8_t target, struct Reader* reader) { uint8_t nextChar; do { if (Reader_read(reader, (char*)&nextChar, 1)) { printf("Unexpected end of input while looking for '%c'\n",target); return OUT_OF_CONTENT_TO_READ; } } while (nextChar != target); return 0; } static inline int parseString(struct Reader* reader, struct Allocator* allocator, String** output) { #define BUFF_SZ (1<<8) #define BUFF_MAX (1<<20) int curSize = BUFF_SZ; struct Allocator* localAllocator = Allocator_child(allocator); uint8_t* buffer = Allocator_malloc(localAllocator, curSize); if (readUntil('"', reader) || Reader_read(reader, buffer, 1)) { printf("Unterminated string\n"); Allocator_free(localAllocator); return OUT_OF_CONTENT_TO_READ; } for (int i = 0; i < BUFF_MAX - 1; i++) { if (buffer[i] == '\\') { // \x01 (skip the x) Reader_skip(reader, 1); uint8_t hex[2]; if (Reader_read(reader, (char*)hex, 2)) { printf("Unexpected end of input parsing escape sequence\n"); Allocator_free(localAllocator); return OUT_OF_CONTENT_TO_READ; } int byte = Hex_decodeByte(hex[0], hex[1]); if (byte == -1) { printf("Invalid escape \"%c%c\" after \"%.*s\"\n",hex[0],hex[1],i+1,buffer); Allocator_free(localAllocator); return UNPARSABLE; } buffer[i] = (uint8_t) byte; } else if (buffer[i] == '"') { *output = String_newBinary((char*)buffer, i, allocator); Allocator_free(localAllocator); return 0; } if (i == curSize - 1) { curSize <<= 1; buffer = Allocator_realloc(localAllocator, buffer, curSize); } if (Reader_read(reader, buffer + i + 1, 1)) { if (i+1 <= 20) { printf("Unterminated string \"%.*s\"\n", i+1, buffer); } else { printf("Unterminated string starting with \"%.*s...\"\n", 20, buffer); } Allocator_free(localAllocator); return OUT_OF_CONTENT_TO_READ; } } printf("Maximum string length of %d bytes exceeded.\n",BUFF_SZ); Allocator_free(localAllocator); return UNPARSABLE; #undef BUFF_SZ #undef BUFF_MAX } /** @see BencSerializer.h */ static int32_t serializeint64_t(struct Writer* writer, int64_t integer) { char buffer[32]; Bits_memset(buffer, 0, 32); snprintf(buffer, 32, "%" PRId64, integer); return Writer_write(writer, buffer, CString_strlen(buffer)); } /** @see BencSerializer.h */ static int32_t parseint64_t(struct Reader* reader, int64_t* output) { uint8_t buffer[32]; for (int i = 0; i < 21; i++) { int32_t status = Reader_read(reader, buffer + i, 0); if (i == 0 && buffer[i] == '-' && status == 0) { // It's just a negative number, no need to fail it. continue; } if (buffer[i] < '0' || buffer[i] > '9' || status != 0 /* end of input */) { buffer[i] = '\0'; int64_t out = strtol((char*)buffer, NULL, 10); // Failed parse causes 0 to be set. if (out == 0 && buffer[0] != '0' && (buffer[0] != '-' || buffer[1] != '0')) { printf("Failed to parse \"%s\": not a number\n",buffer); return UNPARSABLE; } if ((out == INT64_MAX || out == INT64_MIN) && errno == ERANGE) { printf("Failed to parse \"%s\": number too large/small\n",buffer); return UNPARSABLE; } *output = out; return 0; } Reader_skip(reader, 1); } // Larger than the max possible int64. buffer[22] = '\0'; printf("Failed to parse \"%s\": number too large\n",buffer); return UNPARSABLE; } /** * Serialize a bencoded list with padding at the beginning of each line. * * @param writer the place to write the output to. * @param padSpaceCount the number of spaces to place at the beginning of each line. * @param list the list to serialize */ static int32_t serializeListWithPadding(struct Writer* writer, const size_t padSpaceCount, const List* list) { int padCounter; Writer_write(writer, "[\n", 2); const struct List_Item* entry = *list; while (entry != NULL) { PAD(padSpaceCount + 2, padCounter, writer); serializeGenericWithPadding(writer, padSpaceCount + 2, entry->elem); entry = entry->next; if (entry != NULL) { Writer_write(writer, ",\n", 2); } } Writer_write(writer, "\n", 1); PAD(padSpaceCount, padCounter, writer); return Writer_write(writer, "]", 1); } /** @see BencSerializer.h */ static int32_t serializeList(struct Writer* writer, const List* list) { return serializeListWithPadding(writer, 0, list); } /** * Parse a comment in with "slash splat" or double slash notation, * leave the reader on the first character after the last end of comment mark. */ static inline int parseComment(struct Reader* reader) { char chars[2]; int ret = Reader_read(reader, &chars, 2); if (ret) { printf("Warning: expected comment\n"); return OUT_OF_CONTENT_TO_READ; } if (chars[0] != '/') { printf("Warning: expected a comment starting with '/', instead found '%c'\n",chars[0]); return UNPARSABLE; } switch (chars[1]) { case '*':; do { readUntil('*', reader); } while (!(ret = Reader_read(reader, &chars, 1)) && chars[0] != '/'); if (ret) { printf("Unterminated multiline comment\n"); return OUT_OF_CONTENT_TO_READ; } return 0; case '/':; return readUntil('\n', reader); default: printf("Warning: expected a comment starting with \"//\" or \"/*\", " "instead found \"/%c\"\n",chars[1]); return UNPARSABLE; } } /** @see BencSerializer.h */ static int32_t parseList(struct Reader* reader, struct Allocator* allocator, List* output) { char nextChar; readUntil('[', reader); Object* element; struct List_Item* thisEntry = NULL; struct List_Item** lastEntryPointer = output; int ret; for (;;) { for (;;) { if (Reader_read(reader, &nextChar, 0) != 0) { printf("Unterminated list\n"); return OUT_OF_CONTENT_TO_READ; } if (nextChar == '/') { if ((ret = parseComment(reader)) != 0) { return ret; } continue; } switch (nextChar) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '[': case '{': case '"': break; case ']': Reader_skip(reader, 1); return 0; default: // FIXME(gerard): silently skipping anything we don't understand // might not be the best idea Reader_skip(reader, 1); continue; } break; } if ((ret = parseGeneric(reader, allocator, &element)) != 0) { return ret; } thisEntry = Allocator_malloc(allocator, sizeof(struct List_Item)); thisEntry->elem = element; thisEntry->next = NULL; // Read backwards so that the list reads out forward. *lastEntryPointer = thisEntry; lastEntryPointer = &(thisEntry->next); } } /** * Serialize a bencoded dictionary with padding before each line. * * @param writer the place to write the output to. * @param padSpaceCount the number of spaces to place at the beginning of each line. * @param dictionary the dictionary to serialize. */ static int32_t serializeDictionaryWithPadding(struct Writer* writer, size_t padSpaceCount, const Dict* dictionary) { int padCounter = 0; Writer_write(writer, "{\n", 2); const struct Dict_Entry* entry = *dictionary; while (entry != NULL) { PAD(padSpaceCount + 2, padCounter, writer); serializeString(writer, entry->key); Writer_write(writer, " : ", 3); serializeGenericWithPadding(writer, padSpaceCount + 2, entry->val); entry = entry->next; if (entry != NULL) { Writer_write(writer, ",\n", 2); } } Writer_write(writer, "\n", 1); PAD(padSpaceCount, padCounter, writer); return Writer_write(writer, "}", 1); } /** @see BencSerializer.h */ static int32_t serializeDictionary(struct Writer* writer, const Dict* dictionary) { return serializeDictionaryWithPadding(writer, 0, dictionary); } /** @see BencSerializer.h */ static int32_t parseDictionary(struct Reader* reader, struct Allocator* allocator, Dict* output) { uint8_t nextChar; readUntil('{', reader); String* key; Object* value; struct Dict_Entry* first = NULL; struct Dict_Entry* last = NULL; int ret = 0; for (;;) { while (!ret) { ret = Reader_read(reader, &nextChar, 0); switch (nextChar) { case '"': break; case '}': Reader_skip(reader, 1); *output = first; return 0; case '/': parseComment(reader); continue; default: Reader_skip(reader, 1); continue; } break; } if (ret) { printf("Unterminated dictionary\n"); return OUT_OF_CONTENT_TO_READ; } // Get key and value. if ((ret = parseString(reader, allocator, &key)) != 0) { return ret; } readUntil(':', reader); if ((ret = parseGeneric(reader, allocator, &value)) != 0) { return ret; } /* Allocate the entry. */ struct Dict_Entry* entry = Allocator_calloc(allocator, sizeof(struct Dict_Entry), 1); entry->key = key; entry->val = value; if (last) { last->next = entry; } else { first = entry; } last = entry; } } static int32_t parseGeneric(struct Reader* reader, struct Allocator* allocator, Object** output) { int ret = 0; char firstChar; for (;;) { ret = Reader_read(reader, &firstChar, 0); switch (firstChar) { case ' ': case '\r': case '\n': case '\t': Reader_skip(reader, 1); continue; case '/':; if ((ret = parseComment(reader)) != 0) { return ret; } continue; default: break; } if (ret) { printf("Unexpected end of input\n"); return OUT_OF_CONTENT_TO_READ; } break; } Object* out = Allocator_malloc(allocator, sizeof(Object)); switch (firstChar) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':; // int64_t. Int is special because it is not a pointer but a int64_t. int64_t bint; if ((ret = parseint64_t(reader, &bint)) == UNPARSABLE) { break; } out->type = Object_INTEGER; out->as.number = bint; break; case '[':; // List. List* list = Allocator_calloc(allocator, sizeof(List), 1); ret = parseList(reader, allocator, list); out->type = Object_LIST; out->as.list = list; break; case '{':; // Dictionary Dict* dict = Allocator_calloc(allocator, sizeof(Dict), 1); ret = parseDictionary(reader, allocator, dict); out->type = Object_DICT; out->as.dictionary = dict; break; case '"':; // String String* string = NULL; ret = parseString(reader, allocator, &string); out->type = Object_STRING; out->as.string = string; break; default: printf("While looking for something to parse: " "expected one of 0 1 2 3 4 5 6 7 8 9 [ { \", found '%c'\n", firstChar); return UNPARSABLE; } if (ret != 0) { // Something went wrong while parsing. return ret; } *output = out; return 0; } /** * Serialize a benc object into a json string with padding before each line. * * @param writer a Writer which to write the output to. * @param number of pad spaces to place before each line. * @param obj the object to serialize. * @return -2 if the type of object cannot be determined, otherwise * whatever is returned by the Writer. */ static int32_t serializeGenericWithPadding(struct Writer* writer, size_t padSpaceCount, const Object* obj) { switch (obj->type) { case Object_STRING: return serializeString(writer, obj->as.string); case Object_DICT: return serializeDictionaryWithPadding(writer, padSpaceCount, obj->as.dictionary); case Object_LIST: return serializeListWithPadding(writer, padSpaceCount, obj->as.list); case Object_INTEGER: return serializeint64_t(writer, obj->as.number); default: return -2; } } static const struct BencSerializer SERIALIZER = { .serializeString = serializeString, .parseString = parseString, .serializeint64_t = serializeint64_t, .parseint64_t = parseint64_t, .serializeList = serializeList, .parseList = parseList, .serializeDictionary = serializeDictionary, .parseDictionary = parseDictionary }; const struct BencSerializer* JsonBencSerializer_get(void) { return &SERIALIZER; } #undef PAD