RISCI_ATOM
/
cjdns
mirrorاز https://github.com/cjdelisle/cjdns.git


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594
							/* vim: set expandtab ts=4 sw=4: */
/*
 * You may redistribute this program and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation,
 * either version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
#include "memory/Allocator.h"
#include "io/Reader.h"
#include "io/Writer.h"
#include "benc/Dict.h"
#include "benc/List.h"
#include "benc/String.h"
#include "benc/serialization/BencSerializer.h"
#include "benc/serialization/json/JsonBencSerializer.h"
#include "util/Bits.h"
#include "util/Hex.h"

#include <stdio.h>
#include <inttypes.h>
#include <stdbool.h>
#include <stdlib.h>
#include <errno.h>

static int32_t parseGeneric(struct Reader* reader,
                            struct Allocator* allocator,
                            Object** output);
static int32_t serializeGenericWithPadding(struct Writer* writer,
                                           size_t padSpaceCount,
                                           const Object* obj);

/** What the name says. */
static const char* thirtyTwoSpaces = "                                ";

/**
 * Write some number of spaces for padding.
 *
 * @param padSpaces the number of spaces to pad.
 * @param padCounter an integer which is used for internal bookkeeping.
 * @param writer where to write the padding.
 */
#define PAD(padSpaces, padCounter, writer)                              \
    padCounter = 0;                                                     \
    while (32 < padSpaces + padCounter) {                               \
        Writer_write(writer, thirtyTwoSpaces, 32);                      \
        padCounter += 32;                                               \
    }                                                                   \
    Writer_write(writer, thirtyTwoSpaces, padSpaces - padCounter)

static inline int outOfContent()
{
    return -2;
}
#define OUT_OF_CONTENT_TO_READ outOfContent()

static inline int unparsable()
{
    return -3;
}
#define UNPARSABLE unparsable()

/** @see BencSerializer.h */
static int32_t serializeString(struct Writer* writer,
                               const String* string)
{
    Writer_write(writer, "\"", 1);
    size_t i;
    uint8_t chr;
    char buffer[5];
    for (i = 0; i < string->len; i++) {
        chr = (uint8_t) string->bytes[i] & 0xFF;
        /* Nonprinting chars, \ and " are hex'd */
        if (chr < 126 && chr > 31 && chr != '\\' && chr != '"') {
            snprintf(buffer, 5, "%c", chr);
            Writer_write(writer, buffer, 1);
        } else {
            snprintf(buffer, 5, "\\x%.2X", chr);
            Writer_write(writer, buffer, 4);
        }
    }
    return Writer_write(writer, "\"", 1);
}

/**
 * Read until 1 char after the target character.
 */
static inline int readUntil(uint8_t target, struct Reader* reader)
{
    uint8_t nextChar;
    do {
        if (Reader_read(reader, (char*)&nextChar, 1)) {
            printf("Unexpected end of input while looking for '%c'\n",target);
            return OUT_OF_CONTENT_TO_READ;
        }
    } while (nextChar != target);
    return 0;
}

static inline int parseString(struct Reader* reader,
                              struct Allocator* allocator,
                              String** output)
{
    #define BUFF_SZ (1<<8)
    #define BUFF_MAX (1<<20)

    int curSize = BUFF_SZ;
    struct Allocator* localAllocator = Allocator_child(allocator);
    uint8_t* buffer = Allocator_malloc(localAllocator, curSize);
    if (readUntil('"', reader) || Reader_read(reader, buffer, 1)) {
        printf("Unterminated string\n");
        Allocator_free(localAllocator);
        return OUT_OF_CONTENT_TO_READ;
    }
    for (int i = 0; i < BUFF_MAX - 1; i++) {
        if (buffer[i] == '\\') {
            // \x01 (skip the x)
            Reader_skip(reader, 1);
            uint8_t hex[2];
            if (Reader_read(reader, (char*)hex, 2)) {
                printf("Unexpected end of input parsing escape sequence\n");
                Allocator_free(localAllocator);
                return OUT_OF_CONTENT_TO_READ;
            }
            int byte = Hex_decodeByte(hex[0], hex[1]);
            if (byte == -1) {
                printf("Invalid escape \"%c%c\" after \"%.*s\"\n",hex[0],hex[1],i+1,buffer);
                Allocator_free(localAllocator);
                return UNPARSABLE;
            }
            buffer[i] = (uint8_t) byte;
        } else if (buffer[i] == '"') {
            *output = String_newBinary((char*)buffer, i, allocator);
            Allocator_free(localAllocator);
            return 0;
        }
        if (i == curSize - 1) {
            curSize <<= 1;
            buffer = Allocator_realloc(localAllocator, buffer, curSize);
        }
        if (Reader_read(reader, buffer + i + 1, 1)) {
            if (i+1 <= 20) {
                printf("Unterminated string \"%.*s\"\n", i+1, buffer);
            } else {
                printf("Unterminated string starting with \"%.*s...\"\n", 20, buffer);
            }
            Allocator_free(localAllocator);
            return OUT_OF_CONTENT_TO_READ;
        }
    }

    printf("Maximum string length of %d bytes exceeded.\n",BUFF_SZ);
    Allocator_free(localAllocator);
    return UNPARSABLE;

    #undef BUFF_SZ
    #undef BUFF_MAX
}

/** @see BencSerializer.h */
static int32_t serializeint64_t(struct Writer* writer,
                                int64_t integer)
{
    char buffer[32];
    Bits_memset(buffer, 0, 32);

    snprintf(buffer, 32, "%" PRId64, integer);

    return Writer_write(writer, buffer, CString_strlen(buffer));
}

/** @see BencSerializer.h */
static int32_t parseint64_t(struct Reader* reader,
                            int64_t* output)
{
    uint8_t buffer[32];

    for (int i = 0; i < 21; i++) {
        int32_t status = Reader_read(reader, buffer + i, 0);
        if (i == 0 && buffer[i] == '-' && status == 0) {
            // It's just a negative number, no need to fail it.
            continue;
        }
        if (buffer[i] < '0' || buffer[i] > '9' || status != 0 /* end of input */) {
            buffer[i] = '\0';
            int64_t out = strtol((char*)buffer, NULL, 10);
            // Failed parse causes 0 to be set.
            if (out == 0 && buffer[0] != '0' && (buffer[0] != '-' || buffer[1] != '0')) {
                printf("Failed to parse \"%s\": not a number\n",buffer);
                return UNPARSABLE;
            }
            if ((out == INT64_MAX || out == INT64_MIN) && errno == ERANGE) {
                printf("Failed to parse \"%s\": number too large/small\n",buffer);
                return UNPARSABLE;
            }
            *output = out;
            return 0;
        }
        Reader_skip(reader, 1);
    }

    // Larger than the max possible int64.
    buffer[22] = '\0';
    printf("Failed to parse \"%s\": number too large\n",buffer);
    return UNPARSABLE;
}

/**
 * Serialize a bencoded list with padding at the beginning of each line.
 *
 * @param writer the place to write the output to.
 * @param padSpaceCount the number of spaces to place at the beginning of each line.
 * @param list the list to serialize
 */
static int32_t serializeListWithPadding(struct Writer* writer,
                                        const size_t padSpaceCount,
                                        const List* list)
{
    int padCounter;

    Writer_write(writer, "[\n", 2);

    const struct List_Item* entry = *list;
    while (entry != NULL) {
        PAD(padSpaceCount + 2, padCounter, writer);
        serializeGenericWithPadding(writer, padSpaceCount + 2, entry->elem);
        entry = entry->next;
        if (entry != NULL) {
            Writer_write(writer, ",\n", 2);
        }
    }

    Writer_write(writer, "\n", 1);
    PAD(padSpaceCount, padCounter, writer);
    return Writer_write(writer, "]", 1);
}

/** @see BencSerializer.h */
static int32_t serializeList(struct Writer* writer,
                             const List* list)
{
    return serializeListWithPadding(writer, 0, list);
}

/**
 * Parse a comment in with "slash splat" or double slash notation,
 * leave the reader on the first character after the last end of comment mark.
 */
static inline int parseComment(struct Reader* reader)
{
    char chars[2];
    int ret = Reader_read(reader, &chars, 2);
    if (ret) {
        printf("Warning: expected comment\n");
        return OUT_OF_CONTENT_TO_READ;
    }
    if (chars[0] != '/') {
        printf("Warning: expected a comment starting with '/', instead found '%c'\n",chars[0]);
        return UNPARSABLE;
    }
    switch (chars[1]) {
        case '*':;
            do {
                readUntil('*', reader);
            } while (!(ret = Reader_read(reader, &chars, 1)) && chars[0] != '/');
            if (ret) {
                printf("Unterminated multiline comment\n");
                return OUT_OF_CONTENT_TO_READ;
            }
            return 0;
        case '/':;
            return readUntil('\n', reader);
        default:
            printf("Warning: expected a comment starting with \"//\" or \"/*\", "
                   "instead found \"/%c\"\n",chars[1]);
            return UNPARSABLE;
    }
}

/** @see BencSerializer.h */
static int32_t parseList(struct Reader* reader,
                         struct Allocator* allocator,
                         List* output)
{
    char nextChar;
    readUntil('[', reader);

    Object* element;
    struct List_Item* thisEntry = NULL;
    struct List_Item** lastEntryPointer = output;
    int ret;

    for (;;) {
        for (;;) {
            if (Reader_read(reader, &nextChar, 0) != 0) {
                printf("Unterminated list\n");
                return OUT_OF_CONTENT_TO_READ;
            }
            if (nextChar == '/') {
                if ((ret = parseComment(reader)) != 0) {
                    return ret;
                }
                continue;
            }
            switch (nextChar) {
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                case '[':
                case '{':
                case '"':
                    break;

                case ']':
                    Reader_skip(reader, 1);
                    return 0;

                default:
                    // FIXME(gerard): silently skipping anything we don't understand
                    // might not be the best idea
                    Reader_skip(reader, 1);
                    continue;
            }
            break;
        }

        if ((ret = parseGeneric(reader, allocator, &element)) != 0) {
            return ret;
        }
        thisEntry = Allocator_malloc(allocator, sizeof(struct List_Item));
        thisEntry->elem = element;
        thisEntry->next = NULL;

        // Read backwards so that the list reads out forward.
        *lastEntryPointer = thisEntry;
        lastEntryPointer = &(thisEntry->next);
    }
}

/**
 * Serialize a bencoded dictionary with padding before each line.
 *
 * @param writer the place to write the output to.
 * @param padSpaceCount the number of spaces to place at the beginning of each line.
 * @param dictionary the dictionary to serialize.
 */
static int32_t serializeDictionaryWithPadding(struct Writer* writer,
                                              size_t padSpaceCount,
                                              const Dict* dictionary)
{
    int padCounter = 0;
    Writer_write(writer, "{\n", 2);
    const struct Dict_Entry* entry = *dictionary;
    while (entry != NULL) {
        PAD(padSpaceCount + 2, padCounter, writer);
        serializeString(writer, entry->key);
        Writer_write(writer, " : ", 3);
        serializeGenericWithPadding(writer, padSpaceCount + 2, entry->val);
        entry = entry->next;
        if (entry != NULL) {
            Writer_write(writer, ",\n", 2);
        }
    }

    Writer_write(writer, "\n", 1);
    PAD(padSpaceCount, padCounter, writer);
    return Writer_write(writer, "}", 1);
}

/** @see BencSerializer.h */
static int32_t serializeDictionary(struct Writer* writer,
                                   const Dict* dictionary)
{
    return serializeDictionaryWithPadding(writer, 0, dictionary);
}

/** @see BencSerializer.h */
static int32_t parseDictionary(struct Reader* reader,
                               struct Allocator* allocator,
                               Dict* output)
{
    uint8_t nextChar;
    readUntil('{', reader);

    String* key;
    Object* value;
    struct Dict_Entry* first = NULL;
    struct Dict_Entry* last = NULL;
    int ret = 0;

    for (;;) {
        while (!ret) {
            ret = Reader_read(reader, &nextChar, 0);
            switch (nextChar) {
                case '"':
                    break;

                case '}':
                    Reader_skip(reader, 1);
                    *output = first;
                    return 0;

                case '/':
                    parseComment(reader);
                    continue;

                default:
                    Reader_skip(reader, 1);
                    continue;
            }
            break;
        }
        if (ret) {
            printf("Unterminated dictionary\n");
            return OUT_OF_CONTENT_TO_READ;
        }

        // Get key and value.
        if ((ret = parseString(reader, allocator, &key)) != 0) {
            return ret;
        }

        readUntil(':', reader);

        if ((ret = parseGeneric(reader, allocator, &value)) != 0) {
            return ret;
        }

        /* Allocate the entry. */
        struct Dict_Entry* entry = Allocator_calloc(allocator, sizeof(struct Dict_Entry), 1);
        entry->key = key;
        entry->val = value;
        if (last) {
            last->next = entry;
        } else {
            first = entry;
        }
        last = entry;
    }
}

static int32_t parseGeneric(struct Reader* reader,
                            struct Allocator* allocator,
                            Object** output)
{
    int ret = 0;
    char firstChar;

    for (;;) {
        ret = Reader_read(reader, &firstChar, 0);
        switch (firstChar) {
            case ' ':
            case '\r':
            case '\n':
            case '\t':
                Reader_skip(reader, 1);
                continue;

            case '/':;
                if ((ret = parseComment(reader)) != 0) {
                    return ret;
                }
                continue;

            default:
                break;
        }
        if (ret) {
            printf("Unexpected end of input\n");
            return OUT_OF_CONTENT_TO_READ;
        }
        break;
    }

    Object* out = Allocator_malloc(allocator, sizeof(Object));

    switch (firstChar) {
        case '0':
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':;
            // int64_t. Int is special because it is not a pointer but a int64_t.
            int64_t bint;
            if ((ret = parseint64_t(reader, &bint)) == UNPARSABLE) {
                break;
            }
            out->type = Object_INTEGER;
            out->as.number = bint;
            break;

        case '[':;
            // List.
            List* list = Allocator_calloc(allocator, sizeof(List), 1);
            ret = parseList(reader, allocator, list);
            out->type = Object_LIST;
            out->as.list = list;
            break;

        case '{':;
            // Dictionary
            Dict* dict = Allocator_calloc(allocator, sizeof(Dict), 1);
            ret = parseDictionary(reader, allocator, dict);
            out->type = Object_DICT;
            out->as.dictionary = dict;
            break;

        case '"':;
            // String
            String* string = NULL;
            ret = parseString(reader, allocator, &string);
            out->type = Object_STRING;
            out->as.string = string;
            break;

        default:
            printf("While looking for something to parse: "
                   "expected one of 0 1 2 3 4 5 6 7 8 9 [ { \", found '%c'\n", firstChar);
            return UNPARSABLE;
    }

    if (ret != 0) {
        // Something went wrong while parsing.
        return ret;
    }

    *output = out;
    return 0;
}

/**
 * Serialize a benc object into a json string with padding before each line.
 *
 * @param writer a Writer which to write the output to.
 * @param number of pad spaces to place before each line.
 * @param obj the object to serialize.
 * @return -2 if the type of object cannot be determined, otherwise
 *            whatever is returned by the Writer.
 */
static int32_t serializeGenericWithPadding(struct Writer* writer,
                                           size_t padSpaceCount,
                                           const Object* obj)
{
    switch (obj->type)
    {
        case Object_STRING:
            return serializeString(writer, obj->as.string);
        case Object_DICT:
            return serializeDictionaryWithPadding(writer, padSpaceCount, obj->as.dictionary);
        case Object_LIST:
            return serializeListWithPadding(writer, padSpaceCount, obj->as.list);
        case Object_INTEGER:
            return serializeint64_t(writer, obj->as.number);
        default:
            return -2;
    }
}

static const struct BencSerializer SERIALIZER =
{
    .serializeString = serializeString,
    .parseString = parseString,
    .serializeint64_t = serializeint64_t,
    .parseint64_t = parseint64_t,
    .serializeList = serializeList,
    .parseList = parseList,
    .serializeDictionary = serializeDictionary,
    .parseDictionary = parseDictionary
};

const struct BencSerializer* JsonBencSerializer_get(void)
{
    return &SERIALIZER;
}

#undef PAD