123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412 |
- /*
- * Copyright (C) 2013-2014 Jo-Philipp Wich <jow@openwrt.org>
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include <stdbool.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include "ast.h"
- #include "lexer.h"
- #include "parser.h"
- struct token {
- int type;
- const char *pat;
- int plen;
- int (*parse)(const char *buf, struct jp_opcode *op);
- };
- #define dec(o) \
- ((o) - '0')
- #define hex(x) \
- (((x) >= 'a') ? (10 + (x) - 'a') : \
- (((x) >= 'A') ? (10 + (x) - 'A') : dec(x)))
- /*
- * Stores the given codepoint as a utf8 multibyte sequence into the given
- * output buffer and substracts the required amount of bytes from the given
- * length pointer.
- *
- * Returns false if the multibyte sequence would not fit into the buffer,
- * otherwise true.
- */
- static bool
- utf8enc(char **out, int *rem, int code)
- {
- if (code > 0 && code <= 0x7F)
- {
- if (*rem < 1)
- return false;
- *(*out++) = code; (*rem)--;
- return true;
- }
- else if (code > 0 && code <= 0x7FF)
- {
- if (*rem < 2)
- return false;
- *(*out)++ = ((code >> 6) & 0x1F) | 0xC0; (*rem)--;
- *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--;
- return true;
- }
- else if (code > 0 && code <= 0xFFFF)
- {
- if (*rem < 3)
- return false;
- *(*out)++ = ((code >> 12) & 0x0F) | 0xE0; (*rem)--;
- *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--;
- *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--;
- return true;
- }
- else if (code > 0 && code <= 0x10FFFF)
- {
- if (*rem < 4)
- return false;
- *(*out)++ = ((code >> 18) & 0x07) | 0xF0; (*rem)--;
- *(*out)++ = ((code >> 12) & 0x3F) | 0x80; (*rem)--;
- *(*out)++ = ((code >> 6) & 0x3F) | 0x80; (*rem)--;
- *(*out)++ = ( code & 0x3F) | 0x80; (*rem)--;
- return true;
- }
- return true;
- }
- /*
- * Parses a string literal from the given buffer.
- *
- * Returns a negative value on error, otherwise the amount of consumed
- * characters from the given buffer.
- *
- * Error values:
- * -1 Unterminated string
- * -2 Invalid escape sequence
- * -3 String literal too long
- */
- static int
- parse_string(const char *buf, struct jp_opcode *op)
- {
- char q = *(buf++);
- char str[128] = { 0 };
- char *out = str;
- const char *in = buf;
- bool esc = false;
- int rem = sizeof(str) - 1;
- int code;
- while (*in)
- {
- /* continuation of escape sequence */
- if (esc)
- {
- /* \uFFFF */
- if (in[0] == 'u')
- {
- if (isxdigit(in[1]) && isxdigit(in[2]) &&
- isxdigit(in[3]) && isxdigit(in[4]))
- {
- if (!utf8enc(&out, &rem,
- hex(in[1]) * 16 * 16 * 16 +
- hex(in[2]) * 16 * 16 +
- hex(in[3]) * 16 +
- hex(in[4])))
- return -3;
- in += 5;
- }
- else
- {
- return -2;
- }
- }
- /* \xFF */
- else if (in[0] == 'x')
- {
- if (isxdigit(in[1]) && isxdigit(in[2]))
- {
- if (!utf8enc(&out, &rem, hex(in[1]) * 16 + hex(in[2])))
- return -3;
- in += 3;
- }
- else
- {
- return -2;
- }
- }
- /* \377, \77 or \7 */
- else if (in[0] >= '0' && in[0] <= '7')
- {
- /* \377 */
- if (in[1] >= '0' && in[1] <= '7' &&
- in[2] >= '0' && in[2] <= '7')
- {
- code = dec(in[0]) * 8 * 8 +
- dec(in[1]) * 8 +
- dec(in[2]);
- if (code > 255)
- return -2;
- if (!utf8enc(&out, &rem, code))
- return -3;
- in += 3;
- }
- /* \77 */
- else if (in[1] >= '0' && in[1] <= '7')
- {
- if (!utf8enc(&out, &rem, dec(in[0]) * 8 + dec(in[1])))
- return -3;
- in += 2;
- }
- /* \7 */
- else
- {
- if (!utf8enc(&out, &rem, dec(in[0])))
- return -3;
- in += 1;
- }
- }
- /* single character escape */
- else
- {
- if (rem-- < 1)
- return -3;
- switch (in[0])
- {
- case 'a': *out = '\a'; break;
- case 'b': *out = '\b'; break;
- case 'e': *out = '\e'; break;
- case 'f': *out = '\f'; break;
- case 'n': *out = '\n'; break;
- case 'r': *out = '\r'; break;
- case 't': *out = '\t'; break;
- case 'v': *out = '\v'; break;
- default: *out = *in; break;
- }
- in++;
- out++;
- }
- esc = false;
- }
- /* begin of escape sequence */
- else if (*in == '\\')
- {
- in++;
- esc = true;
- }
- /* terminating quote */
- else if (*in == q)
- {
- op->str = strdup(str);
- return (in - buf) + 2;
- }
- /* ordinary char */
- else
- {
- if (rem-- < 1)
- return -3;
- *out++ = *in++;
- }
- }
- return -1;
- }
- /*
- * Parses a label from the given buffer.
- *
- * Returns a negative value on error, otherwise the amount of consumed
- * characters from the given buffer.
- *
- * Error values:
- * -3 Label too long
- */
- static int
- parse_label(const char *buf, struct jp_opcode *op)
- {
- char str[128] = { 0 };
- char *out = str;
- const char *in = buf;
- int rem = sizeof(str) - 1;
- while (*in == '_' || isalnum(*in))
- {
- if (rem-- < 1)
- return -3;
- *out++ = *in++;
- }
- if (!strcmp(str, "true") || !strcmp(str, "false"))
- {
- op->num = (str[0] == 't');
- op->type = T_BOOL;
- }
- else
- {
- op->str = strdup(str);
- }
- return (in - buf);
- }
- /*
- * Parses a number literal from the given buffer.
- *
- * Returns a negative value on error, otherwise the amount of consumed
- * characters from the given buffer.
- *
- * Error values:
- * -2 Invalid number character
- */
- static int
- parse_number(const char *buf, struct jp_opcode *op)
- {
- char *e;
- int n = strtol(buf, &e, 10);
- if (e == buf)
- return -2;
- op->num = n;
- return (e - buf);
- }
- static const struct token tokens[] = {
- { 0, " ", 1 },
- { 0, "\t", 1 },
- { 0, "\n", 1 },
- { T_LE, "<=", 2 },
- { T_GE, ">=", 2 },
- { T_NE, "!=", 2 },
- { T_AND, "&&", 2 },
- { T_OR, "||", 2 },
- { T_DOT, ".", 1 },
- { T_BROPEN, "[", 1 },
- { T_BRCLOSE, "]", 1 },
- { T_POPEN, "(", 1 },
- { T_PCLOSE, ")", 1 },
- { T_UNION, ",", 1 },
- { T_ROOT, "$", 1 },
- { T_THIS, "@", 1 },
- { T_LT, "<", 1 },
- { T_GT, ">", 1 },
- { T_EQ, "=", 1 },
- { T_NOT, "!", 1 },
- { T_WILDCARD, "*", 1 },
- { T_STRING, "'", 1, parse_string },
- { T_STRING, "\"", 1, parse_string },
- { T_LABEL, "_", 1, parse_label },
- { T_LABEL, "az", 0, parse_label },
- { T_LABEL, "AZ", 0, parse_label },
- { T_NUMBER, "-", 1, parse_number },
- { T_NUMBER, "09", 0, parse_number },
- };
- const char *tokennames[23] = {
- [0] = "End of file",
- [T_AND] = "'&&'",
- [T_OR] = "'||'",
- [T_UNION] = "','",
- [T_EQ] = "'='",
- [T_NE] = "'!='",
- [T_GT] = "'>'",
- [T_GE] = "'>='",
- [T_LT] = "'<'",
- [T_LE] = "'<='",
- [T_NOT] = "'!'",
- [T_LABEL] = "Label",
- [T_ROOT] = "'$'",
- [T_THIS] = "'@'",
- [T_DOT] = "'.'",
- [T_WILDCARD] = "'*'",
- [T_BROPEN] = "'['",
- [T_BRCLOSE] = "']'",
- [T_BOOL] = "Bool",
- [T_NUMBER] = "Number",
- [T_STRING] = "String",
- [T_POPEN] = "'('",
- [T_PCLOSE] = "')'",
- };
- static int
- match_token(const char *ptr, struct jp_opcode *op)
- {
- int i;
- const struct token *tok;
- for (i = 0, tok = &tokens[0];
- i < sizeof(tokens) / sizeof(tokens[0]);
- i++, tok = &tokens[i])
- {
- if ((tok->plen > 0 && !strncmp(ptr, tok->pat, tok->plen)) ||
- (tok->plen == 0 && *ptr >= tok->pat[0] && *ptr <= tok->pat[1]))
- {
- op->type = tok->type;
- if (tok->parse)
- return tok->parse(ptr, op);
- return tok->plen;
- }
- }
- return -1;
- }
- struct jp_opcode *
- jp_get_token(struct jp_state *s, const char *input, int *mlen)
- {
- struct jp_opcode op = { 0 };
- *mlen = match_token(input, &op);
- if (*mlen < 0 || op.type == 0)
- return NULL;
- return jp_alloc_op(s, op.type, op.num, op.str, NULL);
- }
|