# This file is part of asmc, a bootstrapping OS with minimal seed # Copyright (C) 2019 Giovanni Mascellani # https://gitlab.com/giomasce/asmc # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program. If not, see . fun mm0_is_digit 1 { $c @c 0 param = ; '0' c <= c '9' <= && ret ; } fun mm0_is_alpha 1 { $c @c 0 param = ; 'a' c <= c 'z' <= && 'A' c <= c 'Z' <= && || ret ; } fun mm0_is_white 1 { $c @c 0 param = ; c ' ' == c '\n' == || c '\r' == || c '\t' == || ret ; } const MM0TOK_TYPE 0 const MM0TOK_VALUE 4 const SIZEOF_MM0TOK 8 const MM0TOK_TYPE_SYMBOL 1 const MM0TOK_TYPE_IDENT 2 const MM0TOK_TYPE_NUMBER 3 const MM0TOK_TYPE_MATH 4 const MM0TOK_SYMB_STAR 1 const MM0TOK_SYMB_DOT 2 const MM0TOK_SYMB_COLON 3 const MM0TOK_SYMB_SEMICOLON 4 const MM0TOK_SYMB_OPEN 5 const MM0TOK_SYMB_CLOSED 6 const MM0TOK_SYMB_ARROW 7 const MM0TOK_SYMB_OPENBR 8 const MM0TOK_SYMB_CLOSEDBR 9 const MM0TOK_SYMB_ASSIGN 10 fun mm0tok_init 2 { $type $value @type 1 param = ; @value 0 param = ; $tok @tok SIZEOF_MM0TOK malloc = ; tok MM0TOK_TYPE take_addr type = ; tok MM0TOK_VALUE take_addr value = ; tok ret ; } fun mm0tok_destroy 1 { $tok @tok 0 param = ; $type @type tok MM0TOK_TYPE take = ; if type MM0TOK_TYPE_IDENT == type MM0TOK_TYPE_MATH == || { tok MM0TOK_VALUE take free ; } tok free ; } fun mm0tok_cmp_value 3 { $tok $type $value @tok 2 param = ; @type 1 param = ; @value 0 param = ; if tok MM0TOK_TYPE take type != { 0 ret ; } if type MM0TOK_TYPE_SYMBOL == { tok MM0TOK_VALUE take value == ret ; } if type MM0TOK_TYPE_IDENT == { tok MM0TOK_VALUE take value strcmp 0 == ret ; } if type MM0TOK_TYPE_NUMBER == { tok MM0TOK_VALUE take value == ret ; } if type MM0TOK_TYPE_MATH == { tok MM0TOK_VALUE take value strcmp 0 == ret ; } 0 "mm0tok_expect_type_value: illegal token type" assert_msg ; } fun mm0tok_dump 1 { $tok @tok 0 param = ; $type $value @type tok MM0TOK_TYPE take = ; @value tok MM0TOK_VALUE take = ; if type MM0TOK_TYPE_SYMBOL == { if value MM0TOK_SYMB_STAR == { "STAR" log ; ret ; } if value MM0TOK_SYMB_DOT == { "DOT" log ; ret ; } if value MM0TOK_SYMB_COLON == { "COLON" log ; ret ; } if value MM0TOK_SYMB_SEMICOLON == { "SEMICOLON" log ; ret ; } if value MM0TOK_SYMB_OPEN == { "OPEN" log ; ret ; } if value MM0TOK_SYMB_CLOSED == { "CLOSED" log ; ret ; } if value MM0TOK_SYMB_ARROW == { "ARROW" log ; ret ; } if value MM0TOK_SYMB_OPENBR == { "OPENBR" log ; ret ; } if value MM0TOK_SYMB_CLOSEDBR == { "CLOSEDBR" log ; ret ; } if value MM0TOK_SYMB_ASSIGN == { "ASSIGN" log ; ret ; } } if type MM0TOK_TYPE_IDENT == { value log ; ret ; } if type MM0TOK_TYPE_NUMBER == { value itoa log ; ret ; } if type MM0TOK_TYPE_MATH == { "$" log ; value log ; "$" log ; ret ; } } const MM0LEXER_FILENAME 0 const MM0LEXER_FD 4 const MM0LEXER_UNREAD 8 const SIZEOF_MM0LEXER 12 fun mm0lexer_init 1 { $filename @filename 0 param = ; $fd @fd filename vfs_open = ; if fd ! { 0 ret ; } $lexer @lexer SIZEOF_MM0LEXER malloc = ; lexer MM0LEXER_FILENAME take_addr filename = ; lexer MM0LEXER_FD take_addr fd = ; lexer MM0LEXER_UNREAD take_addr 0 = ; lexer ret ; } fun mm0lexer_destroy 1 { $lexer @lexer 0 param = ; lexer MM0LEXER_FD take vfs_close ; lexer free ; } fun mm0lexer_read 1 { $lexer @lexer 0 param = ; $c @c lexer MM0LEXER_UNREAD take = ; if c { lexer MM0LEXER_UNREAD take_addr 0 = ; c ret ; } else { lexer MM0LEXER_FD take vfs_read ret ; } } fun mm0lexer_unread 2 { $lexer $c @lexer 1 param = ; @c 0 param = ; lexer MM0LEXER_UNREAD take ! "mm0lexer_unread: unread buffer already used" assert_msg ; lexer MM0LEXER_UNREAD take_addr c = ; } fun mm0lexer_read_skip 1 { $lexer @lexer 0 param = ; while 1 { $c @c lexer mm0lexer_read = ; # Check comment if c '-' == { @c lexer mm0lexer_read = ; c '-' == "mm0lexer_read_skip: invalid single dash" assert_msg ; while c '\n' != { @c lexer mm0lexer_read = ; } } else { if c mm0_is_white ! { c ret ; } } } } fun mm0lexer_get_token_or_eof_ 1 { $lexer @lexer 0 param = ; $c @c lexer mm0lexer_read_skip = ; # End of file if c 0xffffffff == { 0 ret ; } # Symbol token if c '*' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_STAR mm0tok_init ret ; } if c '.' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_DOT mm0tok_init ret ; } if c ':' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_COLON mm0tok_init ret ; } if c ';' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_SEMICOLON mm0tok_init ret ; } if c '(' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_OPEN mm0tok_init ret ; } if c ')' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_CLOSED mm0tok_init ret ; } if c '>' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_ARROW mm0tok_init ret ; } if c '{' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_OPENBR mm0tok_init ret ; } if c '}' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_CLOSEDBR mm0tok_init ret ; } if c '=' == { MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_ASSIGN mm0tok_init ret ; } # Number token if c mm0_is_digit { $zero @zero c '0' == = ; $value @value c '0' - = ; @c lexer mm0lexer_read = ; while c mm0_is_digit { zero ! "mm0lexer_get_token_or_eof_: invalid 0 prefix in number token" assert_msg ; @value value 10 * c '0' - + = ; @c lexer mm0lexer_read = ; } c mm0_is_alpha ! "mm0lexer_get_token_or_eof_: unexpected alphabetic character in number token" assert_msg ; lexer c mm0lexer_unread ; MM0TOK_TYPE_NUMBER value mm0tok_init ret ; } # Identifier token if c mm0_is_alpha c '_' == || { $size $cap $value @size 0 = ; @cap 4 = ; @value cap malloc = ; while c mm0_is_digit c mm0_is_alpha || c '_' == || c '-' == || { # +1 to be sure there is space for the terminator if size 1 + cap >= { @cap cap 2 * = ; @value cap value realloc = ; } value size + c =c ; @size size 1 + = ; @c lexer mm0lexer_read = ; } lexer c mm0lexer_unread ; value size + '\0' =c ; MM0TOK_TYPE_IDENT value mm0tok_init ret ; } # Math token if c '$' == { $size $cap $value @size 0 = ; @cap 4 = ; @value cap malloc = ; @c lexer mm0lexer_read = ; $cont @cont 1 = ; while c '$' != { # +1 to be sure there is space for the terminator if size 1 + cap >= { @cap cap 2 * = ; @value cap value realloc = ; } if c mm0_is_white { @c ' ' = ; } value size + c =c ; @size size 1 + = ; @c lexer mm0lexer_read = ; } value size + '\0' =c ; MM0TOK_TYPE_MATH value mm0tok_init ret ; } 0 "mm0lexer_get_token_or_eof_: invalid character in input file" assert_msg ; } fun mm0lexer_get_token_or_eof 1 { $lexer @lexer 0 param = ; $token @token lexer mm0lexer_get_token_or_eof_ = ; if token { token mm0tok_dump ; " " log ; } else { "EOF" log ; } token ret ; } fun mm0lexer_get_token 1 { $lexer @lexer 0 param = ; $token @token lexer mm0lexer_get_token_or_eof = ; token "mm0lexer_get_token: unexpected end of file" assert_msg ; token ret ; } fun mm0lexer_get_token_type 2 { $lexer $type @lexer 1 param = ; @type 0 param = ; $tok @tok lexer mm0lexer_get_token = ; tok MM0TOK_TYPE take type == "mm0lexer_get_token_type: wrong token type" assert_msg ; tok ret ; } fun mm0lexer_expect 3 { $lexer $type $value @lexer 2 param = ; @type 1 param = ; @value 0 param = ; $tok @tok lexer mm0lexer_get_token = ; tok type value mm0tok_cmp_value "mm0lexer_expect: illegal token" assert_msg ; tok mm0tok_destroy ; } const MM0SORT_PURE 0 const MM0SORT_STRICT 4 const MM0SORT_PROVABLE 8 const MM0SORT_NONEMPTY 12 const SIZEOF_MM0SORT 16 fun mm0sort_init 0 { $sort @sort SIZEOF_MM0SORT malloc = ; sort MM0SORT_PURE take_addr 0 = ; sort MM0SORT_STRICT take_addr 0 = ; sort MM0SORT_PROVABLE take_addr 0 = ; sort MM0SORT_NONEMPTY take_addr 0 = ; sort ret ; } fun mm0sort_destroy 1 { $sort @sort 0 param = ; sort free ; } const MM0TH_LEXER 0 const MM0TH_LEVEL 4 const MM0TH_SORTS 8 const SIZEOF_MM0TH 12 fun mm0th_init 0 { $theory @theory SIZEOF_MM0TH malloc = ; theory MM0TH_LEVEL take_addr 0 = ; theory MM0TH_SORTS take_addr map_init = ; theory ret ; } fun mm0th_sorts_destroy_closure 3 { $ctx $key $value @ctx 2 param = ; @key 1 param = ; @value 0 param = ; value mm0sort_destroy ; } fun mm0th_destroy 1 { $theory @theory 0 param = ; theory MM0TH_SORTS take @mm0th_sorts_destroy_closure 0 map_foreach ; theory MM0TH_SORTS take map_destroy ; theory free ; } fun mm0_parse_sort 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(sort-stmt) " log ; $sort @sort mm0sort_init = ; # Read sort properties $cont @cont 1 = ; while cont { $ok @ok 0 = ; if tok MM0TOK_TYPE_IDENT "pure" mm0tok_cmp_value { @ok 1 = ; sort MM0SORT_PURE take_addr 1 = ; } if tok MM0TOK_TYPE_IDENT "strict" mm0tok_cmp_value { @ok 1 = ; sort MM0SORT_STRICT take_addr 1 = ; } if tok MM0TOK_TYPE_IDENT "provable" mm0tok_cmp_value { @ok 1 = ; sort MM0SORT_PROVABLE take_addr 1 = ; } if tok MM0TOK_TYPE_IDENT "nonempty" mm0tok_cmp_value { @ok 1 = ; sort MM0SORT_NONEMPTY take_addr 1 = ; } if tok MM0TOK_TYPE_IDENT "sort" mm0tok_cmp_value { @ok 1 = ; @cont 0 = ; } ok "mm0_parse_sort: parsing failed" assert_msg ; tok mm0tok_destroy ; @tok lexer MM0TOK_TYPE_IDENT mm0lexer_get_token_type = ; } # Register the sort $sorts @sorts theory MM0TH_SORTS take = ; $value @value tok MM0TOK_VALUE take = ; sorts value map_has ! "mm0_parse_sort: sort already exists" assert_msg ; sorts value sort map_set ; tok mm0tok_destroy ; # Expect semicolon lexer MM0TOK_TYPE_SYMBOL MM0TOK_SYMB_SEMICOLON mm0lexer_expect ; } fun mm0_parse_var 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(var-stmt) " log ; # Discard tokens up to the semicolon $cont @cont 1 = ; tok mm0tok_destroy ; while cont { @tok lexer mm0lexer_get_token = ; if tok MM0TOK_TYPE take MM0TOK_TYPE_SYMBOL == tok MM0TOK_VALUE take MM0TOK_SYMB_SEMICOLON == && { @cont 0 = ; } tok mm0tok_destroy ; } } fun mm0_parse_term 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(term-stmt) " log ; # Discard tokens up to the semicolon $cont @cont 1 = ; tok mm0tok_destroy ; while cont { @tok lexer mm0lexer_get_token = ; if tok MM0TOK_TYPE take MM0TOK_TYPE_SYMBOL == tok MM0TOK_VALUE take MM0TOK_SYMB_SEMICOLON == && { @cont 0 = ; } tok mm0tok_destroy ; } } fun mm0_parse_assert 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(assert-stmt) " log ; # Discard tokens up to the semicolon $cont @cont 1 = ; tok mm0tok_destroy ; while cont { @tok lexer mm0lexer_get_token = ; if tok MM0TOK_TYPE take MM0TOK_TYPE_SYMBOL == tok MM0TOK_VALUE take MM0TOK_SYMB_SEMICOLON == && { @cont 0 = ; } tok mm0tok_destroy ; } } fun mm0_parse_def 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(def-stmt) " log ; # Discard tokens up to the open brace $cont @cont 1 = ; tok mm0tok_destroy ; while cont { @tok lexer mm0lexer_get_token = ; if tok MM0TOK_TYPE take MM0TOK_TYPE_SYMBOL == tok MM0TOK_VALUE take MM0TOK_SYMB_OPENBR == && { theory MM0TH_LEVEL take_addr theory MM0TH_LEVEL take 1 + = ; @cont 0 = ; } tok mm0tok_destroy ; } } fun mm0_parse_notation 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(notation-stmt) " log ; # Discard tokens up to the semicolon $cont @cont 1 = ; tok mm0tok_destroy ; while cont { @tok lexer mm0lexer_get_token = ; if tok MM0TOK_TYPE take MM0TOK_TYPE_SYMBOL == tok MM0TOK_VALUE take MM0TOK_SYMB_SEMICOLON == && { @cont 0 = ; } tok mm0tok_destroy ; } } fun mm0_parse_output 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $lexer @lexer theory MM0TH_LEXER take = ; "(output-stmt) " log ; # Discard tokens up to the semicolon $cont @cont 1 = ; tok mm0tok_destroy ; while cont { @tok lexer mm0lexer_get_token = ; if tok MM0TOK_TYPE take MM0TOK_TYPE_SYMBOL == tok MM0TOK_VALUE take MM0TOK_SYMB_SEMICOLON == && { @cont 0 = ; } tok mm0tok_destroy ; } } fun mm0_parse_statement 2 { $theory $tok @theory 1 param = ; @tok 0 param = ; $value @value tok MM0TOK_VALUE take = ; if value "pure" strcmp 0 == value "strict" strcmp 0 == || value "provable" strcmp 0 == || value "nonempty" strcmp 0 == || value "sort" strcmp 0 == || { theory tok mm0_parse_sort ; ret ; } if value "var" strcmp 0 == { theory tok mm0_parse_var ; ret ; } if value "term" strcmp 0 == { theory tok mm0_parse_term ; ret ; } if value "axiom" strcmp 0 == value "theorem" strcmp 0 == || { theory tok mm0_parse_assert ; ret ; } if value "def" strcmp 0 == { theory tok mm0_parse_def ; ret ; } if value "infixl" strcmp 0 == value "infixr" strcmp 0 == || value "prefix" strcmp 0 == || value "coercion" strcmp 0 == || value "notation" strcmp 0 == || { theory tok mm0_parse_notation ; ret ; } if value "output" strcmp 0 == { theory tok mm0_parse_output ; ret ; } 0 "mm0_parse_statement: invalid statement" assert_msg ; } fun mm0_parse 1 { $lexer @lexer 0 param = ; $theory @theory mm0th_init = ; theory MM0TH_LEXER take_addr lexer = ; while 1 { $tok @tok lexer mm0lexer_get_token_or_eof = ; if tok ! { theory MM0TH_LEVEL take 0 == "mm0_parse: unmatched braces" assert_msg ; theory ret ; } $type $value @type tok MM0TOK_TYPE take = ; @value tok MM0TOK_VALUE take = ; if type MM0TOK_TYPE_SYMBOL == { if value MM0TOK_SYMB_OPENBR == { theory MM0TH_LEVEL take_addr theory MM0TH_LEVEL take 1 + = ; } else { value MM0TOK_SYMB_CLOSEDBR == "mm0_parse: invalid symbol" assert_msg ; theory MM0TH_LEVEL take 0 > "mm0_parse: invalid block closing" assert_msg ; theory MM0TH_LEVEL take_addr theory MM0TH_LEVEL take 1 - = ; } tok mm0tok_destroy ; } else { type MM0TOK_TYPE_IDENT == "mm0_parse: invalid token type" assert_msg ; theory tok mm0_parse_statement ; } } } fun mm0_process 1 { $filename @filename 0 param = ; $lexer @lexer filename mm0lexer_init = ; lexer "mm0_process: cannot open file" assert_msg ; $theory "Parsing MM0 theory: " log ; @theory lexer mm0_parse = ; "\n" log ; # TODO: verify theory # Free resources theory mm0th_destroy ; lexer mm0lexer_destroy ; }