From 1a2d3e0ebb9f35e76509ad0b5bec41bb1d0c6f85 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Tue, 12 May 2026 19:29:58 -0400 Subject: [PATCH] Integrate TPTP with internal APIs via `cmd_context`, add embedded-string TPTP regression tests, and fix TFF arithmetic/timeout regressions (#9483) * Add shell-integrated self-contained TPTP frontend and CLI wiring Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/ba80bc5a-d80f-4d9f-8ed4-a962f697697c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Fix TPTP frontend build integration and validate with tests Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/ba80bc5a-d80f-4d9f-8ed4-a962f697697c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Address review feedback and clean up TPTP/frontend readability Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/ba80bc5a-d80f-4d9f-8ed4-a962f697697c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Refine TPTP parser semantics and keying based on review feedback Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/ba80bc5a-d80f-4d9f-8ed4-a962f697697c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Polish TPTP frontend keys/path checks and deduplicate skip logic Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/ba80bc5a-d80f-4d9f-8ed4-a962f697697c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Add src/api include path to shell CMake target Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/e07bbe13-16bc-4cc6-92e8-1708981b04ad Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Use internal AST/cmd_context APIs in TPTP shell frontend Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/2a6b01d0-c799-4e44-aa73-ef228cb4402e Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Harden TPTP cmd_context integration and suppress check-sat echo Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/2a6b01d0-c799-4e44-aa73-ef228cb4402e Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Make TPTP check-sat stream redirection exception-safe Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/2a6b01d0-c799-4e44-aa73-ef228cb4402e Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Address review nits in TPTP internal API frontend Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/2a6b01d0-c799-4e44-aa73-ef228cb4402e Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Refine TPTP frontend ownership and stream restoration semantics Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/2a6b01d0-c799-4e44-aa73-ef228cb4402e Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Add TPTP regression files and test-z3 tptp test Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/dc1d46fc-4b6c-4f64-91a0-9fb57c73c166 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Adjust Agatha TPTP expectation and tidy test helper constant Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/dc1d46fc-4b6c-4f64-91a0-9fb57c73c166 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Harden tptp test command handling and readability Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/dc1d46fc-4b6c-4f64-91a0-9fb57c73c166 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Validate tptp test filenames against empty and traversal patterns Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/dc1d46fc-4b6c-4f64-91a0-9fb57c73c166 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Tighten tptp filename checks and rename output buffer constant Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/dc1d46fc-4b6c-4f64-91a0-9fb57c73c166 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Use read_tptp API directly in tptp unit test Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/f07170c1-549a-464c-89f8-fee973f9790f Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Clarify required tptp frontend extern flags in test Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/f07170c1-549a-464c-89f8-fee973f9790f Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Move tptp frontend to cmd_context and add string API Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/1be7cf7e-2747-43ba-9a33-2e71dad2d14d Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Polish tptp stream parser naming and simplify test asserts Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/1be7cf7e-2747-43ba-9a33-2e71dad2d14d Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Fix mk_make include resolution for moved tptp frontend Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/7cbce0d2-0ed9-4941-91d4-49977c0a97a1 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Update CMakeLists.txt * Fix TPTP parsing, arithmetic builtin mapping, and timeout handling Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/f2be81f6-e506-47ea-a38a-46d1970add8c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Polish TPTP parser diagnostics and type parsing details Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/f2be81f6-e506-47ea-a38a-46d1970add8c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Refine digit-check helper naming in TPTP frontend Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/f2be81f6-e506-47ea-a38a-46d1970add8c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Add rational zero-denominator regression test for TPTP Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/f2be81f6-e506-47ea-a38a-46d1970add8c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * Fix missing g_display_* symbol definitions for non-shell link targets Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/0283d958-26a8-4b1c-86be-b75a5bc26d8c Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> Co-authored-by: Nikolaj Bjorner --- scripts/mk_project.py | 2 - src/cmd_context/CMakeLists.txt | 1 + src/cmd_context/tptp_frontend.cpp | 916 ++++++++++++++++++++++++++++++ src/cmd_context/tptp_frontend.h | 4 + src/shell/main.cpp | 27 +- src/test/CMakeLists.txt | 8 +- src/test/main.cpp | 1 + src/test/tptp.cpp | 95 ++++ 8 files changed, 1044 insertions(+), 10 deletions(-) create mode 100644 src/cmd_context/tptp_frontend.cpp create mode 100644 src/cmd_context/tptp_frontend.h create mode 100644 src/test/tptp.cpp diff --git a/scripts/mk_project.py b/scripts/mk_project.py index 7b4d444ea..8227473bb 100644 --- a/scripts/mk_project.py +++ b/scripts/mk_project.py @@ -125,5 +125,3 @@ def init_project_def(): add_ml_example('ml_example', 'ml') add_z3py_example('py_example', 'python') return API_files - - diff --git a/src/cmd_context/CMakeLists.txt b/src/cmd_context/CMakeLists.txt index f3cdb3c03..0b75a8526 100644 --- a/src/cmd_context/CMakeLists.txt +++ b/src/cmd_context/CMakeLists.txt @@ -12,6 +12,7 @@ z3_add_component(cmd_context simplifier_cmds.cpp tactic_cmds.cpp tactic_manager.cpp + tptp_frontend.cpp COMPONENT_DEPENDENCIES rewriter solver diff --git a/src/cmd_context/tptp_frontend.cpp b/src/cmd_context/tptp_frontend.cpp new file mode 100644 index 000000000..8ab66ac1d --- /dev/null +++ b/src/cmd_context/tptp_frontend.cpp @@ -0,0 +1,916 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ast/arith_decl_plugin.h" +#include "ast/expr_abstract.h" +#include "ast/ast_util.h" +#include "cmd_context/cmd_context.h" +#include "cmd_context/tptp_frontend.h" +#include "solver/solver.h" +#include "util/error_codes.h" +#include "util/rational.h" +#include "util/z3_exception.h" + +bool g_display_statistics = false; +bool g_display_model = false; + +namespace { + +enum class token_kind { + eof_tok, + id, + str, + lparen, + rparen, + lbrack, + rbrack, + comma, + dot, + colon, + and_tok, + or_tok, + not_tok, + forall_tok, + exists_tok, + equal_tok, + neq_tok, + iff_tok, + implies_tok, + implied_tok, + xor_tok, + nor_tok, + nand_tok, + gt_tok, + star_tok, + slash_tok, + minus_tok, + at_tok +}; + +struct parse_error : public std::exception { + std::string m_msg; + parse_error(std::string const& msg): m_msg(msg) {} + char const* what() const noexcept override { return m_msg.c_str(); } +}; + +class scoped_regular_stream { + cmd_context& m_ctx; + std::string m_prev; +public: + scoped_regular_stream(cmd_context& ctx, std::ostream& out): m_ctx(ctx), m_prev(ctx.get_regular_stream_name()) { m_ctx.set_regular_stream(out); } + ~scoped_regular_stream() { m_ctx.set_regular_stream(m_prev.c_str()); } +}; + +struct token { + token_kind kind = token_kind::eof_tok; + std::string text; + unsigned line = 1; + unsigned col = 1; +}; + +class lexer { + std::string const& m_input; + size_t m_pos = 0; + unsigned m_line = 1; + unsigned m_col = 1; + + bool eof() const { return m_pos >= m_input.size(); } + char peek(unsigned k = 0) const { return m_pos + k < m_input.size() ? m_input[m_pos + k] : '\0'; } + char get() { + char c = peek(); + if (!eof()) { + ++m_pos; + if (c == '\n') { + ++m_line; + m_col = 1; + } + else { + ++m_col; + } + } + return c; + } + + static bool is_symbol_start(char c) { + return std::isalnum(static_cast(c)) || c == '$' || c == '_'; + } + + static bool is_id_char(char c) { + return std::isalnum(static_cast(c)) || c == '$' || c == '_' || c == '\'' || c == '-'; + } + + void skip_ws_comments() { + while (!eof()) { + if (std::isspace(static_cast(peek()))) { + get(); + continue; + } + if (peek() == '%') { + while (!eof() && get() != '\n') {} + continue; + } + if (peek() == '/' && peek(1) == '*') { + get(); + get(); + while (!eof()) { + if (peek() == '*' && peek(1) == '/') { + get(); + get(); + break; + } + get(); + } + continue; + } + break; + } + } + +public: + lexer(std::string const& input): m_input(input) {} + + token next() { + skip_ws_comments(); + token t; + t.line = m_line; + t.col = m_col; + if (eof()) { + t.kind = token_kind::eof_tok; + return t; + } + + if (peek() == '\'' || peek() == '"') { + char q = get(); + t.kind = token_kind::str; + while (!eof()) { + char c = get(); + if (c == '\\' && !eof()) { + t.text.push_back(c); + t.text.push_back(get()); + continue; + } + if (c == q) return t; + t.text.push_back(c); + } + throw parse_error("unterminated string literal"); + } + + if (peek() == '<' && peek(1) == '=' && peek(2) == '>') { + get(); get(); get(); + t.kind = token_kind::iff_tok; + t.text = "<=>"; + return t; + } + if (peek() == '<' && peek(1) == '~' && peek(2) == '>') { + get(); get(); get(); + t.kind = token_kind::xor_tok; + t.text = "<~>"; + return t; + } + if (peek() == '=' && peek(1) == '>') { + get(); get(); + t.kind = token_kind::implies_tok; + t.text = "=>"; + return t; + } + if (peek() == '<' && peek(1) == '=') { + get(); get(); + t.kind = token_kind::implied_tok; + t.text = "<="; + return t; + } + if (peek() == '~' && peek(1) == '|') { + get(); get(); + t.kind = token_kind::nor_tok; + t.text = "~|"; + return t; + } + if (peek() == '~' && peek(1) == '&') { + get(); get(); + t.kind = token_kind::nand_tok; + t.text = "~&"; + return t; + } + if (peek() == '!' && peek(1) == '=') { + get(); get(); + t.kind = token_kind::neq_tok; + t.text = "!="; + return t; + } + + char c = get(); + switch (c) { + case '(': t.kind = token_kind::lparen; return t; + case ')': t.kind = token_kind::rparen; return t; + case '[': t.kind = token_kind::lbrack; return t; + case ']': t.kind = token_kind::rbrack; return t; + case ',': t.kind = token_kind::comma; return t; + case '.': t.kind = token_kind::dot; return t; + case ':': t.kind = token_kind::colon; return t; + case '&': t.kind = token_kind::and_tok; return t; + case '|': t.kind = token_kind::or_tok; return t; + case '~': t.kind = token_kind::not_tok; return t; + case '!': t.kind = token_kind::forall_tok; return t; + case '?': t.kind = token_kind::exists_tok; return t; + case '=': t.kind = token_kind::equal_tok; return t; + case '>': t.kind = token_kind::gt_tok; return t; + case '*': t.kind = token_kind::star_tok; return t; + case '/': t.kind = token_kind::slash_tok; return t; + case '-': t.kind = token_kind::minus_tok; return t; + case '@': t.kind = token_kind::at_tok; return t; + default: + break; + } + + if (is_symbol_start(c)) { + t.kind = token_kind::id; + t.text.push_back(c); + while (!eof() && is_id_char(peek())) + t.text.push_back(get()); + return t; + } + + std::ostringstream out; + out << "unexpected character '" << c << "' at " << t.line << ":" << t.col; + throw parse_error(out.str()); + } +}; + +struct parsed_type { + std::vector domain; + sort* range = nullptr; + parsed_type(sort* s): range(s) {} + parsed_type(std::vector const& d, sort* r): domain(d), range(r) {} +}; + +class tptp_parser { + cmd_context& m_cmd; + ast_manager& m; + arith_util m_arith; + sort* m_univ; + bool m_has_conjecture = false; + std::unordered_map m_sorts; + std::unordered_map m_decls; + std::unordered_map, sort*>> m_typed_decls; + std::vector> m_bound; + std::unordered_set m_seen_files; + + std::string m_input; + std::unique_ptr m_lex; + token m_curr; + + static std::string to_lower(std::string s) { + for (char& c : s) c = static_cast(std::tolower(static_cast(c))); + return s; + } + + static bool is_var_name(std::string const& s) { + if (s.empty()) return false; + unsigned char c = static_cast(s[0]); + return std::isupper(c) || s[0] == '_'; + } + + std::string loc() const { + std::ostringstream out; + out << m_curr.line << ":" << m_curr.col; + return out.str(); + } + + void next() { m_curr = m_lex->next(); } + + bool is(token_kind k) const { return m_curr.kind == k; } + + bool accept(token_kind k) { + if (is(k)) { + next(); + return true; + } + return false; + } + + void expect(token_kind k, char const* msg) { + if (!accept(k)) { + std::ostringstream out; + out << "expected " << msg << " at " << loc(); + throw parse_error(out.str()); + } + } + + std::string parse_name() { + if (is(token_kind::id) || is(token_kind::str)) { + std::string r = m_curr.text; + next(); + return r; + } + std::ostringstream out; + out << "expected identifier at " << loc(); + throw parse_error(out.str()); + } + + sort* get_sort(std::string const& n) { + if (n == "$i") return m_univ; + if (n == "$o") return m.mk_bool_sort(); + if (n == "$int") return m_arith.mk_int(); + if (n == "$rat" || n == "$real") return m_arith.mk_real(); + auto it = m_sorts.find(n); + if (it != m_sorts.end()) return it->second; + sort* s = m.mk_uninterpreted_sort(symbol(n)); + m_sorts.emplace(n, s); + return s; + } + + static bool is_ttype(sort* s) { + return s->get_name() == symbol("$tType"); + } + + static bool is_nonempty_digit_string(std::string const& s) { + if (s.empty()) return false; + for (char c : s) { + if (!std::isdigit(static_cast(c))) + return false; + } + return true; + } + + static std::string mk_decl_key(std::string const& name, unsigned arity, char tag) { + return std::to_string(name.size()) + ":" + name + "\x1f" + std::to_string(arity) + "\x1f" + tag; + } + + static std::string mk_typed_key(std::string const& name, unsigned arity) { + return mk_decl_key(name, arity, 't'); + } + + func_decl* mk_decl(std::string const& name, unsigned arity, bool pred) { + auto itt = m_typed_decls.find(mk_typed_key(name, arity)); + if (itt != m_typed_decls.end()) { + std::string typed_decl_key = mk_decl_key(name, arity, 'd'); + auto itd = m_decls.find(typed_decl_key); + if (itd != m_decls.end()) return itd->second; + auto const& sig = itt->second; + func_decl* f = m.mk_func_decl(symbol(name), sig.first.size(), sig.first.data(), sig.second); + m_decls.emplace(typed_decl_key, f); + return f; + } + + std::string key = mk_decl_key(name, arity, pred ? 'p' : 'f'); + auto itd = m_decls.find(key); + if (itd != m_decls.end()) return itd->second; + + std::vector dom(arity, m_univ); + func_decl* f = m.mk_func_decl(symbol(name), arity, dom.data(), pred ? m.mk_bool_sort() : m_univ); + m_decls.emplace(key, f); + return f; + } + + bool find_bound(std::string const& n, expr_ref& e) const { + for (auto it = m_bound.rbegin(); it != m_bound.rend(); ++it) { + auto jt = it->find(n); + if (jt != it->end()) { + e = jt->second; + return true; + } + } + return false; + } + + expr_ref mk_quantifier(bool is_forall, ptr_vector const& bound, expr_ref const& body) { + SASSERT(body); + if (bound.empty()) return body; + return is_forall ? ::mk_forall(m, bound.size(), bound.data(), body.get()) : ::mk_exists(m, bound.size(), bound.data(), body.get()); + } + + parsed_type parse_type_atom() { + if (accept(token_kind::lparen)) { + parsed_type t = parse_type_expr(); + expect(token_kind::rparen, "')'"); + return t; + } + std::string n = parse_name(); + return parsed_type(get_sort(n)); + } + + std::vector parse_type_product() { + parsed_type first = parse_type_atom(); + if (!first.domain.empty()) + throw parse_error("higher-order type in product is unsupported"); + std::vector args; + args.push_back(first.range); + while (accept(token_kind::star_tok)) { + parsed_type t = parse_type_atom(); + if (!t.domain.empty()) + throw parse_error("higher-order type in product is unsupported"); + args.push_back(t.range); + } + return args; + } + + parsed_type parse_type_expr() { + std::vector prod = parse_type_product(); + if (accept(token_kind::gt_tok)) { + parsed_type rhs = parse_type_expr(); + if (!rhs.domain.empty()) + throw parse_error("higher-order result type is unsupported"); + return parsed_type(prod, rhs.range); + } + if (prod.size() != 1) + throw parse_error("type product must be followed by '>'"); + return parsed_type(prod[0]); + } + + void skip_annotations_until_rparen() { + int depth = 0; + while (!is(token_kind::eof_tok)) { + if (accept(token_kind::lparen) || accept(token_kind::lbrack)) { + ++depth; + continue; + } + if (is(token_kind::rparen) || is(token_kind::rbrack)) { + if (depth == 0) return; + --depth; + next(); + continue; + } + next(); + } + } + + void skip_balanced(token_kind open_k, token_kind close_k) { + int depth = 1; + while (depth > 0 && !is(token_kind::eof_tok)) { + if (accept(open_k)) ++depth; + else if (accept(close_k)) --depth; + else next(); + } + } + + expr_ref parse_term(); + + expr_ref parse_term_primary() { + if (accept(token_kind::lparen)) { + expr_ref e = parse_term(); + expect(token_kind::rparen, "')'"); + return e; + } + if (accept(token_kind::minus_tok)) { + expr_ref e = parse_term_primary(); + if (!m_arith.is_int_real(e)) + throw parse_error("unary '-' expects arithmetic term"); + return expr_ref(m_arith.mk_uminus(e), m); + } + std::string n = parse_name(); + if (n == "$true") return expr_ref(m.mk_true(), m); + if (n == "$false") return expr_ref(m.mk_false(), m); + + if (is_nonempty_digit_string(n)) { + rational num(n.c_str()); + if (accept(token_kind::slash_tok)) { + std::string d = parse_name(); + if (!is_nonempty_digit_string(d)) + throw parse_error("denominator of rational literal must be a sequence of digits"); + rational den(d.c_str()); + if (den.is_zero()) + throw parse_error("denominator of rational literal cannot be zero"); + return expr_ref(m_arith.mk_numeral(num / den, false), m); + } + return expr_ref(m_arith.mk_numeral(num, true), m); + } + + expr_ref b(m); + if (is_var_name(n) && find_bound(n, b)) + return b; + + expr_ref_vector args(m); + if (accept(token_kind::lparen)) { + if (!accept(token_kind::rparen)) { + do { args.push_back(parse_term()); } while (accept(token_kind::comma)); + expect(token_kind::rparen, "')'"); + } + } + + func_decl* f = mk_decl(n, args.size(), false); + return expr_ref(args.empty() ? m.mk_const(f) : m.mk_app(f, args.size(), args.data()), m); + } + + expr_ref parse_formula(); + + expr_ref parse_atomic_formula() { + if (accept(token_kind::lparen)) { + expr_ref e = parse_formula(); + expect(token_kind::rparen, "')'"); + return e; + } + + std::string n = parse_name(); + if (n == "$true") return expr_ref(m.mk_true(), m); + if (n == "$false") return expr_ref(m.mk_false(), m); + + expr_ref_vector args(m); + if (accept(token_kind::lparen)) { + if (!accept(token_kind::rparen)) { + do { args.push_back(parse_term()); } while (accept(token_kind::comma)); + expect(token_kind::rparen, "')'"); + } + } + + if (n == "$less" || n == "$lesseq" || n == "$greater" || n == "$greatereq") { + if (args.size() != 2) { + std::ostringstream out; + out << "arithmetic predicate '" << n << "' expects arity 2"; + throw parse_error(out.str()); + } + expr_ref lhs(args.get(0), m), rhs(args.get(1), m); + if (!m_arith.is_int_real(lhs) || !m_arith.is_int_real(rhs)) { + std::ostringstream out; + out << "arithmetic predicate '" << n << "' expects arithmetic arguments"; + throw parse_error(out.str()); + } + bool use_real = m_arith.is_real(lhs) || m_arith.is_real(rhs); + if (use_real) { + if (m_arith.is_int(lhs)) lhs = expr_ref(m_arith.mk_to_real(lhs), m); + if (m_arith.is_int(rhs)) rhs = expr_ref(m_arith.mk_to_real(rhs), m); + } + if (n == "$less") return expr_ref(m_arith.mk_lt(lhs, rhs), m); + if (n == "$lesseq") return expr_ref(m_arith.mk_le(lhs, rhs), m); + if (n == "$greater") return expr_ref(m_arith.mk_gt(lhs, rhs), m); + /* n == "$greatereq"*/ return expr_ref(m_arith.mk_ge(lhs, rhs), m); + } + + expr_ref lhs(m); + bool has_lhs = false; + if (args.empty()) { + expr_ref b(m); + if (is_var_name(n) && find_bound(n, b)) { + lhs = b; + has_lhs = true; + } + } + + if (is(token_kind::equal_tok) || is(token_kind::neq_tok)) { + if (!has_lhs) { + func_decl* f = mk_decl(n, args.size(), false); + lhs = args.empty() ? m.mk_const(f) : m.mk_app(f, args.size(), args.data()); + } + bool neq = accept(token_kind::neq_tok); + if (!neq) expect(token_kind::equal_tok, "'='"); + expr_ref rhs = parse_term(); + expr_ref eq(m.mk_eq(lhs, rhs), m); + return neq ? expr_ref(m.mk_not(eq), m) : eq; + } + + if (has_lhs) { + if (m.is_bool(lhs)) return lhs; + throw parse_error("non-boolean variable used as formula"); + } + + auto typed = m_typed_decls.find(mk_typed_key(n, args.size())); + if (typed != m_typed_decls.end()) { + func_decl* f = mk_decl(n, args.size(), false); + expr_ref e(args.empty() ? m.mk_const(f) : m.mk_app(f, args.size(), args.data()), m); + if (!m.is_bool(e)) + throw parse_error("typed non-boolean term used as formula"); + return e; + } + + func_decl* pred = mk_decl(n, args.size(), true); + return expr_ref(args.empty() ? m.mk_const(pred) : m.mk_app(pred, args.size(), args.data()), m); + } + + expr_ref parse_unary_formula() { + if (accept(token_kind::not_tok)) { + expr_ref e = parse_unary_formula(); + return expr_ref(m.mk_not(e), m); + } + + if (is(token_kind::forall_tok) || is(token_kind::exists_tok)) { + bool is_forall = is(token_kind::forall_tok); + next(); + expect(token_kind::lbrack, "'['"); + + ptr_vector vars; + std::unordered_map scope; + if (!accept(token_kind::rbrack)) { + do { + std::string v = parse_name(); + sort* s = m_univ; + if (accept(token_kind::colon)) { + parsed_type t = parse_type_expr(); + if (!t.domain.empty()) + throw parse_error("higher-order variable type is unsupported"); + s = t.range; + } + app* c = m.mk_const(symbol(v), s); + vars.push_back(c); + scope.emplace(v, c); + } + while (accept(token_kind::comma)); + expect(token_kind::rbrack, "']'"); + } + expect(token_kind::colon, "':'"); + m_bound.push_back(scope); + expr_ref body = parse_formula(); + m_bound.pop_back(); + return mk_quantifier(is_forall, vars, body); + } + + return parse_atomic_formula(); + } + + expr_ref parse_and_formula() { + expr_ref e = parse_unary_formula(); + while (is(token_kind::and_tok) || is(token_kind::nand_tok)) { + bool is_nand = accept(token_kind::nand_tok); + if (!is_nand) expect(token_kind::and_tok, "'&'"); + expr_ref rhs = parse_unary_formula(); + expr_ref conj(::mk_and(m, e, rhs), m); + e = is_nand ? expr_ref(m.mk_not(conj), m) : conj; + } + return e; + } + + expr_ref parse_or_formula() { + expr_ref e = parse_and_formula(); + while (is(token_kind::or_tok) || is(token_kind::nor_tok)) { + bool is_nor = accept(token_kind::nor_tok); + if (!is_nor) expect(token_kind::or_tok, "'|'"); + expr_ref rhs = parse_and_formula(); + expr_ref disj(::mk_or(m, e, rhs), m); + e = is_nor ? expr_ref(m.mk_not(disj), m) : disj; + } + return e; + } + + expr_ref parse_implies_formula() { + expr_ref e = parse_or_formula(); + if (accept(token_kind::implies_tok)) { + expr_ref rhs = parse_implies_formula(); + return expr_ref(m.mk_implies(e, rhs), m); + } + if (accept(token_kind::implied_tok)) { + expr_ref rhs = parse_implies_formula(); + return expr_ref(m.mk_implies(rhs, e), m); + } + return e; + } + + void parse_type_decl_formula() { + unsigned lparen_count = 0; + while (accept(token_kind::lparen)) ++lparen_count; + std::string name = parse_name(); + expect(token_kind::colon, "':'"); + parsed_type t = parse_type_expr(); + while (lparen_count-- > 0) + expect(token_kind::rparen, "')'"); + + if (t.domain.empty() && is_ttype(t.range)) { + m_sorts.insert_or_assign(name, m.mk_uninterpreted_sort(symbol(name))); + return; + } + + m_typed_decls.insert_or_assign(mk_typed_key(name, t.domain.size()), std::make_pair(t.domain, t.range)); + } + + static bool file_exists(std::string const& f) { + std::ifstream in(f); + return !in.fail(); + } + + static bool is_absolute_path(std::string const& name) { + return !name.empty() && + (name[0] == '/' || + (name.size() >= 2 && std::isalpha(static_cast(name[0])) && name[1] == ':')); + } + + std::string dirname(std::string const& f) const { + size_t idx = f.find_last_of("/\\"); + return idx == std::string::npos ? "." : f.substr(0, idx); + } + + std::string resolve_include(std::string const& curr_file, std::string const& name) const { + if (is_absolute_path(name)) + return name; + std::string local = dirname(curr_file) + "/" + name; + if (file_exists(local)) return local; + char const* root = std::getenv("TPTP"); + if (root) { + std::string env = std::string(root) + "/" + name; + if (file_exists(env)) return env; + } + return local; + } + + void parse_include(std::string const& curr_file) { + expect(token_kind::lparen, "'('"); + std::string file = parse_name(); + if (accept(token_kind::comma)) { + if (accept(token_kind::lbrack)) { + skip_balanced(token_kind::lbrack, token_kind::rbrack); + } + else { + skip_annotations_until_rparen(); + } + } + expect(token_kind::rparen, "')'"); + expect(token_kind::dot, "'.'"); + parse_file(resolve_include(curr_file, file)); + } + + void parse_annotated() { + expect(token_kind::lparen, "'('"); + parse_name(); + expect(token_kind::comma, "','"); + std::string role = to_lower(parse_name()); + expect(token_kind::comma, "','"); + + if (role == "type") { + parse_type_decl_formula(); + } + else { + expr_ref f = parse_formula(); + if (role == "conjecture") { + m_has_conjecture = true; + f = m.mk_not(f); + } + m_cmd.assert_expr(f); + } + + if (accept(token_kind::comma)) { + skip_annotations_until_rparen(); + } + expect(token_kind::rparen, "')'"); + expect(token_kind::dot, "'.'"); + } + + void parse_toplevel(std::string const& current_file) { + while (!is(token_kind::eof_tok)) { + std::string kw = to_lower(parse_name()); + if (kw == "include") { + parse_include(current_file); + } + else if (kw == "fof" || kw == "cnf" || kw == "tff" || kw == "thf") { + parse_annotated(); + } + else { + std::ostringstream out; + out << "unsupported TPTP unit '" << kw << "' at " << loc(); + throw parse_error(out.str()); + } + } + } + +public: + tptp_parser(cmd_context& cmd): + m_cmd(cmd), + m(m_cmd.m()), + m_arith(m), + m_univ(m.mk_uninterpreted_sort(symbol("U"))) { + m_sorts.emplace("$tType", m.mk_uninterpreted_sort(symbol("$tType"))); + m_sorts.emplace("$i", m_univ); + m_sorts.emplace("$o", m.mk_bool_sort()); + m_sorts.emplace("$int", m_arith.mk_int()); + m_sorts.emplace("$rat", m_arith.mk_real()); + m_sorts.emplace("$real", m_arith.mk_real()); + } + + void parse_input(std::istream& in, std::string const& current_file) { + std::ostringstream buf; + buf << in.rdbuf(); + m_input = buf.str(); + m_lex = std::make_unique(m_input); + next(); + parse_toplevel(current_file); + } + + void parse_file(std::string const& filename) { + if (!m_seen_files.insert(filename).second) return; + std::ifstream in(filename); + if (in.fail()) { + std::ostringstream out; + out << "failed to open file '" << filename << "'"; + throw parse_error(out.str()); + } + parse_input(in, filename); + } + + void parse_stream(std::istream& in) { + parse_input(in, "."); + } + + bool has_conjecture() const { return m_has_conjecture; } +}; + +expr_ref tptp_parser::parse_term() { + expr_ref e = parse_term_primary(); + while (accept(token_kind::at_tok)) { + expr_ref arg = parse_term_primary(); + if (!is_app(e)) + throw parse_error("application operator (@) requires uninterpreted function on left-hand side"); + app* a = to_app(e); + func_decl* d = a->get_decl(); + if (d->get_family_id() != null_family_id) + throw parse_error("application operator (@) requires uninterpreted function on left-hand side"); + expr_ref_vector args(m); + for (unsigned i = 0; i < a->get_num_args(); ++i) args.push_back(a->get_arg(i)); + args.push_back(arg); + e = expr_ref(m.mk_app(d, args.size(), args.data()), m); + } + return e; +} + +expr_ref tptp_parser::parse_formula() { + expr_ref e = parse_implies_formula(); + while (is(token_kind::iff_tok) || is(token_kind::xor_tok)) { + bool is_xor = accept(token_kind::xor_tok); + if (!is_xor) expect(token_kind::iff_tok, "'<=>'"); + expr_ref rhs = parse_implies_formula(); + expr_ref iff(m.mk_iff(e, rhs), m); + e = is_xor ? expr_ref(m.mk_not(iff), m) : iff; + } + return e; +} + +} + +static unsigned read_tptp_stream(std::istream& in, char const* current_file) { + try { + cmd_context ctx; + ctx.set_solver_factory(mk_smt_strategic_solver_factory()); + + tptp_parser p(ctx); + p.parse_input(in, current_file ? current_file : "."); + + // Suppress default check-sat output; TPTP frontend reports SZS status explicitly. + std::ostringstream sink; + scoped_regular_stream scoped_stream(ctx, sink); + ctx.check_sat(0, nullptr); + switch (ctx.cs_state()) { + case cmd_context::css_unsat: + if (p.has_conjecture()) std::cout << "% SZS status Theorem\n"; + else std::cout << "% SZS status Unsatisfiable\n"; + break; + case cmd_context::css_sat: + if (p.has_conjecture()) std::cout << "% SZS status CounterSatisfiable\n"; + else std::cout << "% SZS status Satisfiable\n"; + if (g_display_model) { + model_ref mdl; + if (ctx.is_model_available(mdl)) + ctx.display_model(mdl); + } + break; + case cmd_context::css_unknown: + std::cout << "% SZS status GaveUp\n"; + { + std::string reason = ctx.reason_unknown(); + if (!reason.empty()) std::cout << "% SZS reason " << reason << "\n"; + } + break; + default: + break; + } + + if (g_display_statistics) { + ctx.set_regular_stream("stdout"); + ctx.display_statistics(); + } + return 0; + } + catch (parse_error const& ex) { + std::cerr << "TPTP parse error: " << ex.what() << "\n"; + return ERR_PARSER; + } + catch (z3_error& ex) { + if (ex.error_code() == ERR_TIMEOUT) { + std::cout << "% SZS status Timeout\n"; + return 0; + } + std::cerr << "TPTP frontend error: " << ex.what() << "\n"; + return ERR_INTERNAL_FATAL; + } + catch (z3_exception& ex) { + std::cerr << "TPTP frontend error: " << ex.what() << "\n"; + return ERR_INTERNAL_FATAL; + } +} + +unsigned read_tptp(char const* file_name) { + if (!file_name) + return read_tptp_stream(std::cin, "."); + std::ifstream in(file_name); + if (in.fail()) { + std::cerr << "TPTP parse error: failed to open file '" << file_name << "'\n"; + return ERR_PARSER; + } + return read_tptp_stream(in, file_name); +} + +unsigned read_tptp_string(char const* input) { + std::istringstream in(input ? input : ""); + return read_tptp_stream(in, ""); +} diff --git a/src/cmd_context/tptp_frontend.h b/src/cmd_context/tptp_frontend.h new file mode 100644 index 000000000..8e6b2fcb5 --- /dev/null +++ b/src/cmd_context/tptp_frontend.h @@ -0,0 +1,4 @@ +#pragma once + +unsigned read_tptp(char const* file_name); +unsigned read_tptp_string(char const* input); diff --git a/src/shell/main.cpp b/src/shell/main.cpp index 19e617e55..9a4cd6f27 100644 --- a/src/shell/main.cpp +++ b/src/shell/main.cpp @@ -30,6 +30,7 @@ Revision History: #include "shell/dimacs_frontend.h" #include "shell/datalog_frontend.h" #include "shell/opt_frontend.h" +#include "cmd_context/tptp_frontend.h" #include "util/timeout.h" #include "util/z3_exception.h" #include "util/error_codes.h" @@ -43,14 +44,14 @@ Revision History: #include #endif -typedef enum { IN_UNSPECIFIED, IN_SMTLIB_2, IN_DATALOG, IN_DIMACS, IN_WCNF, IN_OPB, IN_LP, IN_Z3_LOG, IN_DRAT } input_kind; +typedef enum { IN_UNSPECIFIED, IN_SMTLIB_2, IN_DATALOG, IN_DIMACS, IN_WCNF, IN_OPB, IN_LP, IN_Z3_LOG, IN_DRAT, IN_TPTP } input_kind; static char const * g_input_file = nullptr; static char const * g_drat_input_file = nullptr; static bool g_standard_input = false; static input_kind g_input_kind = IN_UNSPECIFIED; -bool g_display_statistics = false; -bool g_display_model = false; +extern bool g_display_statistics; +extern bool g_display_model; static bool g_display_istatistics = false; static void error(const char * msg) { @@ -84,6 +85,7 @@ void display_usage() { std::cout << " -opb use parser for PB optimization input format.\n"; std::cout << " -lp use parser for a modest subset of CPLEX LP input format.\n"; std::cout << " -log use parser for Z3 log input format.\n"; + std::cout << " -tptp use parser for TPTP input format (fof/cnf/tff/thf fragments).\n"; std::cout << " -in read formula from standard input.\n"; std::cout << " -model display model for satisfiable SMT.\n"; std::cout << "\nMiscellaneous:\n"; @@ -130,6 +132,15 @@ static bool validate_is_ulong(char const* s) { return false; return true; } + +static bool is_tptp_extension(char const* ext) { + static char const* tptp_extensions[] = {"p", "tptp", "fof", "cnf", "tff", "thf"}; + for (char const* known_ext : tptp_extensions) { + if (strcmp(ext, known_ext) == 0) + return true; + } + return false; +} static void parse_cmd_line_args(std::string& input_file, int argc, char ** argv) { long timeout = 0; @@ -214,6 +225,9 @@ static void parse_cmd_line_args(std::string& input_file, int argc, char ** argv) else if (strcmp(opt_name, "log") == 0) { g_input_kind = IN_Z3_LOG; } + else if (strcmp(opt_name, "tptp") == 0) { + g_input_kind = IN_TPTP; + } else if (strcmp(opt_name, "st") == 0) { g_display_statistics = true; gparams::set("stats", "true"); @@ -387,6 +401,9 @@ int STD_CALL main(int argc, char ** argv) { else if (strcmp(ext, "smt2") == 0) { g_input_kind = IN_SMTLIB_2; } + else if (is_tptp_extension(ext)) { + g_input_kind = IN_TPTP; + } } } switch (g_input_kind) { @@ -415,6 +432,9 @@ int STD_CALL main(int argc, char ** argv) { case IN_DRAT: return_value = read_drat(g_drat_input_file); break; + case IN_TPTP: + return_value = read_tptp(g_input_file); + break; default: UNREACHABLE(); } @@ -434,4 +454,3 @@ int STD_CALL main(int argc, char ** argv) { return ERR_INTERNAL_FATAL; } } - diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 319cd829b..c79a0696f 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -145,6 +145,7 @@ add_executable(test-z3 theory_dl.cpp theory_pb.cpp timeout.cpp + tptp.cpp total_order.cpp totalizer.cpp trigo.cpp @@ -163,12 +164,11 @@ add_executable(test-z3 z3_add_install_tactic_rule(${z3_test_deps}) z3_add_memory_initializer_rule(${z3_test_deps}) z3_add_gparams_register_modules_rule(${z3_test_deps}) -target_compile_definitions(test-z3 PRIVATE ${Z3_COMPONENT_CXX_DEFINES}) +target_compile_definitions(test-z3 PRIVATE + ${Z3_COMPONENT_CXX_DEFINES} +) target_compile_options(test-z3 PRIVATE ${Z3_COMPONENT_CXX_FLAGS}) target_link_libraries(test-z3 PRIVATE ${Z3_DEPENDENT_LIBS}) target_include_directories(test-z3 PRIVATE ${Z3_COMPONENT_EXTRA_INCLUDE_DIRS}) z3_append_linker_flag_list_to_target(test-z3 ${Z3_DEPENDENT_EXTRA_CXX_LINK_FLAGS}) z3_add_component_dependencies_to_target(test-z3 ${z3_test_expanded_deps}) - - - diff --git a/src/test/main.cpp b/src/test/main.cpp index f1d874e00..6c0052094 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -120,6 +120,7 @@ X(model_based_opt) \ X(factor_rewriter) \ X(smt2print_parse) \ + X(tptp) \ X(substitution) \ X(polynomial) \ X(polynomial_factorization) \ diff --git a/src/test/tptp.cpp b/src/test/tptp.cpp new file mode 100644 index 000000000..2ae2fb4f6 --- /dev/null +++ b/src/test/tptp.cpp @@ -0,0 +1,95 @@ +#include +#include +#include +#include +#include "util/debug.h" +#include "util/error_codes.h" +#include "cmd_context/tptp_frontend.h" + +struct tptp_case { + char const* name; + char const* input; + char const* expected_status; +}; + +static unsigned run_tptp(char const* input, std::string& out, std::string& err) { + std::streambuf* old_out = std::cout.rdbuf(); + std::streambuf* old_err = std::cerr.rdbuf(); + std::ostringstream out_buf; + std::ostringstream err_buf; + std::cout.rdbuf(out_buf.rdbuf()); + std::cerr.rdbuf(err_buf.rdbuf()); + unsigned code = read_tptp_string(input); + std::cout.rdbuf(old_out); + std::cerr.rdbuf(old_err); + out = out_buf.str(); + err = err_buf.str(); + return code; +} + +static std::string run_tptp(char const* input) { + std::string out, err; + unsigned code = run_tptp(input, out, err); + ENSURE(code == 0); + return out; +} + +extern bool g_display_statistics; +extern bool g_display_model; + +void tst_tptp() { + g_display_statistics = false; + g_display_model = false; + std::vector cases = { + {"agatha-butler", +R"(fof(ax1,axiom, lives(agatha)). +fof(ax2,axiom, lives(butler)). +fof(ax3,axiom, lives(charles)). +fof(ax4,axiom, ! [X] : (lives(X) => (X = agatha | X = butler | X = charles))). +fof(ax5,axiom, ! [X,Y] : (killed(X,Y) => hates(X,Y))). +fof(ax6,axiom, ! [X,Y] : (killed(X,Y) => ~ richer(X,Y))). +fof(ax7,axiom, ! [X] : (hates(agatha,X) => ~ hates(charles,X))). +fof(ax8,axiom, ! [X] : (X != butler => hates(agatha,X))). +fof(ax9,axiom, ! [X] : (~ richer(X,agatha) => hates(butler,X))). +fof(ax10,axiom, ! [X] : (hates(agatha,X) => hates(butler,X))). +fof(ax11,axiom, ! [X] : (? [Y] : ~ hates(X,Y))). +fof(ax12,axiom, agatha != butler). +fof(ax13,axiom, ? [X] : killed(X,agatha)). +fof(conj,conjecture, ~ killed(butler,agatha)).)", + "% SZS status Theorem"}, + {"socrates-theorem", +R"(fof(a1,axiom, ! [X] : (human(X) => mortal(X))). +fof(a2,axiom, human(socrates)). +fof(c1,conjecture, mortal(socrates)).)", + "% SZS status Theorem"}, + {"simple-sat", +R"(fof(a1,axiom, p(a)).)", + "% SZS status Satisfiable"}, + {"tff-negative-literal", +R"(tff(c1,conjecture, $less(-2,2)).)", + "% SZS status Theorem"}, + {"tff-rational-literal", +R"(tff(c1,conjecture, $less(1/2,2/3)).)", + "% SZS status Theorem"}, + {"tff-type-decl-arrow", +R"(tff(p_type,type, p: $int > $o ). +tff(a1,axiom, p(1)). +tff(c1,conjecture, p(1)).)", + "% SZS status Theorem"}, + {"tff-typed-int-quantifier", +R"(tff(c1,conjecture, ? [X: $int] : $less(12,X)).)", + "% SZS status Theorem"}, + {"tff-lesseq-built-in", +R"(tff(c1,conjecture, $lesseq(2,2)).)", + "% SZS status Theorem"} + }; + for (auto const& c : cases) { + std::string out = run_tptp(c.input); + ENSURE(out.find(c.expected_status) != std::string::npos); + } + + std::string out, err; + unsigned code = run_tptp("tff(c1,conjecture, $less(1/0,1)).", out, err); + ENSURE(code == ERR_PARSER); + ENSURE(err.find("denominator of rational literal cannot be zero") != std::string::npos); +}