mirror of
https://github.com/Z3Prover/z3
synced 2026-07-02 13:26:10 +00:00
add verdicts
Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
This commit is contained in:
parent
4fd22680b5
commit
14d24e2304
3 changed files with 138 additions and 4 deletions
|
|
@ -291,6 +291,7 @@ class tptp_parser {
|
||||||
sort* m_univ;
|
sort* m_univ;
|
||||||
bool m_has_conjecture = false;
|
bool m_has_conjecture = false;
|
||||||
bool m_last_name_quoted = false;
|
bool m_last_name_quoted = false;
|
||||||
|
std::string m_expected_status; // SZS status from the input annotation, if any
|
||||||
std::unordered_map<std::string, sort*> m_sorts;
|
std::unordered_map<std::string, sort*> m_sorts;
|
||||||
sort_ref_vector m_pinned_sorts; // prevents cached sorts from being freed
|
sort_ref_vector m_pinned_sorts; // prevents cached sorts from being freed
|
||||||
std::unordered_map<std::string, func_decl*> m_decls;
|
std::unordered_map<std::string, func_decl*> m_decls;
|
||||||
|
|
@ -2135,6 +2136,7 @@ public:
|
||||||
std::ostringstream buf;
|
std::ostringstream buf;
|
||||||
buf << in.rdbuf();
|
buf << in.rdbuf();
|
||||||
m_input = buf.str();
|
m_input = buf.str();
|
||||||
|
extract_expected_status(m_input);
|
||||||
m_lex = std::make_unique<lexer>(m_input);
|
m_lex = std::make_unique<lexer>(m_input);
|
||||||
next();
|
next();
|
||||||
parse_toplevel(current_file);
|
parse_toplevel(current_file);
|
||||||
|
|
@ -2161,6 +2163,61 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_conjecture() const { return m_has_conjecture; }
|
bool has_conjecture() const { return m_has_conjecture; }
|
||||||
|
|
||||||
|
std::string const& expected_status() const { return m_expected_status; }
|
||||||
|
|
||||||
|
// Scan TPTP comments for an SZS/Status annotation, e.g.
|
||||||
|
// % Status : Unsatisfiable
|
||||||
|
// % SZS status Theorem
|
||||||
|
// Only the first annotation found (the top-level file's) is recorded.
|
||||||
|
void extract_expected_status(std::string const& text) {
|
||||||
|
if (!m_expected_status.empty())
|
||||||
|
return;
|
||||||
|
std::istringstream in(text);
|
||||||
|
std::string line;
|
||||||
|
while (std::getline(in, line)) {
|
||||||
|
// TPTP comment lines start with '%'.
|
||||||
|
size_t i = line.find_first_not_of(" \t");
|
||||||
|
if (i == std::string::npos || line[i] != '%')
|
||||||
|
continue;
|
||||||
|
++i; // skip '%'
|
||||||
|
// Skip leading '%' and spaces.
|
||||||
|
i = line.find_first_not_of("% \t", i);
|
||||||
|
if (i == std::string::npos)
|
||||||
|
continue;
|
||||||
|
std::string rest = line.substr(i);
|
||||||
|
std::string status;
|
||||||
|
// Form 1: "SZS status <Word>"
|
||||||
|
if (rest.compare(0, 4, "SZS ") == 0) {
|
||||||
|
size_t p = rest.find("status");
|
||||||
|
if (p == std::string::npos)
|
||||||
|
continue;
|
||||||
|
p += 6; // length of "status"
|
||||||
|
status = next_status_word(rest, p);
|
||||||
|
}
|
||||||
|
// Form 2: "Status : <Word>" / "Status : <Word>"
|
||||||
|
else if (rest.compare(0, 6, "Status") == 0) {
|
||||||
|
size_t p = rest.find(':', 6);
|
||||||
|
if (p == std::string::npos)
|
||||||
|
continue;
|
||||||
|
status = next_status_word(rest, p + 1);
|
||||||
|
}
|
||||||
|
if (!status.empty()) {
|
||||||
|
m_expected_status = status;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string next_status_word(std::string const& s, size_t p) {
|
||||||
|
size_t a = s.find_first_not_of(" \t", p);
|
||||||
|
if (a == std::string::npos)
|
||||||
|
return "";
|
||||||
|
size_t b = a;
|
||||||
|
while (b < s.size() && (isalnum((unsigned char)s[b]) || s[b] == '_'))
|
||||||
|
++b;
|
||||||
|
return s.substr(a, b - a);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
expr_ref tptp_parser::parse_term() {
|
expr_ref tptp_parser::parse_term() {
|
||||||
|
|
@ -2190,6 +2247,35 @@ expr_ref tptp_parser::parse_formula() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Classify an SZS status into the coarse verdict used for cross-checking.
|
||||||
|
// unsat: a refutation/proof exists (Theorem, Unsatisfiable, ContradictoryAxioms, ...)
|
||||||
|
// sat: a model exists (Satisfiable, CounterSatisfiable, ...)
|
||||||
|
// other: no comparable verdict (Open, Unknown, Timeout, GaveUp, empty, ...)
|
||||||
|
enum class szs_verdict { unsat, sat, other };
|
||||||
|
|
||||||
|
static szs_verdict classify_szs(std::string const& s) {
|
||||||
|
if (s == "Theorem" || s == "Unsatisfiable" || s == "ContradictoryAxioms" || s == "Unsat")
|
||||||
|
return szs_verdict::unsat;
|
||||||
|
if (s == "Satisfiable" || s == "CounterSatisfiable" || s == "CounterTheorem" || s == "Sat")
|
||||||
|
return szs_verdict::sat;
|
||||||
|
return szs_verdict::other;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Emit the SZS status produced by z3. If the input carries an annotated status
|
||||||
|
// that contradicts the produced verdict, prepend "BUG" to flag the mismatch.
|
||||||
|
static void report_szs_status(char const* produced, std::string const& expected) {
|
||||||
|
szs_verdict pv = classify_szs(produced);
|
||||||
|
szs_verdict ev = classify_szs(expected);
|
||||||
|
bool is_bug = !expected.empty() &&
|
||||||
|
(pv == szs_verdict::unsat || pv == szs_verdict::sat) &&
|
||||||
|
(ev == szs_verdict::unsat || ev == szs_verdict::sat) &&
|
||||||
|
pv != ev;
|
||||||
|
if (is_bug)
|
||||||
|
std::cout << "% SZS status BUG " << produced << " (expected " << expected << ")\n";
|
||||||
|
else
|
||||||
|
std::cout << "% SZS status " << produced << "\n";
|
||||||
|
}
|
||||||
|
|
||||||
static unsigned read_tptp_stream(std::istream& in, char const* current_file) {
|
static unsigned read_tptp_stream(std::istream& in, char const* current_file) {
|
||||||
register_on_timeout_proc(on_timeout);
|
register_on_timeout_proc(on_timeout);
|
||||||
try {
|
try {
|
||||||
|
|
@ -2206,12 +2292,12 @@ static unsigned read_tptp_stream(std::istream& in, char const* current_file) {
|
||||||
ctx.check_sat(0, nullptr);
|
ctx.check_sat(0, nullptr);
|
||||||
switch (ctx.cs_state()) {
|
switch (ctx.cs_state()) {
|
||||||
case cmd_context::css_unsat:
|
case cmd_context::css_unsat:
|
||||||
if (p.has_conjecture()) std::cout << "% SZS status Theorem\n";
|
if (p.has_conjecture()) report_szs_status("Theorem", p.expected_status());
|
||||||
else std::cout << "% SZS status Unsatisfiable\n";
|
else report_szs_status("Unsatisfiable", p.expected_status());
|
||||||
break;
|
break;
|
||||||
case cmd_context::css_sat:
|
case cmd_context::css_sat:
|
||||||
if (p.has_conjecture()) std::cout << "% SZS status CounterSatisfiable\n";
|
if (p.has_conjecture()) report_szs_status("CounterSatisfiable", p.expected_status());
|
||||||
else std::cout << "% SZS status Satisfiable\n";
|
else report_szs_status("Satisfiable", p.expected_status());
|
||||||
if (g_display_model) {
|
if (g_display_model) {
|
||||||
model_ref mdl;
|
model_ref mdl;
|
||||||
if (ctx.is_model_available(mdl))
|
if (ctx.is_model_available(mdl))
|
||||||
|
|
|
||||||
|
|
@ -140,6 +140,7 @@
|
||||||
X(zstring)
|
X(zstring)
|
||||||
|
|
||||||
#define FOR_EACH_EXTRA_TEST(X, X_ARGV) \
|
#define FOR_EACH_EXTRA_TEST(X, X_ARGV) \
|
||||||
|
X(tptp) \
|
||||||
X(ext_numeral) \
|
X(ext_numeral) \
|
||||||
X(interval) \
|
X(interval) \
|
||||||
X(value_generator) \
|
X(value_generator) \
|
||||||
|
|
|
||||||
|
|
@ -123,4 +123,51 @@ R"(tff(c1,conjecture, $let(a: $int, a := 5, $let(b: $int, b := 3, $less(b,a)))).
|
||||||
unsigned code = run_tptp("tff(c1,conjecture, $less(1/0,1)).", out, err);
|
unsigned code = run_tptp("tff(c1,conjecture, $less(1/0,1)).", out, err);
|
||||||
ENSURE(code == ERR_PARSER);
|
ENSURE(code == ERR_PARSER);
|
||||||
ENSURE(err.find("denominator of rational literal cannot be zero") != std::string::npos);
|
ENSURE(err.find("denominator of rational literal cannot be zero") != std::string::npos);
|
||||||
|
|
||||||
|
// SZS status cross-checking against the annotated input status.
|
||||||
|
|
||||||
|
// Matching annotation: no BUG flag.
|
||||||
|
{
|
||||||
|
std::string o = run_tptp(
|
||||||
|
R"(% Status : Unsatisfiable
|
||||||
|
cnf(c1,axiom, p(X)).
|
||||||
|
cnf(c2,axiom, ~ p(a)).)");
|
||||||
|
ENSURE(o.find("% SZS status Unsatisfiable") != std::string::npos);
|
||||||
|
ENSURE(o.find("BUG") == std::string::npos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contradicting annotation (says Satisfiable, z3 finds Unsatisfiable): BUG.
|
||||||
|
{
|
||||||
|
std::string o = run_tptp(
|
||||||
|
R"(% SZS status Satisfiable
|
||||||
|
cnf(c1,axiom, p(X)).
|
||||||
|
cnf(c2,axiom, ~ p(a)).)");
|
||||||
|
ENSURE(o.find("BUG") != std::string::npos);
|
||||||
|
ENSURE(o.find("expected Satisfiable") != std::string::npos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Contradicting annotation (says Unsatisfiable, z3 finds Satisfiable): BUG.
|
||||||
|
{
|
||||||
|
std::string o = run_tptp(
|
||||||
|
R"(% Status : Unsatisfiable
|
||||||
|
fof(a1,axiom, p(a)).)");
|
||||||
|
ENSURE(o.find("BUG") != std::string::npos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Theorem annotation matches z3's Theorem verdict for conjectures: no BUG.
|
||||||
|
{
|
||||||
|
std::string o = run_tptp(
|
||||||
|
R"(% SZS status Theorem
|
||||||
|
fof(a1,axiom, ! [X] : (human(X) => mortal(X))).
|
||||||
|
fof(a2,axiom, human(socrates)).
|
||||||
|
fof(c1,conjecture, mortal(socrates)).)");
|
||||||
|
ENSURE(o.find("% SZS status Theorem") != std::string::npos);
|
||||||
|
ENSURE(o.find("BUG") == std::string::npos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unannotated input: nothing to check, no BUG.
|
||||||
|
{
|
||||||
|
std::string o = run_tptp("fof(a1,axiom, p(a)).");
|
||||||
|
ENSURE(o.find("BUG") == std::string::npos);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue