3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-06-21 13:23:39 +00:00

Unit testing fixes

This commit is contained in:
Jakob Rath 2022-11-28 18:05:25 +01:00
parent 3d79cddf33
commit c488a766b5

View file

@ -78,83 +78,140 @@ namespace polysat {
error, error,
}; };
template <typename T>
T* with_default(T* value, T* default_value) {
return value ? value : default_value;
}
struct test_record { struct test_record {
std::string m_name; std::string m_name;
unsigned m_index = 0; ///< m_index-th check_sat() call in this unit test. unsigned m_index = 0; ///< m_index-th check_sat() call in this unit test.
lbool m_answer; ///< what the solver returned lbool m_answer = l_undef; ///< what the solver returned
lbool m_expected = l_undef; ///< the answer we expect (l_undef if unspecified) lbool m_expected = l_undef; ///< the answer we expect (l_undef if unspecified)
test_result m_result = test_result::undefined; test_result m_result = test_result::undefined;
std::string m_error_message; std::string m_error_message;
bool m_finished = false;
using clock_t = std::chrono::steady_clock; using clock_t = std::chrono::steady_clock;
clock_t::time_point start; clock_t::time_point m_start;
clock_t::time_point end; clock_t::time_point m_end;
void set_error(char const* msg) {
m_result = test_result::error;
m_error_message = msg;
m_finished = true;
}
std::ostream& display(std::ostream& out, unsigned name_width = 0) const;
}; };
vector<test_record> test_records; std::ostream& operator<<(std::ostream& out, test_record const& r) { return r.display(out); }
bool collect_test_records = true;
void display_test_records(vector<test_record> const& recs, std::ostream& out) { class test_record_manager {
scoped_ptr_vector<test_record> m_records;
std::string m_name;
unsigned m_index = 0;
public:
void begin_batch(std::string name);
void end_batch();
test_record* new_record();
test_record* active_or_new_record();
void display(std::ostream& out) const;
};
void test_record_manager::begin_batch(std::string name) {
end_batch();
if (m_name != name) {
m_name = name;
m_index = 0;
}
}
void test_record_manager::end_batch() {
// Kick out unfinished records (they have the wrong name)
while (!m_records.empty() && !m_records.back()->m_finished)
m_records.pop_back();
}
test_record* test_record_manager::new_record() {
auto* rec = alloc(test_record);
rec->m_name = m_name;
rec->m_index = ++m_index;
m_records.push_back(rec);
return rec;
}
test_record* test_record_manager::active_or_new_record() {
if (m_records.empty() || m_records.back()->m_finished)
return new_record();
else
return m_records.back();
}
std::ostream& test_record::display(std::ostream& out, unsigned name_width) const {
if (!m_finished)
out << "UNFINISHED ";
out << m_name;
if (m_index != 1)
out << " (" << m_index << ") ";
else
out << " ";
for (size_t i = m_name.length(); i < name_width; ++i)
out << ' ';
std::chrono::duration<double> d = m_end - m_start;
if (d.count() >= 0.15) {
out << std::fixed << std::setprecision(1);
out << std::setw(4) << d.count() << "s ";
}
else
out << " ";
switch (m_answer) {
case l_undef: out << " "; break;
case l_true: out << "SAT "; break;
case l_false: out << "UNSAT "; break;
}
switch (m_result) {
case test_result::undefined: out << "???"; break;
case test_result::ok: out << "ok"; break;
case test_result::wrong_answer: out << color_red() << "wrong answer, expected " << m_expected << color_reset(); break;
case test_result::wrong_model: out << color_red() << "wrong model" << color_reset(); break;
case test_result::resource_out: out << color_yellow() << "resource out" << color_reset(); break;
case test_result::error: out << color_red() << "error: " << m_error_message << color_reset(); break;
}
return out;
}
void test_record_manager::display(std::ostream& out) const {
out << "\n\nTest Results:\n"; out << "\n\nTest Results:\n";
size_t max_name_len = 0; size_t max_name_len = 0;
for (test_record const& r : recs) for (test_record const* r : m_records) {
max_name_len = std::max(max_name_len, r.m_name.length()); if (!r->m_finished)
continue;
max_name_len = std::max(max_name_len, r->m_name.length());
}
size_t n_total = recs.size(); size_t n_total = m_records.size();
size_t n_sat = 0; size_t n_sat = 0;
size_t n_unsat = 0; size_t n_unsat = 0;
size_t n_wrong = 0; size_t n_wrong = 0;
size_t n_error = 0; size_t n_error = 0;
for (test_record const& r : recs) { for (test_record const* r : m_records) {
out << r.m_name; if (!r->m_finished)
if (r.m_index != 1) continue;
out << " (" << r.m_index << ") "; r->display(out, max_name_len);
else
out << " ";
for (size_t i = r.m_name.length(); i < max_name_len; ++i)
out << ' ';
std::chrono::duration<double> d = r.end - r.start;
if (d.count() >= 0.15) {
out << std::fixed << std::setprecision(1);
out << std::setw(4) << d.count() << "s ";
}
else
out << " ";
switch (r.m_answer) {
case l_undef: out << " "; break;
case l_true: out << "SAT "; break;
case l_false: out << "UNSAT "; break;
}
switch (r.m_result) {
case test_result::undefined:
out << "???";
break;
case test_result::ok:
out << "ok";
if (r.m_answer == l_true)
n_sat++;
if (r.m_answer == l_false)
n_unsat++;
break;
case test_result::wrong_answer:
out << color_red() << "wrong answer, expected " << r.m_expected << color_reset();
n_wrong++;
break;
case test_result::wrong_model:
out << color_red() << "wrong model" << color_reset();
n_wrong++;
break;
case test_result::resource_out:
out << color_yellow() << "resource out" << color_reset();
break;
case test_result::error:
out << color_red() << "error: " << r.m_error_message << color_reset();
n_error++;
break;
}
out << std::endl; out << std::endl;
if (r->m_result == test_result::ok && r->m_answer == l_true)
n_sat++;
if (r->m_result == test_result::ok && r->m_answer == l_false)
n_unsat++;
if (r->m_result == test_result::wrong_answer || r->m_result == test_result::wrong_model)
n_wrong++;
if (r->m_result == test_result::error)
n_error++;
} }
out << n_total << " tests, " << (n_sat + n_unsat) << " ok (" << n_sat << " sat, " << n_unsat << " unsat)"; out << n_total << " tests, " << (n_sat + n_unsat) << " ok (" << n_sat << " sat, " << n_unsat << " unsat)";
if (n_wrong) if (n_wrong)
@ -164,6 +221,9 @@ namespace polysat {
out << std::endl; out << std::endl;
} }
test_record_manager test_records;
bool collect_test_records = true;
// test resolve, factoring routines // test resolve, factoring routines
// auxiliary // auxiliary
@ -174,13 +234,19 @@ namespace polysat {
class scoped_solver : public solver_scope, public solver { class scoped_solver : public solver_scope, public solver {
std::string m_name; std::string m_name;
lbool m_last_result = l_undef; lbool m_last_result = l_undef;
test_record* m_record = nullptr; test_record* m_last_finished = nullptr;
unsigned m_record_idx = 0;
public: public:
scoped_solver(std::string name): solver(lim), m_name(name) { scoped_solver(std::string name): solver(lim), m_name(name) {
std::cout << std::string(78, '#') << "\n\nSTART: " << m_name << "\n"; std::cout << std::string(78, '#') << "\n\n";
std::cout << "START: " << m_name << "\n";
set_max_conflicts(10); set_max_conflicts(10);
test_records.begin_batch(name);
}
~scoped_solver() {
test_records.end_batch();
} }
void set_max_conflicts(unsigned c) { void set_max_conflicts(unsigned c) {
@ -190,69 +256,39 @@ namespace polysat {
} }
void check() { void check() {
test_records.push_back({}); auto* rec = test_records.active_or_new_record();
m_record = &test_records.back(); rec->m_finished = true;
m_record->m_name = m_name; m_last_finished = rec;
m_record->m_index = ++m_record_idx; SASSERT(rec->m_answer == l_undef);
m_record->m_answer = l_undef; SASSERT(rec->m_expected == l_undef);
m_record->m_expected = l_undef; SASSERT(rec->m_result == test_result::undefined);
m_record->m_result = test_result::undefined; SASSERT(rec->m_error_message == "");
m_record->m_error_message = ""; {
try { rec->m_start = test_record::clock_t::now();
m_record->start = test_record::clock_t::now(); on_scope_exit end_timer([rec]() {
rec->m_end = test_record::clock_t::now();
});
m_last_result = check_sat(); m_last_result = check_sat();
m_record->end = test_record::clock_t::now(); }
std::cout << "DONE: " << m_name << ": " << m_last_result << "\n"; std::cout << "DONE: " << m_name << ": " << m_last_result << "\n";
statistics st; statistics st;
collect_statistics(st); collect_statistics(st);
std::cout << st << "\n"; std::cout << st << "\n";
m_record->m_answer = m_last_result; rec->m_answer = m_last_result;
m_record->m_result = (m_last_result == l_undef) ? test_result::resource_out : test_result::ok; rec->m_result = (m_last_result == l_undef) ? test_result::resource_out : test_result::ok;
}
catch(z3_exception const& e) {
m_record->end = test_record::clock_t::now();
char const* msg = e.msg();
if (!msg)
msg = "(unknown z3_exception)";
std::cout << "\n\nEXCEPTION (z3_exception) during test: " << m_name << ": " << msg << "\n\n";
m_record->m_result = test_result::error;
m_record->m_error_message = msg;
if (!collect_test_records)
throw;
}
catch(std::exception const& e) {
m_record->end = test_record::clock_t::now();
char const* msg = e.what();
if (!msg)
msg = "(unknown std::exception)";
std::cout << "\n\nEXCEPTION (std::exception) during test: " << m_name << ": " << msg << "\n\n";
m_record->m_result = test_result::error;
m_record->m_error_message = msg;
if (!collect_test_records)
throw;
}
catch(...) {
m_record->end = test_record::clock_t::now();
char const* msg = "(unknown throwable)";
std::cout << "\n\nEXCEPTION (unknown throwable) during test: " << m_name << ": " << msg << "\n\n";
m_record->m_result = test_result::error;
m_record->m_error_message = msg;
if (!collect_test_records)
throw;
}
} }
// TODO: all unit tests should save result somewhere, and then at the end we print a table
// Highlight undef results
// If low conflict limit doesn't work, increase automatically and try again?
void expect_unsat() { void expect_unsat() {
SASSERT_EQ(m_record->m_expected, l_undef); auto* rec = m_last_finished;
m_record->m_expected = l_false; SASSERT(rec);
if (m_record->m_result == test_result::error) SASSERT_EQ(rec->m_expected, l_undef);
SASSERT_EQ(rec->m_answer, m_last_result);
rec->m_expected = l_false;
if (rec->m_result == test_result::error)
return; return;
if (m_last_result != l_false && m_last_result != l_undef) { if (m_last_result != l_false && m_last_result != l_undef) {
m_record->m_result = test_result::wrong_answer; rec->m_result = test_result::wrong_answer;
LOG_H1("FAIL: " << m_name << ": expected UNSAT, got " << m_last_result << "!"); LOG_H1("FAIL: " << m_name << ": expected UNSAT, got " << m_last_result << "!");
if (!collect_test_records) if (!collect_test_records)
VERIFY(false); VERIFY(false);
@ -260,9 +296,12 @@ namespace polysat {
} }
void expect_sat(std::vector<std::pair<dd::pdd, unsigned>> const& expected_assignment = {}) { void expect_sat(std::vector<std::pair<dd::pdd, unsigned>> const& expected_assignment = {}) {
SASSERT_EQ(m_record->m_expected, l_undef); auto* rec = m_last_finished;
m_record->m_expected = l_true; SASSERT(rec);
if (m_record->m_result == test_result::error) SASSERT_EQ(rec->m_expected, l_undef);
SASSERT_EQ(rec->m_answer, m_last_result);
rec->m_expected = l_true;
if (rec->m_result == test_result::error)
return; return;
if (m_last_result == l_true) { if (m_last_result == l_true) {
for (auto const& p : expected_assignment) { for (auto const& p : expected_assignment) {
@ -271,7 +310,7 @@ namespace polysat {
SASSERT(v_pdd.is_monomial() && !v_pdd.is_val()); SASSERT(v_pdd.is_monomial() && !v_pdd.is_val());
auto const v = v_pdd.var(); auto const v = v_pdd.var();
if (get_value(v) != expected_value) { if (get_value(v) != expected_value) {
m_record->m_result = test_result::wrong_model; rec->m_result = test_result::wrong_model;
LOG_H1("FAIL: " << m_name << ": expected assignment v" << v << " := " << expected_value << ", got value " << get_value(v) << "!"); LOG_H1("FAIL: " << m_name << ": expected assignment v" << v << " := " << expected_value << ", got value " << get_value(v) << "!");
if (!collect_test_records) if (!collect_test_records)
VERIFY(false); VERIFY(false);
@ -279,7 +318,7 @@ namespace polysat {
} }
} }
else if (m_last_result == l_false) { else if (m_last_result == l_false) {
m_record->m_result = test_result::wrong_answer; rec->m_result = test_result::wrong_answer;
LOG_H1("FAIL: " << m_name << ": expected SAT, got " << m_last_result << "!"); LOG_H1("FAIL: " << m_name << ": expected SAT, got " << m_last_result << "!");
if (!collect_test_records) if (!collect_test_records)
VERIFY(false); VERIFY(false);
@ -290,23 +329,24 @@ namespace polysat {
template <typename Test, typename... Args> template <typename Test, typename... Args>
void run(Test tst, Args... args) void run(Test tst, Args... args)
{ {
bool got_exception = false;
try { try {
tst(args...); tst(args...);
} }
catch(z3_exception const& e) { catch(z3_exception const& e) {
// TODO: collect in record test_records.active_or_new_record()->set_error(with_default(e.msg(), "unknown z3_exception"));
got_exception = true; if (!collect_test_records)
throw;
} }
catch(std::exception const& e) { catch(std::exception const& e) {
// TODO: collect in record test_records.active_or_new_record()->set_error(with_default(e.what(), "unknown std::exception"));
got_exception = true; if (!collect_test_records)
throw;
} }
catch(...) { catch(...) {
got_exception = true; test_records.active_or_new_record()->set_error("unknown throwable");
if (!collect_test_records)
throw;
} }
if (got_exception && !collect_test_records)
throw;
} }
#define RUN(tst) run([]() { tst; }) #define RUN(tst) run([]() { tst; })
@ -702,13 +742,12 @@ namespace polysat {
* *
* We do overflow checks by doubling the base bitwidth here. * We do overflow checks by doubling the base bitwidth here.
*/ */
static void test_fixed_point_arith_mul_div_inverse() { static void test_fixed_point_arith_mul_div_inverse(unsigned base_bw = 5) {
scoped_solver s(__func__); scoped_solver s(__func__);
auto baseBw = 5; auto max_int_const = rational::power_of_two(base_bw) - 1;
auto max_int_const = 31; // (2^5 - 1) -- change this when you change baseBw
auto bw = 2 * baseBw; auto bw = 2 * base_bw;
auto max_int = s.var(s.add_var(bw)); auto max_int = s.var(s.add_var(bw));
s.add_eq(max_int - max_int_const); s.add_eq(max_int - max_int_const);
@ -720,8 +759,9 @@ namespace polysat {
s.add_ult(0, b); // b > 0 s.add_ult(0, b); // b > 0
// scaling factor (setting it, somewhat arbitrarily, to max_int/3) // scaling factor (setting it, somewhat arbitrarily, to max_int/3)
auto sf_val = div(max_int_const, rational(3));
auto sf = s.var(s.add_var(bw)); auto sf = s.var(s.add_var(bw));
s.add_eq(sf - (max_int_const/3)); s.add_eq(sf - sf_val);
// (a * b) / sf = quot1 <=> quot1 * sf + rem1 - (a * b) = 0 // (a * b) / sf = quot1 <=> quot1 * sf + rem1 - (a * b) = 0
auto quot1 = s.var(s.add_var(bw)); auto quot1 = s.var(s.add_var(bw));
@ -753,15 +793,14 @@ namespace polysat {
s.push(); s.push();
s.add_ult(a, quot3); s.add_ult(a, quot3);
s.check(); s.check();
s.expect_unsat(); // s.expect_unsat();
s.pop(); s.pop();
s.push();
// s.push(); s.add_ult(quot3 + em, a);
// s.add_ult(quot3 + em, a); s.check();
// s.check();
// s.expect_unsat(); // s.expect_unsat();
// s.pop(); s.pop();
} }
/* /*
@ -832,10 +871,6 @@ namespace polysat {
s.check(); s.check();
// s.expect_unsat(); // s.expect_unsat();
s.pop(); s.pop();
//exit(0);
} }
/** Monotonicity under bounds, /** Monotonicity under bounds,
@ -1500,9 +1535,7 @@ namespace polysat {
static void STD_CALL polysat_on_ctrl_c(int) { static void STD_CALL polysat_on_ctrl_c(int) {
signal(SIGINT, SIG_DFL); signal(SIGINT, SIG_DFL);
using namespace polysat; using namespace polysat;
if (!test_records.empty()) test_records.display(std::cout);
test_records.pop_back(); // last record is likely incomplete
display_test_records(test_records, std::cout);
raise(SIGINT); raise(SIGINT);
} }
@ -1511,6 +1544,7 @@ void tst_polysat() {
#if 0 // Enable this block to run a single unit test with detailed output. #if 0 // Enable this block to run a single unit test with detailed output.
collect_test_records = false; collect_test_records = false;
test_polysat::test_pop_conflict();
// test_polysat::test_l2(); // test_polysat::test_l2();
// test_polysat::test_ineq1(); // test_polysat::test_ineq1();
// test_polysat::test_quot_rem(); // test_polysat::test_quot_rem();
@ -1519,7 +1553,7 @@ void tst_polysat() {
// test_polysat::test_band2(); // test_polysat::test_band2();
// test_polysat::test_quot_rem_incomplete(); // test_polysat::test_quot_rem_incomplete();
// test_polysat::test_monot(); // test_polysat::test_monot();
test_polysat::test_fixed_point_arith_div_mul_inverse(); // test_polysat::test_fixed_point_arith_div_mul_inverse();
return; return;
#endif #endif
@ -1569,6 +1603,7 @@ void tst_polysat() {
RUN(test_polysat::test_monot_bounds()); RUN(test_polysat::test_monot_bounds());
RUN(test_polysat::test_monot_bounds_full()); RUN(test_polysat::test_monot_bounds_full());
RUN(test_polysat::test_monot_bounds_simple(8)); RUN(test_polysat::test_monot_bounds_simple(8));
RUN(test_polysat::test_fixed_point_arith_mul_div_inverse());
RUN(test_polysat::test_fixed_point_arith_div_mul_inverse()); RUN(test_polysat::test_fixed_point_arith_div_mul_inverse());
RUN(test_polysat::test_ineq_axiom1()); RUN(test_polysat::test_ineq_axiom1());
@ -1598,5 +1633,5 @@ void tst_polysat() {
// test_fi::randomized(); // test_fi::randomized();
if (collect_test_records) if (collect_test_records)
display_test_records(test_records, std::cout); test_records.display(std::cout);
} }