3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2025-04-23 17:15:31 +00:00

Unit testing fixes

This commit is contained in:
Jakob Rath 2022-11-28 18:05:25 +01:00
parent 3d79cddf33
commit c488a766b5

View file

@ -78,83 +78,140 @@ namespace polysat {
error,
};
template <typename T>
T* with_default(T* value, T* default_value) {
return value ? value : default_value;
}
struct test_record {
std::string m_name;
unsigned m_index = 0; ///< m_index-th check_sat() call in this unit test.
lbool m_answer; ///< what the solver returned
lbool m_answer = l_undef; ///< what the solver returned
lbool m_expected = l_undef; ///< the answer we expect (l_undef if unspecified)
test_result m_result = test_result::undefined;
std::string m_error_message;
bool m_finished = false;
using clock_t = std::chrono::steady_clock;
clock_t::time_point start;
clock_t::time_point end;
clock_t::time_point m_start;
clock_t::time_point m_end;
void set_error(char const* msg) {
m_result = test_result::error;
m_error_message = msg;
m_finished = true;
}
std::ostream& display(std::ostream& out, unsigned name_width = 0) const;
};
vector<test_record> test_records;
bool collect_test_records = true;
std::ostream& operator<<(std::ostream& out, test_record const& r) { return r.display(out); }
void display_test_records(vector<test_record> const& recs, std::ostream& out) {
class test_record_manager {
scoped_ptr_vector<test_record> m_records;
std::string m_name;
unsigned m_index = 0;
public:
void begin_batch(std::string name);
void end_batch();
test_record* new_record();
test_record* active_or_new_record();
void display(std::ostream& out) const;
};
void test_record_manager::begin_batch(std::string name) {
end_batch();
if (m_name != name) {
m_name = name;
m_index = 0;
}
}
void test_record_manager::end_batch() {
// Kick out unfinished records (they have the wrong name)
while (!m_records.empty() && !m_records.back()->m_finished)
m_records.pop_back();
}
test_record* test_record_manager::new_record() {
auto* rec = alloc(test_record);
rec->m_name = m_name;
rec->m_index = ++m_index;
m_records.push_back(rec);
return rec;
}
test_record* test_record_manager::active_or_new_record() {
if (m_records.empty() || m_records.back()->m_finished)
return new_record();
else
return m_records.back();
}
std::ostream& test_record::display(std::ostream& out, unsigned name_width) const {
if (!m_finished)
out << "UNFINISHED ";
out << m_name;
if (m_index != 1)
out << " (" << m_index << ") ";
else
out << " ";
for (size_t i = m_name.length(); i < name_width; ++i)
out << ' ';
std::chrono::duration<double> d = m_end - m_start;
if (d.count() >= 0.15) {
out << std::fixed << std::setprecision(1);
out << std::setw(4) << d.count() << "s ";
}
else
out << " ";
switch (m_answer) {
case l_undef: out << " "; break;
case l_true: out << "SAT "; break;
case l_false: out << "UNSAT "; break;
}
switch (m_result) {
case test_result::undefined: out << "???"; break;
case test_result::ok: out << "ok"; break;
case test_result::wrong_answer: out << color_red() << "wrong answer, expected " << m_expected << color_reset(); break;
case test_result::wrong_model: out << color_red() << "wrong model" << color_reset(); break;
case test_result::resource_out: out << color_yellow() << "resource out" << color_reset(); break;
case test_result::error: out << color_red() << "error: " << m_error_message << color_reset(); break;
}
return out;
}
void test_record_manager::display(std::ostream& out) const {
out << "\n\nTest Results:\n";
size_t max_name_len = 0;
for (test_record const& r : recs)
max_name_len = std::max(max_name_len, r.m_name.length());
for (test_record const* r : m_records) {
if (!r->m_finished)
continue;
max_name_len = std::max(max_name_len, r->m_name.length());
}
size_t n_total = recs.size();
size_t n_total = m_records.size();
size_t n_sat = 0;
size_t n_unsat = 0;
size_t n_wrong = 0;
size_t n_error = 0;
for (test_record const& r : recs) {
out << r.m_name;
if (r.m_index != 1)
out << " (" << r.m_index << ") ";
else
out << " ";
for (size_t i = r.m_name.length(); i < max_name_len; ++i)
out << ' ';
std::chrono::duration<double> d = r.end - r.start;
if (d.count() >= 0.15) {
out << std::fixed << std::setprecision(1);
out << std::setw(4) << d.count() << "s ";
}
else
out << " ";
switch (r.m_answer) {
case l_undef: out << " "; break;
case l_true: out << "SAT "; break;
case l_false: out << "UNSAT "; break;
}
switch (r.m_result) {
case test_result::undefined:
out << "???";
break;
case test_result::ok:
out << "ok";
if (r.m_answer == l_true)
n_sat++;
if (r.m_answer == l_false)
n_unsat++;
break;
case test_result::wrong_answer:
out << color_red() << "wrong answer, expected " << r.m_expected << color_reset();
n_wrong++;
break;
case test_result::wrong_model:
out << color_red() << "wrong model" << color_reset();
n_wrong++;
break;
case test_result::resource_out:
out << color_yellow() << "resource out" << color_reset();
break;
case test_result::error:
out << color_red() << "error: " << r.m_error_message << color_reset();
n_error++;
break;
}
for (test_record const* r : m_records) {
if (!r->m_finished)
continue;
r->display(out, max_name_len);
out << std::endl;
if (r->m_result == test_result::ok && r->m_answer == l_true)
n_sat++;
if (r->m_result == test_result::ok && r->m_answer == l_false)
n_unsat++;
if (r->m_result == test_result::wrong_answer || r->m_result == test_result::wrong_model)
n_wrong++;
if (r->m_result == test_result::error)
n_error++;
}
out << n_total << " tests, " << (n_sat + n_unsat) << " ok (" << n_sat << " sat, " << n_unsat << " unsat)";
if (n_wrong)
@ -164,6 +221,9 @@ namespace polysat {
out << std::endl;
}
test_record_manager test_records;
bool collect_test_records = true;
// test resolve, factoring routines
// auxiliary
@ -174,13 +234,19 @@ namespace polysat {
class scoped_solver : public solver_scope, public solver {
std::string m_name;
lbool m_last_result = l_undef;
test_record* m_record = nullptr;
unsigned m_record_idx = 0;
test_record* m_last_finished = nullptr;
public:
scoped_solver(std::string name): solver(lim), m_name(name) {
std::cout << std::string(78, '#') << "\n\nSTART: " << m_name << "\n";
std::cout << std::string(78, '#') << "\n\n";
std::cout << "START: " << m_name << "\n";
set_max_conflicts(10);
test_records.begin_batch(name);
}
~scoped_solver() {
test_records.end_batch();
}
void set_max_conflicts(unsigned c) {
@ -190,69 +256,39 @@ namespace polysat {
}
void check() {
test_records.push_back({});
m_record = &test_records.back();
m_record->m_name = m_name;
m_record->m_index = ++m_record_idx;
m_record->m_answer = l_undef;
m_record->m_expected = l_undef;
m_record->m_result = test_result::undefined;
m_record->m_error_message = "";
try {
m_record->start = test_record::clock_t::now();
auto* rec = test_records.active_or_new_record();
rec->m_finished = true;
m_last_finished = rec;
SASSERT(rec->m_answer == l_undef);
SASSERT(rec->m_expected == l_undef);
SASSERT(rec->m_result == test_result::undefined);
SASSERT(rec->m_error_message == "");
{
rec->m_start = test_record::clock_t::now();
on_scope_exit end_timer([rec]() {
rec->m_end = test_record::clock_t::now();
});
m_last_result = check_sat();
m_record->end = test_record::clock_t::now();
std::cout << "DONE: " << m_name << ": " << m_last_result << "\n";
statistics st;
collect_statistics(st);
std::cout << st << "\n";
}
std::cout << "DONE: " << m_name << ": " << m_last_result << "\n";
statistics st;
collect_statistics(st);
std::cout << st << "\n";
m_record->m_answer = m_last_result;
m_record->m_result = (m_last_result == l_undef) ? test_result::resource_out : test_result::ok;
}
catch(z3_exception const& e) {
m_record->end = test_record::clock_t::now();
char const* msg = e.msg();
if (!msg)
msg = "(unknown z3_exception)";
std::cout << "\n\nEXCEPTION (z3_exception) during test: " << m_name << ": " << msg << "\n\n";
m_record->m_result = test_result::error;
m_record->m_error_message = msg;
if (!collect_test_records)
throw;
}
catch(std::exception const& e) {
m_record->end = test_record::clock_t::now();
char const* msg = e.what();
if (!msg)
msg = "(unknown std::exception)";
std::cout << "\n\nEXCEPTION (std::exception) during test: " << m_name << ": " << msg << "\n\n";
m_record->m_result = test_result::error;
m_record->m_error_message = msg;
if (!collect_test_records)
throw;
}
catch(...) {
m_record->end = test_record::clock_t::now();
char const* msg = "(unknown throwable)";
std::cout << "\n\nEXCEPTION (unknown throwable) during test: " << m_name << ": " << msg << "\n\n";
m_record->m_result = test_result::error;
m_record->m_error_message = msg;
if (!collect_test_records)
throw;
}
rec->m_answer = m_last_result;
rec->m_result = (m_last_result == l_undef) ? test_result::resource_out : test_result::ok;
}
// TODO: all unit tests should save result somewhere, and then at the end we print a table
// Highlight undef results
// If low conflict limit doesn't work, increase automatically and try again?
void expect_unsat() {
SASSERT_EQ(m_record->m_expected, l_undef);
m_record->m_expected = l_false;
if (m_record->m_result == test_result::error)
auto* rec = m_last_finished;
SASSERT(rec);
SASSERT_EQ(rec->m_expected, l_undef);
SASSERT_EQ(rec->m_answer, m_last_result);
rec->m_expected = l_false;
if (rec->m_result == test_result::error)
return;
if (m_last_result != l_false && m_last_result != l_undef) {
m_record->m_result = test_result::wrong_answer;
rec->m_result = test_result::wrong_answer;
LOG_H1("FAIL: " << m_name << ": expected UNSAT, got " << m_last_result << "!");
if (!collect_test_records)
VERIFY(false);
@ -260,9 +296,12 @@ namespace polysat {
}
void expect_sat(std::vector<std::pair<dd::pdd, unsigned>> const& expected_assignment = {}) {
SASSERT_EQ(m_record->m_expected, l_undef);
m_record->m_expected = l_true;
if (m_record->m_result == test_result::error)
auto* rec = m_last_finished;
SASSERT(rec);
SASSERT_EQ(rec->m_expected, l_undef);
SASSERT_EQ(rec->m_answer, m_last_result);
rec->m_expected = l_true;
if (rec->m_result == test_result::error)
return;
if (m_last_result == l_true) {
for (auto const& p : expected_assignment) {
@ -271,7 +310,7 @@ namespace polysat {
SASSERT(v_pdd.is_monomial() && !v_pdd.is_val());
auto const v = v_pdd.var();
if (get_value(v) != expected_value) {
m_record->m_result = test_result::wrong_model;
rec->m_result = test_result::wrong_model;
LOG_H1("FAIL: " << m_name << ": expected assignment v" << v << " := " << expected_value << ", got value " << get_value(v) << "!");
if (!collect_test_records)
VERIFY(false);
@ -279,7 +318,7 @@ namespace polysat {
}
}
else if (m_last_result == l_false) {
m_record->m_result = test_result::wrong_answer;
rec->m_result = test_result::wrong_answer;
LOG_H1("FAIL: " << m_name << ": expected SAT, got " << m_last_result << "!");
if (!collect_test_records)
VERIFY(false);
@ -290,23 +329,24 @@ namespace polysat {
template <typename Test, typename... Args>
void run(Test tst, Args... args)
{
bool got_exception = false;
try {
tst(args...);
}
catch(z3_exception const& e) {
// TODO: collect in record
got_exception = true;
test_records.active_or_new_record()->set_error(with_default(e.msg(), "unknown z3_exception"));
if (!collect_test_records)
throw;
}
catch(std::exception const& e) {
// TODO: collect in record
got_exception = true;
test_records.active_or_new_record()->set_error(with_default(e.what(), "unknown std::exception"));
if (!collect_test_records)
throw;
}
catch(...) {
got_exception = true;
test_records.active_or_new_record()->set_error("unknown throwable");
if (!collect_test_records)
throw;
}
if (got_exception && !collect_test_records)
throw;
}
#define RUN(tst) run([]() { tst; })
@ -702,13 +742,12 @@ namespace polysat {
*
* We do overflow checks by doubling the base bitwidth here.
*/
static void test_fixed_point_arith_mul_div_inverse() {
static void test_fixed_point_arith_mul_div_inverse(unsigned base_bw = 5) {
scoped_solver s(__func__);
auto baseBw = 5;
auto max_int_const = 31; // (2^5 - 1) -- change this when you change baseBw
auto max_int_const = rational::power_of_two(base_bw) - 1;
auto bw = 2 * baseBw;
auto bw = 2 * base_bw;
auto max_int = s.var(s.add_var(bw));
s.add_eq(max_int - max_int_const);
@ -720,8 +759,9 @@ namespace polysat {
s.add_ult(0, b); // b > 0
// scaling factor (setting it, somewhat arbitrarily, to max_int/3)
auto sf_val = div(max_int_const, rational(3));
auto sf = s.var(s.add_var(bw));
s.add_eq(sf - (max_int_const/3));
s.add_eq(sf - sf_val);
// (a * b) / sf = quot1 <=> quot1 * sf + rem1 - (a * b) = 0
auto quot1 = s.var(s.add_var(bw));
@ -753,15 +793,14 @@ namespace polysat {
s.push();
s.add_ult(a, quot3);
s.check();
s.expect_unsat();
// s.expect_unsat();
s.pop();
// s.push();
// s.add_ult(quot3 + em, a);
// s.check();
s.push();
s.add_ult(quot3 + em, a);
s.check();
// s.expect_unsat();
// s.pop();
s.pop();
}
/*
@ -832,10 +871,6 @@ namespace polysat {
s.check();
// s.expect_unsat();
s.pop();
//exit(0);
}
/** Monotonicity under bounds,
@ -1500,9 +1535,7 @@ namespace polysat {
static void STD_CALL polysat_on_ctrl_c(int) {
signal(SIGINT, SIG_DFL);
using namespace polysat;
if (!test_records.empty())
test_records.pop_back(); // last record is likely incomplete
display_test_records(test_records, std::cout);
test_records.display(std::cout);
raise(SIGINT);
}
@ -1511,6 +1544,7 @@ void tst_polysat() {
#if 0 // Enable this block to run a single unit test with detailed output.
collect_test_records = false;
test_polysat::test_pop_conflict();
// test_polysat::test_l2();
// test_polysat::test_ineq1();
// test_polysat::test_quot_rem();
@ -1519,7 +1553,7 @@ void tst_polysat() {
// test_polysat::test_band2();
// test_polysat::test_quot_rem_incomplete();
// test_polysat::test_monot();
test_polysat::test_fixed_point_arith_div_mul_inverse();
// test_polysat::test_fixed_point_arith_div_mul_inverse();
return;
#endif
@ -1569,6 +1603,7 @@ void tst_polysat() {
RUN(test_polysat::test_monot_bounds());
RUN(test_polysat::test_monot_bounds_full());
RUN(test_polysat::test_monot_bounds_simple(8));
RUN(test_polysat::test_fixed_point_arith_mul_div_inverse());
RUN(test_polysat::test_fixed_point_arith_div_mul_inverse());
RUN(test_polysat::test_ineq_axiom1());
@ -1598,5 +1633,5 @@ void tst_polysat() {
// test_fi::randomized();
if (collect_test_records)
display_test_records(test_records, std::cout);
test_records.display(std::cout);
}