Unit testing fixes

2025-04-23 17:15:31 +00:00 · 2022-11-28 18:05:25 +01:00 · 2022-11-28 18:05:25 +01:00 · c488a766b5
commit c488a766b5
parent 3d79cddf33
1 changed files with 183 additions and 148 deletions
--- a/src/test/polysat.cpp
+++ b/src/test/polysat.cpp
@ -78,83 +78,140 @@ namespace polysat {
        error,
    };

+    template <typename T>
+    T* with_default(T* value, T* default_value) {
+        return value ? value : default_value;
+    }
+
    struct test_record {
        std::string m_name;
        unsigned m_index = 0;  ///< m_index-th check_sat() call in this unit test.
-        lbool m_answer;  ///< what the solver returned
+        lbool m_answer = l_undef;  ///< what the solver returned
        lbool m_expected = l_undef;  ///< the answer we expect (l_undef if unspecified)
        test_result m_result = test_result::undefined;
        std::string m_error_message;
+        bool m_finished = false;

        using clock_t = std::chrono::steady_clock;
-        clock_t::time_point start;
-        clock_t::time_point end;
+        clock_t::time_point m_start;
+        clock_t::time_point m_end;
+
+        void set_error(char const* msg) {
+            m_result = test_result::error;
+            m_error_message = msg;
+            m_finished = true;
+        }
+
+        std::ostream& display(std::ostream& out, unsigned name_width = 0) const;
    };

-    vector<test_record> test_records;
-    bool collect_test_records = true;
+    std::ostream& operator<<(std::ostream& out, test_record const& r) { return r.display(out); }

-    void display_test_records(vector<test_record> const& recs, std::ostream& out) {
+    class test_record_manager {
+        scoped_ptr_vector<test_record> m_records;
+
+        std::string m_name;
+        unsigned m_index = 0;
+
+    public:
+        void begin_batch(std::string name);
+        void end_batch();
+        test_record* new_record();
+        test_record* active_or_new_record();
+        void display(std::ostream& out) const;
+    };
+
+    void test_record_manager::begin_batch(std::string name) {
+        end_batch();
+        if (m_name != name) {
+            m_name = name;
+            m_index = 0;
+        }
+    }
+
+    void test_record_manager::end_batch() {
+        // Kick out unfinished records (they have the wrong name)
+        while (!m_records.empty() && !m_records.back()->m_finished)
+            m_records.pop_back();
+    }
+
+    test_record* test_record_manager::new_record() {
+        auto* rec = alloc(test_record);
+        rec->m_name = m_name;
+        rec->m_index = ++m_index;
+        m_records.push_back(rec);
+        return rec;
+    }
+
+    test_record* test_record_manager::active_or_new_record() {
+        if (m_records.empty() || m_records.back()->m_finished)
+            return new_record();
+        else
+            return m_records.back();
+    }
+
+    std::ostream& test_record::display(std::ostream& out, unsigned name_width) const {
+        if (!m_finished)
+            out << "UNFINISHED ";
+        out << m_name;
+        if (m_index != 1)
+            out << " (" << m_index << ") ";
+        else
+            out << "     ";
+        for (size_t i = m_name.length(); i < name_width; ++i)
+            out << ' ';
+        std::chrono::duration<double> d = m_end - m_start;
+        if (d.count() >= 0.15) {
+            out << std::fixed << std::setprecision(1);
+            out << std::setw(4) << d.count() << "s ";
+        }
+        else
+            out << "      ";
+        switch (m_answer) {
+        case l_undef: out << "      "; break;
+        case l_true:  out << "SAT   "; break;
+        case l_false: out << "UNSAT "; break;
+        }
+        switch (m_result) {
+        case test_result::undefined: out << "???"; break;
+        case test_result::ok: out << "ok"; break;
+        case test_result::wrong_answer: out << color_red() << "wrong answer, expected " << m_expected << color_reset(); break;
+        case test_result::wrong_model: out << color_red() << "wrong model" << color_reset(); break;
+        case test_result::resource_out: out << color_yellow() << "resource out" << color_reset(); break;
+        case test_result::error: out << color_red() << "error: " << m_error_message << color_reset(); break;
+        }
+        return out;
+    }
+
+    void test_record_manager::display(std::ostream& out) const {
        out << "\n\nTest Results:\n";

        size_t max_name_len = 0;
-        for (test_record const& r : recs)
-            max_name_len = std::max(max_name_len, r.m_name.length());
+        for (test_record const* r : m_records) {
+            if (!r->m_finished)
+                continue;
+            max_name_len = std::max(max_name_len, r->m_name.length());
+        }

-        size_t n_total = recs.size();
+        size_t n_total = m_records.size();
        size_t n_sat = 0;
        size_t n_unsat = 0;
        size_t n_wrong = 0;
        size_t n_error = 0;

-        for (test_record const& r : recs) {
-            out << r.m_name;
-            if (r.m_index != 1)
-                out << " (" << r.m_index << ") ";
-            else
-                out << "     ";
-            for (size_t i = r.m_name.length(); i < max_name_len; ++i)
-                out << ' ';
-            std::chrono::duration<double> d = r.end - r.start;
-            if (d.count() >= 0.15) {
-                out << std::fixed << std::setprecision(1);
-                out << std::setw(4) << d.count() << "s ";
-            }
-            else
-                out << "      ";
-            switch (r.m_answer) {
-            case l_undef: out << "      "; break;
-            case l_true:  out << "SAT   "; break;
-            case l_false: out << "UNSAT "; break;
-            }
-            switch (r.m_result) {
-            case test_result::undefined:
-                out << "???";
-                break;
-            case test_result::ok:
-                out << "ok";
-                if (r.m_answer == l_true)
-                    n_sat++;
-                if (r.m_answer == l_false)
-                    n_unsat++;
-                break;
-            case test_result::wrong_answer:
-                out << color_red() << "wrong answer, expected " << r.m_expected << color_reset();
-                n_wrong++;
-                break;
-            case test_result::wrong_model:
-                out << color_red() << "wrong model" << color_reset();
-                n_wrong++;
-                break;
-            case test_result::resource_out:
-                out << color_yellow() << "resource out" << color_reset();
-                break;
-            case test_result::error:
-                out << color_red() << "error: " << r.m_error_message << color_reset();
-                n_error++;
-                break;
-            }
+        for (test_record const* r : m_records) {
+            if (!r->m_finished)
+                continue;
+            r->display(out, max_name_len);
            out << std::endl;
+            if (r->m_result == test_result::ok && r->m_answer == l_true)
+                n_sat++;
+            if (r->m_result == test_result::ok && r->m_answer == l_false)
+                n_unsat++;
+            if (r->m_result == test_result::wrong_answer || r->m_result == test_result::wrong_model)
+                n_wrong++;
+            if (r->m_result == test_result::error)
+                n_error++;
        }
        out << n_total << " tests, " << (n_sat + n_unsat) << " ok (" << n_sat << " sat, " << n_unsat << " unsat)";
        if (n_wrong)
@ -164,6 +221,9 @@ namespace polysat {
        out << std::endl;
    }

+    test_record_manager test_records;
+    bool collect_test_records = true;
+
    // test resolve, factoring routines
    // auxiliary

@ -174,13 +234,19 @@ namespace polysat {
    class scoped_solver : public solver_scope, public solver {
        std::string m_name;
        lbool m_last_result = l_undef;
-        test_record* m_record = nullptr;
-        unsigned m_record_idx = 0;
+        test_record* m_last_finished = nullptr;

    public:
        scoped_solver(std::string name): solver(lim), m_name(name) {
-            std::cout << std::string(78, '#') << "\n\nSTART: " << m_name << "\n";
+            std::cout << std::string(78, '#') << "\n\n";
+            std::cout << "START: " << m_name << "\n";
            set_max_conflicts(10);
+
+            test_records.begin_batch(name);
+        }
+
+        ~scoped_solver() {
+            test_records.end_batch();
        }

        void set_max_conflicts(unsigned c) {
@ -190,69 +256,39 @@ namespace polysat {
        }

        void check() {
-            test_records.push_back({});
-            m_record = &test_records.back();
-            m_record->m_name = m_name;
-            m_record->m_index = ++m_record_idx;
-            m_record->m_answer = l_undef;
-            m_record->m_expected = l_undef;
-            m_record->m_result = test_result::undefined;
-            m_record->m_error_message = "";
-            try {
-                m_record->start = test_record::clock_t::now();
+            auto* rec = test_records.active_or_new_record();
+            rec->m_finished = true;
+            m_last_finished = rec;
+            SASSERT(rec->m_answer == l_undef);
+            SASSERT(rec->m_expected == l_undef);
+            SASSERT(rec->m_result == test_result::undefined);
+            SASSERT(rec->m_error_message == "");
+            {
+                rec->m_start = test_record::clock_t::now();
+                on_scope_exit end_timer([rec]() {
+                    rec->m_end = test_record::clock_t::now();
+                });
                m_last_result = check_sat();
-                m_record->end = test_record::clock_t::now();
-                std::cout << "DONE: " << m_name << ": " << m_last_result << "\n";
-                statistics st;
-                collect_statistics(st);
-                std::cout << st << "\n";
+            }
+            std::cout << "DONE: " << m_name << ": " << m_last_result << "\n";
+            statistics st;
+            collect_statistics(st);
+            std::cout << st << "\n";

-                m_record->m_answer = m_last_result;
-                m_record->m_result = (m_last_result == l_undef) ? test_result::resource_out : test_result::ok;
-            }
-            catch(z3_exception const& e) {
-                m_record->end = test_record::clock_t::now();
-                char const* msg = e.msg();
-                if (!msg)
-                    msg = "(unknown z3_exception)";
-                std::cout << "\n\nEXCEPTION (z3_exception) during test: " << m_name << ": " << msg << "\n\n";
-                m_record->m_result = test_result::error;
-                m_record->m_error_message = msg;
-                if (!collect_test_records)
-                    throw;
-            }
-            catch(std::exception const& e) {
-                m_record->end = test_record::clock_t::now();
-                char const* msg = e.what();
-                if (!msg)
-                    msg = "(unknown std::exception)";
-                std::cout << "\n\nEXCEPTION (std::exception) during test: " << m_name << ": " << msg << "\n\n";
-                m_record->m_result = test_result::error;
-                m_record->m_error_message = msg;
-                if (!collect_test_records)
-                    throw;
-            }
-            catch(...) {
-                m_record->end = test_record::clock_t::now();
-                char const* msg = "(unknown throwable)";
-                std::cout << "\n\nEXCEPTION (unknown throwable) during test: " << m_name << ": " << msg << "\n\n";
-                m_record->m_result = test_result::error;
-                m_record->m_error_message = msg;
-                if (!collect_test_records)
-                    throw;
-            }
+            rec->m_answer = m_last_result;
+            rec->m_result = (m_last_result == l_undef) ? test_result::resource_out : test_result::ok;
        }
-        // TODO: all unit tests should save result somewhere, and then at the end we print a table
-        //       Highlight undef results
-        //       If low conflict limit doesn't work, increase automatically and try again?

        void expect_unsat() {
-            SASSERT_EQ(m_record->m_expected, l_undef);
-            m_record->m_expected = l_false;
-            if (m_record->m_result == test_result::error)
+            auto* rec = m_last_finished;
+            SASSERT(rec);
+            SASSERT_EQ(rec->m_expected, l_undef);
+            SASSERT_EQ(rec->m_answer, m_last_result);
+            rec->m_expected = l_false;
+            if (rec->m_result == test_result::error)
                return;
            if (m_last_result != l_false && m_last_result != l_undef) {
-                m_record->m_result = test_result::wrong_answer;
+                rec->m_result = test_result::wrong_answer;
                LOG_H1("FAIL: " << m_name << ": expected UNSAT, got " << m_last_result << "!");
                if (!collect_test_records)
                    VERIFY(false);
@ -260,9 +296,12 @@ namespace polysat {
        }

        void expect_sat(std::vector<std::pair<dd::pdd, unsigned>> const& expected_assignment = {}) {
-            SASSERT_EQ(m_record->m_expected, l_undef);
-            m_record->m_expected = l_true;
-            if (m_record->m_result == test_result::error)
+            auto* rec = m_last_finished;
+            SASSERT(rec);
+            SASSERT_EQ(rec->m_expected, l_undef);
+            SASSERT_EQ(rec->m_answer, m_last_result);
+            rec->m_expected = l_true;
+            if (rec->m_result == test_result::error)
                return;
            if (m_last_result == l_true) {
                for (auto const& p : expected_assignment) {
@ -271,7 +310,7 @@ namespace polysat {
                    SASSERT(v_pdd.is_monomial() && !v_pdd.is_val());
                    auto const v = v_pdd.var();
                    if (get_value(v) != expected_value) {
-                        m_record->m_result = test_result::wrong_model;
+                        rec->m_result = test_result::wrong_model;
                        LOG_H1("FAIL: " << m_name << ": expected assignment v" << v << " := " << expected_value << ", got value " << get_value(v) << "!");
                        if (!collect_test_records)
                            VERIFY(false);
@ -279,7 +318,7 @@ namespace polysat {
                }
            }
            else if (m_last_result == l_false) {
-                m_record->m_result = test_result::wrong_answer;
+                rec->m_result = test_result::wrong_answer;
                LOG_H1("FAIL: " << m_name << ": expected SAT, got " << m_last_result << "!");
                if (!collect_test_records)
                    VERIFY(false);
@ -290,23 +329,24 @@ namespace polysat {
    template <typename Test, typename... Args>
    void run(Test tst, Args... args)
    {
-        bool got_exception = false;
        try {
            tst(args...);
        }
        catch(z3_exception const& e) {
-            // TODO: collect in record
-            got_exception = true;
+            test_records.active_or_new_record()->set_error(with_default(e.msg(), "unknown z3_exception"));
+            if (!collect_test_records)
+                throw;
        }
        catch(std::exception const& e) {
-            // TODO: collect in record
-            got_exception = true;
+            test_records.active_or_new_record()->set_error(with_default(e.what(), "unknown std::exception"));
+            if (!collect_test_records)
+                throw;
        }
        catch(...) {
-            got_exception = true;
+            test_records.active_or_new_record()->set_error("unknown throwable");
+            if (!collect_test_records)
+                throw;
        }
-        if (got_exception && !collect_test_records)
-            throw;
    }

    #define RUN(tst) run([]() { tst; })
@ -702,13 +742,12 @@ namespace polysat {
         *
         * We do overflow checks by doubling the base bitwidth here.
         */
-        static void test_fixed_point_arith_mul_div_inverse() {
+        static void test_fixed_point_arith_mul_div_inverse(unsigned base_bw = 5) {
            scoped_solver s(__func__);

-            auto baseBw = 5;
-            auto max_int_const = 31; // (2^5 - 1) -- change this when you change baseBw
+            auto max_int_const = rational::power_of_two(base_bw) - 1;

-            auto bw = 2 * baseBw;
+            auto bw = 2 * base_bw;
            auto max_int = s.var(s.add_var(bw));
            s.add_eq(max_int - max_int_const);

@ -720,8 +759,9 @@ namespace polysat {
            s.add_ult(0, b); // b > 0

            // scaling factor (setting it, somewhat arbitrarily, to max_int/3)
+            auto sf_val = div(max_int_const, rational(3));
            auto sf = s.var(s.add_var(bw));
-            s.add_eq(sf - (max_int_const/3));
+            s.add_eq(sf - sf_val);

            // (a * b) / sf = quot1 <=> quot1 * sf + rem1 - (a * b) = 0
            auto quot1 = s.var(s.add_var(bw));
@ -753,15 +793,14 @@ namespace polysat {
            s.push();
            s.add_ult(a, quot3);
            s.check();
-            s.expect_unsat();
+            // s.expect_unsat();
            s.pop();

-
-            // s.push();
-            // s.add_ult(quot3 + em, a);
-            // s.check();
+            s.push();
+            s.add_ult(quot3 + em, a);
+            s.check();
            // s.expect_unsat();
-            // s.pop();
+            s.pop();
        }

        /*
@ -832,10 +871,6 @@ namespace polysat {
            s.check();
            //        s.expect_unsat();
            s.pop();
-
-
-
-            //exit(0);
        }

        /** Monotonicity under bounds,
@ -1500,9 +1535,7 @@ namespace polysat {
 static void STD_CALL polysat_on_ctrl_c(int) {
    signal(SIGINT, SIG_DFL);
    using namespace polysat;
-    if (!test_records.empty())
-        test_records.pop_back();  // last record is likely incomplete
-    display_test_records(test_records, std::cout);
+    test_records.display(std::cout);
    raise(SIGINT);
 }

@ -1511,6 +1544,7 @@ void tst_polysat() {

 #if 0  // Enable this block to run a single unit test with detailed output.
    collect_test_records = false;
+    test_polysat::test_pop_conflict();
    // test_polysat::test_l2();
    // test_polysat::test_ineq1();
    // test_polysat::test_quot_rem();
@ -1519,7 +1553,7 @@ void tst_polysat() {
    // test_polysat::test_band2();
    // test_polysat::test_quot_rem_incomplete();
    // test_polysat::test_monot();
-    test_polysat::test_fixed_point_arith_div_mul_inverse();
+    // test_polysat::test_fixed_point_arith_div_mul_inverse();
    return;
 #endif

@ -1569,6 +1603,7 @@ void tst_polysat() {
    RUN(test_polysat::test_monot_bounds());
    RUN(test_polysat::test_monot_bounds_full());
    RUN(test_polysat::test_monot_bounds_simple(8));
+    RUN(test_polysat::test_fixed_point_arith_mul_div_inverse());
    RUN(test_polysat::test_fixed_point_arith_div_mul_inverse());

    RUN(test_polysat::test_ineq_axiom1());
@ -1598,5 +1633,5 @@ void tst_polysat() {
    // test_fi::randomized();

    if (collect_test_records)
-        display_test_records(test_records, std::cout);
+        test_records.display(std::cout);
 }