Unit cases

2026-07-06 15:26:15 +00:00 · 2026-03-12 11:13:18 +01:00 · 2026-03-12 11:13:18 +01:00 · 1351efe9af
commit 1351efe9af
parent a567a7edfb
3 changed files with 344 additions and 34 deletions
--- a/scripts/compare_seq_solvers.py
+++ b/scripts/compare_seq_solvers.py
@ -14,6 +14,7 @@ and reports:
 """

 import argparse
+import re
 import subprocess
 import sys
 import time
@ -29,6 +30,47 @@ SOLVERS = {
 }


+_STATUS_RE = re.compile(r'\(\s*set-info\s+:status\s+(sat|unsat|unknown)\s*\)')
+
+
+def read_smtlib_status(smt_file: Path) -> str:
+    """Read the expected status from the SMT-LIB (set-info :status ...) directive.
+    Returns 'sat', 'unsat', or 'unknown'.
+    """
+    try:
+        text = smt_file.read_text(encoding="utf-8", errors="replace")
+        m = _STATUS_RE.search(text)
+        if m:
+            return m.group(1)
+    except OSError:
+        pass
+    return "unknown"
+
+
+def determine_status(res_nseq: str, res_seq: str, smtlib_status: str) -> str:
+    """Determine the ground-truth status of a problem.
+    Priority: if both solvers agree on sat/unsat, use that; otherwise if one
+    solver gives sat/unsat, use that; otherwise fall back to the SMT-LIB
+    annotation; otherwise 'unknown'.
+    """
+    definite = {"sat", "unsat"}
+    if res_nseq in definite and res_nseq == res_seq:
+        return res_nseq
+    if res_nseq in definite and res_seq not in definite:
+        return res_nseq
+    if res_seq in definite and res_nseq not in definite:
+        return res_seq
+    # Disagreement (sat vs unsat) — fall back to SMTLIB annotation
+    if res_nseq in definite and res_seq in definite and res_nseq != res_seq:
+        if smtlib_status in definite:
+            return smtlib_status
+        return "unknown"
+    # Neither solver gave a definite answer
+    if smtlib_status in definite:
+        return smtlib_status
+    return "unknown"
+
+
 def run_z3(z3_bin: str, smt_file: Path, solver_arg: str) -> tuple[str, float]:
    """Run z3 on a file with the given solver argument.
    Returns (result, elapsed) where result is 'sat', 'unsat', 'unknown', or 'timeout'/'error'.
@ -81,13 +123,17 @@ def process_file(z3_bin: str, smt_file: Path) -> dict:
    res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"])
    res_seq,  t_seq  = run_z3(z3_bin, smt_file, SOLVERS["seq"])
    cat = classify(res_nseq, res_seq)
+    smtlib_status = read_smtlib_status(smt_file)
+    status = determine_status(res_nseq, res_seq, smtlib_status)
    return {
-        "file":     smt_file,
-        "nseq":     res_nseq,
-        "seq":      res_seq,
-        "t_nseq":   t_nseq,
-        "t_seq":    t_seq,
-        "category": cat,
+        "file":           smt_file,
+        "nseq":           res_nseq,
+        "seq":            res_seq,
+        "t_nseq":         t_nseq,
+        "t_seq":          t_seq,
+        "category":       cat,
+        "smtlib_status":  smtlib_status,
+        "status":         status,
    }


@ -138,24 +184,45 @@ def main():
        categories.setdefault(r["category"], []).append(r)

    print("\n" + "="*70)
-    print("SUMMARY")
-    print("="*70)
-
+    print("TOTALS")
    for cat, items in categories.items():
-        if not items:
-            continue
+        print(f"  {cat:40s}: {len(items)}")
+    print(f"{'='*70}")
+
+    # ── Per-solver timeout & divergence file lists ─────────────────────────
+    nseq_timeouts = [r for r in results if r["nseq"] == "timeout"]
+    seq_timeouts  = [r for r in results if r["seq"]  == "timeout"]
+    both_to       = categories["both_timeout"]
+    diverged      = categories["diverge"]
+
+    def _print_file_list(label: str, items: list[dict]):
        print(f"\n{'─'*70}")
-        print(f"  {cat.upper().replace('_', ' ')} ({len(items)} files)")
+        print(f"  {label} ({len(items)} files)")
        print(f"{'─'*70}")
        for r in sorted(items, key=lambda x: x["file"]):
            print(f"  {r['file']}")
-            if cat not in ("both_timeout", "both_agree"):
-                print(f"    nseq={r['nseq']:8s} ({r['t_nseq']:.1f}s)   seq={r['seq']:8s} ({r['t_seq']:.1f}s)")

-    print(f"\n{'='*70}")
-    print(f"TOTALS")
-    for cat, items in categories.items():
-        print(f"  {cat:40s}: {len(items)}")
+    if nseq_timeouts:
+        _print_file_list("NSEQ TIMES OUT", nseq_timeouts)
+    if seq_timeouts:
+        _print_file_list("SEQ TIMES OUT", seq_timeouts)
+    if both_to:
+        _print_file_list("BOTH TIME OUT", both_to)
+    if diverged:
+        _print_file_list("DIVERGE (sat vs unsat)", diverged)
+
+    print()
+
+    # ── Problem status statistics ────────────────────────────────────────────
+    status_counts = {"sat": 0, "unsat": 0, "unknown": 0}
+    for r in results:
+        status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1
+
+    print(f"\nPROBLEM STATUS  (total {len(results)} files)")
+    print(f"{'─'*40}")
+    print(f"  {'sat':12s}: {status_counts['sat']:5d}  ({100*status_counts['sat']/len(results):.1f}%)")
+    print(f"  {'unsat':12s}: {status_counts['unsat']:5d}  ({100*status_counts['unsat']/len(results):.1f}%)")
+    print(f"  {'unknown':12s}: {status_counts['unknown']:5d}  ({100*status_counts['unknown']/len(results):.1f}%)")
    print(f"{'='*70}\n")

    # ── Optional CSV output ───────────────────────────────────────────────────
@ -163,7 +230,7 @@ def main():
        import csv
        csv_path = Path(args.csv)
        with csv_path.open("w", newline="", encoding="utf-8") as f:
-            writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category"])
+            writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category", "smtlib_status", "status"])
            writer.writeheader()
            for r in sorted(results, key=lambda x: x["file"]):
                writer.writerow({**r, "file": str(r["file"])})
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -1293,6 +1293,9 @@ namespace seq {
    }

    simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector<nielsen_edge*> const& cur_path) {
+        if (m_is_extended)
+            return simplify_result::proceed;
+
        euf::sgraph& sg = g.sg();
        ast_manager& m = sg.get_manager();
        arith_util arith(m);
@ -1379,6 +1382,44 @@ namespace seq {
                        changed = true;
                    }
                }
+
+                // pass 2b: power-character prefix inconsistency
+                // (mirrors ZIPT's SimplifyDir power unit case + IsPrefixConsistent)
+                // When one side starts with a power u^n whose base starts with
+                // char a, and the other side starts with a different char b,
+                // the power exponent must be 0.
+                // Creates a single deterministic child with the substitution and
+                // constraint as edge labels so they appear in the graph.
+                // Guard: skip if we already created a child (re-entry via iterative deepening).
+                if (!eq.is_trivial() && eq.m_lhs && eq.m_rhs) {
+                    euf::snode* lh = eq.m_lhs->first();
+                    euf::snode* rh = eq.m_rhs->first();
+                    for (int dir = 0; dir < 2; dir++) {
+                        euf::snode* pow_head = (dir == 0) ? lh : rh;
+                        euf::snode* other_head = (dir == 0) ? rh : lh;
+                        if (!pow_head || !pow_head->is_power() || !other_head || !other_head->is_char())
+                            continue;
+                        euf::snode* base_sn = pow_head->arg(0);
+                        if (!base_sn) continue;
+                        euf::snode* base_first = base_sn->first();
+                        if (!base_first || !base_first->is_char()) continue;
+                        if (base_first->id() == other_head->id()) continue;
+                        // base starts with different char → create child with exp=0 + power→ε
+                        nielsen_node* child = g.mk_child(this);
+                        nielsen_edge* e = g.mk_edge(this, child, true);
+                        nielsen_subst s(pow_head, sg.mk_empty(), eq.m_dep);
+                        e->add_subst(s);
+                        child->apply_subst(sg, s);
+                        expr* pow_exp = get_power_exp_expr(pow_head);
+                        if (pow_exp) {
+                            expr* zero = arith.mk_numeral(rational(0), true);
+                            e->add_side_int(g.mk_int_constraint(
+                                pow_exp, zero, int_constraint_kind::eq, eq.m_dep));
+                        }
+                        set_extended(true);
+                        return simplify_result::proceed;
+                    }
+                }
            }

            // pass 3: power simplification (mirrors ZIPT's LcpCompression +
@ -1455,7 +1496,8 @@ namespace seq {
                // exponent powers (e.g. base^1 → base created by 3c) before
                // 3e attempts LP-based elimination which would introduce a
                // needless fresh variable.
-                if (changed) continue;
+                if (changed)
+                    continue;

                // 3d: power prefix elimination — when both sides start with a
                // power of the same base, cancel the common power prefix.
@ -1576,6 +1618,211 @@ namespace seq {
        // remaining regex memberships and add to m_int_constraints.
        init_var_bounds_from_mems();

+        // pass 5: variable-character look-ahead substitution
+        // (mirrors ZIPT's StrEq.SimplifyFinal)
+        // When one side starts with a variable x and the other with chars,
+        // look ahead to find how many leading chars can be deterministically
+        // assigned to x without splitting, by checking prefix consistency.
+        // Guard: skip if we already created a child (re-entry via iterative deepening).
+        for (str_eq& eq : m_str_eq) {
+            if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs)
+                continue;
+
+            // Orient: var_side starts with a variable, char_side starts with a char
+            euf::snode* var_side = nullptr;
+            euf::snode* char_side = nullptr;
+            euf::snode* lhead = eq.m_lhs->first();
+            euf::snode* rhead = eq.m_rhs->first();
+            if (!lhead || !rhead) continue;
+
+            if (lhead->is_var() && rhead->is_char()) {
+                var_side = eq.m_lhs;
+                char_side = eq.m_rhs;
+            } else if (rhead->is_var() && lhead->is_char()) {
+                var_side = eq.m_rhs;
+                char_side = eq.m_lhs;
+            } else {
+                continue;
+            }
+
+            euf::snode_vector var_toks, char_toks;
+            var_side->collect_tokens(var_toks);
+            char_side->collect_tokens(char_toks);
+            if (var_toks.size() <= 1 || char_toks.empty())
+                continue;
+
+            euf::snode* var_node = var_toks[0];
+            SASSERT(var_node->is_var());
+
+            // For increasing prefix lengths i (chars from char_side),
+            // check if x → char_toks[0..i-1] · x would be consistent by
+            // comparing tokens after x on the var_side against the shifted
+            // char_side tokens.
+            // Mirrors ZIPT's SimplifyFinal loop: when prefix i is proven
+            // inconsistent (char clash), we continue to i+1. When we reach
+            // a prefix that is consistent or indeterminate, we stop.
+            // The final i is the substitution length: x → char_toks[0..i-1] · x.
+            // If ALL prefixes are inconsistent, i equals the full leading-char
+            // count and we still substitute (x must be at least that long).
+            unsigned i = 0;
+            for (; i < char_toks.size() && char_toks[i]->is_char(); ++i) {
+                unsigned j1 = 1;   // index into var_toks (after the variable)
+                unsigned j2 = i;   // index into char_toks (after copied prefix)
+                bool failed = false;
+
+                while (j1 < var_toks.size() && j2 < char_toks.size()) {
+                    euf::snode* st1 = var_toks[j1];
+                    euf::snode* st2 = char_toks[j2];
+
+                    if (!st2->is_char())
+                        break;  // can't compare against non-char — indeterminate
+
+                    if (st1->is_char()) {
+                        if (st1->id() == st2->id()) {
+                            j1++;
+                            j2++;
+                            continue;
+                        }
+                        failed = true;
+                        break;
+                    }
+
+                    if (st1->id() != var_node->id())
+                        break;  // different variable/power — indeterminate
+
+                    // st1 is the same variable x again — compare against
+                    // the chars we would copy (char_toks[0..i-1])
+                    bool inner_indet = false;
+                    for (unsigned l = 0; j2 < char_toks.size() && l < i; ++l) {
+                        st2 = char_toks[j2];
+                        if (!st2->is_char()) {
+                            inner_indet = true;
+                            break;
+                        }
+                        if (st2->id() == char_toks[l]->id()) {
+                            j2++;
+                            continue;
+                        }
+                        failed = true;
+                        break;
+                    }
+                    if (inner_indet || failed) break;
+                    j1++;
+                }
+
+                if (failed)
+                    continue;  // prefix i is inconsistent — try longer
+                break;         // prefix i is consistent/indeterminate — stop
+            }
+
+            if (i == 0)
+                continue;
+
+            // Divergence guard (mirrors ZIPT's HasDepCycle + power skip):
+            // Check whether the next named variable or power token on the
+            // char_side (past the char prefix) would create a dependency
+            // cycle or involve a power (which would cause infinite unwinding).
+
+            // Step 1: find the first variable or power past the char prefix
+            euf::snode* next_var = nullptr;
+            for (unsigned k = i; k < char_toks.size(); ++k) {
+                euf::snode* t = char_toks[k];
+                if (t->is_power()) {
+                    // Power token → skip this equation (would cause divergence)
+                    next_var = nullptr;
+                    goto skip_eq;
+                }
+                if (t->is_var()) {
+                    next_var = t;
+                    break;
+                }
+            }
+
+            // Step 2: if there is a variable, check for Nielsen dependency cycle
+            if (next_var) {
+                // Build Nielsen dependency graph: for each equation, if one side
+                // starts with variable x, then x depends on the first variable
+                // on the other side. (Mirrors ZIPT's GetNielsenDep.)
+                // Then check if there's a path from next_var back to var_node.
+                // Use a u_map<unsigned> to represent edges: var_id → first_dep_var_id.
+                u_map<unsigned> dep_edges;  // var snode id → first dependent var snode id
+                for (str_eq const& other_eq : m_str_eq) {
+                    if (other_eq.is_trivial()) continue;
+                    if (!other_eq.m_lhs || !other_eq.m_rhs) continue;
+
+                    euf::snode* lh2 = other_eq.m_lhs->first();
+                    euf::snode* rh2 = other_eq.m_rhs->first();
+                    if (!lh2 || !rh2) continue;
+
+                    // Orient: head_var leads one side, scan other side for first var
+                    auto record_dep = [&](euf::snode* head_var, euf::snode* other_side) {
+                        euf::snode_vector other_toks;
+                        other_side->collect_tokens(other_toks);
+                        if (lh2->is_var() && rh2->is_var()) {
+                            // Both sides start with vars: bidirectional dependency
+                            if (!dep_edges.contains(lh2->id()))
+                                dep_edges.insert(lh2->id(), rh2->id());
+                            if (!dep_edges.contains(rh2->id()))
+                                dep_edges.insert(rh2->id(), lh2->id());
+                            return;
+                        }
+                        for (unsigned idx = 0; idx < other_toks.size(); ++idx) {
+                            if (other_toks[idx]->is_var()) {
+                                if (!dep_edges.contains(head_var->id()))
+                                    dep_edges.insert(head_var->id(), other_toks[idx]->id());
+                                return;
+                            }
+                        }
+                    };
+
+                    if (lh2->is_var() && !rh2->is_var())
+                        record_dep(lh2, other_eq.m_rhs);
+                    else if (rh2->is_var() && !lh2->is_var())
+                        record_dep(rh2, other_eq.m_lhs);
+                    else if (lh2->is_var() && rh2->is_var())
+                        record_dep(lh2, other_eq.m_rhs);
+                }
+
+                // DFS from next_var to see if we can reach var_node
+                uint_set visited;
+                svector<unsigned> worklist;
+                worklist.push_back(next_var->id());
+                bool cycle_found = false;
+                while (!worklist.empty() && !cycle_found) {
+                    unsigned cur = worklist.back();
+                    worklist.pop_back();
+                    if (cur == var_node->id()) {
+                        cycle_found = true;
+                        break;
+                    }
+                    if (visited.contains(cur))
+                        continue;
+                    visited.insert(cur);
+                    unsigned dep_id;
+                    if (dep_edges.find(cur, dep_id))
+                        worklist.push_back(dep_id);
+                }
+                if (cycle_found)
+                    continue;
+            }
+
+            {
+            // Create a single deterministic child with the substitution as edge label
+            euf::snode* prefix_sn = char_toks[0];
+            for (unsigned j = 1; j < i; ++j)
+                prefix_sn = sg.mk_concat(prefix_sn, char_toks[j]);
+            euf::snode* replacement = sg.mk_concat(prefix_sn, var_node);
+            nielsen_subst s(var_node, replacement, eq.m_dep);
+            nielsen_node* child = g.mk_child(this);
+            nielsen_edge* e = g.mk_edge(this, child, true);
+            e->add_subst(s);
+            child->apply_subst(sg, s);
+            set_extended(true);
+            return simplify_result::proceed;
+            }
+            skip_eq:;
+        }
+
        if (is_satisfied())
            return simplify_result::satisfied;

@ -1680,7 +1927,7 @@ namespace seq {
            ++m_stats.m_num_unknown;
            return search_result::unknown;
        }
-        catch (...) {
+        catch(const std::exception& ex) {
 #ifdef Z3DEBUG
            std::string dot = to_dot();
 #endif
@ -1763,16 +2010,16 @@ namespace seq {

        // explore children
        bool any_unknown = false;
-        for (nielsen_edge* e : node->outgoing()) {
+        for (nielsen_edge *e : node->outgoing()) {
            cur_path.push_back(e);
            // Push a solver scope for this edge and assert its side integer
            // constraints.  The child's own new int_constraints will be asserted
            // inside the recursive call (above).  On return, pop the scope so
            // that backtracking removes those assertions.
            m_solver.push();
-            for (auto const& ic : e->side_int())
+            for (auto const &ic : e->side_int())
                m_solver.assert_expr(int_constraint_to_expr(ic));
-            search_result r = search_dfs(e->tgt(), depth + 1, cur_path);
+            search_result r = search_dfs(e->tgt(), e->is_progress() ? depth : depth + 1, cur_path);
            m_solver.pop(1);
            if (r == search_result::sat)
                return search_result::sat;
@ -1864,8 +2111,7 @@ namespace seq {
                }
                // branch 2: y → char·fresh (progress)
                {
-                    euf::snode* fresh = mk_fresh_var();
-                    euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
+                    euf::snode* replacement = m_sg.mk_concat(lhead, rhead);
                    nielsen_node* child = mk_child(node);
                    nielsen_edge* e = mk_edge(node, child, true);
                    nielsen_subst s(rhead, replacement, eq.m_dep);
@ -1887,8 +2133,7 @@ namespace seq {
                }
                // branch 2: x → char·fresh (progress)
                {
-                    euf::snode* fresh = mk_fresh_var();
-                    euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
+                    euf::snode* replacement = m_sg.mk_concat(rhead, lhead);
                    nielsen_node* child = mk_child(node);
                    nielsen_edge* e = mk_edge(node, child, true);
                    nielsen_subst s(lhead, replacement, eq.m_dep);
@ -1933,8 +2178,7 @@ namespace seq {
            }
            // child 2: x → y·x' (progress)
            {
-                euf::snode* fresh = mk_fresh_var();
-                euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
+                euf::snode* replacement = m_sg.mk_concat(rhead, lhead);
                nielsen_node* child = mk_child(node);
                nielsen_edge* e = mk_edge(node, child, true);
                nielsen_subst s(lhead, replacement, eq.m_dep);
@ -1943,8 +2187,7 @@ namespace seq {
            }
            // child 3: y → x·y' (progress)
            {
-                euf::snode* fresh = mk_fresh_var();
-                euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
+                euf::snode* replacement = m_sg.mk_concat(lhead, rhead);
                nielsen_node* child = mk_child(node);
                nielsen_edge* e = mk_edge(node, child, true);
                nielsen_subst s(rhead, replacement, eq.m_dep);
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -421,8 +421,8 @@ namespace smt {
        // here the actual Nielsen solving happens
        auto result = m_nielsen.solve();
        std::cout << "Result: " << (result == seq::nielsen_graph::search_result::sat ? "SAT" : result == seq::nielsen_graph::search_result::unsat ? "UNSAT" : "UNKNOWN") << "\n";
-        m_nielsen.to_dot(std::cout);
-        std::cout << std::endl;
+        // m_nielsen.to_dot(std::cout);
+        // std::cout << std::endl;

        if (result == seq::nielsen_graph::search_result::sat) {
            IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node="