Memorize previous conflicts

2026-07-17 12:35:44 +00:00 · 2026-06-25 15:40:53 +02:00 · 2026-06-25 15:40:53 +02:00 · 2aa1d1ee01
commit 2aa1d1ee01
parent d0c0c1b573
3 changed files with 261 additions and 72 deletions
--- a/scripts/compare_seq_solvers.py
+++ b/scripts/compare_seq_solvers.py
@ -1,14 +1,21 @@
 #!/usr/bin/env python3
 """
-Compare z3 string solver configurations, and optionally against an external ZIPT solver.
+Compare z3 string solver configurations, and optionally against external solvers.

 We always run three z3 configurations:
    nseq_md  monadic decomposition (parikh off, eager regex factorization)
    nseq_pa  parikh (parikh on, no regex factorization)
    seq      the old/baseline string solver

+Optionally compare against external solvers:
+    zipt     ZIPT solver (--zipt /path/to/zipt)
+    ostrich  Ostrich solver (--ostrich /path/to/ostrich)
+    noodler  z3-noodler solver (--noodler /path/to/z3noodler)
+
 Usage:
-    python compare_seq_solvers.py <path-to-smtlib-files> --z3 /path/to/z3 [--zipt /path/to/zipt] [--ext .smt2]
+    python compare_seq_solvers.py <path-to-smtlib-files> --z3 /path/to/z3
+        [--zipt /path/to/zipt] [--ostrich /path/to/ostrich] [--noodler /path/to/noodler]
+        [--ext .smt2]

 Finds all .smt2 files under the given path, runs the solvers with a configurable timeout,
 and reports timeouts, divergences, and summary statistics.
@ -106,17 +113,21 @@ def run_z3(z3_bin: str, smt_file: Path, solver_args: list[str], timeout_s: int =
        return f"error:{e}", elapsed


-def run_zipt(zipt_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]:
-    """Run ZIPT on a file. Returns (result, elapsed)."""
+def run_external_solver(binary: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
+                        extra_args: list[str] | None = None) -> tuple[str, float]:
+    """Run an external solver on a file. Returns (result, elapsed).
+
+    Passes -t:{timeout_ms} as the first argument, matching the Z3/ZIPT convention.
+    Supply extra_args for any solver-specific flags that precede the file path.
+    """
    timeout_ms = timeout_s * 1000
-    cmd = [zipt_bin, f"-t:{timeout_ms}", str(smt_file)]
+    cmd = [binary, f"-t:{timeout_ms}"] + (extra_args or []) + [str(smt_file)]
    start = time.monotonic()
    try:
        proc = subprocess.run(cmd, capture_output=True, text=True,
                              timeout=timeout_s + 5)
        elapsed = time.monotonic() - start
-        out = proc.stdout.strip()
-        return _parse_result(out), elapsed
+        return _parse_result(proc.stdout.strip()), elapsed
    except subprocess.TimeoutExpired:
        elapsed = time.monotonic() - start
        return "timeout", elapsed
@ -151,7 +162,11 @@ def classify(solver_results: dict[str, str]) -> str:


 def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
-                 zipt_bin: str | None = None) -> dict:
+                 external_bins: dict[str, str] | None = None) -> dict:
+    """Run all z3 configurations and any external solvers on smt_file.
+
+    external_bins maps solver label (e.g. 'zipt', 'ostrich', 'noodler') to binary path.
+    """
    solver_results: dict[str, str] = {}
    solver_times: dict[str, float] = {}
    for name in SOLVER_NAMES:
@ -162,21 +177,21 @@ def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT,
    cat = classify(solver_results)
    smtlib_status = read_smtlib_status(smt_file)
    status = determine_status(solver_results, smtlib_status)
-    result = {
-        "file":           smt_file,
-        "category":       cat,
-        "smtlib_status":  smtlib_status,
-        "status":         status,
-        "zipt":           None,
-        "t_zipt":         None,
+    result: dict = {
+        "file":          smt_file,
+        "category":      cat,
+        "smtlib_status": smtlib_status,
+        "status":        status,
    }
    for name in SOLVER_NAMES:
        result[name] = solver_results[name]
        result[f"t_{name}"] = solver_times[name]
-    if zipt_bin:
-        res_zipt, t_zipt = run_zipt(zipt_bin, smt_file, timeout_s)
-        result["zipt"]   = res_zipt
-        result["t_zipt"] = t_zipt
+
+    for label, binary in (external_bins or {}).items():
+        res, t = run_external_solver(binary, smt_file, timeout_s)
+        result[label] = res
+        result[f"t_{label}"] = t
+
    return result


@ -193,14 +208,24 @@ def main():
    parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, metavar="SEC",
                        help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})")
    parser.add_argument("--zipt", metavar="PATH", default=None,
-                        help="Path to ZIPT binary (optional; if omitted, ZIPT is not benchmarked)")
+                        help="Path to ZIPT binary (optional)")
+    parser.add_argument("--ostrich", metavar="PATH", default=None,
+                        help="Path to Ostrich binary (optional)")
+    parser.add_argument("--noodler", metavar="PATH", default=None,
+                        help="Path to z3-noodler binary (optional)")
    parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file")
    args = parser.parse_args()

    z3_bin = args.z3
-    zipt_bin = args.zipt
    timeout_s = args.timeout

+    # Collect external solvers in declaration order so output columns are stable.
+    external_bins: dict[str, str] = {}
+    if args.zipt:    external_bins["zipt"]    = args.zipt
+    if args.ostrich: external_bins["ostrich"] = args.ostrich
+    if args.noodler: external_bins["noodler"] = args.noodler
+    ext_names = list(external_bins.keys())
+
    root = Path(args.path)
    if not root.exists():
        print(f"Error: path does not exist: {root}", file=sys.stderr)
@ -225,14 +250,13 @@ def main():
        print(f"Sampling: {len(files)} files selected "
              f"(max {args.max_per_folder} per subfolder, {len(by_folder)} subfolder(s))")

-    solvers_label = ", ".join(SOLVER_NAMES)
-    if zipt_bin: solvers_label += ", zipt"
-    print(f"Found {len(files)} files. Solvers: {solvers_label}. "
+    all_solver_labels = SOLVER_NAMES + ext_names
+    print(f"Found {len(files)} files. Solvers: {', '.join(all_solver_labels)}. "
          f"Workers: {args.jobs}, timeout: {timeout_s}s\n")

    results = []
    pool = ThreadPoolExecutor(max_workers=args.jobs)
-    futures = {pool.submit(process_file, z3_bin, f, timeout_s, zipt_bin): f for f in files}
+    futures = {pool.submit(process_file, z3_bin, f, timeout_s, external_bins): f for f in files}
    done = 0
    try:
        for fut in as_completed(futures):
@ -240,10 +264,8 @@ def main():
            r = fut.result()
            results.append(r)
            solver_part = "  ".join(f"{name}={r[name]:8s} ({r[f't_{name}']:.1f}s)" for name in SOLVER_NAMES)
-            zipt_part = ""
-            if zipt_bin:
-                zipt_part = f"  zipt={r['zipt']:8s} ({r['t_zipt']:.1f}s)"
-            print(f"[{done:4d}/{len(files)}] {r['category']:20s}  {solver_part}{zipt_part}  {r['file'].parent.name}/{r['file'].name}")
+            ext_part = "".join(f"  {label}={r[label]:8s} ({r[f't_{label}']:.1f}s)" for label in ext_names)
+            print(f"[{done:4d}/{len(files)}] {r['category']:20s}  {solver_part}{ext_part}  {r['file'].parent.name}/{r['file'].name}")
    except KeyboardInterrupt:
        print("\nInterrupted — cancelling pending tasks.", file=sys.stderr)
        pool.shutdown(wait=False, cancel_futures=True)
@ -283,10 +305,11 @@ def main():
        timeouts = [r for r in results if r[name] == "timeout"]
        if timeouts:
            _print_file_list(f"{name.upper()} TIMES OUT", timeouts)
-    if zipt_bin:
-        zipt_timeouts = [r for r in results if r["zipt"] == "timeout"]
-        if zipt_timeouts:
-            _print_file_list("ZIPT TIMES OUT", zipt_timeouts)
+
+    for label in ext_names:
+        timeouts = [r for r in results if r[label] == "timeout"]
+        if timeouts:
+            _print_file_list(f"{label.upper()} TIMES OUT", timeouts)

    all_to = categories["all_timeout"]
    if all_to:
@ -294,20 +317,24 @@ def main():
    invalid_models = categories.get("invalid_model", [])
    if invalid_models:
        _print_file_list("INVALID MODEL GENERATED", invalid_models)
-    if zipt_bin:
-        all_three_to = [r for r in results
-                        if all(r[name] == "timeout" for name in SOLVER_NAMES) and r["zipt"] == "timeout"]
-        if all_three_to:
-            _print_file_list("ALL SOLVERS (INCL. ZIPT) TIME OUT", all_three_to)
+
+    if ext_names:
+        all_ext_to = [r for r in results
+                      if all(r[name] == "timeout" for name in SOLVER_NAMES)
+                      and all(r[label] == "timeout" for label in ext_names)]
+        if all_ext_to:
+            _print_file_list("ALL SOLVERS TIME OUT", all_ext_to)
+
    if diverged:
        _print_file_list("DIVERGE among z3 configurations (sat vs unsat)", diverged)
-    if zipt_bin:
-        definite = {"sat", "unsat"}
-        zipt_diverged = [r for r in results
-                         if r["zipt"] in definite
-                         and any(r[name] in definite and r[name] != r["zipt"] for name in SOLVER_NAMES)]
-        if zipt_diverged:
-            _print_file_list("DIVERGE involving ZIPT", zipt_diverged)
+
+    definite = {"sat", "unsat"}
+    for label in ext_names:
+        ext_diverged = [r for r in results
+                        if r[label] in definite
+                        and any(r[name] in definite and r[name] != r[label] for name in SOLVER_NAMES)]
+        if ext_diverged:
+            _print_file_list(f"DIVERGE involving {label.upper()}", ext_diverged)

    print()

@ -331,8 +358,8 @@ def main():
        csv_path = Path(args.csv)
        fieldnames = ["file"] + SOLVER_NAMES + [f"t_{name}" for name in SOLVER_NAMES]
        fieldnames.extend(["category", "smtlib_status", "status"])
-        if zipt_bin:
-            fieldnames.extend(["zipt", "t_zipt"])
+        for label in ext_names:
+            fieldnames.extend([label, f"t_{label}"])
        with csv_path.open("w", newline="", encoding="utf-8") as f:
            writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
            writer.writeheader()
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -684,6 +684,9 @@ namespace seq {
        m_partial_dfa_edge_index.clear();
        m_partial_dfa_pin.reset();
        m_projection_extract_idx = 0;
+        m_explored_automaton.reset();
+        m_unsat_node_cache.clear();
+        m_num_cache_hits = 0;
        // m_length_trail.reset();
        // m_length_info.reset();
        m_dep_mgr.reset();
@ -2003,6 +2006,87 @@ namespace seq {
        }
    }

+    // ---- Transposition-table helpers (node memoization) ----------------------
+
+    static bool reason_is_string_only(backtrack_reason r) {
+        switch (r) {
+        case backtrack_reason::regex:
+        case backtrack_reason::regex_widening:
+        case backtrack_reason::symbol_clash:
+        case backtrack_reason::character_range:
+            return true;
+        default:
+            // arithmetic, parikh_image, external, children_failed, unevaluated
+            return false;
+        }
+    }
+
+    bool nielsen_graph::node_unsat_string_only(nielsen_node const* n) const {
+        if (n->m_reason == backtrack_reason::children_failed)
+            return n->m_unsat_cacheable; // set when the children_failed result was formed
+        return reason_is_string_only(n->m_reason);
+    }
+
+    std::vector<unsigned> nielsen_graph::compute_node_signature(nielsen_node const* n) const {
+        std::vector<unsigned> sig;
+        // string equalities (order-independent)
+        {
+            std::vector<std::pair<unsigned,unsigned>> v;
+            for (auto const& e : n->str_eqs())
+                v.emplace_back(e.m_lhs->id(), e.m_rhs->id());
+            std::sort(v.begin(), v.end());
+            sig.push_back(static_cast<unsigned>(v.size()));
+            for (auto const& [a,b] : v) { sig.push_back(a); sig.push_back(b); }
+        }
+        sig.push_back(UINT_MAX); // section separator
+        // string disequalities
+        {
+            std::vector<std::pair<unsigned,unsigned>> v;
+            for (auto const& d : n->str_deqs())
+                v.emplace_back(d.m_lhs->id(), d.m_rhs->id());
+            std::sort(v.begin(), v.end());
+            sig.push_back(static_cast<unsigned>(v.size()));
+            for (auto const& [a,b] : v) { sig.push_back(a); sig.push_back(b); }
+        }
+        sig.push_back(UINT_MAX);
+        // regex memberships incl. view/guard metadata
+        {
+            std::vector<std::vector<unsigned>> v;
+            for (auto const& mm : n->str_mems())
+                v.push_back({ mm.m_str->id(), mm.m_regex->id(),
+                              static_cast<unsigned>(mm.m_kind),
+                              mm.m_root ? mm.m_root->id() : UINT_MAX,
+                              mm.m_nu, mm.m_discharged ? 1u : 0u });
+            std::sort(v.begin(), v.end());
+            sig.push_back(static_cast<unsigned>(v.size()));
+            for (auto const& a : v) for (unsigned x : a) sig.push_back(x);
+        }
+        sig.push_back(UINT_MAX);
+        // character-range constraints (per symbolic unit)
+        {
+            std::vector<std::vector<unsigned>> v;
+            for (auto const& [uid, cr] : n->char_ranges()) {
+                std::vector<unsigned> entry;
+                entry.push_back(uid);
+                for (auto const& rg : cr.first.ranges()) { entry.push_back(rg.m_lo); entry.push_back(rg.m_hi); }
+                v.push_back(std::move(entry));
+            }
+            std::sort(v.begin(), v.end());
+            sig.push_back(static_cast<unsigned>(v.size()));
+            for (auto const& e : v) { sig.push_back(static_cast<unsigned>(e.size())); for (unsigned x : e) sig.push_back(x); }
+        }
+        return sig;
+    }
+
+    dep_tracker nielsen_graph::node_all_deps(nielsen_node const* n) {
+        dep_tracker d = nullptr;
+        for (auto const& e : n->str_eqs())  d = m_dep_mgr.mk_join(d, e.m_dep);
+        for (auto const& q : n->str_deqs()) d = m_dep_mgr.mk_join(d, q.m_dep);
+        for (auto const& mm : n->str_mems()) d = m_dep_mgr.mk_join(d, mm.m_dep);
+        for (auto const& [uid, cr] : n->char_ranges()) d = m_dep_mgr.mk_join(d, cr.second);
+        return d;
+    }
+
    nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node,
        ptr_vector<nielsen_edge>& cur_path, const unsigned depth) {

@ -2053,6 +2137,23 @@ namespace seq {
            return search_result::unsat;
        }

+        // Transposition-table lookup.  The node is now canonical (post-simplify).
+        // If an equivalent node was already proven UNSAT for string/regex-only
+        // reasons, this node is unsat too — independently of its (integer) side
+        // constraints — so we prune without re-exploring its subtree.  We derive
+        // the conflict from this node's own constraint deps (a sound over-approx).
+        {
+            const std::vector<unsigned> sig = compute_node_signature(node);
+            if (m_unsat_node_cache.contains(sig)) {
+                node->set_conflict(backtrack_reason::regex, node_all_deps(node));
+                node->set_general_conflict();
+                node->m_unsat_cacheable = true;
+                ++m_stats.m_num_simplify_conflict;
+                ++m_num_cache_hits;
+                return search_result::unsat;
+            }
+        }
+
        // Apply Parikh image filter: generate modular length constraints and
        // perform a lightweight feasibility pre-check.  The filter is guarded
        // internally (m_parikh_applied) so it only runs once per node.
@ -2102,6 +2203,9 @@ namespace seq {
            if (!check_leaf_regex(*node, dep)) {
                node->set_general_conflict();
                node->set_conflict(backtrack_reason::regex, dep);
+                // string-only conflict (empty intersection) → memoize.
+                node->m_unsat_cacheable = true;
+                m_unsat_node_cache.insert(compute_node_signature(node));
                return search_result::unsat;
            }
            assert_node_side_constraints(node);
@ -2189,6 +2293,20 @@ namespace seq {
        }
        if (!any_unknown) {
            node->set_child_conflict();
+            // Memoize this internal UNSAT iff its whole subtree closed for
+            // string/regex-only reasons (no child relied on arithmetic / Parikh /
+            // external context).  Then a same-signature node found via any other
+            // path is pruned by the lookup above.
+            bool all_string_only = true;
+            for (nielsen_edge* e : node->outgoing()) {
+                if (!node_unsat_string_only(e->tgt())) {
+                    all_string_only = false;
+                    break;
+                }
+            }
+            node->m_unsat_cacheable = all_string_only;
+            if (all_string_only)
+                m_unsat_node_cache.insert(compute_node_signature(node));
            return search_result::unsat;
        }
        return search_result::unknown;
@ -3368,43 +3486,55 @@ namespace seq {
    }

    // -----------------------------------------------------------------------
-    // Helper: precompute_partial_dfa
-    // BFS of Brzozowski derivatives from root_re up to `depth` steps, recording
-    // all concrete minterm edges in the partial DFA.  Called eagerly from
-    // apply_cycle_decomposition (priority 6) so that SCC detection can fire
-    // before apply_regex_var_split (priority 10) creates a symbolic child whose
-    // ite-derivative would otherwise delay SCC detection by one more level.
+    // Helper: ensure_automaton_explored  (lazy, on-demand, cached)
+    // Records the *complete* reachable Brzozowski automaton of root_re into the
+    // partial DFA — but only once per regex component.  This is the lazy
+    // Q-completion: instead of re-running a fixed-depth BFS on every cycle
+    // decomposition (the old precompute_partial_dfa(R, depth), which re-explored
+    // R's automaton on each of the ~hundreds of decompositions and, with a
+    // shallow depth, left Q incomplete so guards discharged prematurely and laps
+    // never closed), we explore each component once to saturation and remember
+    // every state we visited.  A later decomposition whose head was reached by an
+    // earlier exploration finds it already covered and does no work.
+    //
+    // Completeness matters: the cycle guard / stabilizer view gate on whether the
+    // current state lies in Q (projection_state_in_Q), and that is only sound as
+    // a *bound* when Q contains the whole SCC of the head.  The BFS is bounded by
+    // the finite reachable automaton (Brzozowski states modulo ACI), so it
+    // terminates; m.inc() keeps it responsive to the resource limit.
    // -----------------------------------------------------------------------

-    void nielsen_graph::precompute_partial_dfa(euf::snode const* root_re, const unsigned depth) {
+    void nielsen_graph::ensure_automaton_explored(euf::snode const* root_re) {
        SASSERT(root_re);
        if (!root_re->is_ground())
            return;
+        if (m_explored_automaton.contains(root_re->get_expr()->get_id()))
+            return;

-        struct work_item { euf::snode const* re; unsigned d; };
-        svector<work_item> queue;
-        queue.push_back({root_re, depth});
-        uint_set visited;
-        visited.insert(root_re->id());
+        svector<euf::snode const*> queue;
+        queue.push_back(root_re);

        while (!queue.empty()) {
-            auto [re, d] = queue.back();
+            if (!m.inc())
+                return; // resource limit: leave Q partial (sound under-approx)
+            euf::snode const* re = queue.back();
            queue.pop_back();
+            const unsigned re_eid = re->get_expr()->get_id();
+            if (m_explored_automaton.contains(re_eid))
+                continue; // already explored (here or in a previous component)
+            m_explored_automaton.insert(re_eid);
+
            euf::snode_vector mts;
            m_sg.compute_minterms(re, mts);
            for (euf::snode const* mt : mts) {
-                // std::cout << "minterm: " << mk_pp(mt->get_expr(), m) << std::endl;
                euf::snode const* deriv = m_sg.brzozowski_deriv(re, mt);
                if (!deriv || deriv->is_fail())
                    continue;
                record_partial_derivative_edge(re, mt, deriv);
-                if (d > 0 && deriv->is_ground() && !visited.contains(deriv->id())) {
-                    visited.insert(deriv->id());
-                    queue.push_back({deriv, d - 1});
-                }
+                if (deriv->is_ground() && !m_explored_automaton.contains(deriv->get_expr()->get_id()))
+                    queue.push_back(deriv);
            }
        }
-        //std::string s = partial_dfa_to_dot(root_re, true);
    }

    // -----------------------------------------------------------------------
@ -3500,10 +3630,10 @@ namespace seq {
            euf::snode const* x = first;
            euf::snode const* R = mem.m_regex;

-            // Eagerly precompute partial DFA edges from this regex so that
-            // collect_scc_for_projection can detect cycles without waiting
-            // for apply_regex_var_split to create per-minterm children.
-            precompute_partial_dfa(R, 64);
+            // Lazily explore R's full automaton (once, cached) so that
+            // collect_scc_for_projection sees the complete SCC of R and the
+            // stabilizer view / cycle guard gate on a complete Q.
+            ensure_automaton_explored(R);

            // R must sit on a cycle (an SCC of the partial DFA).
            uint_set scc;
@ -3515,7 +3645,6 @@ namespace seq {
            const unsigned nu = m_projection_extract_idx;
            if (nu == 0)
                continue;
-            fprintf(stderr, "DEC R=%u nu=%u sccsz=%u x=%u nedges=%u\n", R->id(), nu, scc.num_elems(), x->id(), (unsigned)m_partial_dfa_edges.size()); fflush(stderr);

            // Trigger condition: x must not already carry a cycle guard for the
            // current SCC snapshot ν.  All states of one SCC share a single ν, so
@ -5139,6 +5268,8 @@ namespace seq {
        st.update("nseq mod var num unwind (eq)",   m_stats.m_mod_var_num_unwinding_eq);
        st.update("nseq mod var num unwind (mem)",   m_stats.m_mod_var_num_unwinding_mem);
        st.update("nseq mod axiomatized disequalities",   m_stats.m_ax_diseq);
+        st.update("nseq unsat-cache size",                (unsigned) m_unsat_node_cache.size());
+        st.update("nseq unsat-cache hits",                m_num_cache_hits);
    }

 }
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@ -49,6 +49,8 @@ Author:
 #include "util/uint_set.h"
 #include "util/vector.h"
 #include <map>
+#include <set>
+#include <vector>

 namespace smt { class enode; }

@ -512,6 +514,11 @@ namespace seq {
        backtrack_reason        m_reason = backtrack_reason::unevaluated;
        bool                    m_is_progress = false;
        bool                    m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs
+        // true once this node has been proven UNSAT for reasons that depend only
+        // on its string/regex constraints (not on arithmetic / Parikh / external
+        // context).  Such an unsat is a property of the node's string signature
+        // and is safe to memoize in the transposition table (m_unsat_node_cache).
+        bool                    m_unsat_cacheable = false;

        // Parikh filter: set to true once apply_parikh_to_node has been applied
        // to this node. Prevents duplicate constraint generation across DFS runs.
@ -829,6 +836,18 @@ namespace seq {
        // when the explored SCC's edge set actually grows; identifies which
        // partial-DFA edges (m_projection_idx ≤ ν) belong to a projection's Q.
        unsigned                      m_projection_extract_idx = 0;
+        // Expr ids of regex states whose full reachable automaton has already
+        // been recorded into the partial DFA (lazy, once-per-component Q growth;
+        // see ensure_automaton_explored).
+        uint_set                      m_explored_automaton;
+
+        // Transposition table: structural signatures of nodes already proven
+        // UNSAT for string/regex-only reasons.  A node whose signature is present
+        // is unsatisfiable regardless of how it was reached, so the DFS can prune
+        // it without re-exploring its subtree (turns the search tree into the
+        // finite DAG the termination proof bounds).  See compute_node_signature.
+        std::set<std::vector<unsigned>> m_unsat_node_cache;
+        unsigned m_num_cache_hits = 0;


    public:
@ -1010,6 +1029,16 @@ namespace seq {

        search_result search_dfs(nielsen_node *node, ptr_vector<nielsen_edge>& path, unsigned depth = 0);

+        // Transposition table helpers (node memoization of string-only UNSAT).
+        // Canonical structural signature of a node (string equalities,
+        // disequalities, memberships incl. view/guard metadata, char ranges).
+        // Two nodes with equal signatures have identical string constraints.
+        std::vector<unsigned> compute_node_signature(nielsen_node const* n) const;
+        // Union of all constraint deps of a node (sound over-approx conflict).
+        dep_tracker node_all_deps(nielsen_node const* n);
+        // True iff the node's UNSAT depends only on string/regex constraints.
+        bool node_unsat_string_only(nielsen_node const* n) const;
+
        // Regex widening: overapproximate `str` by replacing variables with
        // the intersection of their primitive regex constraints (or Σ* if
        // unconstrained), replacing symbolic chars with their char ranges,
@ -1180,7 +1209,9 @@ namespace seq {
        // eagerly recording concrete minterm edges in the partial DFA so that
        // collect_scc_for_projection can find cycles without first waiting for
        // concrete children to record them one level at a time.
-        void precompute_partial_dfa(euf::snode const* root_re, unsigned depth);
+        // Lazily record the complete reachable automaton of root_re into the
+        // partial DFA, once per regex component (cached in m_explored_automaton).
+        void ensure_automaton_explored(euf::snode const* root_re);

        // generalized power introduction: for an equation where one head is
        // a variable v and the other side has ground prefix + a variable x