theory_nseq: remove seq_state, embed tracked entries directly in prop_queue (#9045)

* Remove seq_state: embed tracked_str_eq/tracked_str_mem directly in prop_queue Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * theory_nseq: use type aliases for eq_item/mem_item instead of wrapper structs Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * seq_model: validate_regex takes single tracked_str_mem, caller loops Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
2026-07-12 01:56:22 +00:00 · 2026-03-19 14:45:53 -07:00 · 2026-03-19 14:45:53 -07:00 · 59bc9b17ea
commit 59bc9b17ea
parent 8795bf06fb
5 changed files with 114 additions and 168 deletions
--- a/src/smt/seq/seq_state.h
+++ b/src/smt/seq/seq_state.h
@ -7,9 +7,9 @@ Module Name:

 Abstract:

-    Constraint store bridging the SMT context to the Nielsen graph.
-    Holds word equations and regex membership constraints with
-    push/pop support for backtracking.
+    Tracked constraint types bridging the SMT context to the Nielsen graph.
+    tracked_str_eq and tracked_str_mem extend the core constraint types with
+    SMT-layer source information (enodes and literals) for conflict reporting.

 Author:

@ -39,71 +39,4 @@ namespace smt {
            : seq::str_mem(str, regex, history, id, dep), lit(lit) {}
    };

-    // source info for a string disequality
-    struct diseq_source {
-        enode* m_n1;
-        enode* m_n2;
-    };
-
-    class seq_state {
-        vector<tracked_str_eq>     m_str_eqs;
-        vector<tracked_str_mem>    m_str_mems;
-        vector<diseq_source>    m_diseqs;
-        unsigned_vector         m_str_eq_lim;
-        unsigned_vector         m_str_mem_lim;
-        unsigned_vector         m_diseq_lim;
-        unsigned                m_next_mem_id = 0;
-
-    public:
-        seq_state() = default;
-
-        void push() {
-            m_str_eq_lim.push_back(m_str_eqs.size());
-            m_str_mem_lim.push_back(m_str_mems.size());
-            m_diseq_lim.push_back(m_diseqs.size());
-        }
-
-        void pop(unsigned n) {
-            for (unsigned i = 0; i < n; ++i) {
-                m_str_eqs.shrink(m_str_eq_lim.back());
-                m_str_eq_lim.pop_back();
-                m_str_mems.shrink(m_str_mem_lim.back());
-                m_str_mem_lim.pop_back();
-                m_diseqs.shrink(m_diseq_lim.back());
-                m_diseq_lim.pop_back();
-            }
-        }
-
-        void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) {
-            seq::dep_tracker dep = nullptr;
-            m_str_eqs.push_back(tracked_str_eq(lhs, rhs, n1, n2, dep));
-        }
-
-        void add_str_mem(euf::snode* str, euf::snode* regex, sat::literal lit) {
-            seq::dep_tracker dep = nullptr;
-            m_str_mems.push_back(tracked_str_mem(str, regex, lit, nullptr, m_next_mem_id++, dep));
-        }
-
-        void add_diseq(enode* n1, enode* n2) {
-            m_diseqs.push_back({n1, n2});
-        }
-
-        vector<tracked_str_eq> const&  str_eqs()  const { return m_str_eqs; }
-        vector<tracked_str_mem> const& str_mems() const { return m_str_mems; }
-        vector<diseq_source> const& diseqs()   const { return m_diseqs; }
-
-        diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; }
-
-        bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_diseqs.empty(); }
-
-        void reset() {
-            m_str_eqs.reset();
-            m_str_mems.reset();
-            m_diseqs.reset();
-            m_str_eq_lim.reset();
-            m_str_mem_lim.reset();
-            m_diseq_lim.reset();
-        }
-    };
-
 }
--- a/src/smt/seq_model.cpp
+++ b/src/smt/seq_model.cpp
@ -17,7 +17,6 @@ Author:

 --*/
 #include "smt/seq_model.h"
-#include "smt/seq/seq_state.h"
 #include "smt/smt_context.h"
 #include "smt/smt_model_generator.h"
 #include "smt/proto_model/proto_model.h"
@ -347,28 +346,22 @@ namespace smt {
        }
    }

-    bool seq_model::validate_regex(seq_state const& state, ::proto_model& mdl) {
-        bool ok = true;
+    bool seq_model::validate_regex(tracked_str_mem const& mem, ::proto_model& mdl) {
+        if (!mem.m_str || !mem.m_regex)
+            return true;
+        expr* s_expr = mem.m_str->get_expr();
+        expr* r_expr = mem.m_regex->get_expr();
+        if (!s_expr || !r_expr)
+            return true;

-        // validate positive memberships: str ∈ regex
-        for (auto const& mem : state.str_mems()) {
-            if (!mem.m_str || !mem.m_regex)
-                continue;
-            expr* s_expr = mem.m_str->get_expr();
-            expr* r_expr = mem.m_regex->get_expr();
-            if (!s_expr || !r_expr)
-                continue;
-
-            expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
-            if (mdl.is_false(in_re)) {
-                IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
-                           << mk_bounded_pp(s_expr, m, 3)
-                           << " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
-                ok = false;
-            }
+        expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m);
+        if (mdl.is_false(in_re)) {
+            IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: "
+                       << mk_bounded_pp(s_expr, m, 3)
+                       << " in " << mk_bounded_pp(r_expr, m, 3) << "\n";);
+            return false;
        }
-
-        return ok;
+        return true;
    }

 }
--- a/src/smt/seq_model.h
+++ b/src/smt/seq_model.h
@ -32,6 +32,7 @@ Author:
 #include "ast/rewriter/seq_rewriter.h"
 #include "ast/euf/euf_sgraph.h"
 #include "smt/seq/seq_nielsen.h"
+#include "smt/seq/seq_state.h"   // tracked_str_mem
 #include "model/seq_factory.h"

 class proto_model;
@ -40,7 +41,7 @@ namespace smt {

    class enode;
    class model_generator;
-    class seq_state;
+    struct tracked_str_mem;
    class model_value_proc;

    class seq_model {
@ -87,7 +88,7 @@ namespace smt {
        // Validate that model assignments satisfy all regex membership
        // constraints from the state.  Checks positive and negative
        // memberships.  Returns true if all constraints pass.
-        bool validate_regex(seq_state const& state, ::proto_model& mdl);
+        bool validate_regex(tracked_str_mem const& mem, ::proto_model& mdl);

    private:
        // extract variable assignments from the sat path (root-to-leaf edges).
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -36,7 +36,6 @@ namespace smt {
        m_context_solver(m),
        m_nielsen(m_sgraph, m_context_solver),
        m_axioms(m_th_rewriter),
-        m_state(),
        m_regex(m_sgraph),
        m_model(m, m_seq, m_rewriter, m_sgraph)
    {
@ -169,10 +168,9 @@ namespace smt {
        euf::snode* s1 = get_snode(e1);
        euf::snode* s2 = get_snode(e2);
        if (s1 && s2) {
-            unsigned idx = m_state.str_eqs().size();
-            m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2));
+            seq::dep_tracker dep = nullptr;
            ctx.push_trail(restore_vector(m_prop_queue));
-            m_prop_queue.push_back(eq_item{idx});
+            m_prop_queue.push_back(eq_item(s1, s2, get_enode(v1), get_enode(v2), dep));
        }
    }

@ -203,10 +201,11 @@ namespace smt {
            euf::snode* sn_re  = get_snode(re);
            if (!sn_str || !sn_re)
                return;
-            unsigned idx = m_state.str_mems().size();
            literal lit(v, !is_true);
+            seq::dep_tracker dep = nullptr;
            if (is_true) {
-                m_state.add_str_mem(sn_str, sn_re, lit);
+                ctx.push_trail(restore_vector(m_prop_queue));
+                m_prop_queue.push_back(mem_item(sn_str, sn_re, lit, nullptr, m_next_mem_id++, dep));
            }
            else {
                // ¬(str ∈ R)  ≡  str ∈ complement(R): store as a positive membership
@ -214,10 +213,9 @@ namespace smt {
                // is kept in mem_source for conflict reporting.
                expr_ref re_compl(m_seq.re.mk_complement(re), m);
                euf::snode* sn_re_compl = get_snode(re_compl.get());
-                m_state.add_str_mem(sn_str, sn_re_compl, lit);
+                ctx.push_trail(restore_vector(m_prop_queue));
+                m_prop_queue.push_back(mem_item(sn_str, sn_re_compl, lit, nullptr, m_next_mem_id++, dep));
            }
-            ctx.push_trail(restore_vector(m_prop_queue));
-            m_prop_queue.push_back(mem_item{idx});
        }
        else if (m_seq.str.is_prefix(e)) {
            if (is_true)
@ -258,14 +256,12 @@ namespace smt {

    void theory_nseq::push_scope_eh() {
        theory::push_scope_eh();
-        m_state.push();
        m_sgraph.push();
        
    }

    void theory_nseq::pop_scope_eh(unsigned num_scopes) {
        theory::pop_scope_eh(num_scopes);
-        m_state.pop(num_scopes);
        m_sgraph.pop(num_scopes);
    }

@ -289,24 +285,22 @@ namespace smt {
        while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) {
            auto const& item = m_prop_queue[m_prop_qhead++];
            if (std::holds_alternative<eq_item>(item))
-                propagate_eq(std::get<eq_item>(item).idx);
+                propagate_eq(std::get<eq_item>(item));
            else if (std::holds_alternative<mem_item>(item))
-                propagate_pos_mem(std::get<mem_item>(item).idx);
+                propagate_pos_mem(std::get<mem_item>(item));
            else if (std::holds_alternative<axiom_item>(item))
                dequeue_axiom(std::get<axiom_item>(item).e);
        }
    }

-    void theory_nseq::propagate_eq(unsigned idx) {
+    void theory_nseq::propagate_eq(tracked_str_eq const& eq) {
        // When s1 = s2 is learned, ensure len(s1) and len(s2) are
        // internalized so congruence closure propagates len(s1) = len(s2).
-        auto const& src = m_state.str_eqs()[idx];
-        ensure_length_var(src.m_l->get_expr());
-        ensure_length_var(src.m_r->get_expr());
+        ensure_length_var(eq.m_l->get_expr());
+        ensure_length_var(eq.m_r->get_expr());
    }

-    void theory_nseq::propagate_pos_mem(unsigned idx) {
-        auto const& mem = m_state.str_mems()[idx];
+    void theory_nseq::propagate_pos_mem(tracked_str_mem const& mem) {

        if (!mem.m_str || !mem.m_regex)
            return;
@ -419,36 +413,46 @@ namespace smt {
    void theory_nseq::populate_nielsen_graph() {
        m_nielsen.reset();

-        // transfer string equalities from state to nielsen graph root
-        for (auto const& eq : m_state.str_eqs()) {
-            m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r);
-        }
+        unsigned num_eqs = 0, num_mems = 0;

-        // transfer regex memberships, pre-processing through seq_regex
-        // to consume ground prefixes via Brzozowski derivatives
-        for (auto const &mem : m_state.str_mems()) {
-            int triv = m_regex.check_trivial(mem);
-            if (triv > 0)
-                continue;  // trivially satisfied, skip
-            if (triv < 0) {
-                // trivially unsat: add anyway so solve() detects conflict
-                m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
-                continue;
+        // transfer string equalities and regex memberships from prop_queue to nielsen graph root
+        for (auto const& item : m_prop_queue) {
+            if (std::holds_alternative<eq_item>(item)) {
+                auto const& eq = std::get<eq_item>(item);
+                m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs, eq.m_l, eq.m_r);
+                ++num_eqs;
            }
-            // pre-process: consume ground prefix characters
-            vector<seq::str_mem> processed;
-            if (!m_regex.process_str_mem(mem, processed)) {
-                // conflict during ground prefix consumption
-                m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
-                continue;
-            }
-            for (auto const& pm : processed) {
-                m_nielsen.add_str_mem(pm.m_str, pm.m_regex, mem.lit);
+            else if (std::holds_alternative<mem_item>(item)) {
+                auto const& mem = std::get<mem_item>(item);
+                int triv = m_regex.check_trivial(mem);
+                if (triv > 0) {
+                    ++num_mems;
+                    continue;  // trivially satisfied, skip
+                }
+                if (triv < 0) {
+                    // trivially unsat: add anyway so solve() detects conflict
+                    m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
+                    ++num_mems;
+                    continue;
+                }
+                // pre-process: consume ground prefix characters
+                vector<seq::str_mem> processed;
+                if (!m_regex.process_str_mem(mem, processed)) {
+                    // conflict during ground prefix consumption
+                    m_nielsen.add_str_mem(mem.m_str, mem.m_regex, mem.lit);
+                    ++num_mems;
+                    continue;
+                }
+                for (auto const& pm : processed)
+                    m_nielsen.add_str_mem(pm.m_str, pm.m_regex, mem.lit);
+                ++num_mems;
            }
        }

-        TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, "
-                        << m_state.str_mems().size() << " mems -> nielsen root\n");
+        TRACE(seq, tout << "nseq populate: " << num_eqs << " eqs, "
+                        << num_mems << " mems -> nielsen root\n");
+        IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with "
+            << num_eqs << " eqs, " << num_mems << " mems\n";);
    }

    final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) {
@ -459,8 +463,13 @@ namespace smt {
            return FC_CONTINUE;
        }

+        // Check if there are any eq/mem items in the propagation queue.
+        bool has_eq_or_mem = false;
+        for (auto const& item : m_prop_queue)
+            if (!std::holds_alternative<axiom_item>(item)) { has_eq_or_mem = true; break; }
+
        // there is nothing to do for the string solver, as there are no string constraints
-        if (m_state.empty() && m_ho_terms.empty() && !has_unhandled_preds()) {
+        if (!has_eq_or_mem && m_ho_terms.empty() && !has_unhandled_preds()) {
            IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state+ho, FC_DONE (no solve)\n";);
            m_nielsen.reset();
            m_nielsen.create_root();
@ -474,7 +483,7 @@ namespace smt {
            return FC_CONTINUE;
        }

-        if (m_state.empty() && !has_unhandled_preds()) {
+        if (!has_eq_or_mem && !has_unhandled_preds()) {
            IF_VERBOSE(1, verbose_stream() << "nseq final_check: empty state (after ho), FC_DONE (no solve)\n";);
            m_nielsen.reset();
            m_nielsen.create_root();
@ -482,8 +491,6 @@ namespace smt {
            return FC_DONE;
        }

-        IF_VERBOSE(1, verbose_stream() << "nseq final_check: populating graph with "
-            << m_state.str_eqs().size() << " eqs, " << m_state.str_mems().size() << " mems\n";);
        populate_nielsen_graph();

        // assert length constraints derived from string equalities
@ -508,7 +515,7 @@ namespace smt {
                IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck UNSAT\n";);
                return FC_CONTINUE;
            case l_false:
-                // all regex constraints satisfiable, no word eqs/diseqs → SAT
+                // all regex constraints satisfiable, no word eqs → SAT
                IF_VERBOSE(1, verbose_stream() << "nseq final_check: regex precheck SAT\n";);
                m_nielsen.set_sat_node(m_nielsen.root());
                return FC_DONE;
@ -538,10 +545,7 @@ namespace smt {
            IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node="
                << (m_nielsen.sat_node() ? "set" : "null") << "\n";);
            // Nielsen found a consistent assignment for positive constraints.
-            // If there are disequalities we haven't verified, we cannot soundly declare sat.
-            SASSERT(!m_state.empty()); // we should have axiomatized them
-            if (!m_state.diseqs().empty())
-                return FC_GIVEUP;
+            SASSERT(has_eq_or_mem); // we should have axiomatized them
            if (!has_unhandled_preds())
                return FC_DONE;
        }
@ -604,7 +608,9 @@ namespace smt {
    }

    void theory_nseq::validate_model(proto_model& mdl) {
-        m_model.validate_regex(m_state, mdl);
+        for (auto const& item : m_prop_queue)
+            if (std::holds_alternative<mem_item>(item))
+                m_model.validate_regex(std::get<mem_item>(item), mdl);
    }

    // -----------------------------------------------------------------------
@ -620,10 +626,14 @@ namespace smt {
    }

    void theory_nseq::display(std::ostream& out) const {
+        unsigned num_eqs = 0, num_mems = 0;
+        for (auto const& item : m_prop_queue) {
+            if (std::holds_alternative<eq_item>(item)) ++num_eqs;
+            else if (std::holds_alternative<mem_item>(item)) ++num_mems;
+        }
        out << "theory_nseq\n";
-        out << "  str_eqs:    " << m_state.str_eqs().size() << "\n";
-        out << "  str_mems:   " << m_state.str_mems().size() << "\n";
-        out << "  diseqs:     " << m_state.diseqs().size() << "\n";
+        out << "  str_eqs:    " << num_eqs << "\n";
+        out << "  str_mems:   " << num_mems << "\n";
        out << "  prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n";
        out << "  ho_terms:   " << m_ho_terms.size() << "\n";
    }
@ -900,7 +910,13 @@ namespace smt {
    // -----------------------------------------------------------------------

    lbool theory_nseq::check_regex_memberships_precheck() {
-        auto const& mems = m_state.str_mems();
+        // Collect mem items from the propagation queue into a local pointer array
+        // so that indices into the array remain stable during this function.
+        ptr_vector<tracked_str_mem const> mems;
+        for (auto const& item : m_prop_queue)
+            if (std::holds_alternative<mem_item>(item))
+                mems.push_back(&std::get<mem_item>(item));
+
        if (mems.empty())
            return l_undef;

@ -910,7 +926,7 @@ namespace smt {
        bool all_primitive = true;

        for (unsigned i = 0; i < mems.size(); ++i) {
-            auto const& mem = mems[i];
+            auto const& mem = *mems[i];
            SASSERT(mem.m_str && mem.m_regex);
            if (mem.is_primitive()) {
                auto& vec = var_to_mems.insert_if_not_there(mem.m_str->id(), unsigned_vector());
@ -923,6 +939,11 @@ namespace smt {
        if (var_to_mems.empty())
            return l_undef;

+        // Check if there are any eq items in the queue (needed for SAT condition below).
+        bool has_eqs = false;
+        for (auto const& item : m_prop_queue)
+            if (std::holds_alternative<eq_item>(item)) { has_eqs = true; break; }
+
        bool any_undef = false;

        // Check intersection emptiness for each variable.
@ -931,7 +952,7 @@ namespace smt {
            unsigned_vector const& mem_indices = kv.m_value;
            ptr_vector<euf::snode> regexes;
            for (unsigned i : mem_indices) {
-                euf::snode* re = mems[i].m_regex;
+                euf::snode* re = mems[i]->m_regex;
                if (re)
                    regexes.push_back(re);
            }
@ -948,10 +969,8 @@ namespace smt {
                enode_pair_vector eqs;
                literal_vector lits;
                for (unsigned i : mem_indices) {
-                    auto const &src = m_state.str_mems()[i];
-                    SASSERT(ctx.get_assignment(src.lit) == l_true); // we already stored the polarity of the literal
-                    lits.push_back(
-                        src.lit);
+                    SASSERT(ctx.get_assignment(mems[i]->lit) == l_true); // we already stored the polarity of the literal
+                    lits.push_back(mems[i]->lit);
                }
                TRACE(seq, tout << "nseq regex precheck: empty intersection for var "
                                << var_id << ", conflict with " << lits.size() << " lits\n";);
@ -967,11 +986,11 @@ namespace smt {
            return l_undef;  // cannot fully determine; let DFS decide

        // All variables' regex intersections are non-empty.
-        // If there are no word equations and no disequalities, variables are
-        // independent and each can be assigned a witness string → SAT.
-        if (all_primitive && m_state.str_eqs().empty() && m_state.diseqs().empty() && !has_unhandled_preds()) {
+        // If there are no word equations, variables are independent and
+        // each can be assigned a witness string → SAT.
+        if (all_primitive && !has_eqs && !has_unhandled_preds()) {
            TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
-                            << "no word eqs/diseqs → SAT\n";);
+                            << "no word eqs → SAT\n";);
            return l_false;  // signals SAT (non-empty / satisfiable)
        }

--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@ -28,7 +28,7 @@ Author:
 #include "smt/smt_theory.h"
 #include "smt/smt_arith_value.h"
 #include "smt/seq/seq_nielsen.h"
-#include "smt/seq/seq_state.h"
+#include "smt/seq/seq_state.h"   // tracked_str_eq, tracked_str_mem
 #include "smt/seq/seq_regex.h"
 #include "smt/seq_model.h"
 #include "smt/nseq_context_solver.h"
@ -48,19 +48,19 @@ namespace smt {
        context_solver m_context_solver;
        seq::nielsen_graph m_nielsen;
        seq::axioms m_axioms;
-        seq_state     m_state;
        seq::seq_regex     m_regex;   // regex membership pre-processing
        seq_model     m_model;   // model construction helper

        // propagation queue items (variant over the distinct propagation cases)
-        struct eq_item    { unsigned idx; };   // string equality at index idx in str_eqs
-        struct mem_item   { unsigned idx; };   // regex membership at index idx in str_mems
-        struct axiom_item { expr*    e;   };   // structural axiom for term e
+        using eq_item   = tracked_str_eq;    // string equality
+        using mem_item  = tracked_str_mem;   // regex membership
+        struct axiom_item { expr* e; };      // structural axiom for term e

        using prop_item = std::variant<eq_item, mem_item, axiom_item>;

        vector<prop_item>       m_prop_queue;
        unsigned                m_prop_qhead = 0;
+        unsigned                m_next_mem_id = 0;     // monotone counter for tracked_str_mem ids
        obj_hashtable<expr>     m_axiom_set;   // dedup guard for axiom_item enqueues

        // statistics
@ -117,8 +117,8 @@ namespace smt {
        euf::snode* get_snode(expr* e);

        // propagation dispatch helpers
-        void propagate_eq(unsigned idx);
-        void propagate_pos_mem(unsigned idx);
+        void propagate_eq(tracked_str_eq const& eq);
+        void propagate_pos_mem(tracked_str_mem const& mem);
        void enqueue_axiom(expr* e);
        void dequeue_axiom(expr* e);
        void ensure_length_var(expr* e);