first rounds

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-06-15 05:15:41 +00:00 · 2026-02-28 12:19:22 -08:00 · 2026-02-28 12:19:22 -08:00 · 9557c5500b
commit 9557c5500b
parent 636b3b0a6b
3 changed files with 236 additions and 64 deletions
--- a/src/ast/rewriter/nseq_nielsen.cpp
+++ b/src/ast/rewriter/nseq_nielsen.cpp
@ -123,21 +123,52 @@ namespace seq {
                rhs[j++] = rhs.get(i);
        rhs.resize(j);

-        // Check trivial cases
-        if (lhs.empty() && rhs.empty())
-            return nielsen_result::solved;
+        // Loop until fixpoint: strip prefix/suffix, then leading/trailing vars
+        bool progress = true;
+        while (progress) {
+            progress = false;

-        // Strip common prefix and suffix
-        changed |= strip_common_prefix(lhs, rhs);
-        changed |= strip_common_suffix(lhs, rhs);
+            // Check trivial cases
+            if (lhs.empty() && rhs.empty())
+                return nielsen_result::solved;
+
+            // Strip common prefix and suffix (units and string constants)
+            progress |= strip_common_prefix(lhs, rhs);
+            progress |= strip_common_suffix(lhs, rhs);
+
+            if (lhs.empty() && rhs.empty())
+                return nielsen_result::solved;
+
+            // Check for conflict: both sides start with different constants
+            if (is_conflict(lhs, rhs))
+                return nielsen_result::conflict;
+
+            // Both sides start with the same variable: strip it
+            if (!lhs.empty() && !rhs.empty() && lhs.get(0) == rhs.get(0) && is_var(lhs.get(0))) {
+                expr_ref_vector new_lhs(m), new_rhs(m);
+                new_lhs.append(lhs.size() - 1, lhs.data() + 1);
+                new_rhs.append(rhs.size() - 1, rhs.data() + 1);
+                lhs.swap(new_lhs);
+                rhs.swap(new_rhs);
+                progress = true;
+                changed = true;
+            }
+
+            // Both sides end with the same variable: strip it
+            if (!lhs.empty() && !rhs.empty() &&
+                lhs.back() == rhs.back() && is_var(lhs.back())) {
+                lhs.pop_back();
+                rhs.pop_back();
+                progress = true;
+                changed = true;
+            }
+
+            changed |= progress;
+        }

        if (lhs.empty() && rhs.empty())
            return nielsen_result::solved;

-        // Check for conflict: both sides start with different constants
-        if (is_conflict(lhs, rhs))
-            return nielsen_result::conflict;
-
        // Variable = empty: if one side is empty and other has single var
        if (lhs.empty() && rhs.size() == 1 && is_var(rhs.get(0)))
            return nielsen_result::solved;  // x = ε is a solution
@ -150,27 +181,6 @@ namespace seq {
        if (rhs.size() == 1 && is_var(rhs.get(0)) && !has_var(lhs))
            return nielsen_result::solved;

-        // Both sides start with the same variable: strip it
-        if (!lhs.empty() && !rhs.empty() && lhs.get(0) == rhs.get(0) && is_var(lhs.get(0))) {
-            expr_ref_vector new_lhs(m), new_rhs(m);
-            new_lhs.append(lhs.size() - 1, lhs.data() + 1);
-            new_rhs.append(rhs.size() - 1, rhs.data() + 1);
-            lhs.swap(new_lhs);
-            rhs.swap(new_rhs);
-            changed = true;
-        }
-
-        // Both sides end with the same variable: strip it
-        if (!lhs.empty() && !rhs.empty() &&
-            lhs.back() == rhs.back() && is_var(lhs.back())) {
-            lhs.pop_back();
-            rhs.pop_back();
-            changed = true;
-        }
-
-        if (changed && lhs.empty() && rhs.empty())
-            return nielsen_result::solved;
-
        if (changed)
            return nielsen_result::reduced;

@ -206,11 +216,10 @@ namespace seq {
                return true;
        }
        if (is_unit(l) && is_unit(r) && l != r) {
-            // Different unit terms
-            expr* c1 = to_app(l)->get_arg(0);
-            expr* c2 = to_app(r)->get_arg(0);
-            rational v1, v2;
-            if (m_autil.is_numeral(c1, v1) && m_autil.is_numeral(c2, v2) && v1 != v2)
+            // Only conflict if both are concrete character values that differ
+            unsigned v1, v2;
+            if (m_util.is_const_char(to_app(l)->get_arg(0), v1) &&
+                m_util.is_const_char(to_app(r)->get_arg(0), v2) && v1 != v2)
                return true;
        }
        return false;
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -57,6 +57,7 @@ namespace smt {

        m_new_propagation = false;
        TRACE(seq, display(tout << "final_check level=" << ctx.get_scope_level() << "\n"););
+        IF_VERBOSE(3, verbose_stream() << "nseq final_check level=" << ctx.get_scope_level() << "\n");

        // Process pending axioms
        if (m_state.has_pending_axioms()) {
@ -321,6 +322,7 @@ namespace smt {
        literal_vector lits;
        m_state.linearize(dep, eqs, lits);
        TRACE(seq, tout << "propagate eq: " << mk_bounded_pp(n1->get_expr(), m) << " = " << mk_bounded_pp(n2->get_expr(), m) << "\n";);
+        IF_VERBOSE(3, verbose_stream() << "  propagate_eq: " << mk_bounded_pp(n1->get_expr(), m) << " = " << mk_bounded_pp(n2->get_expr(), m) << "\n";);
        justification* js = ctx.mk_justification(
            ext_theory_eq_propagation_justification(get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), n1, n2));
        ctx.assign_eq(n1, n2, eq_justification(js));
@ -442,19 +444,55 @@ namespace smt {
        expr_ref_vector units(m);
        for (expr* e : src)
            m_util.str.get_concat_units(e, units);
-        // Step 2: follow e-graph roots for each atomic component
-        for (expr* u : units) {
-            if (!ctx.e_internalized(u)) {
-                dst.push_back(u);
-                continue;
+        // Step 2: follow e-graph roots for each atomic component, recursively
+        // Repeat until fixpoint to handle chains like a → concat(b, t1), t1 → concat(a, t2)
+        bool changed = true;
+        while (changed) {
+            changed = false;
+            expr_ref_vector next(m);
+            for (expr* u : units) {
+                if (!ctx.e_internalized(u)) {
+                    next.push_back(u);
+                    continue;
+                }
+                enode* n = ctx.get_enode(u);
+                enode* r = n->get_root();
+                expr* re = r->get_expr();
+                // If root differs from u, decompose the root
+                if (re != u) {
+                    dep = m_state.mk_join(dep, m_state.mk_dep(n, r));
+                    unsigned old_sz = next.size();
+                    m_util.str.get_concat_units(re, next);
+                    if (next.size() != old_sz + 1 || next.back() != u)
+                        changed = true;
+                    continue;
+                }
+                // Root == u: if u is a variable, search equivalence class
+                // for a concat/string/unit that gives a better decomposition
+                if (m_nielsen.is_var(u)) {
+                    expr* best = nullptr;
+                    for (enode* p = r->get_next(); p != r; p = p->get_next()) {
+                        expr* pe = p->get_expr();
+                        if (m_util.str.is_concat(pe) || m_util.str.is_string(pe) ||
+                            m_util.str.is_unit(pe) || m_util.str.is_empty(pe)) {
+                            best = pe;
+                            dep = m_state.mk_join(dep, m_state.mk_dep(n, p));
+                            break;
+                        }
+                    }
+                    if (best) {
+                        unsigned old_sz = next.size();
+                        m_util.str.get_concat_units(best, next);
+                        if (next.size() != old_sz + 1 || next.back() != u)
+                            changed = true;
+                        continue;
+                    }
+                }
+                next.push_back(u);
            }
-            enode* n = ctx.get_enode(u);
-            enode* r = n->get_root();
-            expr* re = r->get_expr();
-            if (re != u)
-                dep = m_state.mk_join(dep, m_state.mk_dep(n, r));
-            m_util.str.get_concat_units(re, dst);
+            units.swap(next);
        }
+        dst.append(units);
        return true;
    }

@ -499,9 +537,22 @@ namespace smt {
              tout << "= ";
              for (auto* e : rhs) tout << mk_bounded_pp(e, m, 2) << " ";
              tout << "\n";);
+        IF_VERBOSE(3, verbose_stream() << "solve_eq [" << eq.id() << "]: ";
+              for (auto* e : lhs) verbose_stream() << mk_bounded_pp(e, m, 2) << " ";
+              verbose_stream() << "= ";
+              for (auto* e : rhs) verbose_stream() << mk_bounded_pp(e, m, 2) << " ";
+              verbose_stream() << "\n";);

        // Apply Nielsen simplification
        seq::nielsen_result result = m_nielsen.simplify(lhs, rhs);
+        IF_VERBOSE(3, verbose_stream() << "  nielsen=" << (int)result;
+              if (result != seq::nielsen_result::solved) {
+                  verbose_stream() << " reduced: ";
+                  for (auto* e : lhs) verbose_stream() << mk_bounded_pp(e, m, 2) << " ";
+                  verbose_stream() << "= ";
+                  for (auto* e : rhs) verbose_stream() << mk_bounded_pp(e, m, 2) << " ";
+              }
+              verbose_stream() << "\n";);

        switch (result) {
        case seq::nielsen_result::solved: {
@ -512,6 +563,7 @@ namespace smt {
                    return true;
                sort* s = lhs.get(0)->get_sort();
                expr_ref rhs_concat = mk_concat(rhs, s);
+                ensure_enode(rhs_concat);
                ensure_length_axiom(rhs_concat);
                changed = propagate_eq(dep, lhs.get(0), rhs_concat);
            }
@ -520,6 +572,7 @@ namespace smt {
                    return true;
                sort* s = rhs.get(0)->get_sort();
                expr_ref lhs_concat = mk_concat(lhs, s);
+                ensure_enode(lhs_concat);
                ensure_length_axiom(lhs_concat);
                changed = propagate_eq(dep, rhs.get(0), lhs_concat);
            }
@ -538,22 +591,16 @@ namespace smt {

        case seq::nielsen_result::conflict:
            TRACE(seq, tout << "conflict\n";);
+            IF_VERBOSE(3, verbose_stream() << "  CONFLICT on eq " << eq.id() << "\n";);
            set_conflict(dep);
            return true;

        case seq::nielsen_result::reduced: {
            if (lhs.empty() && rhs.empty())
-                return false; // already equal
-            bool changed = false;
-            if (!lhs.empty() && !rhs.empty()) {
-                sort* s = lhs[0]->get_sort();
-                expr_ref l = mk_concat(lhs, s);
-                expr_ref r = mk_concat(rhs, s);
-                ensure_length_axiom(l);
-                ensure_length_axiom(r);
-                changed = propagate_eq(dep, l, r);
-            }
-            else {
+                return false;
+            // If one side is empty, propagate: remaining must be empty
+            if (lhs.empty() || rhs.empty()) {
+                bool changed = false;
                expr_ref_vector& nonempty = lhs.empty() ? rhs : lhs;
                for (expr* e : nonempty) {
                    if (m_util.is_seq(e)) {
@ -561,9 +608,27 @@ namespace smt {
                        changed |= propagate_eq(dep, e, emp);
                    }
                }
+                return changed;
            }
-            TRACE(seq, tout << "reduced" << (changed ? " (propagated)" : " (no change)") << "\n";);
-            return changed;
+            // For single var = ground, propagate
+            if (lhs.size() == 1 && m_nielsen.is_var(lhs.get(0))) {
+                sort* s = lhs.get(0)->get_sort();
+                expr_ref rhs_concat = mk_concat(rhs, s);
+                ensure_enode(rhs_concat);
+                ensure_length_axiom(rhs_concat);
+                bool changed = propagate_eq(dep, lhs.get(0), rhs_concat);
+                if (changed) return true;
+            }
+            else if (rhs.size() == 1 && m_nielsen.is_var(rhs.get(0))) {
+                sort* s = rhs.get(0)->get_sort();
+                expr_ref lhs_concat = mk_concat(lhs, s);
+                ensure_enode(lhs_concat);
+                ensure_length_axiom(lhs_concat);
+                bool changed = propagate_eq(dep, rhs.get(0), lhs_concat);
+                if (changed) return true;
+            }
+            // Fall through to branch/split on the reduced equation
+            break;
        }

        case seq::nielsen_result::split:
@ -572,7 +637,11 @@ namespace smt {
        }

        // Try branching: find a variable and decide x = "" or x ≠ ""
-        return branch_eq(lhs, rhs, dep);
+        if (branch_eq(lhs, rhs, dep))
+            return true;
+
+        // Try Nielsen splitting: x · α = c · β where x is non-empty variable, c is unit
+        return split_variable(lhs, rhs, dep);
    }

    bool theory_nseq::branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs,
@ -598,6 +667,7 @@ namespace smt {
                case l_undef:
                    // Force the empty branch first
                    TRACE(seq, tout << "branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
+                    IF_VERBOSE(3, verbose_stream() << "  branch " << mk_bounded_pp(e, m) << " = \"\"\n";);
                    ctx.force_phase(eq_empty);
                    ctx.mark_as_relevant(eq_empty);
                    m_new_propagation = true;
@ -605,11 +675,11 @@ namespace smt {
                    return true;
                case l_true:
                    // Variable assigned to empty but EQ not yet merged
-                    // Propagate the equality
+                    IF_VERBOSE(3, verbose_stream() << "  branch_eq: " << mk_bounded_pp(e, m) << " eq_empty=l_true, propagating\n";);
                    propagate_eq(dep, e, emp);
                    return true;
                case l_false:
-                    // x ≠ "": try to find a prefix from the other side
+                    IF_VERBOSE(3, verbose_stream() << "  branch_eq: " << mk_bounded_pp(e, m) << " eq_empty=l_false (non-empty)\n";);
                    break;
                }
            }
@ -635,8 +705,23 @@ namespace smt {
        // For x starting one side, try x = prefix of other side
        // x = "" was already tried (assigned false)
        // Now enumerate: x = other[0], x = other[0]·other[1], ...
+        //
+        // Only use length when we have PROVEN tight bounds (lo == hi)
+        // to avoid suggesting candidates based on tentative arithmetic values
        rational x_len;
-        bool has_x_len = get_length(x, x_len);
+        bool has_x_len = false;
+        {
+            rational lo, hi;
+            expr_ref len_x = mk_len(x);
+            if (lower_bound(len_x, lo) && upper_bound(len_x, hi) && lo == hi) {
+                x_len = lo;
+                has_x_len = true;
+            }
+        }
+        // Only enumerate candidates when we know the variable's length
+        // Otherwise let split_variable handle it (peel one char at a time)
+        if (!has_x_len || x_len.is_zero())
+            return false;

        // Find the range of valid prefix lengths
        // Stop before we include x itself (cyclic candidate)
@ -691,6 +776,83 @@ namespace smt {
        return false;
    }

+    // -------------------------------------------------------
+    // Nielsen splitting: x · α = c · β → x = c · x_tail
+    // -------------------------------------------------------
+
+    bool theory_nseq::split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs,
+                                      nseq_dependency* dep) {
+        if (lhs.empty() || rhs.empty())
+            return false;
+
+        // Find: variable on one side facing a unit/constant on the other
+        expr* var = nullptr;
+        expr* head = nullptr;
+        bool var_on_left = false;
+
+        if (m_nielsen.is_var(lhs[0]) && !m_nielsen.is_var(rhs[0])) {
+            var = lhs[0];
+            head = rhs[0];
+            var_on_left = true;
+        }
+        else if (m_nielsen.is_var(rhs[0]) && !m_nielsen.is_var(lhs[0])) {
+            var = rhs[0];
+            head = lhs[0];
+            var_on_left = false;
+        }
+        if (!var || !head)
+            return false;
+
+        // Check that var is known non-empty (branch_eq should have handled empty case)
+        expr_ref emp(m_util.str.mk_empty(var->get_sort()), m);
+        if (ctx.e_internalized(emp) && ctx.e_internalized(var)) {
+            enode* n_var = ctx.get_enode(var);
+            enode* n_emp = ctx.get_enode(emp);
+            if (n_var->get_root() == n_emp->get_root())
+                return false;
+        }
+        literal eq_empty = mk_eq(var, emp, false);
+        if (ctx.get_assignment(eq_empty) != l_false)
+            return false;
+
+        // var is non-empty and the equation forces var to start with head.
+        // Nielsen split: var = head · var_tail
+        expr_ref one(m_autil.mk_int(1), m);
+        expr_ref var_tail(m_sk.mk_tail(var, one), m);
+        ensure_enode(var_tail);
+        mk_var(ctx.get_enode(var_tail));
+        ensure_length_axiom(var_tail);
+
+        // Before committing to the split, ensure the tail's emptiness is decided.
+        // The SAT solver might default to tail ≠ "" without exploring tail = "".
+        // Force the empty case first.
+        expr_ref tail_emp(m_util.str.mk_empty(var_tail->get_sort()), m);
+        literal tail_eq_empty = mk_eq(var_tail, tail_emp, false);
+        if (ctx.get_assignment(tail_eq_empty) == l_undef) {
+            ctx.force_phase(tail_eq_empty);
+            ctx.mark_as_relevant(tail_eq_empty);
+            m_new_propagation = true;
+            return true;
+        }
+
+        expr_ref split_rhs(m_util.str.mk_concat(head, var_tail), m);
+        ensure_enode(split_rhs);
+        ensure_length_axiom(split_rhs);
+        // Don't add explicit length axioms here — CC will derive
+        // len(var) = len(head) + len(tail) from the merge, and
+        // this derivation is properly scoped (undone on backtrack).
+
+        TRACE(seq, tout << "nielsen split: " << mk_bounded_pp(var, m) << " = "
+              << mk_bounded_pp(head, m) << " · tail\n";);
+        IF_VERBOSE(3, verbose_stream() << "  split " << mk_bounded_pp(var, m) << " = "
+              << mk_bounded_pp(head, m) << " · tail\n";);
+
+        bool result = propagate_eq(dep, var, split_rhs);
+        if (result)
+            m_state.stats().m_num_splits++;
+        return result;
+    }
+
    // -------------------------------------------------------
    // Regex membership
    // -------------------------------------------------------
--- a/src/smt/theory_nseq.h
+++ b/src/smt/theory_nseq.h
@ -96,6 +96,7 @@ namespace smt {
        bool branch_eq(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
        bool branch_eq_prefix(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
        bool branch_var_prefix(expr* x, expr_ref_vector const& other, nseq_dependency* dep);
+        bool split_variable(expr_ref_vector const& lhs, expr_ref_vector const& rhs, nseq_dependency* dep);
        bool canonize(expr_ref_vector const& src, expr_ref_vector& dst, nseq_dependency*& dep);
        bool all_eqs_solved();
        bool check_length_conflict(expr* x, expr_ref_vector const& es, nseq_dependency* dep);