From 352b14fe2b26efab7dd78ff51563cdb8ffed0d23 Mon Sep 17 00:00:00 2001 From: Nikolaj Bjorner Date: Tue, 21 Apr 2026 09:16:00 +0200 Subject: [PATCH] fix and optimize not-contains and regex equalities Signed-off-by: Nikolaj Bjorner --- src/ast/rewriter/seq_axioms.cpp | 12 ++++++++ src/smt/seq/seq_nielsen.cpp | 44 +++++++++------------------ src/smt/seq/seq_nielsen.h | 18 ++++++++--- src/smt/theory_nseq.cpp | 54 ++++++++++++++++++++++++++++----- 4 files changed, 86 insertions(+), 42 deletions(-) diff --git a/src/ast/rewriter/seq_axioms.cpp b/src/ast/rewriter/seq_axioms.cpp index ce2070cfd..664d5d352 100644 --- a/src/ast/rewriter/seq_axioms.cpp +++ b/src/ast/rewriter/seq_axioms.cpp @@ -1353,11 +1353,23 @@ namespace seq { Create the recursive function not_contains that implements this recursive definition. Add the axiom contains(a, b) or not_contains(a, b) + + if b is a string (or concatentation of characters/unit values) use regex shortcut: + + ~contain(a, b) => a not in .*b.* */ void axioms::not_contains_axiom(expr *e) { expr* _a = nullptr, *_b = nullptr; VERIFY(seq.str.is_contains(e, _a, _b)); + if (seq.str.is_string(_b)) { + auto star = seq.re.mk_full_seq(seq.re.mk_re(_b->get_sort())); + auto b_re = seq.re.mk_to_re(_b); + auto r = seq.re.mk_concat(star, seq.re.mk_concat(b_re, star)); + auto mem = seq.re.mk_in_re(_a, r); + add_clause(expr_ref(e, m), ~expr_ref(mem, m)); + return; + } auto ca = purify(_a); auto cb = purify(_b); sort* srt = ca->get_sort(); diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index ee8eda179..b6be9b93c 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -866,8 +866,7 @@ namespace seq { // pass 2: detect symbol clashes, empty-propagation, and prefix cancellation for (str_eq& eq : m_str_eq) { - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); // one side empty, the other not empty => conflict check // (the actual substitution is done in apply_det_modifier) @@ -1915,8 +1914,7 @@ namespace seq { for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); for (unsigned od = 0; od < 2; ++od) { bool fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, fwd); @@ -1951,8 +1949,7 @@ namespace seq { for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); for (unsigned od = 0; od < 2; ++od) { bool fwd = od == 0; euf::snode* lhead = dir_token(eq.m_lhs, fwd); @@ -2189,8 +2186,7 @@ namespace seq { str_eq const& eq = node->str_eqs()[eq_idx]; if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); // EqSplit only applies to regex-free equations. if (!eq.m_lhs->is_regex_free() || !eq.m_rhs->is_regex_free()) continue; @@ -2368,8 +2364,7 @@ namespace seq { for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); euf::snode_vector toks; eq.m_lhs->collect_tokens(toks); for (euf::snode* t : toks) { @@ -2399,8 +2394,7 @@ namespace seq { for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); for (unsigned od = 0; od < 2; ++od) { bool local_fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, local_fwd); @@ -2439,7 +2433,7 @@ namespace seq { str_eq const*& eq_out, bool& fwd) const { for (str_eq const& eq : node->str_eqs()) { - SASSERT(eq.m_lhs && eq.m_rhs && !eq.is_trivial()); + SASSERT(eq.well_formed() && !eq.is_trivial()); for (unsigned od = 0; od < 2; ++od) { bool local_fwd = (od == 0); @@ -2506,8 +2500,7 @@ namespace seq { for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; + SASSERT(eq.well_formed()); if (eq.m_lhs->is_empty() && eq.m_rhs->first() && eq.m_rhs->first()->is_power()) { power = eq.m_rhs->first(); dep = eq.m_dep; @@ -2560,10 +2553,9 @@ namespace seq { // Look for two directional endpoint power tokens with the same base. for (str_eq const& eq : node->str_eqs()) { + SASSERT(eq.well_formed()); if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; for (unsigned od = 0; od < 2; ++od) { bool fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, fwd); @@ -2619,12 +2611,9 @@ namespace seq { bool nielsen_graph::apply_split_power_elim(nielsen_node* node) { for (str_eq const& eq : node->str_eqs()) { + SASSERT(eq.well_formed()); if (eq.is_trivial()) continue; - // NB: Shuvendu - this test is always false based on how str_eqs are constructed - // it can be an assertion to force the invariant that str_eqs always have both sides non-null. - if (!eq.m_lhs || !eq.m_rhs) - continue; for (int side = 0; side < 2; ++side) { euf::snode* pow_side = (side == 0) ? eq.m_lhs : eq.m_rhs; @@ -2760,10 +2749,9 @@ namespace seq { bool nielsen_graph::apply_gpower_intr(nielsen_node* node) { for (str_eq const& eq : node->str_eqs()) { + SASSERT(eq.well_formed()); if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) - continue; // Try both directions (ZIPT's ExtendDir(fwd=true/false)). for (unsigned od = 0; od < 2; ++od) { @@ -3896,7 +3884,7 @@ namespace seq { // Step 1: Compute LHS (|x|) for each non-eliminating substitution // using current m_mod_cnt (before bumping). // Also assert |x|_k >= 0 (mirrors ZIPT's NielsenNode constructor line 172). - svector> lhs_exprs; + vector> lhs_exprs; for (unsigned i = 0; i < substs.size(); ++i) { auto const& s = substs[i]; if (!s.m_var->is_var()) @@ -3904,7 +3892,7 @@ namespace seq { if (!m_seq.is_seq(s.m_var->get_expr())) continue; expr_ref lhs = compute_length_expr(s.m_var); - lhs_exprs.push_back({i, lhs.get()}); + lhs_exprs.push_back({i, lhs}); if (s.is_eliminating()) continue; has_non_elim = true; @@ -3918,12 +3906,10 @@ namespace seq { // Step 3: Compute RHS (|u|) with bumped mod counts and add |x| = |u|. - for (auto const& p : lhs_exprs) { - unsigned idx = p.first; - expr* lhs_expr = p.second; + for (auto const &[idx, lhs] : lhs_exprs) { auto const& s = substs[idx]; expr_ref rhs = compute_length_expr(s.m_replacement); - e->add_side_constraint(mk_constraint(m.mk_eq(lhs_expr, rhs), s.m_dep)); + e->add_side_constraint(mk_constraint(m.mk_eq(lhs, rhs), s.m_dep)); } // Step 4: Restore mod counts (temporary bump for computing RHS only). diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 3212449af..0f58c7d45 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -371,10 +371,10 @@ namespace seq { euf::snode* m_rhs; dep_tracker m_dep; - - str_eq() = default; - str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep): - m_lhs(lhs), m_rhs(rhs), m_dep(dep) {} + str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep): + m_lhs(lhs), m_rhs(rhs), m_dep(dep) { + SASSERT(well_formed()); + } bool operator==(str_eq const& other) const { return m_lhs == other.m_lhs && m_rhs == other.m_rhs; @@ -388,6 +388,10 @@ namespace seq { // check if the constraint contains a given variable bool contains_var(euf::snode* var) const; + + bool well_formed() const { + return !!m_lhs && !!m_rhs; + } }; struct eq_pp { @@ -407,7 +411,7 @@ namespace seq { euf::snode* m_regex; dep_tracker m_dep; - str_mem(): m_str(nullptr), m_regex(nullptr), m_dep(nullptr) {} + // str_mem(): m_str(nullptr), m_regex(nullptr), m_dep(nullptr) {} str_mem(euf::snode* str, euf::snode* regex, dep_tracker const& dep): m_str(str), m_regex(regex), m_dep(dep) {} @@ -425,6 +429,10 @@ namespace seq { // check if the constraint contains a given variable bool contains_var(euf::snode* var) const; + + bool well_formed() const { + return !!m_str && !!m_regex; + } }; struct mem_pp { diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 9ea9fe874..3fcb17adf 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -226,14 +226,33 @@ namespace smt { void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) { try { - expr* e1 = get_enode(v1)->get_expr(); - expr* e2 = get_enode(v2)->get_expr(); + auto n1 = get_enode(v1); + auto n2 = get_enode(v2); + auto e1 = n1->get_expr(); + auto e2 = n2->get_expr(); TRACE(seq, tout << mk_pp(e1, m) << " == " << mk_pp(e2, m) << "\n"); if (m_seq.is_re(e1)) { + seq_rewriter rw(m); + expr_ref s(m), r(m); + r = m_seq.re.mk_union(m_seq.re.mk_diff(e1, e2), m_seq.re.mk_diff(e2, e1)); + switch (rw.some_seq_in_re(r, s)) { + case l_false: + // regexes are equivalent: nothing to do + return; + case l_true: { + // regexes are disjoint: conflict + enode_pair_vector eqs; + literal_vector lits; + eqs.push_back({n1, n2}); + set_conflict(eqs, lits); + return; + } + default: break; + } push_unhandled_pred(); return; } - if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2)) + if (!m_seq.is_seq(e1)) return; euf::snode* s1 = get_snode(e1); euf::snode* s2 = get_snode(e2); @@ -252,12 +271,31 @@ namespace smt { } void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) { - expr* e1 = get_enode(v1)->get_expr(); - expr* e2 = get_enode(v2)->get_expr(); + auto n1 = get_enode(v1); + auto n2 = get_enode(v2); + auto e1 = n1->get_expr(); + auto e2 = n2->get_expr(); TRACE(seq, tout << mk_pp(e1, m) << " != " << mk_pp(e2, m) << "\n"); - if (m_seq.is_re(e1)) - // regex disequality: nseq cannot verify language non-equivalence - push_unhandled_pred(); + if (m_seq.is_re(e1)) { + seq_rewriter rw(m); + expr_ref s(m), r(m); + r = m_seq.re.mk_union(m_seq.re.mk_diff(e1, e2), m_seq.re.mk_diff(e2, e1)); + switch (rw.some_seq_in_re(r, s)) { + case l_false: { + auto lit = mk_eq(e1, e2, false); + literal_vector lits; + lits.push_back(lit); + ctx.mk_th_axiom(get_id(), lits.size(), lits.data()); + break; + } + case l_true: + // the regexes are different + break; + case l_undef: + push_unhandled_pred(); + break; + } + } else if (m_seq.is_seq(e1) && !m_no_diseq_set.contains(e1) && !m_no_diseq_set.contains(e2)) m_axioms.diseq_axiom(e1, e2); else