diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index 1faef3e097..fc6531144b 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -137,6 +137,15 @@ namespace euf { n->m_level = 1; n->m_length = 1; n->m_is_classical = false; + // Defined seq operations (str.replace*, str.replace_all, str.replace_re*) are + // classified as s_var because they have no dedicated snode kind, but they are NOT + // free variables: their value is fixed by the recfun/axiom layer. Mark them rigid + // so the Nielsen modifiers never eliminate or split them (see snode::is_rigid). + { + expr* e = n->m_expr; + n->m_rigid = e && (m_seq.str.is_replace(e) || m_seq.str.is_replace_all(e) || + m_seq.str.is_replace_re(e) || m_seq.str.is_replace_re_all(e)); + } break; case snode_kind::s_unit: diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 1faba384f1..e1a05d175b 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -70,6 +70,7 @@ namespace euf { bool m_ground = true; // no uninterpreted string variables bool m_regex_free = true; // no regex constructs bool m_is_classical = true; // classical regular expression + bool m_rigid = false; // defined seq op (replace/replace_all/replace_re*) — opaque to Nielsen, never substitute/split unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) unsigned m_length = 0; // token count, number of leaf tokens in the tree @@ -211,6 +212,19 @@ namespace euf { bool is_var() const { return m_kind == snode_kind::s_var; } + // A rigid snode is a defined sequence operation (str.replace, str.replace_all, + // str.replace_re, str.replace_re_all) whose semantics are supplied externally by + // the recfun/axiom layer. It is classified as s_var but must NOT be treated as a + // free, eliminable Nielsen variable: substituting/Nielsen-splitting it (e.g. + // unifying two distinct replace_all applications) silently discards its definition + // and yields invalid models. theory_nseq gives up (FC_GIVEUP) when a rigid snode + // participates in the constraints (see nielsen_node::references_rigid), deferring + // to the recfun/axiom layer instead of searching. Note: replace_all etc. on + // concrete arguments are folded away by seq_rewriter before reaching here, so this + // only affects genuinely symbolic occurrences. + bool is_rigid() const { + return m_rigid; + } bool is_unit() const { return m_kind == snode_kind::s_unit; } diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 5dfeb9c331..2d35dde672 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -1833,6 +1833,26 @@ namespace seq { return true; } + static bool snode_has_rigid(euf::snode const* s) { + for (euf::snode const* t : s->collect_tokens()) + if (t->is_rigid()) + return true; + return false; + } + + bool nielsen_node::references_rigid() const { + for (str_eq const& eq : m_str_eq) + if (snode_has_rigid(eq.m_lhs) || snode_has_rigid(eq.m_rhs)) + return true; + for (str_deq const& dq : m_str_deq) + if (snode_has_rigid(dq.m_lhs) || snode_has_rigid(dq.m_rhs)) + return true; + for (str_mem const& mem : m_str_mem) + if (snode_has_rigid(mem.m_str) || snode_has_rigid(mem.m_regex)) + return true; + return false; + } + euf::snode const* nielsen_graph::mk_rewrite(expr* e) const { expr_ref er(e, m); th_rewriter rw(m); @@ -1945,6 +1965,7 @@ namespace seq { if (r == search_result::unsat) { ++m_stats.m_num_unsat; const auto deps = collect_conflict_deps(); + m_conflict_sources.reset(); m_dep_mgr.linearize(deps, m_conflict_sources); TRACE(seq, display(tout, m_root)); return r; diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 18a1661135..c6658f18a9 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -654,6 +654,14 @@ namespace seq { // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; + // true if ANY equality/disequality/membership references a rigid (defined) op + // snode (str.replace, str.replace_all, str.replace_re*). Used to defer to the + // axiom layer (FC_GIVEUP) before searching: these terms are not free variables + // but are pinned by the recfun/axiom layer, and the Nielsen modifiers would + // substitute/unify them as if free, discarding their definition and producing + // invalid models. + bool references_rigid() const; + // render constraint set as an HTML fragment for DOT node labels. // mirrors ZIPT's NielsenNode.ToHtmlString() std::ostream& to_html(std::ostream& out, obj_map& names, uint64_t& next_id, ast_manager& m) const; diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index e6442170e2..20a43f5c57 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -922,6 +922,22 @@ namespace smt { SASSERT(!m_nielsen.root()->is_currently_conflict()); + // nseq cannot soundly reason about defined sequence operations (str.replace, + // str.replace_all, str.replace_re*) inside the Nielsen graph: they are not free + // variables but are pinned by the recfun/axiom layer. The modifiers (and the + // regex pre-check) would treat them as free (e.g. unifying two distinct + // replace_all applications), silently discarding their definition and yielding + // invalid models. When such a rigid term participates in the constraints, defer + // to the axiom layer and give up. (Concrete replace_all etc. are folded to + // literals by seq_rewriter before reaching the sgraph, so only genuinely + // symbolic occurrences are affected.) This check precedes the regex pre-check + // so a rigid term as a membership subject cannot yield a bogus SAT either. + if (m_nielsen.root()->references_rigid()) { + IF_VERBOSE(1, verbose_stream() << "nseq final_check: rigid defined op present, FC_GIVEUP\n";); + TRACE(seq, tout << "nseq final_check: rigid defined op present, FC_GIVEUP\n"); + return FC_GIVEUP; + } + // Regex membership pre-check: before running DFS, check intersection // emptiness for each variable's regex constraints. This handles // regex-only problems that the DFS cannot efficiently solve.