From 3fdd903373039849a8dfa30d9da9452b946f2531 Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Tue, 14 Apr 2026 15:48:13 +0200 Subject: [PATCH] Bug if uninternalized literal becomes internalized and immediately false --- src/smt/seq/seq_nielsen.cpp | 42 ++++++++++--------------------------- src/smt/seq/seq_nielsen.h | 12 +++++------ src/smt/theory_nseq.cpp | 27 ++++++++++++++---------- 3 files changed, 33 insertions(+), 48 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 91829d36b..421a1018d 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -262,6 +262,7 @@ namespace seq { void nielsen_node::add_constraint(constraint const &c) { auto& m = graph().get_manager(); + // TODO: Is it possible that we miss a conflict if we decompose? if (m.is_and(c.fml)) { // We have to add all - even if some of it conflict // [otw. we would leave the node partially initialized] @@ -272,6 +273,7 @@ namespace seq { } expr* l, *r; if (m.is_eq(c.fml, l, r)) { + // To avoid filling memory with tautologies (that would happen quite often) if (l == r) return; } @@ -1248,6 +1250,8 @@ namespace seq { m_sat_path.reset(); m_conflict_sources.reset(); + TRACE(seq, tout << "Solve call " << m_stats.m_num_solve_calls << "\n"); + // Constraint.Shared: assert root-level length/Parikh constraints to the // solver at the base level, so they are visible during all feasibility checks. assert_root_constraints_to_solver(); @@ -1379,8 +1383,6 @@ namespace seq { } } if (mem) { - static int cnt = 0; - std::cout << "search cnt: " << cnt << std::endl; vector deps; m_dep_mgr.linearize(mem->m_dep, deps); for (auto& dep : deps) { @@ -1452,6 +1454,13 @@ namespace seq { node->set_conflict(backtrack_reason::regex, dep); return search_result::unsat; } + assert_node_new_int_constraints(node); + // We need to have everything asserted before reporting SAT + // (otw. the outer solver might assume false-assigned literals to be true) + if (node->is_currently_conflict()) { + ++m_stats.m_num_simplify_conflict; + return search_result::unsat; + } m_sat_node = node; m_sat_path = cur_path; return search_result::sat; @@ -4172,21 +4181,6 @@ namespace seq { SASSERT(dep); - static unsigned cnt = 0; - std::cout << cnt << std::endl; - - cnt++; - - std::cout << "Dep:\n"; - vector deps; - m_dep_mgr.linearize(dep, deps); - for (auto& dep : deps) { - if (std::holds_alternative(dep)) - std::cout << "eq: " << mk_pp(std::get(dep).first->get_expr(), m) << " = " << mk_pp(std::get(dep).second->get_expr(), m) << std::endl; - else - std::cout << "mem literal: " << std::get(dep) << std::endl; - } - euf::snode* approx = nullptr; for (euf::snode* tok : tokens) { euf::snode* tok_re = nullptr; @@ -4200,20 +4194,6 @@ namespace seq { else if (tok->is_var()) { // Variable → intersection of primitive regex constraints, or Σ* euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node, m_dep_mgr, dep); - std::cout << "Primitives for " << mk_pp(tok->get_expr(), m) << ":\n"; - if (x_range) - std::cout << mk_pp(x_range->get_expr(), m) << std::endl; - else - std::cout << "null" << std::endl; - std::cout << "Dep:\n"; - deps.reset(); - m_dep_mgr.linearize(dep, deps); - for (auto& dep : deps) { - if (std::holds_alternative(dep)) - std::cout << "eq: " << mk_pp(std::get(dep).first->get_expr(), m) << " = " << mk_pp(std::get(dep).second->get_expr(), m) << std::endl; - else - std::cout << "mem literal: " << std::get(dep) << std::endl; - } if (x_range) tok_re = x_range; else { diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 42fa8a075..2fdbe1111 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -978,6 +978,12 @@ namespace seq { dep_manager& dep_mgr() { return m_dep_mgr; } dep_manager const& dep_mgr() const { return m_dep_mgr; } + // Assert the constraints of `node` that are new relative to its + // parent (indices [m_parent_ic_count..end)) into the current solver scope. + // Called by search_dfs after simplify_and_init so that the newly derived + // bounds become visible to subsequent check() and check_lp_le() calls. + void assert_node_new_int_constraints(nielsen_node* node); + private: vector m_conflict_sources; @@ -1143,12 +1149,6 @@ namespace seq { // Mirrors ZIPT's Constraint.Shared forwarding mechanism. void assert_root_constraints_to_solver(); - // Assert the constraints of `node` that are new relative to its - // parent (indices [m_parent_ic_count..end)) into the current solver scope. - // Called by search_dfs after simplify_and_init so that the newly derived - // bounds become visible to subsequent check() and check_lp_le() calls. - void assert_node_new_int_constraints(nielsen_node* node); - // Generate |LHS| = |RHS| length constraints for a non-root node's own // string equalities and add them as constraints on the node. // Called once per node (guarded by m_node_len_constraints_generated). diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 73bd8c30e..6ae275bc2 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -85,9 +85,14 @@ namespace smt { std::function literal_if_false = [&](expr* e) { bool is_not = m.is_not(e, e); - TRACE(seq, tout << "literal_if_false: " << mk_pp(e, m) << " internalized - " << ctx.e_internalized(e) << "\n"); - if (!ctx.e_internalized(e)) - return sat::null_literal; + TRACE(seq, tout << "literal_if_false: " << mk_pp(e, m) << " internalized - " << ctx.b_internalized(e) << "\n"); + if (!ctx.b_internalized(e)) + // it can happen that the element is not internalized, but as soon as we do it, it becomes false. + // In case we just skip one of those uninternalized expressions, + // adding the Nielsen assumption later will fail + // Alternatively, we could just retry Nielsen saturation in case + // adding the Nielsen assumption yields the assumption being false after internalizing + ctx.internalize(to_app(e), false); literal lit = ctx.get_literal(e); if (is_not) lit.neg(); @@ -641,6 +646,8 @@ namespace smt { return FC_CONTINUE; } + m_nielsen.assert_node_new_int_constraints(m_nielsen.root()); + ++m_num_final_checks; m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth); @@ -649,10 +656,12 @@ namespace smt { m_nielsen.set_signature_split(get_fparams().m_nseq_signature); m_nielsen.set_regex_factorization(get_fparams().m_nseq_regex_factorization); + SASSERT(!m_nielsen.root()->is_currently_conflict()); + // Regex membership pre-check: before running DFS, check intersection // emptiness for each variable's regex constraints. This handles // regex-only problems that the DFS cannot efficiently solve. - if (get_fparams().m_nseq_regex_precheck) { + if (!m_nielsen.root()->is_currently_conflict() && get_fparams().m_nseq_regex_precheck) { switch (check_regex_memberships_precheck()) { case l_true: // conflict was asserted inside check_regex_memberships_precheck @@ -674,7 +683,7 @@ namespace smt { IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";); // here the actual Nielsen solving happens - auto result = m_nielsen.solve(); std::cout << "Solve returned " << (int)result << std::endl; + auto result = m_nielsen.solve(); #ifdef Z3DEBUG // Examining the Nielsen graph is probably the best way of debugging @@ -750,11 +759,10 @@ namespace smt { case l_true: break; case l_undef: - std::cout << "Undef [" << lit << "]: " << mk_pp(c.fml, m) << std::endl; + // std::cout << "Undef [" << lit << "]: " << mk_pp(c.fml, m) << std::endl; has_undef = true; // Commit the chosen Nielsen assumption to the SAT core so it // cannot remain permanently undefined in a partial model. - ctx.mk_th_axiom(get_id(), 1, &lit); ctx.force_phase(lit); IF_VERBOSE(2, verbose_stream() << "nseq final_check: adding nielsen assumption " << c.fml << "\n";); @@ -762,11 +770,8 @@ namespace smt { break; case l_false: // this should not happen because nielsen checks for this before returning a satisfying path. - IF_VERBOSE(1, verbose_stream() - << "nseq final_check: nielsen assumption " << c.fml << " is false\n";); + TRACE(seq, tout << "nseq final_check: nielsen assumption " << c.fml << " is false; internalized - " << ctx.e_internalized(c.fml) << "\n"); has_undef = true; - ctx.mk_th_axiom(get_id(), 1, &lit); - ctx.force_phase(lit); std::cout << "False [" << lit << "]: " << mk_pp(c.fml, m) << std::endl; break; }