From 3fdd903373039849a8dfa30d9da9452b946f2531 Mon Sep 17 00:00:00 2001
From: CEisenhofer <clemens.eisenhofer@tuwien.ac.at>
Date: Tue, 14 Apr 2026 15:48:13 +0200
Subject: [PATCH] Bug if uninternalized literal becomes internalized and
 immediately false

---
 src/smt/seq/seq_nielsen.cpp | 42 ++++++++++---------------------------
 src/smt/seq/seq_nielsen.h   | 12 +++++------
 src/smt/theory_nseq.cpp     | 27 ++++++++++++++----------
 3 files changed, 33 insertions(+), 48 deletions(-)

diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp
index 91829d36b..421a1018d 100644
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@@ -262,6 +262,7 @@ namespace seq {
 
     void nielsen_node::add_constraint(constraint const &c) {
         auto& m = graph().get_manager();
+        // TODO: Is it possible that we miss a conflict if we decompose?
         if (m.is_and(c.fml)) {
             // We have to add all - even if some of it conflict
             // [otw. we would leave the node partially initialized]
@@ -272,6 +273,7 @@ namespace seq {
         }
         expr* l, *r;
         if (m.is_eq(c.fml, l, r)) {
+            // To avoid filling memory with tautologies (that would happen quite often)
             if (l == r)
                 return;
         }
@@ -1248,6 +1250,8 @@ namespace seq {
             m_sat_path.reset();
             m_conflict_sources.reset();
 
+            TRACE(seq, tout << "Solve call " << m_stats.m_num_solve_calls << "\n");
+
             // Constraint.Shared: assert root-level length/Parikh constraints to the
             // solver at the base level, so they are visible during all feasibility checks.
             assert_root_constraints_to_solver();
@@ -1379,8 +1383,6 @@ namespace seq {
             }
         }
         if (mem) {
-            static int cnt = 0;
-            std::cout << "search cnt: " << cnt << std::endl;
             vector<dep_manager::value, false> deps;
             m_dep_mgr.linearize(mem->m_dep, deps);
             for (auto& dep : deps) {
@@ -1452,6 +1454,13 @@ namespace seq {
                 node->set_conflict(backtrack_reason::regex, dep);
                 return search_result::unsat;
             }
+            assert_node_new_int_constraints(node);
+            // We need to have everything asserted before reporting SAT
+            // (otw. the outer solver might assume false-assigned literals to be true)
+            if (node->is_currently_conflict()) {
+                ++m_stats.m_num_simplify_conflict;
+                return search_result::unsat;
+            }
             m_sat_node = node;
             m_sat_path = cur_path;
             return search_result::sat;
@@ -4172,21 +4181,6 @@ namespace seq {
 
         SASSERT(dep);
 
-        static unsigned cnt = 0;
-        std::cout << cnt << std::endl;
-
-        cnt++;
-
-        std::cout << "Dep:\n";
-        vector<dep_manager::value, false> deps;
-        m_dep_mgr.linearize(dep, deps);
-        for (auto& dep : deps) {
-            if (std::holds_alternative<enode_pair>(dep))
-                std::cout << "eq: " << mk_pp(std::get<enode_pair>(dep).first->get_expr(), m) << " = " << mk_pp(std::get<enode_pair>(dep).second->get_expr(), m) << std::endl;
-            else
-                std::cout << "mem literal: " << std::get<sat::literal>(dep) << std::endl;
-        }
-
         euf::snode* approx = nullptr;
         for (euf::snode* tok : tokens) {
             euf::snode* tok_re = nullptr;
@@ -4200,20 +4194,6 @@ namespace seq {
             else if (tok->is_var()) {
                 // Variable → intersection of primitive regex constraints, or Σ*
                 euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node, m_dep_mgr, dep);
-                std::cout << "Primitives for " << mk_pp(tok->get_expr(), m) << ":\n";
-                if (x_range)
-                    std::cout << mk_pp(x_range->get_expr(), m) << std::endl;
-                else
-                    std::cout << "null" << std::endl;
-                std::cout << "Dep:\n";
-                deps.reset();
-                m_dep_mgr.linearize(dep, deps);
-                for (auto& dep : deps) {
-                    if (std::holds_alternative<enode_pair>(dep))
-                        std::cout << "eq: " << mk_pp(std::get<enode_pair>(dep).first->get_expr(), m) << " = " << mk_pp(std::get<enode_pair>(dep).second->get_expr(), m) << std::endl;
-                    else
-                        std::cout << "mem literal: " << std::get<sat::literal>(dep) << std::endl;
-                }
                 if (x_range)
                     tok_re = x_range;
                 else {
diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h
index 42fa8a075..2fdbe1111 100644
--- a/src/smt/seq/seq_nielsen.h
+++ b/src/smt/seq/seq_nielsen.h
@@ -978,6 +978,12 @@ namespace seq {
         dep_manager& dep_mgr() { return m_dep_mgr; }
         dep_manager const& dep_mgr() const { return m_dep_mgr; }
 
+        // Assert the constraints of `node` that are new relative to its
+        // parent (indices [m_parent_ic_count..end)) into the current solver scope.
+        // Called by search_dfs after simplify_and_init so that the newly derived
+        // bounds become visible to subsequent check() and check_lp_le() calls.
+        void assert_node_new_int_constraints(nielsen_node* node);
+
     private:
 
         vector<dep_source, false> m_conflict_sources;
@@ -1143,12 +1149,6 @@ namespace seq {
         // Mirrors ZIPT's Constraint.Shared forwarding mechanism.
         void assert_root_constraints_to_solver();
 
-        // Assert the constraints of `node` that are new relative to its
-        // parent (indices [m_parent_ic_count..end)) into the current solver scope.
-        // Called by search_dfs after simplify_and_init so that the newly derived
-        // bounds become visible to subsequent check() and check_lp_le() calls.
-        void assert_node_new_int_constraints(nielsen_node* node);
-
         // Generate |LHS| = |RHS| length constraints for a non-root node's own
         // string equalities and add them as constraints on the node.
         // Called once per node (guarded by m_node_len_constraints_generated).
diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp
index 73bd8c30e..6ae275bc2 100644
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@@ -85,9 +85,14 @@ namespace smt {
 
         std::function<sat::literal(expr*)> literal_if_false = [&](expr* e) {
             bool is_not = m.is_not(e, e);
-            TRACE(seq, tout << "literal_if_false: " << mk_pp(e, m) << " internalized - " << ctx.e_internalized(e) << "\n");
-            if (!ctx.e_internalized(e))
-                return sat::null_literal;
+            TRACE(seq, tout << "literal_if_false: " << mk_pp(e, m) << " internalized - " << ctx.b_internalized(e) << "\n");
+            if (!ctx.b_internalized(e))
+                // it can happen that the element is not internalized, but as soon as we do it, it becomes false.
+                // In case we just skip one of those uninternalized expressions,
+                // adding the Nielsen assumption later will fail
+                // Alternatively, we could just retry Nielsen saturation in case
+                // adding the Nielsen assumption yields the assumption being false after internalizing
+                ctx.internalize(to_app(e), false);
             literal lit = ctx.get_literal(e);
             if (is_not)
                 lit.neg();
@@ -641,6 +646,8 @@ namespace smt {
                 return FC_CONTINUE;
             }
 
+            m_nielsen.assert_node_new_int_constraints(m_nielsen.root());
+
             ++m_num_final_checks;
 
             m_nielsen.set_max_search_depth(get_fparams().m_nseq_max_depth);
@@ -649,10 +656,12 @@ namespace smt {
             m_nielsen.set_signature_split(get_fparams().m_nseq_signature);
             m_nielsen.set_regex_factorization(get_fparams().m_nseq_regex_factorization);
 
+            SASSERT(!m_nielsen.root()->is_currently_conflict());
+
             // Regex membership pre-check: before running DFS, check intersection
             // emptiness for each variable's regex constraints.  This handles
             // regex-only problems that the DFS cannot efficiently solve.
-            if (get_fparams().m_nseq_regex_precheck) {
+            if (!m_nielsen.root()->is_currently_conflict() && get_fparams().m_nseq_regex_precheck) {
                 switch (check_regex_memberships_precheck()) {
                 case l_true:
                     // conflict was asserted inside check_regex_memberships_precheck
@@ -674,7 +683,7 @@ namespace smt {
             IF_VERBOSE(1, verbose_stream() << "nseq final_check: calling solve()\n";);
 
             // here the actual Nielsen solving happens
-            auto result = m_nielsen.solve(); std::cout << "Solve returned " << (int)result << std::endl;
+            auto result = m_nielsen.solve();
 
 #ifdef Z3DEBUG
             // Examining the Nielsen graph is probably the best way of debugging
@@ -750,11 +759,10 @@ namespace smt {
             case l_true: 
                 break;
             case l_undef:
-                std::cout << "Undef [" << lit << "]: " << mk_pp(c.fml, m) << std::endl;
+                // std::cout << "Undef [" << lit << "]: " << mk_pp(c.fml, m) << std::endl;
                 has_undef = true; 
                 // Commit the chosen Nielsen assumption to the SAT core so it
                 // cannot remain permanently undefined in a partial model.
-                ctx.mk_th_axiom(get_id(), 1, &lit);
                 ctx.force_phase(lit);
                 IF_VERBOSE(2, verbose_stream() << 
                     "nseq final_check: adding nielsen assumption " << c.fml << "\n";);
@@ -762,11 +770,8 @@ namespace smt {
                 break;
             case l_false: 
                 // this should not happen because nielsen checks for this before returning a satisfying path.
-                IF_VERBOSE(1, verbose_stream()
-                                  << "nseq final_check: nielsen assumption " << c.fml << " is false\n";);
+                TRACE(seq, tout << "nseq final_check: nielsen assumption " << c.fml << " is false; internalized - " << ctx.e_internalized(c.fml) << "\n");
                 has_undef = true;
-                ctx.mk_th_axiom(get_id(), 1, &lit);
-                ctx.force_phase(lit);
                 std::cout << "False [" << lit << "]: " << mk_pp(c.fml, m) << std::endl;
                 break;
             }