Fix three nseq quick-win issues: non-greedy quantifiers, bool-eq normalization, stuck-node crash, precheck SAT soundness

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/03c71d3f-a889-4b06-95ff-ed96a2c0615c
2026-07-05 23:06:12 +00:00 · 2026-03-21 03:04:00 +00:00 · 2026-03-21 03:04:00 +00:00 · 3c1b1da8ac
commit 3c1b1da8ac
parent f518faac9b
5 changed files with 88 additions and 7 deletions
--- a/src/ast/seq_decl_plugin.cpp
+++ b/src/ast/seq_decl_plugin.cpp
@ -687,6 +687,10 @@ void seq_decl_plugin::get_op_names(svector<builtin_name> & op_names, symbol cons
    op_names.push_back(builtin_name("int.to.str", OP_STRING_ITOS));
    op_names.push_back(builtin_name("re.nostr",  _OP_REGEXP_EMPTY));
    op_names.push_back(builtin_name("re.complement", OP_RE_COMPLEMENT));
+    // Non-greedy quantifiers: semantically identical to greedy for satisfiability
+    op_names.push_back(builtin_name("re.+?", OP_RE_PLUS));
+    op_names.push_back(builtin_name("re.*?", OP_RE_STAR));
+    op_names.push_back(builtin_name("re.??", OP_RE_OPTION));
    op_names.push_back(builtin_name("str.from_ubv", OP_STRING_UBVTOS));
    op_names.push_back(builtin_name("str.from_sbv", OP_STRING_SBVTOS));
 }
--- a/src/smt/seq/seq_nielsen.cpp
+++ b/src/smt/seq/seq_nielsen.cpp
@ -2453,7 +2453,13 @@ namespace seq {
        if (!node->is_extended()) {
            bool ext = generate_extensions(node);
            IF_VERBOSE(0, display(verbose_stream(), node));
-            VERIFY(ext);
+            if (!ext) {
+                // No modifier applies: the node is stuck (unsupported equation form).
+                // Mark as extended with no children to avoid redundant re-exploration,
+                // and return unknown so the DFS will report FC_GIVEUP.
+                node->set_extended(true);
+                return search_result::unknown;
+            }
            node->set_extended(true);
            ++m_stats.m_num_extensions;
        }
--- a/src/smt/seq/seq_regex.cpp
+++ b/src/smt/seq/seq_regex.cpp
@ -612,6 +612,37 @@ namespace seq {
        return is_empty_bfs(result, max_states);
    }

+    // -----------------------------------------------------------------------
+    // Check if the language of re accepts at least one non-empty string.
+    // Returns true  if a non-empty string is definitely accepted.
+    // Returns false conservatively (also false when inconclusive).
+    // Used by the regex precheck SAT shortcut: only shortcut to SAT when the
+    // intersection language contains a non-empty witness, avoiding false SAT
+    // for intersections like [a-u]* ∩ v* = {""} when var ≠ "" is also asserted.
+    // -----------------------------------------------------------------------
+
+    bool seq_regex::accepts_nonempty_string(euf::snode* re) {
+        if (!re || re->is_fail()) return false;
+        // Quick structural checks: full-seq, full-char, and range always
+        // accept non-empty strings (they require at least one character).
+        if (re->is_full_seq() || re->is_full_char()) return true;
+        // Only explore ground regexes; non-ground could expand unpredictably.
+        if (!re->is_ground()) return true; // conservative: assume non-empty exists
+        // For s_other snodes (unrecognized kinds), be conservative.
+        if (re->kind() == euf::snode_kind::s_other) return true;
+        // Compute one level of derivatives: if any derivative is non-fail and
+        // non-empty, then the regex accepts some string starting with that char.
+        euf::snode_vector reps;
+        if (!get_alphabet_representatives(re, reps))
+            return true; // conservative
+        for (euf::snode* ch : reps) {
+            euf::snode* deriv = m_sg.brzozowski_deriv(re, ch);
+            if (deriv && !deriv->is_fail() && !is_empty_regex(deriv))
+                return true;
+        }
+        return false;
+    }
+
    // -----------------------------------------------------------------------
    // Language subset check: L(A) ⊆ L(B)
    // via intersection(A, complement(B)) = ∅
--- a/src/smt/seq/seq_regex.h
+++ b/src/smt/seq/seq_regex.h
@ -191,6 +191,13 @@ namespace seq {
        lbool check_intersection_emptiness(ptr_vector<euf::snode> const& regexes,
                                           unsigned max_states = 10000);

+        // Check if the language of re contains at least one non-empty string.
+        // Returns true  if a non-empty string is definitely accepted.
+        // Returns false if no non-empty string is accepted, or inconclusive.
+        // Used by the regex precheck to avoid false SAT shortcuts for languages
+        // that only contain the empty string, e.g. [a-u]* ∩ v* = {""}.
+        bool accepts_nonempty_string(euf::snode* re);
+
        // Check if L(subset_re) ⊆ L(superset_re).
        // Computed as: subset_re ∩ complement(superset_re) = ∅.
        // Mirrors ZIPT NielsenNode.IsLanguageSubset (NielsenNode.cs:1382-1385)
--- a/src/smt/theory_nseq.cpp
+++ b/src/smt/theory_nseq.cpp
@ -200,6 +200,19 @@ namespace smt {
        expr* e = ctx.bool_var2expr(v);
        expr* s = nullptr, *re = nullptr;
        TRACE(seq, tout << (is_true ? "" : "¬") << mk_bounded_pp(e, m, 3) << "\n";);
+        // Normalize (= bool_seq_expr true/false) to a direct assignment.
+        // Handles patterns like (not (= (str.contains "A" x) true)).
+        {
+            expr* eq_lhs = nullptr, *eq_rhs = nullptr;
+            if (m.is_eq(e, eq_lhs, eq_rhs)) {
+                if (m.is_true(eq_rhs))
+                    e = eq_lhs;
+                else if (m.is_false(eq_rhs)) { e = eq_lhs; is_true = !is_true; }
+                else if (m.is_true(eq_lhs))
+                    e = eq_rhs;
+                else if (m.is_false(eq_lhs)) { e = eq_rhs; is_true = !is_true; }
+            }
+        }
        if (m_seq.str.is_in_re(e, s, re)) {
            euf::snode* sn_str = get_snode(s);
            euf::snode* sn_re  = get_snode(re);
@ -953,6 +966,10 @@ namespace smt {
            if (std::holds_alternative<eq_item>(item)) { has_eqs = true; break; }

        bool any_undef = false;
+        // Track whether any variable's intersection only accepts "" (nullable-only).
+        // When true, we must not shortcut to SAT: the only witness is "", which may
+        // be excluded by a disequality (e.g. var ≠ "").
+        bool any_nullable_only = false;

        // Check intersection emptiness for each variable.
        for (auto& kv : var_to_mems) {
@ -985,19 +1002,35 @@ namespace smt {
                set_conflict(eqs, lits);
                return l_true;   // conflict asserted
            }
-            if (result == l_undef)
+            if (result == l_undef) {
                any_undef = true;
-            // l_false = non-empty intersection, this variable's constraints are satisfiable
+                continue;
+            }
+            // l_false = intersection is non-empty; check if it has a non-empty witness.
+            // Build the intersection snode to call accepts_nonempty_string.
+            euf::snode* inter = nullptr;
+            for (euf::snode* re : regexes) {
+                if (!inter) { inter = re; continue; }
+                expr* r1 = inter->get_expr();
+                expr* r2 = re->get_expr();
+                if (!r1 || !r2) { inter = nullptr; break; }
+                expr_ref intersection(m_seq.re.mk_inter(r1, r2), m);
+                inter = m_sgraph.mk(intersection);
+                if (!inter) break;
+            }
+            if (!inter || !m_regex.accepts_nonempty_string(inter))
+                any_nullable_only = true;
        }

        if (any_undef)
            return l_undef;  // cannot fully determine; let DFS decide

        // All variables' regex intersections are non-empty.
-        // If there are no word equations, variables are independent and
-        // each can be assigned a witness string → SAT.
-        if (all_primitive && !has_eqs && !has_unhandled_preds()) {
-            TRACE(seq, tout << "nseq regex precheck: all intersections non-empty, "
+        // Only shortcut to SAT when every variable's intersection accepts a non-empty
+        // witness string.  If any intersection is nullable-only (= {""}) we cannot
+        // guarantee a witness consistent with disequalities such as var ≠ "".
+        if (all_primitive && !has_eqs && !has_unhandled_preds() && !any_nullable_only) {
+            TRACE(seq, tout << "nseq regex precheck: all intersections have non-empty witnesses, "
                            << "no word eqs → SAT\n";);
            return l_false;  // signals SAT (non-empty / satisfiable)
        }