fix bug in non-emptiness witness extraction

Signed-off-by: Nikolaj Bjorner <nbjorner@microsoft.com>
2026-06-02 07:07:52 +00:00 · 2026-03-24 12:34:40 -07:00 · 2026-03-24 12:34:40 -07:00 · 5803c6f202
commit 5803c6f202
parent da9d8c8694
7 changed files with 198 additions and 61 deletions
--- a/src/ast/rewriter/bool_rewriter.cpp
+++ b/src/ast/rewriter/bool_rewriter.cpp
@ -1185,4 +1185,26 @@ void bool_rewriter::mk_ge2(expr* a, expr* b, expr* c, expr_ref& r) {
 }


+bool bool_rewriter::decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el) {
+    expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr;
+    if (m().is_ite(r, cond, r1, r2)) {
+        c = cond;
+        th = r1;
+        el = r2;
+        return true;
+    }
+    for (expr *e : subterms::ground(expr_ref(r, m()))) {
+        if (m().is_ite(e, cond, r1, r2)) {
+            c = cond;
+            th = r1;
+            el = r2;
+            return true;
+        }
+    }
+    return false;
+}
+
+
+
+
 template class rewriter_tpl<bool_rewriter_cfg>;
--- a/src/ast/rewriter/bool_rewriter.h
+++ b/src/ast/rewriter/bool_rewriter.h
@ -242,6 +242,9 @@ public:
    void mk_nand(expr * arg1, expr * arg2, expr_ref & result);
    void mk_nor(expr * arg1, expr * arg2, expr_ref & result);
    void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result);
+
+
+    bool decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el);
 };

 struct bool_rewriter_cfg : public default_rewriter_cfg {
--- a/src/ast/rewriter/seq_rewriter.cpp
+++ b/src/ast/rewriter/seq_rewriter.cpp
@ -25,6 +25,7 @@ Authors:
 #include "ast/ast_pp.h"
 #include "ast/ast_ll_pp.h"
 #include "ast/ast_util.h"
+#include "ast/for_each_expr.h"
 #include "ast/well_sorted.h"
 #include "ast/rewriter/var_subst.h"
 #include "ast/rewriter/expr_safe_replace.h"
@ -6083,11 +6084,9 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
        re_eval_pos current = todo.back();
        todo.pop_back();
        r = current.e;
-        // IF_VERBOSE(1, verbose_stream() << "derive " << mk_pp(r, m()) << "\n");
        str.resize(current.str_len);
        if (current.needs_derivation) {
            SASSERT(current.exclude.empty());
-            // We are looking for the next character => generate derivation
            if (visited.is_marked(r))
                continue;
            if (re().is_empty(r))
@ -6095,11 +6094,11 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
            auto info = re().get_info(r);
            if (info.nullable == l_true)
                return l_true;
+            IF_VERBOSE(2, verbose_stream() << "  derive str=" << str.size() << " " << mk_bounded_pp(r, m(), 2) << "\n");
            visited.mark(r);
            if (re().is_union(r)) {
-                for (expr* arg : *to_app(r)) {
+                for (expr* arg : *to_app(r))
                    todo.push_back({ expr_ref(arg, m()), str.size(), {}, true });
-                }
                continue;
            }

@ -6109,7 +6108,7 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect

        buffer<std::pair<unsigned, unsigned>> exclude = std::move(current.exclude);

-        expr* c, * th, * el;
+
        if (re().is_empty(r))
            continue;
        if (re().is_union(r)) {
@ -6118,7 +6117,8 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
            }
            continue;
        }
-        if (m().is_ite(r, c, th, el)) {
+        expr_ref c(m()), th(m()), el(m());
+        if (bool_rewriter(m()).decompose_ite(r, c, th, el)) {
            unsigned low = 0, high = zstring::unicode_max_char();
            bool has_bounds = get_bounds(c, low, high);
            if (!re().is_empty(el)) {
@ -6129,8 +6129,16 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
                    exclude.pop_back();
            }
            if (has_bounds) {
-                // I want this case to be processed first => push it last
-                // reason: current string is only pruned
+
+                if (any_of(subterms::all(th), [&](expr *t) { return m().is_ite(t); })) {
+                    if (low > 0)
+                        exclude.push_back({0, low - 1});
+                    if (high < zstring::unicode_max_char())
+                        exclude.push_back({high + 1, zstring::unicode_max_char()});
+                    todo.push_back({ expr_ref(th, m()), str.size(), exclude, false });
+                    continue;
+                }
+
                SASSERT(low <= high);
                unsigned ch = low;
                bool found = true;
@ -6153,12 +6161,14 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect
                }
                if (found && ch <= high) {
                    str.push_back(ch);
-                    todo.push_back({ expr_ref(th, m()), str.size(), {}, true });
+                    todo.push_back({expr_ref(th, m()), str.size(), {}, true});
                }
            }
            continue;
        }

+
+
        if (is_ground(r)) {
            // ensure selected character is not in exclude
            unsigned ch = 'a';
--- a/src/smt/seq/seq_regex.cpp
+++ b/src/smt/seq/seq_regex.cpp
@ -503,7 +503,8 @@ namespace seq {
    // -----------------------------------------------------------------------

    lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) {
-        SASSERT(re && re->get_expr());
+        if (!re)
+            return l_undef;
        if (re->is_fail())
            return l_true;
        if (re->is_nullable())
--- a/src/smt/seq_regex.cpp
+++ b/src/smt/seq_regex.cpp
@ -673,55 +673,7 @@ namespace smt {
        }
    }

-    /*
-        Return a list of all (cond, leaf) pairs in a given derivative
-        expression r.

-        Note: this  implementation is inefficient: it simply collects all expressions under an if and 
-        iterates over all combinations.
-
-        This method is still used by:
-            propagate_is_empty
-            propagate_is_non_empty
-    */
-    void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) {
-        obj_hashtable<expr> ifs;
-        expr* cond = nullptr, * r1 = nullptr, * r2 = nullptr;
-        for (expr* e : subterms::ground(expr_ref(r, m))) 
-            if (m.is_ite(e, cond, r1, r2))
-                ifs.insert(cond);
-        
-        expr_ref_vector rs(m);
-        vector<expr_ref_vector> conds;
-        conds.push_back(expr_ref_vector(m));
-        rs.push_back(r);
-        for (expr* c : ifs) {
-            unsigned sz = conds.size();
-            expr_safe_replace rep1(m);
-            expr_safe_replace rep2(m);
-            rep1.insert(c, m.mk_true());
-            rep2.insert(c, m.mk_false());
-            expr_ref r2(m);
-            for (unsigned i = 0; i < sz; ++i) {
-                expr_ref_vector cs = conds[i];
-                cs.push_back(mk_not(m, c));
-                conds.push_back(cs);
-                conds[i].push_back(c);
-                expr_ref r1(rs.get(i), m);
-                rep1(r1, r2);
-                rs[i] = r2;
-                rep2(r1, r2);
-                rs.push_back(r2);
-            }
-        }
-        for (unsigned i = 0; i < conds.size(); ++i) {
-            expr_ref conj = mk_and(conds[i]);
-            expr_ref r(rs.get(i), m);
-            ctx.get_rewriter()(r);
-            if (!m.is_false(conj) && !re().is_empty(r))
-                result.push_back(conj, r);
-        }
-    }

    /*
      is_empty(r, u) => ~is_nullable(r)
@ -877,4 +829,50 @@ namespace smt {
        return std::string("id") + std::to_string(e->get_id());
    }

+    /**
+    Return a list of all (cond, leaf) pairs in a given
+    expression r.
+
+    Note: this  implementation is inefficient: it simply collects all expressions under an if and
+    iterates over all combinations.
+*/
+    void seq_regex::get_cofactors(expr *r, expr_ref_pair_vector &result) {
+        obj_hashtable<expr> ifs;
+        expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr;
+        for (expr *e : subterms::ground(expr_ref(r, m)))
+            if (m.is_ite(e, cond, r1, r2))
+                ifs.insert(cond);
+
+        expr_ref_vector rs(m);
+        vector<expr_ref_vector> conds;
+        conds.push_back(expr_ref_vector(m));
+        rs.push_back(r);
+        for (expr *c : ifs) {
+            unsigned sz = conds.size();
+            expr_safe_replace rep1(m);
+            expr_safe_replace rep2(m);
+            rep1.insert(c, m.mk_true());
+            rep2.insert(c, m.mk_false());
+            expr_ref r2(m);
+            for (unsigned i = 0; i < sz; ++i) {
+                expr_ref_vector cs = conds[i];
+                cs.push_back(m.mk_not(c));
+                conds.push_back(cs);
+                conds[i].push_back(c);
+                expr_ref r1(rs.get(i), m);
+                rep1(r1, r2);
+                rs[i] = r2;
+                rep2(r1, r2);
+                rs.push_back(r2);
+            }
+        }
+        for (unsigned i = 0; i < conds.size(); ++i) {
+            expr_ref conj = mk_and(conds[i]);
+            expr_ref r(rs.get(i), m);
+            ctx.get_rewriter()(r);
+            if (!m.is_false(conj) && !re().is_empty(r))
+                result.push_back(conj, r);
+        }
+    }
+
 }
--- a/src/smt/seq_regex.h
+++ b/src/smt/seq_regex.h
@ -164,7 +164,6 @@ namespace smt {
        // returned by derivative_wrapper
        expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r);
        void get_derivative_targets(expr* r, expr_ref_vector& targets);
-        void get_cofactors(expr* r, expr_ref_pair_vector& result);

        /* 
           Pretty print the regex of the state id to the out stream, 
@ -184,6 +183,9 @@ namespace smt {

        bool block_if_empty(expr* r, literal lit);

+        void get_cofactors(expr *r, expr_ref_pair_vector &result);
+       
+
    public:

        seq_regex(theory_seq& th);
--- a/src/test/seq_regex.cpp
+++ b/src/test/seq_regex.cpp
@ -712,6 +712,105 @@ static void test_some_seq_in_re_excluded_low_regression() {
    std::cout << "  ok: witness=" << ws << " satisfies [A-Z] \\ {A}\n";
 }

+// Regression: some_seq_in_re returns l_false for re.inter of "odd number+\n" and "phone number+\n"
+// The regex is non-empty, a valid witness is e.g. "1111111111\n".
+// Root cause: derivative of re.inter produces nested ITEs, and the witness
+// search incorrectly pushes inner ITE nodes with needs_derivation=true,
+// causing ITE conditions from the first derivative to leak into the next.
+static void test_some_seq_in_re_inter_loop_regression() {
+    std::cout << "test_some_seq_in_re_inter_loop_regression\n";
+    ast_manager m;
+    reg_decl_plugins(m);
+    seq_util su(m);
+    seq_rewriter rw(m);
+    th_rewriter tr(m);
+
+    // Helpers
+    auto mk_to_re = [&](const char* s) -> expr_ref {
+        return expr_ref(su.re.mk_to_re(su.str.mk_string(s)), m);
+    };
+    auto mk_range = [&](const char* lo, const char* hi) -> expr_ref {
+        expr_ref l(su.mk_char(lo[0]), m);
+        expr_ref h(su.mk_char(hi[0]), m);
+        return expr_ref(su.re.mk_range(su.str.mk_unit(l), su.str.mk_unit(h)), m);
+    };
+    auto cat = [&](expr* a, expr* b) -> expr_ref {
+        return expr_ref(su.re.mk_concat(a, b), m);
+    };
+    auto un = [&](expr* a, expr* b) -> expr_ref {
+        return expr_ref(su.re.mk_union(a, b), m);
+    };
+
+    // Build the regex from the crash output:
+    // a!1 = ([1-9][1-9]* | "")
+    expr_ref range19 = mk_range("1", "9");
+    expr_ref a1(su.re.mk_union(
+        su.re.mk_concat(range19, su.re.mk_star(range19)),
+        su.re.mk_to_re(su.str.mk_string(""))), m);
+
+    // a!2 = "3" | "5" | "7" | "9"
+    expr_ref a2 = un(mk_to_re("3"), un(mk_to_re("5"), un(mk_to_re("7"), mk_to_re("9"))));
+
+    // a!3 = (a!1 ++ ("1" | a!2)) ++ "\n"
+    expr_ref a3 = cat(cat(a1, un(mk_to_re("1"), a2)), mk_to_re("\x0a"));
+
+    // a!4 = "(" ++ loop(3,3,[0-9]) ++ ")"
+    expr_ref range09 = mk_range("0", "9");
+    expr_ref loop3(su.re.mk_loop(range09, 3, 3), m);
+    expr_ref a4 = cat(cat(mk_to_re("("), loop3), mk_to_re(")"));
+
+    // a!5 = a!4 ++ ("" | " ") ++ loop(3,3,[0-9])
+    expr_ref a5 = cat(cat(a4, un(mk_to_re(""), mk_to_re(" "))), loop3);
+
+    // a!6 = a!5 ++ ("" | " " | "-")
+    expr_ref sep3 = un(mk_to_re(""), un(mk_to_re(" "), mk_to_re("-")));
+    expr_ref a6 = cat(a5, sep3);
+
+    // a!7 = loop(3,3,[0-9]) ++ ("" | " " | "-")
+    expr_ref a7 = cat(loop3, sep3);
+
+    // a!8 = a!7 ++ loop(3,3,[0-9]) ++ ("" | " " | "-")
+    expr_ref a8 = cat(cat(a7, loop3), sep3);
+
+    // a!9 = a!8 ++ loop(4,4,[0-9]) ++ "\n"
+    expr_ref loop4(su.re.mk_loop(range09, 4, 4), m);
+    expr_ref a9 = cat(cat(a8, loop4), mk_to_re("\x0a"));
+
+    // a!10 = (a!6 ++ loop(4,4,[0-9])) | a!9
+    expr_ref a10 = un(cat(a6, loop4), a9);
+
+    // Final regex = re.inter(a!3, a!10)
+    expr_ref re_expr(su.re.mk_inter(a3, a10), m);
+
+    std::cout << "  regex: " << mk_pp(re_expr, m) << "\n";
+
+    // The regex is non-empty: "1111111111\n" matches both a!3 and a!10
+    // some_seq_in_re must return l_true with a valid witness
+    expr_ref witness(m);
+    lbool wr = rw.some_seq_in_re(re_expr, witness);
+    std::cout << "  some_seq_in_re returned: " << wr << "\n";
+    if (witness)
+        std::cout << "  witness: " << mk_pp(witness, m) << "\n";
+    else
+        std::cout << "  witness: null\n";
+    ENSURE(wr == l_true);
+    ENSURE(witness.get() != nullptr);
+
+    if (wr != l_true || !witness)
+        return;
+
+    // Verify witness satisfies the regex
+    expr_ref in_re(su.re.mk_in_re(witness, re_expr), m);
+    expr_ref in_re_simpl(m);
+    tr(in_re, in_re_simpl);
+    std::cout << "  in_re simplified: " << mk_pp(in_re_simpl, m) << "\n";
+    SASSERT(m.is_true(in_re_simpl));
+
+    zstring ws;
+    VERIFY(su.str.is_string(witness, ws));
+    std::cout << "  ok: witness=\"" << ws << "\" satisfies the intersection regex\n";
+}
+
 void tst_seq_regex() {
    test_seq_regex_instantiation();
    test_seq_regex_is_empty();
@ -731,8 +830,6 @@ void tst_seq_regex() {
    test_bfs_empty_union_of_empties();
    test_bfs_nonempty_range();
    test_bfs_empty_complement_full();
-    test_bfs_null_safety();
-    test_bfs_bounded();
    // New tests for regex membership completion
    test_char_set_is_subset();
    test_stabilizer_store_basic();
@ -743,5 +840,9 @@ void tst_seq_regex() {
    test_is_language_subset_false();
    test_is_language_subset_trivial();
    test_some_seq_in_re_excluded_low_regression();
+    test_some_seq_in_re_inter_loop_regression();
+    // test_bfs_null_safety has a pre-existing failure, run it last
+    test_bfs_null_safety();
+    test_bfs_bounded();
    std::cout << "seq_regex: all tests passed\n";
 }