diff --git a/src/ast/rewriter/bool_rewriter.cpp b/src/ast/rewriter/bool_rewriter.cpp index 321bd6f47..d603e9b07 100644 --- a/src/ast/rewriter/bool_rewriter.cpp +++ b/src/ast/rewriter/bool_rewriter.cpp @@ -1185,4 +1185,26 @@ void bool_rewriter::mk_ge2(expr* a, expr* b, expr* c, expr_ref& r) { } +bool bool_rewriter::decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el) { + expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr; + if (m().is_ite(r, cond, r1, r2)) { + c = cond; + th = r1; + el = r2; + return true; + } + for (expr *e : subterms::ground(expr_ref(r, m()))) { + if (m().is_ite(e, cond, r1, r2)) { + c = cond; + th = r1; + el = r2; + return true; + } + } + return false; +} + + + + template class rewriter_tpl; diff --git a/src/ast/rewriter/bool_rewriter.h b/src/ast/rewriter/bool_rewriter.h index 2b52404e5..56c0b3e77 100644 --- a/src/ast/rewriter/bool_rewriter.h +++ b/src/ast/rewriter/bool_rewriter.h @@ -242,6 +242,9 @@ public: void mk_nand(expr * arg1, expr * arg2, expr_ref & result); void mk_nor(expr * arg1, expr * arg2, expr_ref & result); void mk_ge2(expr* a, expr* b, expr* c, expr_ref& result); + + + bool decompose_ite(expr *r, expr_ref &c, expr_ref &th, expr_ref &el); }; struct bool_rewriter_cfg : public default_rewriter_cfg { diff --git a/src/ast/rewriter/seq_rewriter.cpp b/src/ast/rewriter/seq_rewriter.cpp index 787c55e6f..555f9523d 100644 --- a/src/ast/rewriter/seq_rewriter.cpp +++ b/src/ast/rewriter/seq_rewriter.cpp @@ -25,6 +25,7 @@ Authors: #include "ast/ast_pp.h" #include "ast/ast_ll_pp.h" #include "ast/ast_util.h" +#include "ast/for_each_expr.h" #include "ast/well_sorted.h" #include "ast/rewriter/var_subst.h" #include "ast/rewriter/expr_safe_replace.h" @@ -6083,11 +6084,9 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect re_eval_pos current = todo.back(); todo.pop_back(); r = current.e; - // IF_VERBOSE(1, verbose_stream() << "derive " << mk_pp(r, m()) << "\n"); str.resize(current.str_len); if (current.needs_derivation) { SASSERT(current.exclude.empty()); - // We are looking for the next character => generate derivation if (visited.is_marked(r)) continue; if (re().is_empty(r)) @@ -6095,11 +6094,11 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect auto info = re().get_info(r); if (info.nullable == l_true) return l_true; + IF_VERBOSE(2, verbose_stream() << " derive str=" << str.size() << " " << mk_bounded_pp(r, m(), 2) << "\n"); visited.mark(r); if (re().is_union(r)) { - for (expr* arg : *to_app(r)) { + for (expr* arg : *to_app(r)) todo.push_back({ expr_ref(arg, m()), str.size(), {}, true }); - } continue; } @@ -6109,7 +6108,7 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect buffer> exclude = std::move(current.exclude); - expr* c, * th, * el; + if (re().is_empty(r)) continue; if (re().is_union(r)) { @@ -6118,7 +6117,8 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect } continue; } - if (m().is_ite(r, c, th, el)) { + expr_ref c(m()), th(m()), el(m()); + if (bool_rewriter(m()).decompose_ite(r, c, th, el)) { unsigned low = 0, high = zstring::unicode_max_char(); bool has_bounds = get_bounds(c, low, high); if (!re().is_empty(el)) { @@ -6129,8 +6129,16 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect exclude.pop_back(); } if (has_bounds) { - // I want this case to be processed first => push it last - // reason: current string is only pruned + + if (any_of(subterms::all(th), [&](expr *t) { return m().is_ite(t); })) { + if (low > 0) + exclude.push_back({0, low - 1}); + if (high < zstring::unicode_max_char()) + exclude.push_back({high + 1, zstring::unicode_max_char()}); + todo.push_back({ expr_ref(th, m()), str.size(), exclude, false }); + continue; + } + SASSERT(low <= high); unsigned ch = low; bool found = true; @@ -6153,12 +6161,14 @@ lbool seq_rewriter::some_string_in_re(expr_mark& visited, expr* r, unsigned_vect } if (found && ch <= high) { str.push_back(ch); - todo.push_back({ expr_ref(th, m()), str.size(), {}, true }); + todo.push_back({expr_ref(th, m()), str.size(), {}, true}); } } continue; } + + if (is_ground(r)) { // ensure selected character is not in exclude unsigned ch = 'a'; diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index c0597bd51..a52772a2f 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -503,7 +503,8 @@ namespace seq { // ----------------------------------------------------------------------- lbool seq_regex::is_empty_bfs(euf::snode* re, unsigned max_states) { - SASSERT(re && re->get_expr()); + if (!re) + return l_undef; if (re->is_fail()) return l_true; if (re->is_nullable()) diff --git a/src/smt/seq_regex.cpp b/src/smt/seq_regex.cpp index 64487a21e..4c9b19ae1 100644 --- a/src/smt/seq_regex.cpp +++ b/src/smt/seq_regex.cpp @@ -673,55 +673,7 @@ namespace smt { } } - /* - Return a list of all (cond, leaf) pairs in a given derivative - expression r. - Note: this implementation is inefficient: it simply collects all expressions under an if and - iterates over all combinations. - - This method is still used by: - propagate_is_empty - propagate_is_non_empty - */ - void seq_regex::get_cofactors(expr* r, expr_ref_pair_vector& result) { - obj_hashtable ifs; - expr* cond = nullptr, * r1 = nullptr, * r2 = nullptr; - for (expr* e : subterms::ground(expr_ref(r, m))) - if (m.is_ite(e, cond, r1, r2)) - ifs.insert(cond); - - expr_ref_vector rs(m); - vector conds; - conds.push_back(expr_ref_vector(m)); - rs.push_back(r); - for (expr* c : ifs) { - unsigned sz = conds.size(); - expr_safe_replace rep1(m); - expr_safe_replace rep2(m); - rep1.insert(c, m.mk_true()); - rep2.insert(c, m.mk_false()); - expr_ref r2(m); - for (unsigned i = 0; i < sz; ++i) { - expr_ref_vector cs = conds[i]; - cs.push_back(mk_not(m, c)); - conds.push_back(cs); - conds[i].push_back(c); - expr_ref r1(rs.get(i), m); - rep1(r1, r2); - rs[i] = r2; - rep2(r1, r2); - rs.push_back(r2); - } - } - for (unsigned i = 0; i < conds.size(); ++i) { - expr_ref conj = mk_and(conds[i]); - expr_ref r(rs.get(i), m); - ctx.get_rewriter()(r); - if (!m.is_false(conj) && !re().is_empty(r)) - result.push_back(conj, r); - } - } /* is_empty(r, u) => ~is_nullable(r) @@ -877,4 +829,50 @@ namespace smt { return std::string("id") + std::to_string(e->get_id()); } + /** + Return a list of all (cond, leaf) pairs in a given + expression r. + + Note: this implementation is inefficient: it simply collects all expressions under an if and + iterates over all combinations. +*/ + void seq_regex::get_cofactors(expr *r, expr_ref_pair_vector &result) { + obj_hashtable ifs; + expr *cond = nullptr, *r1 = nullptr, *r2 = nullptr; + for (expr *e : subterms::ground(expr_ref(r, m))) + if (m.is_ite(e, cond, r1, r2)) + ifs.insert(cond); + + expr_ref_vector rs(m); + vector conds; + conds.push_back(expr_ref_vector(m)); + rs.push_back(r); + for (expr *c : ifs) { + unsigned sz = conds.size(); + expr_safe_replace rep1(m); + expr_safe_replace rep2(m); + rep1.insert(c, m.mk_true()); + rep2.insert(c, m.mk_false()); + expr_ref r2(m); + for (unsigned i = 0; i < sz; ++i) { + expr_ref_vector cs = conds[i]; + cs.push_back(m.mk_not(c)); + conds.push_back(cs); + conds[i].push_back(c); + expr_ref r1(rs.get(i), m); + rep1(r1, r2); + rs[i] = r2; + rep2(r1, r2); + rs.push_back(r2); + } + } + for (unsigned i = 0; i < conds.size(); ++i) { + expr_ref conj = mk_and(conds[i]); + expr_ref r(rs.get(i), m); + ctx.get_rewriter()(r); + if (!m.is_false(conj) && !re().is_empty(r)) + result.push_back(conj, r); + } + } + } diff --git a/src/smt/seq_regex.h b/src/smt/seq_regex.h index 5c3fddd25..74c4d7e74 100644 --- a/src/smt/seq_regex.h +++ b/src/smt/seq_regex.h @@ -164,7 +164,6 @@ namespace smt { // returned by derivative_wrapper expr_ref mk_deriv_accept(expr* s, unsigned i, expr* r); void get_derivative_targets(expr* r, expr_ref_vector& targets); - void get_cofactors(expr* r, expr_ref_pair_vector& result); /* Pretty print the regex of the state id to the out stream, @@ -184,6 +183,9 @@ namespace smt { bool block_if_empty(expr* r, literal lit); + void get_cofactors(expr *r, expr_ref_pair_vector &result); + + public: seq_regex(theory_seq& th); diff --git a/src/test/seq_regex.cpp b/src/test/seq_regex.cpp index fad7340e2..1cde18b21 100644 --- a/src/test/seq_regex.cpp +++ b/src/test/seq_regex.cpp @@ -712,6 +712,105 @@ static void test_some_seq_in_re_excluded_low_regression() { std::cout << " ok: witness=" << ws << " satisfies [A-Z] \\ {A}\n"; } +// Regression: some_seq_in_re returns l_false for re.inter of "odd number+\n" and "phone number+\n" +// The regex is non-empty, a valid witness is e.g. "1111111111\n". +// Root cause: derivative of re.inter produces nested ITEs, and the witness +// search incorrectly pushes inner ITE nodes with needs_derivation=true, +// causing ITE conditions from the first derivative to leak into the next. +static void test_some_seq_in_re_inter_loop_regression() { + std::cout << "test_some_seq_in_re_inter_loop_regression\n"; + ast_manager m; + reg_decl_plugins(m); + seq_util su(m); + seq_rewriter rw(m); + th_rewriter tr(m); + + // Helpers + auto mk_to_re = [&](const char* s) -> expr_ref { + return expr_ref(su.re.mk_to_re(su.str.mk_string(s)), m); + }; + auto mk_range = [&](const char* lo, const char* hi) -> expr_ref { + expr_ref l(su.mk_char(lo[0]), m); + expr_ref h(su.mk_char(hi[0]), m); + return expr_ref(su.re.mk_range(su.str.mk_unit(l), su.str.mk_unit(h)), m); + }; + auto cat = [&](expr* a, expr* b) -> expr_ref { + return expr_ref(su.re.mk_concat(a, b), m); + }; + auto un = [&](expr* a, expr* b) -> expr_ref { + return expr_ref(su.re.mk_union(a, b), m); + }; + + // Build the regex from the crash output: + // a!1 = ([1-9][1-9]* | "") + expr_ref range19 = mk_range("1", "9"); + expr_ref a1(su.re.mk_union( + su.re.mk_concat(range19, su.re.mk_star(range19)), + su.re.mk_to_re(su.str.mk_string(""))), m); + + // a!2 = "3" | "5" | "7" | "9" + expr_ref a2 = un(mk_to_re("3"), un(mk_to_re("5"), un(mk_to_re("7"), mk_to_re("9")))); + + // a!3 = (a!1 ++ ("1" | a!2)) ++ "\n" + expr_ref a3 = cat(cat(a1, un(mk_to_re("1"), a2)), mk_to_re("\x0a")); + + // a!4 = "(" ++ loop(3,3,[0-9]) ++ ")" + expr_ref range09 = mk_range("0", "9"); + expr_ref loop3(su.re.mk_loop(range09, 3, 3), m); + expr_ref a4 = cat(cat(mk_to_re("("), loop3), mk_to_re(")")); + + // a!5 = a!4 ++ ("" | " ") ++ loop(3,3,[0-9]) + expr_ref a5 = cat(cat(a4, un(mk_to_re(""), mk_to_re(" "))), loop3); + + // a!6 = a!5 ++ ("" | " " | "-") + expr_ref sep3 = un(mk_to_re(""), un(mk_to_re(" "), mk_to_re("-"))); + expr_ref a6 = cat(a5, sep3); + + // a!7 = loop(3,3,[0-9]) ++ ("" | " " | "-") + expr_ref a7 = cat(loop3, sep3); + + // a!8 = a!7 ++ loop(3,3,[0-9]) ++ ("" | " " | "-") + expr_ref a8 = cat(cat(a7, loop3), sep3); + + // a!9 = a!8 ++ loop(4,4,[0-9]) ++ "\n" + expr_ref loop4(su.re.mk_loop(range09, 4, 4), m); + expr_ref a9 = cat(cat(a8, loop4), mk_to_re("\x0a")); + + // a!10 = (a!6 ++ loop(4,4,[0-9])) | a!9 + expr_ref a10 = un(cat(a6, loop4), a9); + + // Final regex = re.inter(a!3, a!10) + expr_ref re_expr(su.re.mk_inter(a3, a10), m); + + std::cout << " regex: " << mk_pp(re_expr, m) << "\n"; + + // The regex is non-empty: "1111111111\n" matches both a!3 and a!10 + // some_seq_in_re must return l_true with a valid witness + expr_ref witness(m); + lbool wr = rw.some_seq_in_re(re_expr, witness); + std::cout << " some_seq_in_re returned: " << wr << "\n"; + if (witness) + std::cout << " witness: " << mk_pp(witness, m) << "\n"; + else + std::cout << " witness: null\n"; + ENSURE(wr == l_true); + ENSURE(witness.get() != nullptr); + + if (wr != l_true || !witness) + return; + + // Verify witness satisfies the regex + expr_ref in_re(su.re.mk_in_re(witness, re_expr), m); + expr_ref in_re_simpl(m); + tr(in_re, in_re_simpl); + std::cout << " in_re simplified: " << mk_pp(in_re_simpl, m) << "\n"; + SASSERT(m.is_true(in_re_simpl)); + + zstring ws; + VERIFY(su.str.is_string(witness, ws)); + std::cout << " ok: witness=\"" << ws << "\" satisfies the intersection regex\n"; +} + void tst_seq_regex() { test_seq_regex_instantiation(); test_seq_regex_is_empty(); @@ -731,8 +830,6 @@ void tst_seq_regex() { test_bfs_empty_union_of_empties(); test_bfs_nonempty_range(); test_bfs_empty_complement_full(); - test_bfs_null_safety(); - test_bfs_bounded(); // New tests for regex membership completion test_char_set_is_subset(); test_stabilizer_store_basic(); @@ -743,5 +840,9 @@ void tst_seq_regex() { test_is_language_subset_false(); test_is_language_subset_trivial(); test_some_seq_in_re_excluded_low_regression(); + test_some_seq_in_re_inter_loop_regression(); + // test_bfs_null_safety has a pre-existing failure, run it last + test_bfs_null_safety(); + test_bfs_bounded(); std::cout << "seq_regex: all tests passed\n"; }