From ab53889c10e5fc4f25f762536b261b185d592e6c Mon Sep 17 00:00:00 2001 From: CEisenhofer Date: Wed, 18 Mar 2026 14:28:53 +0100 Subject: [PATCH] Fixed couple of regex problems [there are still others] --- scripts/compare_seq_solvers.py | 8 +- src/ast/euf/euf_sgraph.cpp | 189 ++++++++++--- src/ast/euf/euf_sgraph.h | 6 +- src/params/smt_params.h | 2 +- src/params/smt_params_helper.pyg | 2 +- src/smt/seq/seq_nielsen.cpp | 453 ++++++++++++++----------------- src/smt/seq/seq_nielsen.h | 11 - src/smt/seq/seq_parikh.cpp | 9 +- src/smt/seq/seq_regex.cpp | 78 +----- src/smt/seq/seq_regex.h | 4 +- src/test/euf_sgraph.cpp | 12 + 11 files changed, 392 insertions(+), 382 deletions(-) diff --git a/scripts/compare_seq_solvers.py b/scripts/compare_seq_solvers.py index e5f56a9e3..7e94f051f 100644 --- a/scripts/compare_seq_solvers.py +++ b/scripts/compare_seq_solvers.py @@ -22,8 +22,8 @@ DEFAULT_TIMEOUT = 5 # seconds COMMON_ARGS = ["model_validate=true"] SOLVERS = { - "nseq": ["smt.string_solver=nseq"], - "nseq_np": ["smt.string_solver=nseq", "smt.nseq.parikh=false"], + "nseq": ["smt.string_solver=nseq", "smt.nseq.parikh=false"], + "nseq_p": ["smt.string_solver=nseq", "smt.nseq.parikh=true"], "seq": ["smt.string_solver=seq"], } @@ -190,8 +190,8 @@ def main(): help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})") parser.add_argument("--zipt", metavar="PATH", default=None, help="Path to ZIPT binary (optional; if omitted, ZIPT is not benchmarked)") - parser.add_argument("--no-parikh", action="store_true", - help="Also run nseq with nseq.parikh=false") + parser.add_argument("--parikh", action="store_true", + help="Also run nseq with nseq.parikh=true") parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file") args = parser.parse_args() diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index d9fcf9308..044352e66 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -513,34 +513,75 @@ namespace euf { return n; } - snode* sgraph::brzozowski_deriv(snode* re, snode* elem) { + snode* sgraph::brzozowski_deriv(snode* re, snode* elem, snode* allowed_range) { expr* re_expr = re->get_expr(); expr* elem_expr = elem->get_expr(); if (!re_expr || !elem_expr) return nullptr; + // unwrap str.unit to get the character expression expr* ch = nullptr; if (m_seq.str.is_unit(elem_expr, ch)) elem_expr = ch; - // If elem is a regex predicate (e.g., re.allchar from compute_minterms), - // extract a representative character for the derivative. - sort* seq_sort = nullptr, *ele_sort = nullptr; - if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) { - if (ele_sort != elem_expr->get_sort()) { + // If an explicit allowed_range is provided (which is a regex minterm), + // we extract a representative character (like 'lo') from it, + // and evaluate the derivative with respect to that representative character. + // This avoids generating massive 'ite' structures for symbolic variables. + sort* seq_sort = nullptr, *ele_sort = nullptr; if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) { + // Just take one element - they are anyway all assumed to produce the same result + auto extract_rep = [&](expr* e) -> expr* { expr* lo = nullptr, *hi = nullptr; - if (m_seq.re.is_full_char(elem_expr)) { - // re.allchar represents the entire alphabet; computing a derivative - // w.r.t. a single character would be imprecise and could incorrectly - // report fail. Return nullptr to prevent incorrect pruning. - return nullptr; + expr* r1 = nullptr, *r2 = nullptr; + while (e) { + if (m_seq.re.is_full_char(e)) + return m_seq.mk_char(0); + if (m_seq.re.is_range(e, lo, hi) && lo) { + expr* lo_ch = nullptr; + zstring zs; + if (m_seq.str.is_unit(lo, lo_ch)) + return lo_ch; + if (m_seq.str.is_string(lo, zs) && zs.length() > 0) + return m_seq.str.mk_char(zs[0]); + return lo; + } + if (m_seq.re.is_union(e, r1, r2)) + e = r1; + else + return nullptr; } - else if (m_seq.re.is_range(elem_expr, lo, hi) && lo) - elem_expr = lo; + return nullptr; + }; + + if (allowed_range && allowed_range->get_expr()) { + expr* range_expr = allowed_range->get_expr(); + if (m_seq.re.is_full_char(range_expr)) { + // For full char, keep symbolic + } + else { + expr* rep = extract_rep(range_expr); + if (rep) + elem_expr = rep; + } + } + else if (ele_sort != elem_expr->get_sort()) { + expr* rep = extract_rep(elem_expr); + if (rep) + elem_expr = rep; else return nullptr; } } + + SASSERT(elem_expr); + if (elem_expr->get_sort() != ele_sort) { + std::cout << "SORT MISMATCH before mk_derivative\n" + << " elem_expr: " << mk_pp(elem_expr, m) << "\n" + << " elem_sort: " << mk_pp(elem_expr->get_sort(), m) << "\n" + << " ele_sort: " << mk_pp(ele_sort, m) << "\n" + << " re_expr: " << mk_pp(re_expr, m) << std::endl; + } + expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr); if (!result) return nullptr; @@ -557,8 +598,35 @@ namespace euf { preds.push_back(e); return; } - if (m_seq.re.is_to_re(e)) + if (m_seq.re.is_to_re(e)) { + expr* s = nullptr; + if (m_seq.re.is_to_re(e, s)) { + zstring zs; + expr* ch_expr = nullptr; + if (m_seq.str.is_string(s, zs) && zs.length() > 0) { + unsigned c = zs[0]; + ch_expr = m_seq.str.mk_char(c); + } + else if (m_seq.str.is_unit(s, ch_expr)) { + // ch_expr correctly extracted + } + + if (ch_expr) { + expr_ref unit_str(m_seq.str.mk_unit(ch_expr), m); + expr_ref re_char(m_seq.re.mk_to_re(unit_str), m); + bool dup = false; + for (expr* p : preds) { + if (p == re_char) { + dup = true; + break; + } + } + if (!dup) + preds.push_back(re_char); + } + } return; + } if (m_seq.re.is_full_char(e)) return; if (m_seq.re.is_full_seq(e)) @@ -566,41 +634,82 @@ namespace euf { if (m_seq.re.is_empty(e)) return; // recurse into compound regex operators - for (unsigned i = 0; i < re->num_args(); ++i) + for (unsigned i = 0; i < re->num_args(); ++i) { collect_re_predicates(re->arg(i), preds); + } } void sgraph::compute_minterms(snode* re, snode_vector& minterms) { - // extract character predicates from the regex expr_ref_vector preds(m); collect_re_predicates(re, preds); + + unsigned max_c = m_seq.max_char(); + if (preds.empty()) { - // no predicates means the whole alphabet is one minterm - // represented by full_char expr_ref fc(m_seq.re.mk_full_char(m_str_sort), m); minterms.push_back(mk(fc)); return; } - // generate minterms as conjunctions/negations of predicates - // for n predicates, there are up to 2^n minterms - unsigned n = preds.size(); - // cap at reasonable size to prevent exponential blowup - if (n > 20) - n = 20; - for (unsigned mask = 0; mask < (1u << n); ++mask) { - expr_ref_vector conj(m); - for (unsigned i = 0; i < n; ++i) { - if (mask & (1u << i)) - conj.push_back(preds.get(i)); - else - conj.push_back(m_seq.re.mk_complement(preds.get(i))); + + std::vector classes; + classes.push_back(char_set::full(max_c)); + + for (expr* p : preds) { + char_set p_set; + expr* lo = nullptr, *hi = nullptr; + + if (m_seq.re.is_range(p, lo, hi)) { + unsigned vlo = 0, vhi = 0; + if (m_seq.is_const_char(lo, vlo) && m_seq.is_const_char(hi, vhi)) { + if (vlo <= vhi) + p_set = char_set(char_range(vlo, vhi + 1)); + } } - SASSERT(!conj.empty()); - // intersect all terms - expr_ref mt(conj.get(0), m); - for (unsigned i = 1; i < conj.size(); ++i) - mt = m_seq.re.mk_inter(mt, conj.get(i)); - minterms.push_back(mk(mt)); + else if (m_seq.re.is_to_re(p)) { + expr* str_arg = nullptr; + expr* ch_expr = nullptr; + unsigned char_val = 0; + if (m_seq.re.is_to_re(p, str_arg) && + m_seq.str.is_unit(str_arg, ch_expr) && + m_seq.is_const_char(ch_expr, char_val)) { + p_set.add(char_val); + } + } + else if (m_seq.re.is_full_char(p)) + p_set = char_set::full(max_c); + else + continue; + + if (p_set.is_empty() || p_set.is_full(max_c)) + continue; + + std::vector next_classes; + char_set p_comp = p_set.complement(max_c); + for (auto const& c : classes) { + char_set in_c = c.intersect_with(p_set); + char_set out_c = c.intersect_with(p_comp); + if (!in_c.is_empty()) + next_classes.push_back(in_c); + if (!out_c.is_empty()) + next_classes.push_back(out_c); + } + classes = std::move(next_classes); + } + for (auto const& c : classes) { + expr_ref class_expr(m); + for (auto const& r : c.ranges()) { + zstring z_lo(r.m_lo); + zstring z_hi(r.m_hi - 1); + expr_ref c_lo(m_seq.str.mk_string(z_lo), m); + expr_ref c_hi(m_seq.str.mk_string(z_hi), m); + expr_ref range_expr(m_seq.re.mk_range(c_lo, c_hi), m); + if (!class_expr) + class_expr = range_expr; + else + class_expr = m_seq.re.mk_union(class_expr, range_expr); + } + if (class_expr) + minterms.push_back(mk(class_expr)); } } @@ -660,3 +769,9 @@ namespace euf { } + + + + + + diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 3881256d0..8ec1106ab 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -137,8 +137,10 @@ namespace euf { // substitution: replace all occurrences of var in n by replacement snode* subst(snode* n, snode* var, snode* replacement); - // Brzozowski derivative of regex re with respect to element elem - snode* brzozowski_deriv(snode* re, snode* elem); + // Brzozowski derivative of regex re with respect to element elem. + // allowed_range can explicitly provide a concrete character or range to use + // for deriving symbolic variables. + snode* brzozowski_deriv(snode* re, snode* elem, snode* allowed_range = nullptr); // compute minterms (character class partition) from a regex void compute_minterms(snode* re, snode_vector& minterms); diff --git a/src/params/smt_params.h b/src/params/smt_params.h index 950a05fb3..aa593fd90 100644 --- a/src/params/smt_params.h +++ b/src/params/smt_params.h @@ -250,7 +250,7 @@ struct smt_params : public preprocessor_params, symbol m_string_solver; unsigned m_nseq_max_depth = 0; unsigned m_nseq_max_nodes = 0; - bool m_nseq_parikh = true; + bool m_nseq_parikh = false; bool m_nseq_regex_precheck = true; smt_params(params_ref const & p = params_ref()): diff --git a/src/params/smt_params_helper.pyg b/src/params/smt_params_helper.pyg index b5b132834..226c5ccbb 100644 --- a/src/params/smt_params_helper.pyg +++ b/src/params/smt_params_helper.pyg @@ -125,7 +125,7 @@ def_module_params(module_name='smt', ('string_solver', SYMBOL, 'seq', 'solver for string/sequence theories. options are: \'z3str3\' (specialized string solver), \'seq\' (sequence solver), \'auto\' (use static features to choose best solver), \'empty\' (a no-op solver that forces an answer unknown if strings were used), \'none\' (no solver), \'nseq\' (Nielsen-based string solver)'), ('nseq.max_depth', UINT, 0, 'maximum Nielsen search depth for theory_nseq (0 = unlimited)'), ('nseq.max_nodes', UINT, 0, 'maximum number of DFS nodes explored by theory_nseq per solve() call (0 = unlimited)'), - ('nseq.parikh', BOOL, True, 'enable Parikh image checks in nseq solver'), + ('nseq.parikh', BOOL, False, 'enable Parikh image checks in nseq solver'), ('nseq.regex_precheck', BOOL, True, 'enable regex membership pre-check before DFS in theory_nseq: checks intersection emptiness per-variable and short-circuits SAT/UNSAT for regex-only problems'), ('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'), ('seq.split_w_len', BOOL, True, 'enable splitting guided by length constraints'), diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index cb2fac901..3d7ccfc2d 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -255,9 +255,9 @@ namespace seq { m_is_general_conflict = true; m_reason = backtrack_reason::character_range; } - } else { - m_char_ranges.insert(id, range.clone()); } + else + m_char_ranges.insert(id, range.clone()); } void nielsen_node::add_char_diseq(euf::snode* sym_char, euf::snode* other) { @@ -371,10 +371,10 @@ namespace seq { ++const_len; } else if (t->is_var()) { var_tokens.push_back(t); - } else { + } + else // power or unknown token: cannot propagate simply, abort return; - } } if (var_tokens.empty()) { @@ -396,11 +396,12 @@ namespace seq { if (const_len > hi) { m_is_general_conflict = true; m_reason = backtrack_reason::arithmetic; - } else { - add_upper_int_bound(y, hi - const_len, s.m_dep); } + else + add_upper_int_bound(y, hi - const_len, s.m_dep); } - } else { + } + else { // multiple variables: propagate upper bound to each // (each variable contributes >= 0, so each <= hi - const_len) if (hi != UINT_MAX) { @@ -433,7 +434,8 @@ namespace seq { add_lower_int_bound(mem.m_str, min_len, mem.m_dep); if (max_len < UINT_MAX) add_upper_int_bound(mem.m_str, max_len, mem.m_dep); - } else { + } + else { // str is a concatenation or other term: add as general int_constraints ast_manager& m = m_graph.m(); arith_util arith(m); @@ -484,7 +486,8 @@ namespace seq { m_char_diseqs.remove(var_id); m_char_ranges.remove(var_id); } - } else { + } + else { SASSERT(s.m_val->is_char()); // symbolic char → concrete char: check range constraints if (m_char_ranges.contains(var_id)) { @@ -678,9 +681,9 @@ namespace seq { if (i + 1 < r.size() && r[i] == '\\' && r[i+1] == 'n') { result += "
"; i += 2; - } else { - result += r[i++]; } + else + result += r[i++]; } return result; } @@ -715,7 +718,8 @@ namespace seq { } else if (arith.is_numeral(arg, neg_val) && neg_val.is_neg()) { r += " − "; // minus sign r += (-neg_val).to_string(); - } else { + } + else { r += " + "; r += arith_expr_html(arg, m); } @@ -894,11 +898,13 @@ namespace seq { if (seq.is_const_char(ch_expr, code)) { if (code >= 32 && code < 127 && code != '"' && code != '\\') { char_acc += static_cast(code); - } else { + } + else { flush_chars(); char buf[16]; snprintf(buf, sizeof(buf), "'\\x%x'", code); - if (!first) result += " + "; + if (!first) + result += " + "; result += buf; first = false; } @@ -917,14 +923,15 @@ namespace seq { } else if (tok->is_unit()) { // seq.unit with non-literal character: show the character expression expr* ch = to_app(e)->get_arg(0); - if (is_app(ch)) { + if (is_app(ch)) result += dot_html_escape(to_app(ch)->get_decl()->get_name().str()); - } else { + else { std::ostringstream os; os << mk_pp(ch, m); result += dot_html_escape(os.str()); } - } else if (tok->is_power()) { + } + else if (tok->is_power()) { // seq.power(base, n): render as basen std::string base_html = snode_label_html(tok->arg(0), m); if (tok->arg(0)->length() > 1) @@ -934,9 +941,10 @@ namespace seq { expr* exp_expr = to_app(e)->get_arg(1); result += arith_expr_html(exp_expr, m); result += ""; - } else if (e && seq.is_re(e)) { + } + else if (e && seq.is_re(e)) result += regex_expr_html(e, m, seq); - } else { + else { std::ostringstream os; os << mk_pp(e, m); result += dot_html_escape(os.str()); @@ -1300,9 +1308,9 @@ namespace seq { expr_ref norm_exp = normalize_arith(m, exp_acc); expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m); result.push_back(sg.mk(new_pow)); - } else { - result.push_back(tok); } + else + result.push_back(tok); i = j; continue; } @@ -1526,13 +1534,14 @@ namespace seq { euf::snode* rt = rhs_toks[prefix]; if ((lt->is_char() || lt->is_var()) && m.are_equal(lt->get_expr(), rt->get_expr())) { ++prefix; - } else if (lt->is_char() && rt->is_char() && m.are_distinct(lt->get_expr(), rt->get_expr())) { + } + else if (lt->is_char() && rt->is_char() && m.are_distinct(lt->get_expr(), rt->get_expr())) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return simplify_result::conflict; - } else { - break; } + else + break; } // --- suffix (only among the tokens not already consumed by prefix) --- @@ -1547,9 +1556,9 @@ namespace seq { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return simplify_result::conflict; - } else { - break; } + else + break; } if (prefix > 0 || suffix > 0) { @@ -1822,8 +1831,7 @@ namespace seq { // compute minterms and check for uniform derivative euf::snode_vector minterms; sg.compute_minterms(mem.m_regex, minterms); - if (minterms.empty()) - break; + VERIFY(!minterms.empty()); euf::snode* uniform = nullptr; bool is_uniform = true; for (euf::snode* mt : minterms) { @@ -1943,7 +1951,7 @@ namespace seq { if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs) continue; for (unsigned od = 0; od < 2; ++od) { - bool fwd = (od == 0); + bool fwd = od == 0; // Orient: var_side starts with a variable, char_side with a char euf::snode* var_side = nullptr; @@ -1960,9 +1968,8 @@ namespace seq { var_side = eq.m_rhs; char_side = eq.m_lhs; } - else { + else continue; - } euf::snode_vector var_toks, char_toks; collect_tokens_dir(var_side, fwd, var_toks); @@ -2017,7 +2024,8 @@ namespace seq { failed = true; break; } - if (inner_indet || failed) break; + if (inner_indet || failed) + break; j1++; } @@ -2051,12 +2059,15 @@ namespace seq { if (next_var) { u_map dep_edges; // var id -> first dependent var id for (str_eq const& other_eq : m_str_eq) { - if (other_eq.is_trivial()) continue; - if (!other_eq.m_lhs || !other_eq.m_rhs) continue; + if (other_eq.is_trivial()) + continue; + if (!other_eq.m_lhs || !other_eq.m_rhs) + continue; euf::snode* lh2 = dir_token(other_eq.m_lhs, fwd); euf::snode* rh2 = dir_token(other_eq.m_rhs, fwd); - if (!lh2 || !rh2) continue; + if (!lh2 || !rh2) + continue; auto record_dep = [&](euf::snode* head_var, euf::snode* other_side) { euf::snode_vector other_toks; @@ -2076,12 +2087,10 @@ namespace seq { if (!dep_edges.contains(rh2->id())) dep_edges.insert(rh2->id(), lh2->id()); } - else if (lh2->is_var() && !rh2->is_var()) { + else if (lh2->is_var() && !rh2->is_var()) record_dep(lh2, other_eq.m_rhs); - } - else if (rh2->is_var() && !lh2->is_var()) { + else if (rh2->is_var() && !lh2->is_var()) record_dep(rh2, other_eq.m_lhs); - } } // DFS from next_var to see if we can reach var_node @@ -2139,18 +2148,38 @@ namespace seq { bool nielsen_node::has_opaque_terms() const { auto is_opaque = [](euf::snode* n) { return n && n->kind() == euf::snode_kind::s_other; }; for (str_eq const& eq : m_str_eq) { - if (eq.is_trivial()) continue; - if (is_opaque(eq.m_lhs) || is_opaque(eq.m_rhs)) return true; + if (eq.is_trivial()) + continue; + if (is_opaque(eq.m_lhs) || is_opaque(eq.m_rhs)) + return true; euf::snode_vector toks; - if (eq.m_lhs) { eq.m_lhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; toks.reset(); } - if (eq.m_rhs) { eq.m_rhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; } + if (eq.m_lhs) { + eq.m_lhs->collect_tokens(toks); + for (auto* t : toks) { + if (is_opaque(t)) + return true; + } + toks.reset(); + } + if (eq.m_rhs) { + eq.m_rhs->collect_tokens(toks); + for (auto* t : toks) { + if (is_opaque(t)) + return true; + } + } } for (str_mem const& mem : m_str_mem) { - if (!mem.m_str) continue; - if (is_opaque(mem.m_str)) return true; + if (!mem.m_str) + continue; + if (is_opaque(mem.m_str)) + return true; euf::snode_vector toks; mem.m_str->collect_tokens(toks); - for (auto* t : toks) if (is_opaque(t)) return true; + for (auto* t : toks) { + if (is_opaque(t)) + return true; + } } return false; } @@ -2160,8 +2189,8 @@ namespace seq { // ----------------------------------------------------------------------- void nielsen_graph::apply_parikh_to_node(nielsen_node& node) { - if (!m_parikh_enabled) return; - if (node.m_parikh_applied) return; + if (!m_parikh_enabled || node.m_parikh_applied) + return; node.m_parikh_applied = true; // Generate modular length constraints (len(str) = min_len + stride·k, etc.) @@ -2178,7 +2207,8 @@ namespace seq { } void nielsen_graph::assert_root_constraints_to_solver() { - if (m_root_constraints_asserted) return; + if (m_root_constraints_asserted) + return; m_root_constraints_asserted = true; // Constraint.Shared: assert all root-level length/Parikh constraints // to m_solver at the base level (no push/pop). These include: @@ -2189,8 +2219,9 @@ namespace seq { // pruning at every node, not just the root. vector constraints; generate_length_constraints(constraints); - for (auto const& lc : constraints) + for (auto const& lc : constraints) { m_solver.assert_expr(lc.m_expr); + } } nielsen_graph::search_result nielsen_graph::solve() { @@ -2208,7 +2239,7 @@ namespace seq { // Iterative deepening: increment by 1 on each failure. // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. - m_depth_bound = 10; + m_depth_bound = 3; while (true) { if (!m().inc()) { #ifdef Z3DEBUG @@ -2372,8 +2403,9 @@ namespace seq { e->set_len_constraints_computed(true); } - for (auto const &ic : e->side_int()) + for (auto const &ic : e->side_int()) { m_solver.assert_expr(int_constraint_to_expr(ic)); + } // Bump modification counts for the child's context. inc_edge_mod_counts(e); @@ -2489,7 +2521,7 @@ namespace seq { if (!eq.m_lhs || !eq.m_rhs) continue; for (unsigned od = 0; od < 2; ++od) { - bool fwd = (od == 0); + bool fwd = od == 0; euf::snode* lhead = dir_token(eq.m_lhs, fwd); euf::snode* rhead = dir_token(eq.m_rhs, fwd); if (!lhead || !rhead) @@ -2525,6 +2557,15 @@ namespace seq { e->add_subst(s); child->apply_subst(m_sg, s); } + // child 4: y → x·y (no progress) + { + euf::snode* replacement = dir_concat(m_sg, lhead, rhead, fwd); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(rhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } return true; } } @@ -2660,11 +2701,13 @@ namespace seq { } seen_variable = true; lhs_has_symbolic = true; - } else { - const_diff += (int)token_const_length(tok); } - } else { - if (ri >= rhs_len) break; + else + const_diff += (int)token_const_length(tok); + } + else { + if (ri >= rhs_len) + break; euf::snode* tok = rhs_toks[ri++]; bool is_var = token_has_variable_length(tok); if (is_var) { @@ -2677,13 +2720,14 @@ namespace seq { } seen_variable = true; rhs_has_symbolic = true; - } else { - const_diff -= (int)token_const_length(tok); } + else + const_diff -= (int)token_const_length(tok); } } - if (!has_best) return false; + if (!has_best) + return false; // A split at the very start or very end is degenerate — skip. if ((best_lhs == 0 && best_rhs == 0) || @@ -2754,7 +2798,8 @@ namespace seq { // Prepend pad to RHS prefix, append pad to LHS suffix. eq1_rhs = m_sg.mk_concat(pad, rhs_prefix); eq2_lhs = m_sg.mk_concat(lhs_suffix, pad); - } else { + } + else { // RHS prefix is longer by |padding| constants. // Append pad to LHS prefix, prepend pad to RHS suffix. eq1_lhs = m_sg.mk_concat(lhs_prefix, pad); @@ -2816,134 +2861,6 @@ namespace seq { return m_sg.mk(unit); } - // ----------------------------------------------------------------------- - // nielsen_graph: apply_regex_char_split - // ----------------------------------------------------------------------- - - void nielsen_graph::collect_first_chars(euf::snode* re, euf::snode_vector& chars) { - if (!re) - return; - - // to_re(s): the first character of the string s - if (re->is_to_re()) { - euf::snode* body = re->arg(0); - if (body && !body->is_empty()) { - euf::snode* first = body->first(); - if (first && first->is_char()) { - bool dup = false; - for (euf::snode* c : chars) - if (c == first) { dup = true; break; } - if (!dup) - chars.push_back(first); - } - // Handle string literals (classified as s_other in sgraph): - // extract the first character from the zstring and create a char snode. - else if (first && first->get_expr()) { - seq_util& seq = m_sg.get_seq_util(); - zstring s; - if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) { - euf::snode* ch = m_sg.mk_char(s[0]); - bool dup = false; - for (euf::snode* c : chars) - if (c == ch) { dup = true; break; } - if (!dup) - chars.push_back(ch); - } - } - } - return; - } - - // full character set (.): use 'a' as representative - if (re->is_full_char()) { - euf::snode* ch = m_sg.mk_char('a'); - bool dup = false; - for (euf::snode* c : chars) - if (c == ch) { dup = true; break; } - if (!dup) - chars.push_back(ch); - return; - } - - // re.range(lo, hi): use lo as representative - if (re->get_expr()) { - seq_util& seq = m_sg.get_seq_util(); - expr* lo = nullptr, *hi = nullptr; - if (seq.re.is_range(re->get_expr(), lo, hi) && lo) { - unsigned ch_val = 'a'; - if (seq.is_const_char(lo, ch_val)) { - euf::snode* ch = m_sg.mk_char(ch_val); - bool dup = false; - for (euf::snode* c : chars) - if (c == ch) { dup = true; break; } - if (!dup) - chars.push_back(ch); - } - return; - } - } - - // fail, full_seq: no concrete first chars to extract - if (re->is_fail() || re->is_full_seq()) - return; - - // recurse into children (handles union, concat, star, loop, etc.) - for (unsigned i = 0; i < re->num_args(); ++i) - collect_first_chars(re->arg(i), chars); - } - - bool nielsen_graph::apply_regex_char_split(nielsen_node* node) { - for (str_mem const& mem : node->str_mems()) { - if (!mem.m_str || !mem.m_regex) - continue; - - // find the first token of the string side - euf::snode* first = mem.m_str->first(); - if (!first || !first->is_var()) - continue; - - // collect concrete characters from the regex - euf::snode_vector chars; - collect_first_chars(mem.m_regex, chars); - - // If no concrete characters found, fall through to apply_regex_var_split - if (chars.empty()) - continue; - - bool created = false; - - // for each character c with non-fail derivative: - // child: x → c · x - for (euf::snode* ch : chars) { - euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch); - if (!deriv || deriv->is_fail()) - continue; - - euf::snode* replacement = m_sg.mk_concat(ch, first); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(first, replacement, mem.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - created = true; - } - - // x → ε branch (variable is empty) - { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(first, m_sg.mk_empty_seq(first->get_sort()), mem.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - created = true; - } - - if (created) - return true; - } - return false; - } - bool nielsen_graph::generate_extensions(nielsen_node *node) { // The first modifier that produces edges is used and returned immediately. @@ -2985,10 +2902,6 @@ namespace seq { if (apply_const_nielsen(node)) return ++m_stats.m_mod_const_nielsen, true; - // Priority 9: RegexCharSplit - split str_mem by first-position chars - if (apply_regex_char_split(node)) - return ++m_stats.m_mod_regex_char_split, true; - // Priority 10: RegexVarSplit - split str_mem by minterms if (apply_regex_var_split(node)) return ++m_stats.m_mod_regex_var_split, true; @@ -3014,16 +2927,22 @@ namespace seq { euf::snode* nielsen_graph::find_power_token(nielsen_node* node) const { for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; euf::snode_vector toks; eq.m_lhs->collect_tokens(toks); - for (euf::snode* t : toks) - if (t->is_power()) return t; + for (euf::snode* t : toks) { + if (t->is_power()) + return t; + } toks.reset(); eq.m_rhs->collect_tokens(toks); - for (euf::snode* t : toks) - if (t->is_power()) return t; + for (euf::snode* t : toks) { + if (t->is_power()) + return t; + } } return nullptr; } @@ -3039,8 +2958,10 @@ namespace seq { str_eq const*& eq_out, bool& fwd) const { for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; for (unsigned od = 0; od < 2; ++od) { bool local_fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, local_fwd); @@ -3079,8 +3000,10 @@ namespace seq { str_eq const*& eq_out, bool& fwd) const { for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; for (unsigned od = 0; od < 2; ++od) { bool local_fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, local_fwd); @@ -3123,8 +3046,10 @@ namespace seq { euf::snode* power = nullptr; dep_tracker dep = m_dep_mgr.mk_empty(); for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; if (eq.m_lhs->is_empty() && eq.m_rhs->first() && eq.m_rhs->first()->is_power()) { power = eq.m_rhs->first(); dep = eq.m_dep; @@ -3187,7 +3112,8 @@ namespace seq { bool fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, fwd); euf::snode* rhead = dir_token(eq.m_rhs, fwd); - if (!lhead || !rhead) continue; + if (!lhead || !rhead) + continue; if (!lhead->is_power() || !rhead->is_power()) continue; if (lhead->num_args() < 1 || rhead->num_args() < 1) @@ -3246,24 +3172,30 @@ namespace seq { seq_util seq(m); for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; for (int side = 0; side < 2; ++side) { euf::snode* pow_side = (side == 0) ? eq.m_lhs : eq.m_rhs; euf::snode* other_side = (side == 0) ? eq.m_rhs : eq.m_lhs; - if (!pow_side || !other_side) continue; + if (!pow_side || !other_side) + continue; for (unsigned od = 0; od < 2; ++od) { - bool fwd = (od == 0); + bool fwd = od == 0; euf::snode* end_tok = dir_token(pow_side, fwd); - if (!end_tok || !end_tok->is_power()) continue; + if (!end_tok || !end_tok->is_power()) + continue; euf::snode* base_sn = end_tok->arg(0); expr* pow_exp = get_power_exp_expr(end_tok, seq); - if (!base_sn || !pow_exp) continue; + if (!base_sn || !pow_exp) + continue; auto [count, consumed] = comm_power(base_sn, other_side, m, fwd); - if (!count.get() || consumed == 0) continue; + if (!count.get() || consumed == 0) + continue; expr_ref norm_count = normalize_arith(m, count); @@ -3394,17 +3326,14 @@ namespace seq { // 3. Fall back to simple star(R) euf::snode* stab_base = nullptr; - if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) { + if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) stab_base = mem.m_regex; - } - else if (m_seq_regex && mem.m_history) { + else if (m_seq_regex && mem.m_history) stab_base = m_seq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history); - } - if (!stab_base) { + if (!stab_base) // Fall back: simple star of the cycle regex stab_base = mem.m_regex; - } // Register the stabilizer if (m_seq_regex) @@ -3478,15 +3407,18 @@ namespace seq { bool nielsen_graph::apply_gpower_intr(nielsen_node* node) { for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; // Try both directions (ZIPT's ExtendDir(fwd=true/false)). for (unsigned od = 0; od < 2; ++od) { bool fwd = (od == 0); euf::snode* lhead = dir_token(eq.m_lhs, fwd); euf::snode* rhead = dir_token(eq.m_rhs, fwd); - if (!lhead || !rhead) continue; + if (!lhead || !rhead) + continue; // Orientation 1: RHS directional head is var, scan LHS in that // direction for ground prefix + matching cycle var. @@ -3496,7 +3428,10 @@ namespace seq { euf::snode_vector ground_prefix; euf::snode* target_var = nullptr; for (unsigned i = 0; i < toks.size(); ++i) { - if (toks[i]->is_var()) { target_var = toks[i]; break; } + if (toks[i]->is_var()) { + target_var = toks[i]; + break; + } ground_prefix.push_back(toks[i]); } if (target_var && !ground_prefix.empty() && target_var->id() == rhead->id()) { @@ -3512,7 +3447,10 @@ namespace seq { euf::snode_vector ground_prefix; euf::snode* target_var = nullptr; for (unsigned i = 0; i < toks.size(); ++i) { - if (toks[i]->is_var()) { target_var = toks[i]; break; } + if (toks[i]->is_var()) { + target_var = toks[i]; + break; + } ground_prefix.push_back(toks[i]); } if (target_var && !ground_prefix.empty() && target_var->id() == lhead->id()) { @@ -3542,11 +3480,15 @@ namespace seq { unsigned n = ground_prefix_orig.size(); unsigned period = n; for (unsigned p = 1; p <= n / 2; ++p) { - if (n % p != 0) continue; + if (n % p != 0) + continue; bool match = true; for (unsigned i = p; i < n && match; ++i) match = ground_prefix_orig[i]->id() == ground_prefix_orig[i % p]->id(); - if (match) { period = p; break; } + if (match) { + period = p; + break; + } } for (unsigned i = 0; i < period; ++i) { ground_prefix.push_back(ground_prefix_orig[i]); @@ -3591,7 +3533,8 @@ namespace seq { expr_ref fresh_n = mk_fresh_int_var(); expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m); euf::snode* power_snode = m_sg.mk(power_expr); - if (!power_snode) return false; + if (!power_snode) + return false; expr* zero = arith.mk_int(0); @@ -3627,15 +3570,16 @@ namespace seq { euf::snode* partial_sn = m_sg.mk(partial_pow); euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, partial_sn, fwd) : partial_sn; replacement = dir_concat(m_sg, power_snode, suffix_sn, fwd); - } else { + } + else { // Fallback: use full token (shouldn't normally happen) euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, tok, fwd) : tok; replacement = dir_concat(m_sg, power_snode, suffix_sn, fwd); } - } else { + } + else // Token is a char: P(char) = ε, suffix = just the prefix replacement = prefix_sn ? dir_concat(m_sg, power_snode, prefix_sn, fwd) : power_snode; - } nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); @@ -3671,14 +3615,17 @@ namespace seq { bool nielsen_graph::apply_regex_var_split(nielsen_node* node) { for (str_mem const& mem : node->str_mems()) { - if (!mem.m_str || !mem.m_regex) continue; + if (!mem.m_str || !mem.m_regex) + continue; euf::snode* first = mem.m_str->first(); - if (!first || !first->is_var()) continue; + if (!first || !first->is_var()) + continue; // Compute minterms from the regex + // std::cout << "Regex split: " << mk_pp(mem.m_regex->get_expr(), m_sg.get_manager()) << std::endl; euf::snode_vector minterms; m_sg.compute_minterms(mem.m_regex, minterms); - if (minterms.empty()) continue; + VERIFY(!minterms.empty()); bool created = false; @@ -3695,14 +3642,29 @@ namespace seq { // Branch 2+: for each minterm m_i, x → ?c · x // where ?c is a symbolic char constrained by the minterm for (euf::snode* mt : minterms) { - if (mt->is_fail()) continue; + // std::cout << "Processing minterm: " << mk_pp(mt->get_expr(), m_sg.get_manager()) << std::endl; + SASSERT(!mt->is_fail()); // Try Brzozowski derivative with respect to this minterm // If the derivative is fail (empty language), skip this minterm euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt); - if (deriv && deriv->is_fail()) continue; + SASSERT(deriv); + if (deriv->is_fail()) + continue; + // std::cout << "Result: " << mk_pp(deriv->get_expr(), m_sg.get_manager()) << std::endl; + + char_set cs; + if (m_parikh && mt->get_expr()) + cs = m_parikh->minterm_to_char_set(mt->get_expr()); + + euf::snode* fresh_char = nullptr; + if (cs.is_unit()) { + expr_ref char_expr(m_sg.get_seq_util().str.mk_string(zstring(cs.first_char())), m_sg.get_manager()); + fresh_char = m_sg.mk(char_expr); + } else { + fresh_char = mk_fresh_char_var(); + } - euf::snode* fresh_char = mk_fresh_char_var(); euf::snode* replacement = m_sg.mk_concat(fresh_char, first); nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, false); @@ -3713,10 +3675,8 @@ namespace seq { // This is the key Parikh pruning step: when x → ?c · x' is // generated from minterm m_i, ?c must belong to the character // class described by m_i so that str ∈ derivative(R, m_i). - if (mt->get_expr()) { - char_set cs = m_parikh->minterm_to_char_set(mt->get_expr()); - if (!cs.is_empty()) - child->add_char_range(fresh_char, cs); + if (!cs.is_unit() && !cs.is_empty()) { + child->add_char_range(fresh_char, cs); } created = true; } @@ -3796,14 +3756,15 @@ namespace seq { euf::snode* partial_sn = m_sg.mk(partial_pow); euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, partial_sn, fwd) : partial_sn; replacement = dir_concat(m_sg, power_m_sn, suffix_sn, fwd); - } else { + } + else { euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, tok, fwd) : tok; replacement = dir_concat(m_sg, power_m_sn, suffix_sn, fwd); } - } else { + } + else // P(char) = ε, suffix is just the prefix replacement = prefix_sn ? dir_concat(m_sg, power_m_sn, prefix_sn, fwd) : power_m_sn; - } nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); @@ -4443,7 +4404,8 @@ namespace seq { euf::snode* rng_sn = m_sg.mk(rng); if (!range_re) { range_re = rng_sn; - } else { + } + else { expr_ref u(seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), mgr); range_re = m_sg.mk(u); } @@ -4560,7 +4522,6 @@ namespace seq { st.update("nseq mod star intr", m_stats.m_mod_star_intr); st.update("nseq mod gpower intr", m_stats.m_mod_gpower_intr); st.update("nseq mod const nielsen", m_stats.m_mod_const_nielsen); - st.update("nseq mod regex char", m_stats.m_mod_regex_char_split); st.update("nseq mod regex var", m_stats.m_mod_regex_var_split); st.update("nseq mod power split", m_stats.m_mod_power_split); st.update("nseq mod var nielsen", m_stats.m_mod_var_nielsen); @@ -4568,3 +4529,5 @@ namespace seq { } } + + diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 4cfb0d6e6..fcffb8dd5 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -734,7 +734,6 @@ namespace seq { unsigned m_mod_star_intr = 0; unsigned m_mod_gpower_intr = 0; unsigned m_mod_const_nielsen = 0; - unsigned m_mod_regex_char_split = 0; unsigned m_mod_regex_var_split = 0; unsigned m_mod_power_split = 0; unsigned m_mod_var_nielsen = 0; @@ -996,13 +995,6 @@ namespace seq { unsigned& out_rhs_idx, int& out_padding) const; - // apply regex character split modifier to a node. - // for a str_mem constraint x·s ∈ R where x is a variable: - // (1) x → c·z for each char c accepted by R at first position - // (2) x → ε (x is empty) - // returns true if children were generated. - bool apply_regex_char_split(nielsen_node* node); - // power epsilon modifier: for a power token u^n in an equation, // branch: (1) base u = ε, (2) power is empty (n = 0 semantics). // mirrors ZIPT's PowerEpsilonModifier @@ -1060,9 +1052,6 @@ namespace seq { // mirrors ZIPT's VarNumUnwindingModifier bool apply_var_num_unwinding(nielsen_node* node); - // collect concrete first-position characters from a regex snode - void collect_first_chars(euf::snode* re, euf::snode_vector& chars); - // find the first power token in any str_eq at this node euf::snode* find_power_token(nielsen_node* node) const; diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp index 3f6deba40..26587322b 100644 --- a/src/smt/seq/seq_parikh.cpp +++ b/src/smt/seq/seq_parikh.cpp @@ -353,8 +353,15 @@ namespace seq { if (seq.re.is_complement(re_expr, inner)) return minterm_to_char_set(inner).complement(max_c); - // intersection: characters present in both sets + // union: characters present in either set expr* r1 = nullptr, *r2 = nullptr; + if (seq.re.is_union(re_expr, r1, r2)) { + char_set cs = minterm_to_char_set(r1); + cs.add(minterm_to_char_set(r2)); + return cs; + } + + // intersection: characters present in both sets if (seq.re.is_intersection(re_expr, r1, r2)) return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2)); diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index a0fc31095..ab23cad42 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -725,83 +725,7 @@ namespace seq { } // ----------------------------------------------------------------------- - // Collect first characters - // ----------------------------------------------------------------------- - - void seq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) { - if (!re) - return; - - // to_re(s): extract first character of the string body - if (re->is_to_re()) { - euf::snode* body = re->arg(0); - if (body && !body->is_empty()) { - euf::snode* first = body->first(); - if (first && first->is_char()) { - bool dup = false; - for (euf::snode* c : chars) - if (c == first) { dup = true; break; } - if (!dup) - chars.push_back(first); - } - // Handle string literals (classified as s_other in sgraph) - else if (first && first->get_expr()) { - seq_util& seq = m_sg.get_seq_util(); - zstring s; - if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) { - euf::snode* ch = m_sg.mk_char(s[0]); - bool dup = false; - for (euf::snode* c : chars) - if (c == ch) { dup = true; break; } - if (!dup) - chars.push_back(ch); - } - } - } - return; - } - - // leaf cases: produce representative characters for character classes - if (re->is_full_char()) { - // full character set (.): use 'a' as representative - euf::snode* ch = m_sg.mk_char('a'); - bool dup = false; - for (euf::snode* c : chars) - if (c == ch) { dup = true; break; } - if (!dup) - chars.push_back(ch); - return; - } - - // re.range(lo, hi): use lo as representative - if (re->get_expr()) { - seq_util& seq = m_sg.get_seq_util(); - expr* lo = nullptr, *hi = nullptr; - if (seq.re.is_range(re->get_expr(), lo, hi) && lo) { - zstring s; - unsigned ch_val = 'a'; - if (seq.is_const_char(lo, ch_val)) { - euf::snode* ch = m_sg.mk_char(ch_val); - bool dup = false; - for (euf::snode* c : chars) - if (c == ch) { dup = true; break; } - if (!dup) - chars.push_back(ch); - } - return; - } - } - - if (re->is_fail() || re->is_full_seq()) - return; - - // recurse into children (handles union, concat, star, loop, etc.) - for (unsigned i = 0; i < re->num_args(); ++i) - collect_first_chars(re->arg(i), chars); - } - - // ----------------------------------------------------------------------- - // Membership processing +// Membership processing // ----------------------------------------------------------------------- bool seq_regex::process_str_mem(seq::str_mem const& mem, diff --git a/src/smt/seq/seq_regex.h b/src/smt/seq/seq_regex.h index 5969b0518..9ef24e665 100644 --- a/src/smt/seq/seq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -279,9 +279,7 @@ namespace seq { // collect concrete first-position characters from a regex. // extracts characters reachable from to_re leaves and simple ranges. - void collect_first_chars(euf::snode* re, euf::snode_vector& chars); - - // ----------------------------------------------------------------- + // ----------------------------------------------------------------- // Membership processing // ----------------------------------------------------------------- diff --git a/src/test/euf_sgraph.cpp b/src/test/euf_sgraph.cpp index 82d99b4fb..59a9304a9 100644 --- a/src/test/euf_sgraph.cpp +++ b/src/test/euf_sgraph.cpp @@ -748,6 +748,18 @@ static void test_sgraph_minterms() { // no predicates => single minterm (full_char) SASSERT(minterms.size() == 1); std::cout << " re.all minterms: " << minterms.size() << "\n"; + + // test union of strings: "evil" and "/evil" + expr_ref evil(seq.re.mk_to_re(seq.str.mk_string(zstring("evil"))), m); + expr_ref slash_evil(seq.re.mk_to_re(seq.str.mk_string(zstring("/evil"))), m); + expr_ref union_re(seq.re.mk_union(evil, slash_evil), m); + euf::snode* s_union_re = sg.mk(union_re); + + euf::snode_vector union_minterms; + sg.compute_minterms(s_union_re, union_minterms); + std::cout << " union minterms: " << union_minterms.size() << "\n"; + // should collect 'e' and '/', yielding 3 disjoint subset partitions (e), (/), and the rest + SASSERT(union_minterms.size() == 3); } void tst_euf_sgraph() {