From ddd8bf84d7a866df44fef389ed198c3aeff0491e Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 2 Apr 2026 14:27:06 -0700 Subject: [PATCH] Replace apply_regex_unit_split with apply_regex_if_split (#9210) Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/7df36402-f2c7-4de5-b626-3df14d4eee64 Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/smt/seq/seq_nielsen.cpp | 79 +++++++++++++------------------------ src/smt/seq/seq_nielsen.h | 9 ++--- 2 files changed, 32 insertions(+), 56 deletions(-) diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 04660cdd8..36d711399 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -2228,9 +2228,9 @@ namespace seq { if (apply_const_nielsen(node)) return ++m_stats.m_mod_const_nielsen, true; - // Priority 9: RegexUnitSplit - split str_mem c·s ∈ R by minterms of R - if (apply_regex_unit_split(node)) - return ++m_stats.m_mod_regex_unit_split, true; + // Priority 9: RegexIfSplit - split str_mem s ∈ ite(c,th,el) by branching on c + if (apply_regex_if_split(node)) + return ++m_stats.m_mod_regex_if_split, true; // Priority 9b: SignatureSplit - heuristic string equation splitting if (m_signature_split && apply_signature_split(node)) @@ -3299,40 +3299,22 @@ namespace seq { return false; } - bool nielsen_graph::apply_regex_unit_split(nielsen_node *node) { + bool nielsen_graph::apply_regex_if_split(nielsen_node *node) { for (str_mem const &mem : node->str_mems()) { SASSERT(mem.m_str && mem.m_regex); - if (mem.is_primitive()) + + expr *r_expr = mem.m_regex->get_expr(); + expr_ref c(m), th(m), el(m); + if (!bool_rewriter(m).decompose_ite(r_expr, c, th, el)) continue; - euf::snode *first = mem.m_str->first(); - if (!first || !first->is_char_or_unit()) - continue; - - // Take derivative of R w.r.t. :var 0 (canonical, cached), then - // substitute inner_char for :var 0. Resulting ite conditions are - // on inner_char. - seq_rewriter rw(m); - expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m); - if (!d) - continue; - - // Extract the inner char expression from seq.unit(?inner) - expr *unit_expr = first->get_expr(), *inner_char; - VERIFY(m_seq.str.is_unit(unit_expr, inner_char)); - - var_subst vs(m); - d = vs(d, inner_char); - - - euf::snode *rest = m_sg.drop_first(mem.m_str); bool created = false; - // Worklist: (regex_expr, accumulated_char_set). + // Worklist: (regex_expr, accumulated_conditions). // Call decompose_ite in a loop until no more ite sub-expressions, - // branching on c=true and c=false and accumulating char constraints. + // branching on c=true and c=false and accumulating conditions. vector> worklist; - worklist.push_back({d, expr_ref_vector(m)}); + worklist.push_back({expr_ref(r_expr, m), expr_ref_vector(m)}); while (!worklist.empty()) { auto [r, cs] = std::move(worklist.back()); @@ -3341,50 +3323,45 @@ namespace seq { if (m_seq.re.is_empty(r)) continue; - expr_ref c(m), th(m), el(m); - if (!bool_rewriter(m).decompose_ite(r, c, th, el)) { - // No ite remaining: leaf → create child node - euf::snode *deriv_snode = m_sg.mk(r); + expr_ref c2(m), th2(m), el2(m); + if (!bool_rewriter(m).decompose_ite(r, c2, th2, el2)) { + // No ite remaining: leaf → create child node with regex updated to r + euf::snode *new_regex_snode = m_sg.mk(r); nielsen_node *child = mk_child(node); for (auto f : cs) child->add_constraint(constraint(f, mem.m_dep, m)); mk_edge(node, child, true); for (str_mem &cm : child->str_mems()) if (cm.m_id == mem.m_id) { - cm.m_str = rest; - cm.m_regex = deriv_snode; + cm.m_regex = new_regex_snode; break; } created = true; continue; } - // Substitute inner_char into condition and simplify th_rewriter tr(m); - expr_ref c_simp(c, m); + expr_ref c_simp(c2, m); tr(c_simp); if (m.is_true(c_simp)) { - // Condition is always satisfied: only then-branch - if (!m_seq.re.is_empty(th)) - worklist.push_back({th, std::move(cs)}); + if (!m_seq.re.is_empty(th2)) + worklist.push_back({th2, std::move(cs)}); } else if (m.is_false(c_simp)) { - // Condition is never satisfied: only else-branch - if (!m_seq.re.is_empty(el)) - worklist.push_back({el, std::move(cs)}); + if (!m_seq.re.is_empty(el2)) + worklist.push_back({el2, std::move(cs)}); } else { - // Branch on c=true and c=false, accumulate char constraints - if (!m_seq.re.is_empty(th)) { + if (!m_seq.re.is_empty(th2)) { expr_ref_vector cs_true(cs); - cs_true.push_back(c); - worklist.push_back({th, std::move(cs_true)}); + cs_true.push_back(c2); + worklist.push_back({th2, std::move(cs_true)}); } - if (!m_seq.re.is_empty(el)) { + if (!m_seq.re.is_empty(el2)) { expr_ref_vector cs_false(cs); - cs_false.push_back(mk_not(c)); - worklist.push_back({el, std::move(cs_false)}); + cs_false.push_back(mk_not(c2)); + worklist.push_back({el2, std::move(cs_false)}); } } } @@ -4342,7 +4319,7 @@ namespace seq { st.update("nseq mod const nielsen", m_stats.m_mod_const_nielsen); st.update("nseq mod signature split", m_stats.m_mod_signature_split); st.update("nseq mod regex var", m_stats.m_mod_regex_var_split); - st.update("nseq mod regex unit", m_stats.m_mod_regex_unit_split); + st.update("nseq mod regex if", m_stats.m_mod_regex_if_split); st.update("nseq mod power split", m_stats.m_mod_power_split); st.update("nseq mod var nielsen", m_stats.m_mod_var_nielsen); st.update("nseq mod var num unwind (eq)", m_stats.m_mod_var_num_unwinding_eq); diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index c04345be9..2af940d7a 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -730,7 +730,7 @@ namespace seq { unsigned m_mod_num_cmp = 0; unsigned m_mod_split_power_elim = 0; unsigned m_mod_const_num_unwinding = 0; - unsigned m_mod_regex_unit_split = 0; + unsigned m_mod_regex_if_split = 0; unsigned m_mod_eq_split = 0; unsigned m_mod_star_intr = 0; unsigned m_mod_gpower_intr = 0; @@ -1060,10 +1060,9 @@ namespace seq { // mirrors ZIPT's ConstNumUnwindingModifier bool apply_const_num_unwinding(nielsen_node* node); - // regex unit split: for str_mem c·s ∈ R where c is a symbolic unit, - // branch over regex minterms and constrain c via char_range. - // Unlike apply_regex_var_split, no substitution and no epsilon branch. - bool apply_regex_unit_split(nielsen_node* node); + // regex if split: for str_mem s ∈ R where R decomposes as ite(c, th, el), + // branch into s ∈ th under condition c, and s ∈ el under condition ¬c. + bool apply_regex_if_split(nielsen_node* node); // star introduction: for a str_mem x·s ∈ R where a cycle is detected // (backedge exists), introduce stabilizer: x ∈ base* with x split.