From 037d2da801012cd200f6efc98b0ed1e4ac72dd3b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 2 Mar 2026 16:34:29 +0000 Subject: [PATCH] Distinguish is_re_concat/is_str_concat and mk_re_concat/mk_str_concat in euf_seq_plugin Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/euf/euf_seq_plugin.cpp | 54 ++++++++++++++++++++++++++++------ src/ast/euf/euf_seq_plugin.h | 37 +++++++++++++++++++---- 2 files changed, 77 insertions(+), 14 deletions(-) diff --git a/src/ast/euf/euf_seq_plugin.cpp b/src/ast/euf/euf_seq_plugin.cpp index 9173c9c10..47dc213d4 100644 --- a/src/ast/euf/euf_seq_plugin.cpp +++ b/src/ast/euf/euf_seq_plugin.cpp @@ -26,10 +26,16 @@ Author: namespace euf { + // Check if enode is any kind of concat (str.++ or re.++) + static bool is_any_concat(enode* n, seq_util const& seq) { + return (seq.str.is_concat(n->get_expr()) || seq.re.is_concat(n->get_expr())) && n->num_args() == 2; + } + // Collect leaves of a concat tree in left-to-right order. // For non-concat nodes, the node itself is a leaf. + // Handles both str.++ and re.++. static void collect_enode_leaves(enode* n, seq_util const& seq, enode_vector& leaves) { - if (seq.str.is_concat(n->get_expr()) && n->num_args() == 2) { + if (is_any_concat(n, seq)) { collect_enode_leaves(n->get_arg(0), seq, leaves); collect_enode_leaves(n->get_arg(1), seq, leaves); } @@ -39,7 +45,7 @@ namespace euf { } unsigned enode_concat_hash::operator()(enode* n) const { - if (!seq.str.is_concat(n->get_expr())) + if (!is_any_concat(n, seq)) return n->get_id(); enode_vector leaves; collect_enode_leaves(n, seq, leaves); @@ -51,7 +57,7 @@ namespace euf { bool enode_concat_eq::operator()(enode* a, enode* b) const { if (a == b) return true; - if (!seq.str.is_concat(a->get_expr()) && !seq.str.is_concat(b->get_expr())) + if (!is_any_concat(a, seq) && !is_any_concat(b, seq)) return a->get_id() == b->get_id(); enode_vector la, lb; collect_enode_leaves(a, seq, la); @@ -124,14 +130,27 @@ namespace euf { push_merge(last, n); } - // empty concat: concat(a, empty) = a, concat(empty, b) = b + // str.++ identity: concat(a, ε) = a, concat(ε, b) = b enode* a, *b; - if (is_concat(n, a, b)) { - if (is_empty(a)) + if (is_str_concat(n, a, b)) { + if (is_str_empty(a)) push_merge(n, b); - else if (is_empty(b)) + else if (is_str_empty(b)) push_merge(n, a); } + + // re.++ identity: concat(a, epsilon) = a, concat(epsilon, b) = b + // re.++ absorption: concat(a, ∅) = ∅, concat(∅, b) = ∅ + if (is_re_concat(n, a, b)) { + if (is_re_epsilon(a)) + push_merge(n, b); + else if (is_re_epsilon(b)) + push_merge(n, a); + else if (is_re_empty(a)) + push_merge(n, a); + else if (is_re_empty(b)) + push_merge(n, b); + } } void seq_plugin::propagate_merge(enode* a, enode* b) { @@ -248,17 +267,34 @@ namespace euf { return na->get_root() == nb->get_root(); } - enode* seq_plugin::mk_concat(enode* a, enode* b) { + enode* seq_plugin::mk_str_concat(enode* a, enode* b) { expr* e = m_seq.str.mk_concat(a->get_expr(), b->get_expr()); enode* args[2] = { a, b }; return mk(e, 2, args); } - enode* seq_plugin::mk_empty(sort* s) { + enode* seq_plugin::mk_re_concat(enode* a, enode* b) { + expr* e = m_seq.re.mk_concat(a->get_expr(), b->get_expr()); + enode* args[2] = { a, b }; + return mk(e, 2, args); + } + + enode* seq_plugin::mk_concat(enode* a, enode* b) { + if (m_seq.is_re(a->get_expr())) + return mk_re_concat(a, b); + return mk_str_concat(a, b); + } + + enode* seq_plugin::mk_str_empty(sort* s) { expr* e = m_seq.str.mk_empty(s); return mk(e, 0, nullptr); } + enode* seq_plugin::mk_re_epsilon(sort* seq_sort) { + expr* e = m_seq.re.mk_epsilon(seq_sort); + return mk(e, 0, nullptr); + } + void seq_plugin::undo() { auto k = m_undo.back(); m_undo.pop_back(); diff --git a/src/ast/euf/euf_seq_plugin.h b/src/ast/euf/euf_seq_plugin.h index fbe210dfe..8a7f36920 100644 --- a/src/ast/euf/euf_seq_plugin.h +++ b/src/ast/euf/euf_seq_plugin.h @@ -51,6 +51,7 @@ namespace euf { // Associativity-respecting hash for enode concat trees. // Flattens concat(concat(a,b),c) and concat(a,concat(b,c)) // to the same leaf sequence [a,b,c] before hashing. + // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). struct enode_concat_hash { seq_util const& seq; enode_concat_hash(seq_util const& s) : seq(s) {} @@ -58,6 +59,7 @@ namespace euf { }; // Associativity-respecting equality for enode concat trees. + // Handles both str.++ (OP_SEQ_CONCAT) and re.++ (OP_RE_CONCAT). struct enode_concat_eq { seq_util const& seq; enode_concat_eq(seq_util const& s) : seq(s) {} @@ -87,19 +89,44 @@ namespace euf { enode_concat_eq m_concat_eq; hashtable m_concat_table; - bool is_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } - bool is_concat(enode* n, enode*& a, enode*& b) { - return is_concat(n) && n->num_args() == 2 && + // string concat predicates + bool is_str_concat(enode* n) const { return m_seq.str.is_concat(n->get_expr()); } + bool is_str_concat(enode* n, enode*& a, enode*& b) { + return is_str_concat(n) && n->num_args() == 2 && (a = n->get_arg(0), b = n->get_arg(1), true); } + + // regex concat predicates + bool is_re_concat(enode* n) const { return m_seq.re.is_concat(n->get_expr()); } + bool is_re_concat(enode* n, enode*& a, enode*& b) { + return is_re_concat(n) && n->num_args() == 2 && + (a = n->get_arg(0), b = n->get_arg(1), true); + } + + // any concat, string or regex + bool is_concat(enode* n) const { return is_str_concat(n) || is_re_concat(n); } + bool is_concat(enode* n, enode*& a, enode*& b) { + return is_str_concat(n, a, b) || is_re_concat(n, a, b); + } + bool is_star(enode* n) const { return m_seq.re.is_star(n->get_expr()); } bool is_loop(enode* n) const { return m_seq.re.is_loop(n->get_expr()); } - bool is_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + + // string empty: ε for str.++ + bool is_str_empty(enode* n) const { return m_seq.str.is_empty(n->get_expr()); } + // regex empty set: ∅ for re.++ (absorbing element) + bool is_re_empty(enode* n) const { return m_seq.re.is_empty(n->get_expr()); } + // regex epsilon: to_re("") for re.++ (identity element) + bool is_re_epsilon(enode* n) const { return m_seq.re.is_epsilon(n->get_expr()); } + bool is_to_re(enode* n) const { return m_seq.re.is_to_re(n->get_expr()); } bool is_full_seq(enode* n) const { return m_seq.re.is_full_seq(n->get_expr()); } + enode* mk_str_concat(enode* a, enode* b); + enode* mk_re_concat(enode* a, enode* b); enode* mk_concat(enode* a, enode* b); - enode* mk_empty(sort* s); + enode* mk_str_empty(sort* s); + enode* mk_re_epsilon(sort* seq_sort); void push_undo(undo_kind k);