diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index bd604ab6f..0620b99ea 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -18,6 +18,8 @@ Author: #include "ast/euf/euf_sgraph.h" #include "ast/euf/euf_seq_plugin.h" +#include "ast/arith_decl_plugin.h" +#include "ast/rewriter/th_rewriter.h" #include "ast/ast_pp.h" namespace euf { @@ -109,6 +111,11 @@ namespace euf { if (m_seq.str.is_in_re(e)) return snode_kind::s_in_re; + // projection operator π_{Q,F}(state) modeled as the re.proj skolem. + if (m_seq.is_skolem(e) && + to_app(e)->get_decl()->get_parameter(0).get_symbol() == re_proj_name()) + return snode_kind::s_projection; + return snode_kind::s_var; } @@ -283,6 +290,18 @@ namespace euf { n->m_length = 1; break; + case snode_kind::s_projection: + // re.proj(state, root, nu): args 0/1 are regexes, arg 2 is the + // snapshot index (an integer numeral). Ground/classical follow the + // regex arguments only. + SASSERT(n->num_args() == 3); + n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground(); + n->m_regex_free = false; + n->m_is_classical = false; + n->m_level = 1; + n->m_length = 1; + break; + default: // NSB review: is this the correct defaults for unclassified nodes? // Is this UNREACHABLE()? @@ -292,6 +311,14 @@ namespace euf { n->m_length = 1; break; } + + // A regex (sub)tree carries a projection iff it is a projection node or + // any argument does. Computed uniformly here so all callers can use the + // cheap snode flag instead of re-walking the AST. + n->m_has_projection = (n->m_kind == snode_kind::s_projection); + for (unsigned i = 0; i < n->num_args() && !n->m_has_projection; ++i) + if (n->arg(i)->has_projection()) + n->m_has_projection = true; } static const unsigned HASH_BASE = 31; @@ -544,6 +571,210 @@ namespace euf { return n; } + bool sgraph::is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const { + if (!e || !m_seq.is_skolem(e)) + return false; + app* ap = to_app(e); + if (ap->get_decl()->get_parameter(0).get_symbol() != re_proj_name()) + return false; + if (ap->get_num_args() != 3) + return false; + arith_util au(m); + rational r; + if (!au.is_numeral(ap->get_arg(2), r) || !r.is_unsigned()) + return false; + state = ap->get_arg(0); + root = ap->get_arg(1); + nu = r.get_unsigned(); + return true; + } + + expr_ref sgraph::mk_re_proj(expr* state, expr* root, unsigned nu) { + SASSERT(state && root); + arith_util au(m); + expr* args[3] = { state, root, au.mk_int(nu) }; + sort* re_sort = state->get_sort(); + return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m); + } + + expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) { + // The symbolic derivative of a regex is an ite-term dispatching on + // character predicates; its leaves are ordinary derivative states. + // π_{Q,F} is propagated to all such leaves (paper §4). Partial-DFA + // states are produced from concrete-character derivatives and are thus + // ite-free, so every ite encountered here is dispatch structure. + expr* c = nullptr, *th = nullptr, *el = nullptr; + if (m.is_ite(e, c, th, el)) { + expr_ref t = wrap_proj(th, root, nu); + expr_ref f = wrap_proj(el, root, nu); + return expr_ref(m.mk_ite(c, t, f), m); + } + // π(∅) ≡ ∅: a dead leaf stays dead. + if (m_seq.re.is_empty(e)) + return expr_ref(e, m); + return mk_re_proj(e, root, nu); + } + + // 3-valued Kleene conjunction / disjunction over lbool. + static lbool lb_and(lbool a, lbool b) { + if (a == l_false || b == l_false) return l_false; + if (a == l_undef || b == l_undef) return l_undef; + return l_true; + } + static lbool lb_or(lbool a, lbool b) { + if (a == l_true || b == l_true) return l_true; + if (a == l_undef || b == l_undef) return l_undef; + return l_false; + } + + lbool sgraph::re_nullable(snode* re) { + if (!re) + return l_undef; + // Projection-free regexes: defer to the standard regex info. + if (!re->has_projection()) + return m_seq.re.get_info(re->get_expr()).nullable; + + switch (re->kind()) { + case snode_kind::s_projection: { + // nullable(π_{{root}}(state)) = [state ∈ F] = [state ≡ root]. + // Regex expressions are perfectly shared, so syntactic pointer + // equality coincides with the expr-id equality used for cycle + // detection in the partial DFA. + expr* state = nullptr, *root = nullptr; unsigned nu = 0; + VERIFY(is_re_proj(re->get_expr(), state, root, nu)); + return (state == root) ? l_true : l_false; + } + case snode_kind::s_complement: + return ~re_nullable(re->arg(0)); + case snode_kind::s_intersect: + case snode_kind::s_concat: + return lb_and(re_nullable(re->arg(0)), re_nullable(re->arg(1))); + case snode_kind::s_union: + return lb_or(re_nullable(re->arg(0)), re_nullable(re->arg(1))); + case snode_kind::s_star: + return l_true; + case snode_kind::s_plus: + return re_nullable(re->arg(0)); + case snode_kind::s_ite: + // ε-acceptance of a symbolic-derivative residual depends on the + // (unresolved) character predicate, so it is genuinely unknown + // until the ite is split (apply_regex_if_split). + return l_undef; + default: + // Not expected to carry a projection in our constructions; fall + // back to the standard info (sound for projection-free shapes). + return m_seq.re.get_info(re->get_expr()).nullable; + } + } + + snode* sgraph::deriv_proj(snode* re, expr* ch) { + SASSERT(re && re->get_expr()); + expr* re_expr = re->get_expr(); + sort* re_sort = re_expr->get_sort(); + + // Projection-free subterm: the standard derivative already handles it + // (and keeps the result canonical, matching partial-DFA state ids). + if (!re->has_projection()) + return mk(m_rewriter.mk_derivative(ch, re_expr)); + + // Minimal language-preserving simplifications that never inspect a + // projection's arguments (so the opaque skolem and its inner state id + // are preserved for the oracle's r ∈ Q test). + auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); }; + auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); }; + auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); }; + auto mk_union = [&](expr* x, expr* y) -> expr* { + if (is_empty(x)) return y; + if (is_empty(y)) return x; + if (x == y) return x; + if (is_full(x) || is_full(y)) return m_seq.re.mk_full_seq(re_sort); + return m_seq.re.mk_union(x, y); + }; + auto mk_inter = [&](expr* x, expr* y) -> expr* { + if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort); + if (is_full(x)) return y; + if (is_full(y)) return x; + if (x == y) return x; + return m_seq.re.mk_inter(x, y); + }; + auto mk_concat = [&](expr* x, expr* y) -> expr* { + if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort); + if (is_eps(x)) return y; + if (is_eps(y)) return x; + return m_seq.re.mk_concat(x, y); + }; + auto mk_compl = [&](expr* x) -> expr* { + if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort); + if (is_full(x)) return m_seq.re.mk_empty(re_sort); + expr* inner = nullptr; + if (m_seq.re.is_complement(x, inner)) return inner; + return m_seq.re.mk_complement(x); + }; + + switch (re->kind()) { + case snode_kind::s_projection: { + expr* state = nullptr, *root = nullptr; unsigned nu = 0; + VERIFY(is_re_proj(re_expr, state, root, nu)); + // δ_a(π_{Q,{root}}(state)) = π_{Q,{root}}(δ_a(state)) if state ∈ Q, + // else ⊥. The gate is on the *current* state (paper §3.3). + if (!m_proj_oracle || !m_proj_oracle->projection_state_in_Q(state, nu)) + return mk(m_seq.re.mk_empty(re_sort)); + snode* dstate = deriv_proj(re->arg(0), ch); // arg(0) ≡ state + if (!dstate || dstate->is_fail() || m_seq.re.is_empty(dstate->get_expr())) + return mk(m_seq.re.mk_empty(re_sort)); + // δ(state) may be concrete (one state) or an ite-term (symbolic + // character). Wrap π around the result, descending into ite leaves. + return mk(wrap_proj(dstate->get_expr(), root, nu)); + } + case snode_kind::s_ite: { + // ite-structured residual (from a symbolic-character derivative): + // δ_a(ite(c, th, el)) = ite(c, δ_a(th), δ_a(el)). + snode* dth = deriv_proj(re->arg(1), ch); + snode* del = deriv_proj(re->arg(2), ch); + return mk(expr_ref(m.mk_ite(re->arg(0)->get_expr(), dth->get_expr(), del->get_expr()), m)); + } + case snode_kind::s_complement: { + snode* d = deriv_proj(re->arg(0), ch); + return mk(expr_ref(mk_compl(d->get_expr()), m)); + } + case snode_kind::s_intersect: { + snode* d0 = deriv_proj(re->arg(0), ch); + snode* d1 = deriv_proj(re->arg(1), ch); + return mk(expr_ref(mk_inter(d0->get_expr(), d1->get_expr()), m)); + } + case snode_kind::s_union: { + snode* d0 = deriv_proj(re->arg(0), ch); + snode* d1 = deriv_proj(re->arg(1), ch); + return mk(expr_ref(mk_union(d0->get_expr(), d1->get_expr()), m)); + } + case snode_kind::s_concat: { + // δ_a(R·S) = δ_a(R)·S ⊔ (nullable(R) ? δ_a(S) : ∅) + snode* d0 = deriv_proj(re->arg(0), ch); + expr* head = mk_concat(d0->get_expr(), re->arg(1)->get_expr()); + if (re_nullable(re->arg(0)) == l_true) { + snode* d1 = deriv_proj(re->arg(1), ch); + head = mk_union(head, d1->get_expr()); + } + return mk(expr_ref(head, m)); + } + case snode_kind::s_star: { + // δ_a(R*) = δ_a(R)·R* + snode* d = deriv_proj(re->arg(0), ch); + return mk(expr_ref(mk_concat(d->get_expr(), re_expr), m)); + } + case snode_kind::s_plus: { + // δ_a(R+) = δ_a(R)·R* + snode* d = deriv_proj(re->arg(0), ch); + expr_ref star(m_seq.re.mk_star(re->arg(0)->get_expr()), m); + return mk(expr_ref(mk_concat(d->get_expr(), star), m)); + } + default: + // A projection nested under an operator we do not specially derive + // is not produced by our constructions. Fall back conservatively. + return mk(m_rewriter.mk_derivative(ch, re_expr)); + } + } + snode* sgraph::brzozowski_deriv(snode* re, snode* elem) { expr* re_expr = re->get_expr(); expr* elem_expr = elem->get_expr(); @@ -592,6 +823,22 @@ namespace euf { } } + // If the regex contains a projection operator, the generic rewriter + // cannot derive it. Use the projection-aware structural derivative, + // which delegates projection-free subterms back to mk_derivative. + // Canonicalize the result with th_rewriter (which treats the re.proj + // skolem as an opaque leaf and only applies ACI / identity rules to the + // surrounding Boolean/concat structure). Without this, equivalent + // derivative states get distinct snode ids and BFS emptiness checks + // fail to deduplicate, exploring an exploded state space. + if (re->has_projection()) { + snode* d = deriv_proj(re, elem_expr); + expr_ref e(d->get_expr(), m); + th_rewriter trw(m); + trw(e); + return mk(e); + } + expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr); SASSERT(result); return mk(result); @@ -647,6 +894,14 @@ namespace euf { if (m_seq.re.is_empty(e)) return; + // projection operator: collect predicates from the regex arguments only + // (the third argument is the integer snapshot index, not a regex). + if (re->is_projection()) { + collect_re_predicates(re->arg(0), preds); + collect_re_predicates(re->arg(1), preds); + return; + } + // Expected compound regex operators are handled by recursion below. // If a leaf survives to this point, it is an unhandled regex form. if (re->num_args() == 0) { diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index f66aabe0f..df23eefc6 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -58,6 +58,7 @@ Author: #include "util/region.h" #include "util/statistics.h" +#include "util/lbool.h" #include "ast/ast.h" #include "ast/seq_decl_plugin.h" #include "ast/rewriter/seq_rewriter.h" @@ -68,6 +69,19 @@ namespace euf { class seq_plugin; + // Oracle queried by the projection-aware derivative of sgraph. + // The projection operator π_{Q,F}(state) (a re.proj skolem) has its set of + // explored states Q stored externally (the nielsen_graph partial DFA), keyed + // by a snapshot index nu. The sgraph consults this oracle to decide whether + // the current state lies in Q when deriving a projection. + class projection_oracle { + public: + virtual ~projection_oracle() = default; + // true iff `state` (a regex expression) belongs to the explored + // subautomaton snapshot identified by `nu`. + virtual bool projection_state_in_Q(expr* state, unsigned nu) = 0; + }; + class sgraph { struct stats { @@ -107,6 +121,9 @@ namespace euf { unsigned_vector m_alias_trail; // expression ids unsigned_vector m_alias_trail_lim; // scope boundaries + // Oracle answering "state ∈ Q_nu" for projection derivatives. Not owned. + projection_oracle* m_proj_oracle = nullptr; + snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); @@ -156,6 +173,27 @@ namespace euf { // for deriving symbolic variables. snode* brzozowski_deriv(snode* re, snode* elem); + // Register the oracle consulted when deriving projection operators. + // Passing nullptr unregisters. Not owned. + void set_projection_oracle(projection_oracle* o) { m_proj_oracle = o; } + + // Projection operator support (π_{Q,F}(state) modeled as re.proj skolem). + // Recognize and destructure a re.proj skolem expression. + bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const; + // Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu. + expr_ref mk_re_proj(expr* state, expr* root, unsigned nu); + // Wrap a (possibly ite-structured) symbolic-derivative result in the + // projection operator, propagating π into every ite leaf (paper §4). + expr_ref wrap_proj(expr* e, expr* root, unsigned nu); + // Projection-aware Brzozowski derivative w.r.t. a character expr + // (concrete or symbolic). + snode* deriv_proj(snode* re, expr* ch); + + // Projection-aware nullability: lifts re.get_info().nullable to regexes + // that may contain projection operators. Returns l_true / l_false + // (l_undef only if an underlying projection-free subterm is undef). + lbool re_nullable(snode* re); + // Decode a character expression that may be represented as a const-char, // a unit string containing a const-char, or a one-character string literal. bool decode_re_char(expr* ex, unsigned& out) const; diff --git a/src/ast/euf/euf_snode.h b/src/ast/euf/euf_snode.h index 1493b4a94..2e4f94213 100644 --- a/src/ast/euf/euf_snode.h +++ b/src/ast/euf/euf_snode.h @@ -56,9 +56,16 @@ namespace euf { s_range, // character range [lo,hi] (OP_RE_RANGE) s_ite, // ite (OP_ITE) s_to_re, // string to regex (OP_SEQ_TO_RE) - s_in_re // regex membership (OP_SEQ_IN_RE) + s_in_re, // regex membership (OP_SEQ_IN_RE) + s_projection // projection operator π_{Q,F}(state) (re.proj skolem) }; + // Skolem decl-name marking a regex projection operator π_{Q,F}(state). + // The skolem has signature re.proj(state : RegLan, root : RegLan, nu : Int) + // where F = {root} (singleton accepting state) and nu identifies the + // explored subautomaton snapshot Q (see seq_nielsen partial DFA). + inline symbol re_proj_name() { return symbol("re.proj"); } + class snode { expr *m_expr = nullptr; snode_kind m_kind = snode_kind::s_var; @@ -69,6 +76,7 @@ namespace euf { bool m_ground = true; // no uninterpreted string variables bool m_regex_free = true; // no regex constructs bool m_is_classical = true; // classical regular expression + bool m_has_projection = false; // contains a projection operator (re.proj) unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons) unsigned m_length = 0; // token count, number of leaf tokens in the tree @@ -125,6 +133,10 @@ namespace euf { bool is_classical() const { return m_is_classical; } + // true iff this regex (sub)tree contains a projection operator (re.proj). + bool has_projection() const { + return m_has_projection; + } unsigned level() const { return m_level; } @@ -202,6 +214,9 @@ namespace euf { bool is_in_re() const { return m_kind == snode_kind::s_in_re; } + bool is_projection() const { + return m_kind == snode_kind::s_projection; + } // is this a leaf token (analogous to ZIPT's StrToken as opposed to Str) bool is_token() const { diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index df0830150..8dc9a78c4 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -178,15 +178,14 @@ namespace seq { bool str_mem::is_trivial(nielsen_node const* n) const { if (!(m_str && m_regex && m_str->is_empty())) return false; - const auto& info = n->graph().seq().re.get_info(m_regex->get_expr()); - return info.nullable == l_true; + // Projection-aware nullability (handles re.proj operators in m_regex). + return n->graph().sg().re_nullable(m_regex) == l_true; } bool str_mem::is_contradiction(nielsen_node const* n) const { if (!(m_str && m_regex && m_str->is_empty())) return false; - const auto& info = n->graph().seq().re.get_info(m_regex->get_expr()); - return info.nullable == l_false; + return n->graph().sg().re_nullable(m_regex) == l_false; } bool str_mem::contains_var(euf::snode* var) const { @@ -457,14 +456,49 @@ namespace seq { m_context_solver(ctx_solver), m_partial_dfa_pin(sg.get_manager()), m_parikh(alloc(seq_parikh, sg)), - m_seq_regex(alloc(seq::seq_regex, sg)) {} + m_seq_regex(alloc(seq::seq_regex, sg)) { + // Answer projection-state membership queries during projection-aware + // derivatives (the sgraph cannot reach the partial DFA otherwise). + m_sg.set_projection_oracle(this); + } nielsen_graph::~nielsen_graph() { + m_sg.set_projection_oracle(nullptr); dealloc(m_parikh); dealloc(m_seq_regex); reset(); } + bool nielsen_graph::projection_state_in_Q(expr* state, unsigned nu) { + if (!state || nu == 0) + return false; + const unsigned sid = state->get_id(); + // r ∈ Q_nu iff r is incident to a partial-DFA edge whose extraction + // index lies in [1, nu] (the "edges marked ≤ ν" subautomaton of the + // implementation-aspects section of the paper). + auto incident = [&](std::unordered_map const& adj) { + auto it = adj.find(sid); + if (it == adj.end()) + return false; + for (const unsigned edge_idx : it->second) { + if (edge_idx >= m_partial_dfa_edges.size()) + continue; + const unsigned pidx = m_partial_dfa_edges[edge_idx].m_projection_idx; + if (pidx != 0 && pidx <= nu) + return true; + } + return false; + }; + return incident(m_partial_dfa_out) || incident(m_partial_dfa_in); + } + + euf::snode* nielsen_graph::mk_projection_term(euf::snode* root_re, unsigned nu) { + SASSERT(root_re && root_re->get_expr()); + // π_{Q_nu, {root}}(root): current state == accepting state == root. + expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu); + return m_sg.mk(proj); + } + nielsen_node* nielsen_graph::mk_node() { const unsigned id = m_nodes.size(); nielsen_node* n = alloc(nielsen_node, *this, id); @@ -1057,137 +1091,6 @@ namespace seq { return covered; } - euf::snode* nielsen_graph::build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx) { - SASSERT(root_re && root_re->get_expr()); - - sort* seq_sort = nullptr; - VERIFY(m_seq.is_re(root_re->get_expr(), seq_sort)); - SASSERT(seq_sort); - - sort* re_sort = root_re->get_expr()->get_sort(); - - // Floyd-Warshall is done over expressions, not snodes: every label - // edge that feeds this matrix comes from m_partial_dfa_edges whose - // labels (and src/dst) may correspond to snodes that no longer exist - // at the current sgraph scope. We work entirely with pinned exprs - // here and materialize a single snode for the simplified result at - // the very end. All intermediate exprs are kept alive by an - // expr_ref_vector that lives for the duration of this call. - expr_ref_vector pin(m); - expr* eps_expr = m_seq.re.mk_epsilon(seq_sort); - expr* empty_expr = m_seq.re.mk_empty(re_sort); - pin.push_back(eps_expr); - pin.push_back(empty_expr); - - auto is_empty = [&](expr* re) -> bool { - return !re || m_seq.re.is_empty(re); - }; - - auto is_eps = [&](expr* re) -> bool { - return re && m_seq.re.is_epsilon(re); - }; - - auto mk_union = [&](expr* a, expr* b) -> expr* { - if (is_empty(a)) return b; - if (is_empty(b)) return a; - if (a == b) return a; - expr* r = m_seq.re.mk_union(a, b); - pin.push_back(r); - return r; - }; - - auto mk_concat = [&](expr* a, expr* b) -> expr* { - if (is_empty(a) || is_empty(b)) return empty_expr; - if (is_eps(a)) return b; - if (is_eps(b)) return a; - expr* r = m_seq.re.mk_concat(a, b); - pin.push_back(r); - return r; - }; - - auto mk_star = [&](expr* r) -> expr* { - if (is_empty(r) || is_eps(r)) return eps_expr; - if (r && m_seq.re.is_star(r)) return r; - expr* s = m_seq.re.mk_star(r); - pin.push_back(s); - return s; - }; - - unsigned_vector states; - states.reserve(scc.num_elems()); - std::unordered_map pos; - for (unsigned s : scc) { - pos.emplace(s, states.size()); - states.push_back(s); - } - - // scc / pos are keyed by expr id (see collect_scc_for_projection). - auto it_root = pos.find(root_re->get_expr()->get_id()); - if (it_root == pos.end()) - return nullptr; - - unsigned n = states.size(); - std::vector> R(n, std::vector(n, nullptr)); - - for (unsigned src_id : states) { - auto it = m_partial_dfa_out.find(src_id); - if (it == m_partial_dfa_out.end()) - continue; - for (unsigned edge_idx : it->second) { - if (edge_idx >= m_partial_dfa_edges.size()) - continue; - partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx]; - if (!e.m_src || !e.m_dst || !e.m_label) - continue; - if (!scc.contains(e.m_dst->get_id())) - continue; - if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx) - continue; - auto it_src = pos.find(e.m_src->get_id()); - auto it_dst = pos.find(e.m_dst->get_id()); - if (it_src == pos.end() || it_dst == pos.end()) - continue; - unsigned i = it_src->second; - unsigned j = it_dst->second; - R[i][j] = mk_union(R[i][j], e.m_label); - } - } - - for (unsigned i = 0; i < n; ++i) { - R[i][i] = mk_union(R[i][i], eps_expr); - } - - for (unsigned k = 0; k < n; ++k) { - std::vector col_k(n, nullptr), row_k(n, nullptr); - for (unsigned i = 0; i < n; ++i) - col_k[i] = R[i][k]; - for (unsigned j = 0; j < n; ++j) - row_k[j] = R[k][j]; - expr* loop_k = R[k][k]; - expr* loop_star = mk_star(loop_k); - - for (unsigned i = 0; i < n; ++i) { - if (is_empty(col_k[i])) - continue; - for (unsigned j = 0; j < n; ++j) { - if (is_empty(row_k[j])) - continue; - expr* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]); - R[i][j] = mk_union(R[i][j], via_k); - } - } - } - - expr* result = R[it_root->second][it_root->second]; - if (!result) - return m_sg.mk(eps_expr); - - expr_ref simplified(result, m); - th_rewriter trw(m); - trw(simplified); - return m_sg.mk(simplified); - } - bool nielsen_graph::try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re) { SASSERT(root_re && root_re->get_expr()); projection_re = nullptr; @@ -1207,7 +1110,10 @@ namespace seq { if (covered_edges <= prev_covered) return false; - projection_re = build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx); + // Keep the stabilizer symbolic as a projection operator over the + // (just-marked) explored subautomaton snapshot. Its language is + // refined lazily through projection-aware derivatives. + projection_re = mk_projection_term(root_re, m_projection_extract_idx); if (!projection_re) return false; @@ -1222,7 +1128,6 @@ namespace seq { expr_ref new_arg(v->get_expr(), m); expr_ref new_l(left, m), new_r(right, m); expr* arg, *l, *r; - std::cout << "Creating slice for " << mk_pp(new_arg, m) << "; " << mk_pp(new_l, m) << "; " << mk_pp(new_r, m) << std::endl; if (m_sk.is_slice(new_arg, arg, l, r)) { new_l = a.mk_add(left, l); @@ -1232,7 +1137,6 @@ namespace seq { new_arg = arg; } expr_ref slice = m_sk.mk_slice(new_arg, new_l, new_r); - std::cout << "Got " << mk_pp(slice, m) << std::endl; return m_sg.mk(slice); } @@ -1713,35 +1617,45 @@ namespace seq { break; euf::snode* src_re = mem.m_regex; - seq_rewriter rw(m); - expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m); - // Extract the inner char expression from seq.unit(?inner) - expr *inner_char = tok->arg(0)->get_expr(); - - // substitute the inner char for the derivative variable in d - var_subst vs(m); - d = vs(d, inner_char); - - th_rewriter thrw(m); - thrw(d); - - // Record concrete minterm edges for src_re so cycle_decomp can - // detect SCCs lazily (mirrors the incremental DFA building from - // concrete char consumption in the loop above). - if (src_re->is_ground()) { - euf::snode_vector mts; - sg.compute_minterms(src_re, mts); - for (euf::snode* mt : mts) { - euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt); - if (mt_deriv && !mt_deriv->is_fail()) - m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv); - } + euf::snode* next = nullptr; + if (src_re->has_projection()) { + // The generic symbolic derivative cannot see the projection + // operator; route through the projection-aware derivative, + // which yields the ite-residual with π propagated to leaves. + next = sg.brzozowski_deriv(src_re, tok); } + else { + seq_rewriter rw(m); + expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m); - auto next = sg.mk(d); + // Extract the inner char expression from seq.unit(?inner) + expr *inner_char = tok->arg(0)->get_expr(); + + // substitute the inner char for the derivative variable in d + var_subst vs(m); + d = vs(d, inner_char); + + th_rewriter thrw(m); + thrw(d); + + // Record concrete minterm edges for src_re so cycle_decomp can + // detect SCCs lazily (mirrors the incremental DFA building from + // concrete char consumption in the loop above). + if (src_re->is_ground()) { + euf::snode_vector mts; + sg.compute_minterms(src_re, mts); + for (euf::snode* mt : mts) { + euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt); + if (mt_deriv && !mt_deriv->is_fail()) + m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv); + } + } + + next = sg.mk(d); + } if (next->is_fail()) { - TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << " d: " << d << "\n"); + TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << "\n"); set_general_conflict(); set_conflict(backtrack_reason::regex, mem.m_dep); return simplify_result::conflict; @@ -3384,7 +3298,10 @@ namespace seq { uint_set scc; if (!collect_scc_for_projection(root_re, scc)) return nullptr; - return build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx); + // Symbolic projection over the edges marked by earlier extractions + // (index ≤ current snapshot). No new marking here, mirroring the old + // behaviour of reusing the current extraction index. + return mk_projection_term(root_re, m_projection_extract_idx); } // ----------------------------------------------------------------------- @@ -3395,6 +3312,10 @@ namespace seq { // ----------------------------------------------------------------------- bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) { + TRACE(seq, tout << "cycle_subsumption ENTER node " << node->id() << " with mems:\n"; + for (auto const& m2 : node->str_mems()) + tout << " [prim=" << m2.is_primitive() << "] " + << mk_pp(m2.m_str->get_expr(), m) << " ∈ " << mk_pp(m2.m_regex->get_expr(), m) << "\n"); for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) { str_mem const& mem = node->str_mems()[mi]; SASSERT(mem.well_formed()); @@ -3418,7 +3339,12 @@ namespace seq { continue; // Check L(x_regex) ⊆ L(stabilizer). - if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true) + lbool subset = m_seq_regex->is_language_subset(x_regex, stabilizer); + TRACE(seq, tout << "cycle_subsumption check: first=" << mk_pp(first->get_expr(), m) + << "\n x_regex=" << mk_pp(x_regex->get_expr(), m) + << "\n stabilizer=" << mk_pp(stabilizer->get_expr(), m) + << "\n subset=" << subset << "\n"); + if (subset != l_true) continue; // Subsume: replace x·rest ∈ r with rest ∈ r. @@ -3454,10 +3380,11 @@ namespace seq { // cycle is detected, project SCC onto stabilizer constraint b. // Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*). // - // Here b = Proj{R}{E_SCC}{R} is the language of all paths from R back to R - // in the known SCC edges (including ε), which equals s* for the non-empty - // cycle language s. Equivalently, stabilizer_re from build_projection_regex_from_scc - // is the Kleene closure computed by Floyd-Warshall over the SCC subgraph. + // Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting + // the language of all paths from R back to R that stay within the explored + // subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language + // s. It is kept symbolic; its derivative/nullability are evaluated lazily by + // the projection-aware sgraph (paper §3.3) rather than materialized. // // The constraint on x'' prevents divergence: x'' may not begin with any // non-empty word from L(stabilizer_re), so it cannot re-enter the cycle. @@ -3501,13 +3428,22 @@ namespace seq { euf::snode* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort)); euf::snode* xpp = get_tail(x, compute_length_expr(xp).get()); euf::snode* xp_xpp = m_sg.mk_concat(xp, xpp); + TRACE(seq, tout << "cycle_decomp build: xp=" << mk_pp(xp->get_expr(), m) + << "\n xpp=" << mk_pp(xpp->get_expr(), m) + << "\n xp_xpp=" << mk_pp(xp_xpp->get_expr(), m) + << "\n xp_xpp.first=" << mk_pp(xp_xpp->first()->get_expr(), m) << "\n"); nielsen_node* child = mk_child(node); + SASSERT(child->m_str_mem[mi] == mem); nielsen_edge* e = mk_edge(node, child, false); const nielsen_subst s(x, xp_xpp, nullptr, mem.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); + // remove from mi^th element of the child the leading token, as it is immediately subsumed + SASSERT(child->m_str_mem[mi].m_str->first() == xp); + child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true); + // x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle) child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep)); diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 4294380d5..02bd6b62c 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -855,7 +855,7 @@ namespace seq { // the overall Nielsen transformation graph // mirrors ZIPT's NielsenGraph - class nielsen_graph { + class nielsen_graph : public euf::projection_oracle { friend class nielsen_node; friend class nielsen_edge; @@ -979,6 +979,11 @@ namespace seq { seq_util& seq() { return m_seq; } seq_util const& seq() const { return m_seq; } + // euf::projection_oracle: true iff regex `state` is part of the + // explored subautomaton snapshot `nu` (a partial-DFA edge incident to + // `state` was marked with an index in [1, nu]). + bool projection_state_in_Q(expr* state, unsigned nu) override; + // node management nielsen_node* mk_node(); nielsen_node* mk_child(nielsen_node* parent); @@ -1169,9 +1174,12 @@ namespace seq { // currently covered edge count for this extraction. unsigned mark_scc_projection_edges(uint_set const& scc); - // Build regex equivalent to proj(root_re, E_scc, {root_re}) using the - // currently marked SCC edges (projection index <= extract_idx). - euf::snode* build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx); + // Build the projection operator π_{Q,{root_re}}(root_re) as a re.proj + // skolem snode, where Q is the explored subautomaton identified by the + // snapshot index nu. This is the stabilizer of root_re kept symbolically + // (the projection's derivative/nullability are evaluated lazily by the + // sgraph consulting projection_state_in_Q). + euf::snode* mk_projection_term(euf::snode* root_re, unsigned nu); // Try to extract a stronger projection for root_re. Returns true and // stores it in projection_re iff SCC coverage has grown. diff --git a/src/smt/seq/seq_regex.cpp b/src/smt/seq/seq_regex.cpp index 6cb1aed10..c3b914d1b 100644 --- a/src/smt/seq/seq_regex.cpp +++ b/src/smt/seq/seq_regex.cpp @@ -400,6 +400,14 @@ namespace seq { if (re->is_fail() || re->is_full_char() || re->is_full_seq()) return; + // projection operator: only the regex arguments carry character + // structure; the third argument is the integer snapshot index. + if (re->is_projection()) { + collect_char_boundaries(re->arg(0), bounds); + collect_char_boundaries(re->arg(1), bounds); + return; + } + // If we reached a leaf and none of the expected leaf forms matched, // this is a regex constructor we did not account for in boundary // extraction and should fail loudly in debug builds. diff --git a/src/smt/seq/seq_regex.h b/src/smt/seq/seq_regex.h index 6a41d89bb..bfce29b8c 100644 --- a/src/smt/seq/seq_regex.h +++ b/src/smt/seq/seq_regex.h @@ -210,8 +210,9 @@ namespace seq { } // check if regex accepts the empty string + // (projection-aware: re may contain re.proj operators) bool is_nullable(euf::snode* re) const { - return re && seq.re.get_info(re->get_expr()).nullable == l_true; + return re && m_sg.re_nullable(re) == l_true; } // check if regex is ground (no string variables)