3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-06-19 15:16:29 +00:00

Added real projection operator

This commit is contained in:
CEisenhofer 2026-05-29 15:51:35 +02:00
parent ff99cb442a
commit 70031b674c
7 changed files with 437 additions and 176 deletions

View file

@ -18,6 +18,8 @@ Author:
#include "ast/euf/euf_sgraph.h"
#include "ast/euf/euf_seq_plugin.h"
#include "ast/arith_decl_plugin.h"
#include "ast/rewriter/th_rewriter.h"
#include "ast/ast_pp.h"
namespace euf {
@ -109,6 +111,11 @@ namespace euf {
if (m_seq.str.is_in_re(e))
return snode_kind::s_in_re;
// projection operator π_{Q,F}(state) modeled as the re.proj skolem.
if (m_seq.is_skolem(e) &&
to_app(e)->get_decl()->get_parameter(0).get_symbol() == re_proj_name())
return snode_kind::s_projection;
return snode_kind::s_var;
}
@ -283,6 +290,18 @@ namespace euf {
n->m_length = 1;
break;
case snode_kind::s_projection:
// re.proj(state, root, nu): args 0/1 are regexes, arg 2 is the
// snapshot index (an integer numeral). Ground/classical follow the
// regex arguments only.
SASSERT(n->num_args() == 3);
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
n->m_regex_free = false;
n->m_is_classical = false;
n->m_level = 1;
n->m_length = 1;
break;
default:
// NSB review: is this the correct defaults for unclassified nodes?
// Is this UNREACHABLE()?
@ -292,6 +311,14 @@ namespace euf {
n->m_length = 1;
break;
}
// A regex (sub)tree carries a projection iff it is a projection node or
// any argument does. Computed uniformly here so all callers can use the
// cheap snode flag instead of re-walking the AST.
n->m_has_projection = (n->m_kind == snode_kind::s_projection);
for (unsigned i = 0; i < n->num_args() && !n->m_has_projection; ++i)
if (n->arg(i)->has_projection())
n->m_has_projection = true;
}
static const unsigned HASH_BASE = 31;
@ -544,6 +571,210 @@ namespace euf {
return n;
}
bool sgraph::is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const {
if (!e || !m_seq.is_skolem(e))
return false;
app* ap = to_app(e);
if (ap->get_decl()->get_parameter(0).get_symbol() != re_proj_name())
return false;
if (ap->get_num_args() != 3)
return false;
arith_util au(m);
rational r;
if (!au.is_numeral(ap->get_arg(2), r) || !r.is_unsigned())
return false;
state = ap->get_arg(0);
root = ap->get_arg(1);
nu = r.get_unsigned();
return true;
}
expr_ref sgraph::mk_re_proj(expr* state, expr* root, unsigned nu) {
SASSERT(state && root);
arith_util au(m);
expr* args[3] = { state, root, au.mk_int(nu) };
sort* re_sort = state->get_sort();
return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m);
}
expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) {
// The symbolic derivative of a regex is an ite-term dispatching on
// character predicates; its leaves are ordinary derivative states.
// π_{Q,F} is propagated to all such leaves (paper §4). Partial-DFA
// states are produced from concrete-character derivatives and are thus
// ite-free, so every ite encountered here is dispatch structure.
expr* c = nullptr, *th = nullptr, *el = nullptr;
if (m.is_ite(e, c, th, el)) {
expr_ref t = wrap_proj(th, root, nu);
expr_ref f = wrap_proj(el, root, nu);
return expr_ref(m.mk_ite(c, t, f), m);
}
// π(∅) ≡ ∅: a dead leaf stays dead.
if (m_seq.re.is_empty(e))
return expr_ref(e, m);
return mk_re_proj(e, root, nu);
}
// 3-valued Kleene conjunction / disjunction over lbool.
static lbool lb_and(lbool a, lbool b) {
if (a == l_false || b == l_false) return l_false;
if (a == l_undef || b == l_undef) return l_undef;
return l_true;
}
static lbool lb_or(lbool a, lbool b) {
if (a == l_true || b == l_true) return l_true;
if (a == l_undef || b == l_undef) return l_undef;
return l_false;
}
lbool sgraph::re_nullable(snode* re) {
if (!re)
return l_undef;
// Projection-free regexes: defer to the standard regex info.
if (!re->has_projection())
return m_seq.re.get_info(re->get_expr()).nullable;
switch (re->kind()) {
case snode_kind::s_projection: {
// nullable(π_{{root}}(state)) = [state ∈ F] = [state ≡ root].
// Regex expressions are perfectly shared, so syntactic pointer
// equality coincides with the expr-id equality used for cycle
// detection in the partial DFA.
expr* state = nullptr, *root = nullptr; unsigned nu = 0;
VERIFY(is_re_proj(re->get_expr(), state, root, nu));
return (state == root) ? l_true : l_false;
}
case snode_kind::s_complement:
return ~re_nullable(re->arg(0));
case snode_kind::s_intersect:
case snode_kind::s_concat:
return lb_and(re_nullable(re->arg(0)), re_nullable(re->arg(1)));
case snode_kind::s_union:
return lb_or(re_nullable(re->arg(0)), re_nullable(re->arg(1)));
case snode_kind::s_star:
return l_true;
case snode_kind::s_plus:
return re_nullable(re->arg(0));
case snode_kind::s_ite:
// ε-acceptance of a symbolic-derivative residual depends on the
// (unresolved) character predicate, so it is genuinely unknown
// until the ite is split (apply_regex_if_split).
return l_undef;
default:
// Not expected to carry a projection in our constructions; fall
// back to the standard info (sound for projection-free shapes).
return m_seq.re.get_info(re->get_expr()).nullable;
}
}
snode* sgraph::deriv_proj(snode* re, expr* ch) {
SASSERT(re && re->get_expr());
expr* re_expr = re->get_expr();
sort* re_sort = re_expr->get_sort();
// Projection-free subterm: the standard derivative already handles it
// (and keeps the result canonical, matching partial-DFA state ids).
if (!re->has_projection())
return mk(m_rewriter.mk_derivative(ch, re_expr));
// Minimal language-preserving simplifications that never inspect a
// projection's arguments (so the opaque skolem and its inner state id
// are preserved for the oracle's r ∈ Q test).
auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); };
auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); };
auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); };
auto mk_union = [&](expr* x, expr* y) -> expr* {
if (is_empty(x)) return y;
if (is_empty(y)) return x;
if (x == y) return x;
if (is_full(x) || is_full(y)) return m_seq.re.mk_full_seq(re_sort);
return m_seq.re.mk_union(x, y);
};
auto mk_inter = [&](expr* x, expr* y) -> expr* {
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
if (is_full(x)) return y;
if (is_full(y)) return x;
if (x == y) return x;
return m_seq.re.mk_inter(x, y);
};
auto mk_concat = [&](expr* x, expr* y) -> expr* {
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
if (is_eps(x)) return y;
if (is_eps(y)) return x;
return m_seq.re.mk_concat(x, y);
};
auto mk_compl = [&](expr* x) -> expr* {
if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort);
if (is_full(x)) return m_seq.re.mk_empty(re_sort);
expr* inner = nullptr;
if (m_seq.re.is_complement(x, inner)) return inner;
return m_seq.re.mk_complement(x);
};
switch (re->kind()) {
case snode_kind::s_projection: {
expr* state = nullptr, *root = nullptr; unsigned nu = 0;
VERIFY(is_re_proj(re_expr, state, root, nu));
// δ_a(π_{Q,{root}}(state)) = π_{Q,{root}}(δ_a(state)) if state ∈ Q,
// else ⊥. The gate is on the *current* state (paper §3.3).
if (!m_proj_oracle || !m_proj_oracle->projection_state_in_Q(state, nu))
return mk(m_seq.re.mk_empty(re_sort));
snode* dstate = deriv_proj(re->arg(0), ch); // arg(0) ≡ state
if (!dstate || dstate->is_fail() || m_seq.re.is_empty(dstate->get_expr()))
return mk(m_seq.re.mk_empty(re_sort));
// δ(state) may be concrete (one state) or an ite-term (symbolic
// character). Wrap π around the result, descending into ite leaves.
return mk(wrap_proj(dstate->get_expr(), root, nu));
}
case snode_kind::s_ite: {
// ite-structured residual (from a symbolic-character derivative):
// δ_a(ite(c, th, el)) = ite(c, δ_a(th), δ_a(el)).
snode* dth = deriv_proj(re->arg(1), ch);
snode* del = deriv_proj(re->arg(2), ch);
return mk(expr_ref(m.mk_ite(re->arg(0)->get_expr(), dth->get_expr(), del->get_expr()), m));
}
case snode_kind::s_complement: {
snode* d = deriv_proj(re->arg(0), ch);
return mk(expr_ref(mk_compl(d->get_expr()), m));
}
case snode_kind::s_intersect: {
snode* d0 = deriv_proj(re->arg(0), ch);
snode* d1 = deriv_proj(re->arg(1), ch);
return mk(expr_ref(mk_inter(d0->get_expr(), d1->get_expr()), m));
}
case snode_kind::s_union: {
snode* d0 = deriv_proj(re->arg(0), ch);
snode* d1 = deriv_proj(re->arg(1), ch);
return mk(expr_ref(mk_union(d0->get_expr(), d1->get_expr()), m));
}
case snode_kind::s_concat: {
// δ_a(R·S) = δ_a(R)·S ⊔ (nullable(R) ? δ_a(S) : ∅)
snode* d0 = deriv_proj(re->arg(0), ch);
expr* head = mk_concat(d0->get_expr(), re->arg(1)->get_expr());
if (re_nullable(re->arg(0)) == l_true) {
snode* d1 = deriv_proj(re->arg(1), ch);
head = mk_union(head, d1->get_expr());
}
return mk(expr_ref(head, m));
}
case snode_kind::s_star: {
// δ_a(R*) = δ_a(R)·R*
snode* d = deriv_proj(re->arg(0), ch);
return mk(expr_ref(mk_concat(d->get_expr(), re_expr), m));
}
case snode_kind::s_plus: {
// δ_a(R+) = δ_a(R)·R*
snode* d = deriv_proj(re->arg(0), ch);
expr_ref star(m_seq.re.mk_star(re->arg(0)->get_expr()), m);
return mk(expr_ref(mk_concat(d->get_expr(), star), m));
}
default:
// A projection nested under an operator we do not specially derive
// is not produced by our constructions. Fall back conservatively.
return mk(m_rewriter.mk_derivative(ch, re_expr));
}
}
snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
expr* re_expr = re->get_expr();
expr* elem_expr = elem->get_expr();
@ -592,6 +823,22 @@ namespace euf {
}
}
// If the regex contains a projection operator, the generic rewriter
// cannot derive it. Use the projection-aware structural derivative,
// which delegates projection-free subterms back to mk_derivative.
// Canonicalize the result with th_rewriter (which treats the re.proj
// skolem as an opaque leaf and only applies ACI / identity rules to the
// surrounding Boolean/concat structure). Without this, equivalent
// derivative states get distinct snode ids and BFS emptiness checks
// fail to deduplicate, exploring an exploded state space.
if (re->has_projection()) {
snode* d = deriv_proj(re, elem_expr);
expr_ref e(d->get_expr(), m);
th_rewriter trw(m);
trw(e);
return mk(e);
}
expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
SASSERT(result);
return mk(result);
@ -647,6 +894,14 @@ namespace euf {
if (m_seq.re.is_empty(e))
return;
// projection operator: collect predicates from the regex arguments only
// (the third argument is the integer snapshot index, not a regex).
if (re->is_projection()) {
collect_re_predicates(re->arg(0), preds);
collect_re_predicates(re->arg(1), preds);
return;
}
// Expected compound regex operators are handled by recursion below.
// If a leaf survives to this point, it is an unhandled regex form.
if (re->num_args() == 0) {

View file

@ -58,6 +58,7 @@ Author:
#include "util/region.h"
#include "util/statistics.h"
#include "util/lbool.h"
#include "ast/ast.h"
#include "ast/seq_decl_plugin.h"
#include "ast/rewriter/seq_rewriter.h"
@ -68,6 +69,19 @@ namespace euf {
class seq_plugin;
// Oracle queried by the projection-aware derivative of sgraph.
// The projection operator π_{Q,F}(state) (a re.proj skolem) has its set of
// explored states Q stored externally (the nielsen_graph partial DFA), keyed
// by a snapshot index nu. The sgraph consults this oracle to decide whether
// the current state lies in Q when deriving a projection.
class projection_oracle {
public:
virtual ~projection_oracle() = default;
// true iff `state` (a regex expression) belongs to the explored
// subautomaton snapshot identified by `nu`.
virtual bool projection_state_in_Q(expr* state, unsigned nu) = 0;
};
class sgraph {
struct stats {
@ -107,6 +121,9 @@ namespace euf {
unsigned_vector m_alias_trail; // expression ids
unsigned_vector m_alias_trail_lim; // scope boundaries
// Oracle answering "state ∈ Q_nu" for projection derivatives. Not owned.
projection_oracle* m_proj_oracle = nullptr;
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
snode_kind classify(expr* e) const;
void compute_metadata(snode* n);
@ -156,6 +173,27 @@ namespace euf {
// for deriving symbolic variables.
snode* brzozowski_deriv(snode* re, snode* elem);
// Register the oracle consulted when deriving projection operators.
// Passing nullptr unregisters. Not owned.
void set_projection_oracle(projection_oracle* o) { m_proj_oracle = o; }
// Projection operator support (π_{Q,F}(state) modeled as re.proj skolem).
// Recognize and destructure a re.proj skolem expression.
bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const;
// Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu.
expr_ref mk_re_proj(expr* state, expr* root, unsigned nu);
// Wrap a (possibly ite-structured) symbolic-derivative result in the
// projection operator, propagating π into every ite leaf (paper §4).
expr_ref wrap_proj(expr* e, expr* root, unsigned nu);
// Projection-aware Brzozowski derivative w.r.t. a character expr
// (concrete or symbolic).
snode* deriv_proj(snode* re, expr* ch);
// Projection-aware nullability: lifts re.get_info().nullable to regexes
// that may contain projection operators. Returns l_true / l_false
// (l_undef only if an underlying projection-free subterm is undef).
lbool re_nullable(snode* re);
// Decode a character expression that may be represented as a const-char,
// a unit string containing a const-char, or a one-character string literal.
bool decode_re_char(expr* ex, unsigned& out) const;

View file

@ -56,9 +56,16 @@ namespace euf {
s_range, // character range [lo,hi] (OP_RE_RANGE)
s_ite, // ite (OP_ITE)
s_to_re, // string to regex (OP_SEQ_TO_RE)
s_in_re // regex membership (OP_SEQ_IN_RE)
s_in_re, // regex membership (OP_SEQ_IN_RE)
s_projection // projection operator π_{Q,F}(state) (re.proj skolem)
};
// Skolem decl-name marking a regex projection operator π_{Q,F}(state).
// The skolem has signature re.proj(state : RegLan, root : RegLan, nu : Int)
// where F = {root} (singleton accepting state) and nu identifies the
// explored subautomaton snapshot Q (see seq_nielsen partial DFA).
inline symbol re_proj_name() { return symbol("re.proj"); }
class snode {
expr *m_expr = nullptr;
snode_kind m_kind = snode_kind::s_var;
@ -69,6 +76,7 @@ namespace euf {
bool m_ground = true; // no uninterpreted string variables
bool m_regex_free = true; // no regex constructs
bool m_is_classical = true; // classical regular expression
bool m_has_projection = false; // contains a projection operator (re.proj)
unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons)
unsigned m_length = 0; // token count, number of leaf tokens in the tree
@ -125,6 +133,10 @@ namespace euf {
bool is_classical() const {
return m_is_classical;
}
// true iff this regex (sub)tree contains a projection operator (re.proj).
bool has_projection() const {
return m_has_projection;
}
unsigned level() const {
return m_level;
}
@ -202,6 +214,9 @@ namespace euf {
bool is_in_re() const {
return m_kind == snode_kind::s_in_re;
}
bool is_projection() const {
return m_kind == snode_kind::s_projection;
}
// is this a leaf token (analogous to ZIPT's StrToken as opposed to Str)
bool is_token() const {

View file

@ -178,15 +178,14 @@ namespace seq {
bool str_mem::is_trivial(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
return false;
const auto& info = n->graph().seq().re.get_info(m_regex->get_expr());
return info.nullable == l_true;
// Projection-aware nullability (handles re.proj operators in m_regex).
return n->graph().sg().re_nullable(m_regex) == l_true;
}
bool str_mem::is_contradiction(nielsen_node const* n) const {
if (!(m_str && m_regex && m_str->is_empty()))
return false;
const auto& info = n->graph().seq().re.get_info(m_regex->get_expr());
return info.nullable == l_false;
return n->graph().sg().re_nullable(m_regex) == l_false;
}
bool str_mem::contains_var(euf::snode* var) const {
@ -457,14 +456,49 @@ namespace seq {
m_context_solver(ctx_solver),
m_partial_dfa_pin(sg.get_manager()),
m_parikh(alloc(seq_parikh, sg)),
m_seq_regex(alloc(seq::seq_regex, sg)) {}
m_seq_regex(alloc(seq::seq_regex, sg)) {
// Answer projection-state membership queries during projection-aware
// derivatives (the sgraph cannot reach the partial DFA otherwise).
m_sg.set_projection_oracle(this);
}
nielsen_graph::~nielsen_graph() {
m_sg.set_projection_oracle(nullptr);
dealloc(m_parikh);
dealloc(m_seq_regex);
reset();
}
bool nielsen_graph::projection_state_in_Q(expr* state, unsigned nu) {
if (!state || nu == 0)
return false;
const unsigned sid = state->get_id();
// r ∈ Q_nu iff r is incident to a partial-DFA edge whose extraction
// index lies in [1, nu] (the "edges marked ≤ ν" subautomaton of the
// implementation-aspects section of the paper).
auto incident = [&](std::unordered_map<unsigned, unsigned_vector> const& adj) {
auto it = adj.find(sid);
if (it == adj.end())
return false;
for (const unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
const unsigned pidx = m_partial_dfa_edges[edge_idx].m_projection_idx;
if (pidx != 0 && pidx <= nu)
return true;
}
return false;
};
return incident(m_partial_dfa_out) || incident(m_partial_dfa_in);
}
euf::snode* nielsen_graph::mk_projection_term(euf::snode* root_re, unsigned nu) {
SASSERT(root_re && root_re->get_expr());
// π_{Q_nu, {root}}(root): current state == accepting state == root.
expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu);
return m_sg.mk(proj);
}
nielsen_node* nielsen_graph::mk_node() {
const unsigned id = m_nodes.size();
nielsen_node* n = alloc(nielsen_node, *this, id);
@ -1057,137 +1091,6 @@ namespace seq {
return covered;
}
euf::snode* nielsen_graph::build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx) {
SASSERT(root_re && root_re->get_expr());
sort* seq_sort = nullptr;
VERIFY(m_seq.is_re(root_re->get_expr(), seq_sort));
SASSERT(seq_sort);
sort* re_sort = root_re->get_expr()->get_sort();
// Floyd-Warshall is done over expressions, not snodes: every label
// edge that feeds this matrix comes from m_partial_dfa_edges whose
// labels (and src/dst) may correspond to snodes that no longer exist
// at the current sgraph scope. We work entirely with pinned exprs
// here and materialize a single snode for the simplified result at
// the very end. All intermediate exprs are kept alive by an
// expr_ref_vector that lives for the duration of this call.
expr_ref_vector pin(m);
expr* eps_expr = m_seq.re.mk_epsilon(seq_sort);
expr* empty_expr = m_seq.re.mk_empty(re_sort);
pin.push_back(eps_expr);
pin.push_back(empty_expr);
auto is_empty = [&](expr* re) -> bool {
return !re || m_seq.re.is_empty(re);
};
auto is_eps = [&](expr* re) -> bool {
return re && m_seq.re.is_epsilon(re);
};
auto mk_union = [&](expr* a, expr* b) -> expr* {
if (is_empty(a)) return b;
if (is_empty(b)) return a;
if (a == b) return a;
expr* r = m_seq.re.mk_union(a, b);
pin.push_back(r);
return r;
};
auto mk_concat = [&](expr* a, expr* b) -> expr* {
if (is_empty(a) || is_empty(b)) return empty_expr;
if (is_eps(a)) return b;
if (is_eps(b)) return a;
expr* r = m_seq.re.mk_concat(a, b);
pin.push_back(r);
return r;
};
auto mk_star = [&](expr* r) -> expr* {
if (is_empty(r) || is_eps(r)) return eps_expr;
if (r && m_seq.re.is_star(r)) return r;
expr* s = m_seq.re.mk_star(r);
pin.push_back(s);
return s;
};
unsigned_vector states;
states.reserve(scc.num_elems());
std::unordered_map<unsigned, unsigned> pos;
for (unsigned s : scc) {
pos.emplace(s, states.size());
states.push_back(s);
}
// scc / pos are keyed by expr id (see collect_scc_for_projection).
auto it_root = pos.find(root_re->get_expr()->get_id());
if (it_root == pos.end())
return nullptr;
unsigned n = states.size();
std::vector<std::vector<expr*>> R(n, std::vector<expr*>(n, nullptr));
for (unsigned src_id : states) {
auto it = m_partial_dfa_out.find(src_id);
if (it == m_partial_dfa_out.end())
continue;
for (unsigned edge_idx : it->second) {
if (edge_idx >= m_partial_dfa_edges.size())
continue;
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
if (!e.m_src || !e.m_dst || !e.m_label)
continue;
if (!scc.contains(e.m_dst->get_id()))
continue;
if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx)
continue;
auto it_src = pos.find(e.m_src->get_id());
auto it_dst = pos.find(e.m_dst->get_id());
if (it_src == pos.end() || it_dst == pos.end())
continue;
unsigned i = it_src->second;
unsigned j = it_dst->second;
R[i][j] = mk_union(R[i][j], e.m_label);
}
}
for (unsigned i = 0; i < n; ++i) {
R[i][i] = mk_union(R[i][i], eps_expr);
}
for (unsigned k = 0; k < n; ++k) {
std::vector<expr*> col_k(n, nullptr), row_k(n, nullptr);
for (unsigned i = 0; i < n; ++i)
col_k[i] = R[i][k];
for (unsigned j = 0; j < n; ++j)
row_k[j] = R[k][j];
expr* loop_k = R[k][k];
expr* loop_star = mk_star(loop_k);
for (unsigned i = 0; i < n; ++i) {
if (is_empty(col_k[i]))
continue;
for (unsigned j = 0; j < n; ++j) {
if (is_empty(row_k[j]))
continue;
expr* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]);
R[i][j] = mk_union(R[i][j], via_k);
}
}
}
expr* result = R[it_root->second][it_root->second];
if (!result)
return m_sg.mk(eps_expr);
expr_ref simplified(result, m);
th_rewriter trw(m);
trw(simplified);
return m_sg.mk(simplified);
}
bool nielsen_graph::try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re) {
SASSERT(root_re && root_re->get_expr());
projection_re = nullptr;
@ -1207,7 +1110,10 @@ namespace seq {
if (covered_edges <= prev_covered)
return false;
projection_re = build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
// Keep the stabilizer symbolic as a projection operator over the
// (just-marked) explored subautomaton snapshot. Its language is
// refined lazily through projection-aware derivatives.
projection_re = mk_projection_term(root_re, m_projection_extract_idx);
if (!projection_re)
return false;
@ -1222,7 +1128,6 @@ namespace seq {
expr_ref new_arg(v->get_expr(), m);
expr_ref new_l(left, m), new_r(right, m);
expr* arg, *l, *r;
std::cout << "Creating slice for " << mk_pp(new_arg, m) << "; " << mk_pp(new_l, m) << "; " << mk_pp(new_r, m) << std::endl;
if (m_sk.is_slice(new_arg, arg, l, r)) {
new_l = a.mk_add(left, l);
@ -1232,7 +1137,6 @@ namespace seq {
new_arg = arg;
}
expr_ref slice = m_sk.mk_slice(new_arg, new_l, new_r);
std::cout << "Got " << mk_pp(slice, m) << std::endl;
return m_sg.mk(slice);
}
@ -1713,35 +1617,45 @@ namespace seq {
break;
euf::snode* src_re = mem.m_regex;
seq_rewriter rw(m);
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
// Extract the inner char expression from seq.unit(?inner)
expr *inner_char = tok->arg(0)->get_expr();
// substitute the inner char for the derivative variable in d
var_subst vs(m);
d = vs(d, inner_char);
th_rewriter thrw(m);
thrw(d);
// Record concrete minterm edges for src_re so cycle_decomp can
// detect SCCs lazily (mirrors the incremental DFA building from
// concrete char consumption in the loop above).
if (src_re->is_ground()) {
euf::snode_vector mts;
sg.compute_minterms(src_re, mts);
for (euf::snode* mt : mts) {
euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt);
if (mt_deriv && !mt_deriv->is_fail())
m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv);
}
euf::snode* next = nullptr;
if (src_re->has_projection()) {
// The generic symbolic derivative cannot see the projection
// operator; route through the projection-aware derivative,
// which yields the ite-residual with π propagated to leaves.
next = sg.brzozowski_deriv(src_re, tok);
}
else {
seq_rewriter rw(m);
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
auto next = sg.mk(d);
// Extract the inner char expression from seq.unit(?inner)
expr *inner_char = tok->arg(0)->get_expr();
// substitute the inner char for the derivative variable in d
var_subst vs(m);
d = vs(d, inner_char);
th_rewriter thrw(m);
thrw(d);
// Record concrete minterm edges for src_re so cycle_decomp can
// detect SCCs lazily (mirrors the incremental DFA building from
// concrete char consumption in the loop above).
if (src_re->is_ground()) {
euf::snode_vector mts;
sg.compute_minterms(src_re, mts);
for (euf::snode* mt : mts) {
euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt);
if (mt_deriv && !mt_deriv->is_fail())
m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv);
}
}
next = sg.mk(d);
}
if (next->is_fail()) {
TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << " d: " << d << "\n");
TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << "\n");
set_general_conflict();
set_conflict(backtrack_reason::regex, mem.m_dep);
return simplify_result::conflict;
@ -3384,7 +3298,10 @@ namespace seq {
uint_set scc;
if (!collect_scc_for_projection(root_re, scc))
return nullptr;
return build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
// Symbolic projection over the edges marked by earlier extractions
// (index ≤ current snapshot). No new marking here, mirroring the old
// behaviour of reusing the current extraction index.
return mk_projection_term(root_re, m_projection_extract_idx);
}
// -----------------------------------------------------------------------
@ -3395,6 +3312,10 @@ namespace seq {
// -----------------------------------------------------------------------
bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) {
TRACE(seq, tout << "cycle_subsumption ENTER node " << node->id() << " with mems:\n";
for (auto const& m2 : node->str_mems())
tout << " [prim=" << m2.is_primitive() << "] "
<< mk_pp(m2.m_str->get_expr(), m) << "" << mk_pp(m2.m_regex->get_expr(), m) << "\n");
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
SASSERT(mem.well_formed());
@ -3418,7 +3339,12 @@ namespace seq {
continue;
// Check L(x_regex) ⊆ L(stabilizer).
if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true)
lbool subset = m_seq_regex->is_language_subset(x_regex, stabilizer);
TRACE(seq, tout << "cycle_subsumption check: first=" << mk_pp(first->get_expr(), m)
<< "\n x_regex=" << mk_pp(x_regex->get_expr(), m)
<< "\n stabilizer=" << mk_pp(stabilizer->get_expr(), m)
<< "\n subset=" << subset << "\n");
if (subset != l_true)
continue;
// Subsume: replace x·rest ∈ r with rest ∈ r.
@ -3454,10 +3380,11 @@ namespace seq {
// cycle is detected, project SCC onto stabilizer constraint b.
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
//
// Here b = Proj{R}{E_SCC}{R} is the language of all paths from R back to R
// in the known SCC edges (including ε), which equals s* for the non-empty
// cycle language s. Equivalently, stabilizer_re from build_projection_regex_from_scc
// is the Kleene closure computed by Floyd-Warshall over the SCC subgraph.
// Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting
// the language of all paths from R back to R that stay within the explored
// subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language
// s. It is kept symbolic; its derivative/nullability are evaluated lazily by
// the projection-aware sgraph (paper §3.3) rather than materialized.
//
// The constraint on x'' prevents divergence: x'' may not begin with any
// non-empty word from L(stabilizer_re), so it cannot re-enter the cycle.
@ -3501,13 +3428,22 @@ namespace seq {
euf::snode* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort));
euf::snode* xpp = get_tail(x, compute_length_expr(xp).get());
euf::snode* xp_xpp = m_sg.mk_concat(xp, xpp);
TRACE(seq, tout << "cycle_decomp build: xp=" << mk_pp(xp->get_expr(), m)
<< "\n xpp=" << mk_pp(xpp->get_expr(), m)
<< "\n xp_xpp=" << mk_pp(xp_xpp->get_expr(), m)
<< "\n xp_xpp.first=" << mk_pp(xp_xpp->first()->get_expr(), m) << "\n");
nielsen_node* child = mk_child(node);
SASSERT(child->m_str_mem[mi] == mem);
nielsen_edge* e = mk_edge(node, child, false);
const nielsen_subst s(x, xp_xpp, nullptr, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// remove from mi^th element of the child the leading token, as it is immediately subsumed
SASSERT(child->m_str_mem[mi].m_str->first() == xp);
child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true);
// x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle)
child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep));

View file

@ -855,7 +855,7 @@ namespace seq {
// the overall Nielsen transformation graph
// mirrors ZIPT's NielsenGraph
class nielsen_graph {
class nielsen_graph : public euf::projection_oracle {
friend class nielsen_node;
friend class nielsen_edge;
@ -979,6 +979,11 @@ namespace seq {
seq_util& seq() { return m_seq; }
seq_util const& seq() const { return m_seq; }
// euf::projection_oracle: true iff regex `state` is part of the
// explored subautomaton snapshot `nu` (a partial-DFA edge incident to
// `state` was marked with an index in [1, nu]).
bool projection_state_in_Q(expr* state, unsigned nu) override;
// node management
nielsen_node* mk_node();
nielsen_node* mk_child(nielsen_node* parent);
@ -1169,9 +1174,12 @@ namespace seq {
// currently covered edge count for this extraction.
unsigned mark_scc_projection_edges(uint_set const& scc);
// Build regex equivalent to proj(root_re, E_scc, {root_re}) using the
// currently marked SCC edges (projection index <= extract_idx).
euf::snode* build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx);
// Build the projection operator π_{Q,{root_re}}(root_re) as a re.proj
// skolem snode, where Q is the explored subautomaton identified by the
// snapshot index nu. This is the stabilizer of root_re kept symbolically
// (the projection's derivative/nullability are evaluated lazily by the
// sgraph consulting projection_state_in_Q).
euf::snode* mk_projection_term(euf::snode* root_re, unsigned nu);
// Try to extract a stronger projection for root_re. Returns true and
// stores it in projection_re iff SCC coverage has grown.

View file

@ -400,6 +400,14 @@ namespace seq {
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
return;
// projection operator: only the regex arguments carry character
// structure; the third argument is the integer snapshot index.
if (re->is_projection()) {
collect_char_boundaries(re->arg(0), bounds);
collect_char_boundaries(re->arg(1), bounds);
return;
}
// If we reached a leaf and none of the expected leaf forms matched,
// this is a regex constructor we did not account for in boundary
// extraction and should fail loudly in debug builds.

View file

@ -210,8 +210,9 @@ namespace seq {
}
// check if regex accepts the empty string
// (projection-aware: re may contain re.proj operators)
bool is_nullable(euf::snode* re) const {
return re && seq.re.get_info(re->get_expr()).nullable == l_true;
return re && m_sg.re_nullable(re) == l_true;
}
// check if regex is ground (no string variables)