mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 15:16:29 +00:00
Added real projection operator
This commit is contained in:
parent
ff99cb442a
commit
70031b674c
7 changed files with 437 additions and 176 deletions
|
|
@ -18,6 +18,8 @@ Author:
|
|||
|
||||
#include "ast/euf/euf_sgraph.h"
|
||||
#include "ast/euf/euf_seq_plugin.h"
|
||||
#include "ast/arith_decl_plugin.h"
|
||||
#include "ast/rewriter/th_rewriter.h"
|
||||
#include "ast/ast_pp.h"
|
||||
|
||||
namespace euf {
|
||||
|
|
@ -109,6 +111,11 @@ namespace euf {
|
|||
if (m_seq.str.is_in_re(e))
|
||||
return snode_kind::s_in_re;
|
||||
|
||||
// projection operator π_{Q,F}(state) modeled as the re.proj skolem.
|
||||
if (m_seq.is_skolem(e) &&
|
||||
to_app(e)->get_decl()->get_parameter(0).get_symbol() == re_proj_name())
|
||||
return snode_kind::s_projection;
|
||||
|
||||
return snode_kind::s_var;
|
||||
}
|
||||
|
||||
|
|
@ -283,6 +290,18 @@ namespace euf {
|
|||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
case snode_kind::s_projection:
|
||||
// re.proj(state, root, nu): args 0/1 are regexes, arg 2 is the
|
||||
// snapshot index (an integer numeral). Ground/classical follow the
|
||||
// regex arguments only.
|
||||
SASSERT(n->num_args() == 3);
|
||||
n->m_ground = n->arg(0)->is_ground() && n->arg(1)->is_ground();
|
||||
n->m_regex_free = false;
|
||||
n->m_is_classical = false;
|
||||
n->m_level = 1;
|
||||
n->m_length = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
// NSB review: is this the correct defaults for unclassified nodes?
|
||||
// Is this UNREACHABLE()?
|
||||
|
|
@ -292,6 +311,14 @@ namespace euf {
|
|||
n->m_length = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
// A regex (sub)tree carries a projection iff it is a projection node or
|
||||
// any argument does. Computed uniformly here so all callers can use the
|
||||
// cheap snode flag instead of re-walking the AST.
|
||||
n->m_has_projection = (n->m_kind == snode_kind::s_projection);
|
||||
for (unsigned i = 0; i < n->num_args() && !n->m_has_projection; ++i)
|
||||
if (n->arg(i)->has_projection())
|
||||
n->m_has_projection = true;
|
||||
}
|
||||
|
||||
static const unsigned HASH_BASE = 31;
|
||||
|
|
@ -544,6 +571,210 @@ namespace euf {
|
|||
return n;
|
||||
}
|
||||
|
||||
bool sgraph::is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const {
|
||||
if (!e || !m_seq.is_skolem(e))
|
||||
return false;
|
||||
app* ap = to_app(e);
|
||||
if (ap->get_decl()->get_parameter(0).get_symbol() != re_proj_name())
|
||||
return false;
|
||||
if (ap->get_num_args() != 3)
|
||||
return false;
|
||||
arith_util au(m);
|
||||
rational r;
|
||||
if (!au.is_numeral(ap->get_arg(2), r) || !r.is_unsigned())
|
||||
return false;
|
||||
state = ap->get_arg(0);
|
||||
root = ap->get_arg(1);
|
||||
nu = r.get_unsigned();
|
||||
return true;
|
||||
}
|
||||
|
||||
expr_ref sgraph::mk_re_proj(expr* state, expr* root, unsigned nu) {
|
||||
SASSERT(state && root);
|
||||
arith_util au(m);
|
||||
expr* args[3] = { state, root, au.mk_int(nu) };
|
||||
sort* re_sort = state->get_sort();
|
||||
return expr_ref(m_seq.mk_skolem(re_proj_name(), 3, args, re_sort), m);
|
||||
}
|
||||
|
||||
expr_ref sgraph::wrap_proj(expr* e, expr* root, unsigned nu) {
|
||||
// The symbolic derivative of a regex is an ite-term dispatching on
|
||||
// character predicates; its leaves are ordinary derivative states.
|
||||
// π_{Q,F} is propagated to all such leaves (paper §4). Partial-DFA
|
||||
// states are produced from concrete-character derivatives and are thus
|
||||
// ite-free, so every ite encountered here is dispatch structure.
|
||||
expr* c = nullptr, *th = nullptr, *el = nullptr;
|
||||
if (m.is_ite(e, c, th, el)) {
|
||||
expr_ref t = wrap_proj(th, root, nu);
|
||||
expr_ref f = wrap_proj(el, root, nu);
|
||||
return expr_ref(m.mk_ite(c, t, f), m);
|
||||
}
|
||||
// π(∅) ≡ ∅: a dead leaf stays dead.
|
||||
if (m_seq.re.is_empty(e))
|
||||
return expr_ref(e, m);
|
||||
return mk_re_proj(e, root, nu);
|
||||
}
|
||||
|
||||
// 3-valued Kleene conjunction / disjunction over lbool.
|
||||
static lbool lb_and(lbool a, lbool b) {
|
||||
if (a == l_false || b == l_false) return l_false;
|
||||
if (a == l_undef || b == l_undef) return l_undef;
|
||||
return l_true;
|
||||
}
|
||||
static lbool lb_or(lbool a, lbool b) {
|
||||
if (a == l_true || b == l_true) return l_true;
|
||||
if (a == l_undef || b == l_undef) return l_undef;
|
||||
return l_false;
|
||||
}
|
||||
|
||||
lbool sgraph::re_nullable(snode* re) {
|
||||
if (!re)
|
||||
return l_undef;
|
||||
// Projection-free regexes: defer to the standard regex info.
|
||||
if (!re->has_projection())
|
||||
return m_seq.re.get_info(re->get_expr()).nullable;
|
||||
|
||||
switch (re->kind()) {
|
||||
case snode_kind::s_projection: {
|
||||
// nullable(π_{{root}}(state)) = [state ∈ F] = [state ≡ root].
|
||||
// Regex expressions are perfectly shared, so syntactic pointer
|
||||
// equality coincides with the expr-id equality used for cycle
|
||||
// detection in the partial DFA.
|
||||
expr* state = nullptr, *root = nullptr; unsigned nu = 0;
|
||||
VERIFY(is_re_proj(re->get_expr(), state, root, nu));
|
||||
return (state == root) ? l_true : l_false;
|
||||
}
|
||||
case snode_kind::s_complement:
|
||||
return ~re_nullable(re->arg(0));
|
||||
case snode_kind::s_intersect:
|
||||
case snode_kind::s_concat:
|
||||
return lb_and(re_nullable(re->arg(0)), re_nullable(re->arg(1)));
|
||||
case snode_kind::s_union:
|
||||
return lb_or(re_nullable(re->arg(0)), re_nullable(re->arg(1)));
|
||||
case snode_kind::s_star:
|
||||
return l_true;
|
||||
case snode_kind::s_plus:
|
||||
return re_nullable(re->arg(0));
|
||||
case snode_kind::s_ite:
|
||||
// ε-acceptance of a symbolic-derivative residual depends on the
|
||||
// (unresolved) character predicate, so it is genuinely unknown
|
||||
// until the ite is split (apply_regex_if_split).
|
||||
return l_undef;
|
||||
default:
|
||||
// Not expected to carry a projection in our constructions; fall
|
||||
// back to the standard info (sound for projection-free shapes).
|
||||
return m_seq.re.get_info(re->get_expr()).nullable;
|
||||
}
|
||||
}
|
||||
|
||||
snode* sgraph::deriv_proj(snode* re, expr* ch) {
|
||||
SASSERT(re && re->get_expr());
|
||||
expr* re_expr = re->get_expr();
|
||||
sort* re_sort = re_expr->get_sort();
|
||||
|
||||
// Projection-free subterm: the standard derivative already handles it
|
||||
// (and keeps the result canonical, matching partial-DFA state ids).
|
||||
if (!re->has_projection())
|
||||
return mk(m_rewriter.mk_derivative(ch, re_expr));
|
||||
|
||||
// Minimal language-preserving simplifications that never inspect a
|
||||
// projection's arguments (so the opaque skolem and its inner state id
|
||||
// are preserved for the oracle's r ∈ Q test).
|
||||
auto is_empty = [&](expr* r) { return m_seq.re.is_empty(r); };
|
||||
auto is_full = [&](expr* r) { return m_seq.re.is_full_seq(r); };
|
||||
auto is_eps = [&](expr* r) { return m_seq.re.is_epsilon(r); };
|
||||
auto mk_union = [&](expr* x, expr* y) -> expr* {
|
||||
if (is_empty(x)) return y;
|
||||
if (is_empty(y)) return x;
|
||||
if (x == y) return x;
|
||||
if (is_full(x) || is_full(y)) return m_seq.re.mk_full_seq(re_sort);
|
||||
return m_seq.re.mk_union(x, y);
|
||||
};
|
||||
auto mk_inter = [&](expr* x, expr* y) -> expr* {
|
||||
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
|
||||
if (is_full(x)) return y;
|
||||
if (is_full(y)) return x;
|
||||
if (x == y) return x;
|
||||
return m_seq.re.mk_inter(x, y);
|
||||
};
|
||||
auto mk_concat = [&](expr* x, expr* y) -> expr* {
|
||||
if (is_empty(x) || is_empty(y)) return m_seq.re.mk_empty(re_sort);
|
||||
if (is_eps(x)) return y;
|
||||
if (is_eps(y)) return x;
|
||||
return m_seq.re.mk_concat(x, y);
|
||||
};
|
||||
auto mk_compl = [&](expr* x) -> expr* {
|
||||
if (is_empty(x)) return m_seq.re.mk_full_seq(re_sort);
|
||||
if (is_full(x)) return m_seq.re.mk_empty(re_sort);
|
||||
expr* inner = nullptr;
|
||||
if (m_seq.re.is_complement(x, inner)) return inner;
|
||||
return m_seq.re.mk_complement(x);
|
||||
};
|
||||
|
||||
switch (re->kind()) {
|
||||
case snode_kind::s_projection: {
|
||||
expr* state = nullptr, *root = nullptr; unsigned nu = 0;
|
||||
VERIFY(is_re_proj(re_expr, state, root, nu));
|
||||
// δ_a(π_{Q,{root}}(state)) = π_{Q,{root}}(δ_a(state)) if state ∈ Q,
|
||||
// else ⊥. The gate is on the *current* state (paper §3.3).
|
||||
if (!m_proj_oracle || !m_proj_oracle->projection_state_in_Q(state, nu))
|
||||
return mk(m_seq.re.mk_empty(re_sort));
|
||||
snode* dstate = deriv_proj(re->arg(0), ch); // arg(0) ≡ state
|
||||
if (!dstate || dstate->is_fail() || m_seq.re.is_empty(dstate->get_expr()))
|
||||
return mk(m_seq.re.mk_empty(re_sort));
|
||||
// δ(state) may be concrete (one state) or an ite-term (symbolic
|
||||
// character). Wrap π around the result, descending into ite leaves.
|
||||
return mk(wrap_proj(dstate->get_expr(), root, nu));
|
||||
}
|
||||
case snode_kind::s_ite: {
|
||||
// ite-structured residual (from a symbolic-character derivative):
|
||||
// δ_a(ite(c, th, el)) = ite(c, δ_a(th), δ_a(el)).
|
||||
snode* dth = deriv_proj(re->arg(1), ch);
|
||||
snode* del = deriv_proj(re->arg(2), ch);
|
||||
return mk(expr_ref(m.mk_ite(re->arg(0)->get_expr(), dth->get_expr(), del->get_expr()), m));
|
||||
}
|
||||
case snode_kind::s_complement: {
|
||||
snode* d = deriv_proj(re->arg(0), ch);
|
||||
return mk(expr_ref(mk_compl(d->get_expr()), m));
|
||||
}
|
||||
case snode_kind::s_intersect: {
|
||||
snode* d0 = deriv_proj(re->arg(0), ch);
|
||||
snode* d1 = deriv_proj(re->arg(1), ch);
|
||||
return mk(expr_ref(mk_inter(d0->get_expr(), d1->get_expr()), m));
|
||||
}
|
||||
case snode_kind::s_union: {
|
||||
snode* d0 = deriv_proj(re->arg(0), ch);
|
||||
snode* d1 = deriv_proj(re->arg(1), ch);
|
||||
return mk(expr_ref(mk_union(d0->get_expr(), d1->get_expr()), m));
|
||||
}
|
||||
case snode_kind::s_concat: {
|
||||
// δ_a(R·S) = δ_a(R)·S ⊔ (nullable(R) ? δ_a(S) : ∅)
|
||||
snode* d0 = deriv_proj(re->arg(0), ch);
|
||||
expr* head = mk_concat(d0->get_expr(), re->arg(1)->get_expr());
|
||||
if (re_nullable(re->arg(0)) == l_true) {
|
||||
snode* d1 = deriv_proj(re->arg(1), ch);
|
||||
head = mk_union(head, d1->get_expr());
|
||||
}
|
||||
return mk(expr_ref(head, m));
|
||||
}
|
||||
case snode_kind::s_star: {
|
||||
// δ_a(R*) = δ_a(R)·R*
|
||||
snode* d = deriv_proj(re->arg(0), ch);
|
||||
return mk(expr_ref(mk_concat(d->get_expr(), re_expr), m));
|
||||
}
|
||||
case snode_kind::s_plus: {
|
||||
// δ_a(R+) = δ_a(R)·R*
|
||||
snode* d = deriv_proj(re->arg(0), ch);
|
||||
expr_ref star(m_seq.re.mk_star(re->arg(0)->get_expr()), m);
|
||||
return mk(expr_ref(mk_concat(d->get_expr(), star), m));
|
||||
}
|
||||
default:
|
||||
// A projection nested under an operator we do not specially derive
|
||||
// is not produced by our constructions. Fall back conservatively.
|
||||
return mk(m_rewriter.mk_derivative(ch, re_expr));
|
||||
}
|
||||
}
|
||||
|
||||
snode* sgraph::brzozowski_deriv(snode* re, snode* elem) {
|
||||
expr* re_expr = re->get_expr();
|
||||
expr* elem_expr = elem->get_expr();
|
||||
|
|
@ -592,6 +823,22 @@ namespace euf {
|
|||
}
|
||||
}
|
||||
|
||||
// If the regex contains a projection operator, the generic rewriter
|
||||
// cannot derive it. Use the projection-aware structural derivative,
|
||||
// which delegates projection-free subterms back to mk_derivative.
|
||||
// Canonicalize the result with th_rewriter (which treats the re.proj
|
||||
// skolem as an opaque leaf and only applies ACI / identity rules to the
|
||||
// surrounding Boolean/concat structure). Without this, equivalent
|
||||
// derivative states get distinct snode ids and BFS emptiness checks
|
||||
// fail to deduplicate, exploring an exploded state space.
|
||||
if (re->has_projection()) {
|
||||
snode* d = deriv_proj(re, elem_expr);
|
||||
expr_ref e(d->get_expr(), m);
|
||||
th_rewriter trw(m);
|
||||
trw(e);
|
||||
return mk(e);
|
||||
}
|
||||
|
||||
expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr);
|
||||
SASSERT(result);
|
||||
return mk(result);
|
||||
|
|
@ -647,6 +894,14 @@ namespace euf {
|
|||
if (m_seq.re.is_empty(e))
|
||||
return;
|
||||
|
||||
// projection operator: collect predicates from the regex arguments only
|
||||
// (the third argument is the integer snapshot index, not a regex).
|
||||
if (re->is_projection()) {
|
||||
collect_re_predicates(re->arg(0), preds);
|
||||
collect_re_predicates(re->arg(1), preds);
|
||||
return;
|
||||
}
|
||||
|
||||
// Expected compound regex operators are handled by recursion below.
|
||||
// If a leaf survives to this point, it is an unhandled regex form.
|
||||
if (re->num_args() == 0) {
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ Author:
|
|||
|
||||
#include "util/region.h"
|
||||
#include "util/statistics.h"
|
||||
#include "util/lbool.h"
|
||||
#include "ast/ast.h"
|
||||
#include "ast/seq_decl_plugin.h"
|
||||
#include "ast/rewriter/seq_rewriter.h"
|
||||
|
|
@ -68,6 +69,19 @@ namespace euf {
|
|||
|
||||
class seq_plugin;
|
||||
|
||||
// Oracle queried by the projection-aware derivative of sgraph.
|
||||
// The projection operator π_{Q,F}(state) (a re.proj skolem) has its set of
|
||||
// explored states Q stored externally (the nielsen_graph partial DFA), keyed
|
||||
// by a snapshot index nu. The sgraph consults this oracle to decide whether
|
||||
// the current state lies in Q when deriving a projection.
|
||||
class projection_oracle {
|
||||
public:
|
||||
virtual ~projection_oracle() = default;
|
||||
// true iff `state` (a regex expression) belongs to the explored
|
||||
// subautomaton snapshot identified by `nu`.
|
||||
virtual bool projection_state_in_Q(expr* state, unsigned nu) = 0;
|
||||
};
|
||||
|
||||
class sgraph {
|
||||
|
||||
struct stats {
|
||||
|
|
@ -107,6 +121,9 @@ namespace euf {
|
|||
unsigned_vector m_alias_trail; // expression ids
|
||||
unsigned_vector m_alias_trail_lim; // scope boundaries
|
||||
|
||||
// Oracle answering "state ∈ Q_nu" for projection derivatives. Not owned.
|
||||
projection_oracle* m_proj_oracle = nullptr;
|
||||
|
||||
snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args);
|
||||
snode_kind classify(expr* e) const;
|
||||
void compute_metadata(snode* n);
|
||||
|
|
@ -156,6 +173,27 @@ namespace euf {
|
|||
// for deriving symbolic variables.
|
||||
snode* brzozowski_deriv(snode* re, snode* elem);
|
||||
|
||||
// Register the oracle consulted when deriving projection operators.
|
||||
// Passing nullptr unregisters. Not owned.
|
||||
void set_projection_oracle(projection_oracle* o) { m_proj_oracle = o; }
|
||||
|
||||
// Projection operator support (π_{Q,F}(state) modeled as re.proj skolem).
|
||||
// Recognize and destructure a re.proj skolem expression.
|
||||
bool is_re_proj(expr* e, expr*& state, expr*& root, unsigned& nu) const;
|
||||
// Build the re.proj skolem expression for π_{{root}}(state) at snapshot nu.
|
||||
expr_ref mk_re_proj(expr* state, expr* root, unsigned nu);
|
||||
// Wrap a (possibly ite-structured) symbolic-derivative result in the
|
||||
// projection operator, propagating π into every ite leaf (paper §4).
|
||||
expr_ref wrap_proj(expr* e, expr* root, unsigned nu);
|
||||
// Projection-aware Brzozowski derivative w.r.t. a character expr
|
||||
// (concrete or symbolic).
|
||||
snode* deriv_proj(snode* re, expr* ch);
|
||||
|
||||
// Projection-aware nullability: lifts re.get_info().nullable to regexes
|
||||
// that may contain projection operators. Returns l_true / l_false
|
||||
// (l_undef only if an underlying projection-free subterm is undef).
|
||||
lbool re_nullable(snode* re);
|
||||
|
||||
// Decode a character expression that may be represented as a const-char,
|
||||
// a unit string containing a const-char, or a one-character string literal.
|
||||
bool decode_re_char(expr* ex, unsigned& out) const;
|
||||
|
|
|
|||
|
|
@ -56,9 +56,16 @@ namespace euf {
|
|||
s_range, // character range [lo,hi] (OP_RE_RANGE)
|
||||
s_ite, // ite (OP_ITE)
|
||||
s_to_re, // string to regex (OP_SEQ_TO_RE)
|
||||
s_in_re // regex membership (OP_SEQ_IN_RE)
|
||||
s_in_re, // regex membership (OP_SEQ_IN_RE)
|
||||
s_projection // projection operator π_{Q,F}(state) (re.proj skolem)
|
||||
};
|
||||
|
||||
// Skolem decl-name marking a regex projection operator π_{Q,F}(state).
|
||||
// The skolem has signature re.proj(state : RegLan, root : RegLan, nu : Int)
|
||||
// where F = {root} (singleton accepting state) and nu identifies the
|
||||
// explored subautomaton snapshot Q (see seq_nielsen partial DFA).
|
||||
inline symbol re_proj_name() { return symbol("re.proj"); }
|
||||
|
||||
class snode {
|
||||
expr *m_expr = nullptr;
|
||||
snode_kind m_kind = snode_kind::s_var;
|
||||
|
|
@ -69,6 +76,7 @@ namespace euf {
|
|||
bool m_ground = true; // no uninterpreted string variables
|
||||
bool m_regex_free = true; // no regex constructs
|
||||
bool m_is_classical = true; // classical regular expression
|
||||
bool m_has_projection = false; // contains a projection operator (re.proj)
|
||||
unsigned m_level = 0; // tree depth/level (0 for empty, 1 for singletons)
|
||||
unsigned m_length = 0; // token count, number of leaf tokens in the tree
|
||||
|
||||
|
|
@ -125,6 +133,10 @@ namespace euf {
|
|||
bool is_classical() const {
|
||||
return m_is_classical;
|
||||
}
|
||||
// true iff this regex (sub)tree contains a projection operator (re.proj).
|
||||
bool has_projection() const {
|
||||
return m_has_projection;
|
||||
}
|
||||
unsigned level() const {
|
||||
return m_level;
|
||||
}
|
||||
|
|
@ -202,6 +214,9 @@ namespace euf {
|
|||
bool is_in_re() const {
|
||||
return m_kind == snode_kind::s_in_re;
|
||||
}
|
||||
bool is_projection() const {
|
||||
return m_kind == snode_kind::s_projection;
|
||||
}
|
||||
|
||||
// is this a leaf token (analogous to ZIPT's StrToken as opposed to Str)
|
||||
bool is_token() const {
|
||||
|
|
|
|||
|
|
@ -178,15 +178,14 @@ namespace seq {
|
|||
bool str_mem::is_trivial(nielsen_node const* n) const {
|
||||
if (!(m_str && m_regex && m_str->is_empty()))
|
||||
return false;
|
||||
const auto& info = n->graph().seq().re.get_info(m_regex->get_expr());
|
||||
return info.nullable == l_true;
|
||||
// Projection-aware nullability (handles re.proj operators in m_regex).
|
||||
return n->graph().sg().re_nullable(m_regex) == l_true;
|
||||
}
|
||||
|
||||
bool str_mem::is_contradiction(nielsen_node const* n) const {
|
||||
if (!(m_str && m_regex && m_str->is_empty()))
|
||||
return false;
|
||||
const auto& info = n->graph().seq().re.get_info(m_regex->get_expr());
|
||||
return info.nullable == l_false;
|
||||
return n->graph().sg().re_nullable(m_regex) == l_false;
|
||||
}
|
||||
|
||||
bool str_mem::contains_var(euf::snode* var) const {
|
||||
|
|
@ -457,14 +456,49 @@ namespace seq {
|
|||
m_context_solver(ctx_solver),
|
||||
m_partial_dfa_pin(sg.get_manager()),
|
||||
m_parikh(alloc(seq_parikh, sg)),
|
||||
m_seq_regex(alloc(seq::seq_regex, sg)) {}
|
||||
m_seq_regex(alloc(seq::seq_regex, sg)) {
|
||||
// Answer projection-state membership queries during projection-aware
|
||||
// derivatives (the sgraph cannot reach the partial DFA otherwise).
|
||||
m_sg.set_projection_oracle(this);
|
||||
}
|
||||
|
||||
nielsen_graph::~nielsen_graph() {
|
||||
m_sg.set_projection_oracle(nullptr);
|
||||
dealloc(m_parikh);
|
||||
dealloc(m_seq_regex);
|
||||
reset();
|
||||
}
|
||||
|
||||
bool nielsen_graph::projection_state_in_Q(expr* state, unsigned nu) {
|
||||
if (!state || nu == 0)
|
||||
return false;
|
||||
const unsigned sid = state->get_id();
|
||||
// r ∈ Q_nu iff r is incident to a partial-DFA edge whose extraction
|
||||
// index lies in [1, nu] (the "edges marked ≤ ν" subautomaton of the
|
||||
// implementation-aspects section of the paper).
|
||||
auto incident = [&](std::unordered_map<unsigned, unsigned_vector> const& adj) {
|
||||
auto it = adj.find(sid);
|
||||
if (it == adj.end())
|
||||
return false;
|
||||
for (const unsigned edge_idx : it->second) {
|
||||
if (edge_idx >= m_partial_dfa_edges.size())
|
||||
continue;
|
||||
const unsigned pidx = m_partial_dfa_edges[edge_idx].m_projection_idx;
|
||||
if (pidx != 0 && pidx <= nu)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
return incident(m_partial_dfa_out) || incident(m_partial_dfa_in);
|
||||
}
|
||||
|
||||
euf::snode* nielsen_graph::mk_projection_term(euf::snode* root_re, unsigned nu) {
|
||||
SASSERT(root_re && root_re->get_expr());
|
||||
// π_{Q_nu, {root}}(root): current state == accepting state == root.
|
||||
expr_ref proj = m_sg.mk_re_proj(root_re->get_expr(), root_re->get_expr(), nu);
|
||||
return m_sg.mk(proj);
|
||||
}
|
||||
|
||||
nielsen_node* nielsen_graph::mk_node() {
|
||||
const unsigned id = m_nodes.size();
|
||||
nielsen_node* n = alloc(nielsen_node, *this, id);
|
||||
|
|
@ -1057,137 +1091,6 @@ namespace seq {
|
|||
return covered;
|
||||
}
|
||||
|
||||
euf::snode* nielsen_graph::build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx) {
|
||||
SASSERT(root_re && root_re->get_expr());
|
||||
|
||||
sort* seq_sort = nullptr;
|
||||
VERIFY(m_seq.is_re(root_re->get_expr(), seq_sort));
|
||||
SASSERT(seq_sort);
|
||||
|
||||
sort* re_sort = root_re->get_expr()->get_sort();
|
||||
|
||||
// Floyd-Warshall is done over expressions, not snodes: every label
|
||||
// edge that feeds this matrix comes from m_partial_dfa_edges whose
|
||||
// labels (and src/dst) may correspond to snodes that no longer exist
|
||||
// at the current sgraph scope. We work entirely with pinned exprs
|
||||
// here and materialize a single snode for the simplified result at
|
||||
// the very end. All intermediate exprs are kept alive by an
|
||||
// expr_ref_vector that lives for the duration of this call.
|
||||
expr_ref_vector pin(m);
|
||||
expr* eps_expr = m_seq.re.mk_epsilon(seq_sort);
|
||||
expr* empty_expr = m_seq.re.mk_empty(re_sort);
|
||||
pin.push_back(eps_expr);
|
||||
pin.push_back(empty_expr);
|
||||
|
||||
auto is_empty = [&](expr* re) -> bool {
|
||||
return !re || m_seq.re.is_empty(re);
|
||||
};
|
||||
|
||||
auto is_eps = [&](expr* re) -> bool {
|
||||
return re && m_seq.re.is_epsilon(re);
|
||||
};
|
||||
|
||||
auto mk_union = [&](expr* a, expr* b) -> expr* {
|
||||
if (is_empty(a)) return b;
|
||||
if (is_empty(b)) return a;
|
||||
if (a == b) return a;
|
||||
expr* r = m_seq.re.mk_union(a, b);
|
||||
pin.push_back(r);
|
||||
return r;
|
||||
};
|
||||
|
||||
auto mk_concat = [&](expr* a, expr* b) -> expr* {
|
||||
if (is_empty(a) || is_empty(b)) return empty_expr;
|
||||
if (is_eps(a)) return b;
|
||||
if (is_eps(b)) return a;
|
||||
expr* r = m_seq.re.mk_concat(a, b);
|
||||
pin.push_back(r);
|
||||
return r;
|
||||
};
|
||||
|
||||
auto mk_star = [&](expr* r) -> expr* {
|
||||
if (is_empty(r) || is_eps(r)) return eps_expr;
|
||||
if (r && m_seq.re.is_star(r)) return r;
|
||||
expr* s = m_seq.re.mk_star(r);
|
||||
pin.push_back(s);
|
||||
return s;
|
||||
};
|
||||
|
||||
unsigned_vector states;
|
||||
states.reserve(scc.num_elems());
|
||||
std::unordered_map<unsigned, unsigned> pos;
|
||||
for (unsigned s : scc) {
|
||||
pos.emplace(s, states.size());
|
||||
states.push_back(s);
|
||||
}
|
||||
|
||||
// scc / pos are keyed by expr id (see collect_scc_for_projection).
|
||||
auto it_root = pos.find(root_re->get_expr()->get_id());
|
||||
if (it_root == pos.end())
|
||||
return nullptr;
|
||||
|
||||
unsigned n = states.size();
|
||||
std::vector<std::vector<expr*>> R(n, std::vector<expr*>(n, nullptr));
|
||||
|
||||
for (unsigned src_id : states) {
|
||||
auto it = m_partial_dfa_out.find(src_id);
|
||||
if (it == m_partial_dfa_out.end())
|
||||
continue;
|
||||
for (unsigned edge_idx : it->second) {
|
||||
if (edge_idx >= m_partial_dfa_edges.size())
|
||||
continue;
|
||||
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
|
||||
if (!e.m_src || !e.m_dst || !e.m_label)
|
||||
continue;
|
||||
if (!scc.contains(e.m_dst->get_id()))
|
||||
continue;
|
||||
if (e.m_projection_idx == 0 || e.m_projection_idx > extract_idx)
|
||||
continue;
|
||||
auto it_src = pos.find(e.m_src->get_id());
|
||||
auto it_dst = pos.find(e.m_dst->get_id());
|
||||
if (it_src == pos.end() || it_dst == pos.end())
|
||||
continue;
|
||||
unsigned i = it_src->second;
|
||||
unsigned j = it_dst->second;
|
||||
R[i][j] = mk_union(R[i][j], e.m_label);
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
R[i][i] = mk_union(R[i][i], eps_expr);
|
||||
}
|
||||
|
||||
for (unsigned k = 0; k < n; ++k) {
|
||||
std::vector<expr*> col_k(n, nullptr), row_k(n, nullptr);
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
col_k[i] = R[i][k];
|
||||
for (unsigned j = 0; j < n; ++j)
|
||||
row_k[j] = R[k][j];
|
||||
expr* loop_k = R[k][k];
|
||||
expr* loop_star = mk_star(loop_k);
|
||||
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
if (is_empty(col_k[i]))
|
||||
continue;
|
||||
for (unsigned j = 0; j < n; ++j) {
|
||||
if (is_empty(row_k[j]))
|
||||
continue;
|
||||
expr* via_k = mk_concat(mk_concat(col_k[i], loop_star), row_k[j]);
|
||||
R[i][j] = mk_union(R[i][j], via_k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expr* result = R[it_root->second][it_root->second];
|
||||
if (!result)
|
||||
return m_sg.mk(eps_expr);
|
||||
|
||||
expr_ref simplified(result, m);
|
||||
th_rewriter trw(m);
|
||||
trw(simplified);
|
||||
return m_sg.mk(simplified);
|
||||
}
|
||||
|
||||
bool nielsen_graph::try_extract_partial_projection(euf::snode* root_re, euf::snode*& projection_re) {
|
||||
SASSERT(root_re && root_re->get_expr());
|
||||
projection_re = nullptr;
|
||||
|
|
@ -1207,7 +1110,10 @@ namespace seq {
|
|||
if (covered_edges <= prev_covered)
|
||||
return false;
|
||||
|
||||
projection_re = build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
|
||||
// Keep the stabilizer symbolic as a projection operator over the
|
||||
// (just-marked) explored subautomaton snapshot. Its language is
|
||||
// refined lazily through projection-aware derivatives.
|
||||
projection_re = mk_projection_term(root_re, m_projection_extract_idx);
|
||||
if (!projection_re)
|
||||
return false;
|
||||
|
||||
|
|
@ -1222,7 +1128,6 @@ namespace seq {
|
|||
expr_ref new_arg(v->get_expr(), m);
|
||||
expr_ref new_l(left, m), new_r(right, m);
|
||||
expr* arg, *l, *r;
|
||||
std::cout << "Creating slice for " << mk_pp(new_arg, m) << "; " << mk_pp(new_l, m) << "; " << mk_pp(new_r, m) << std::endl;
|
||||
|
||||
if (m_sk.is_slice(new_arg, arg, l, r)) {
|
||||
new_l = a.mk_add(left, l);
|
||||
|
|
@ -1232,7 +1137,6 @@ namespace seq {
|
|||
new_arg = arg;
|
||||
}
|
||||
expr_ref slice = m_sk.mk_slice(new_arg, new_l, new_r);
|
||||
std::cout << "Got " << mk_pp(slice, m) << std::endl;
|
||||
return m_sg.mk(slice);
|
||||
}
|
||||
|
||||
|
|
@ -1713,35 +1617,45 @@ namespace seq {
|
|||
break;
|
||||
|
||||
euf::snode* src_re = mem.m_regex;
|
||||
seq_rewriter rw(m);
|
||||
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
|
||||
|
||||
// Extract the inner char expression from seq.unit(?inner)
|
||||
expr *inner_char = tok->arg(0)->get_expr();
|
||||
|
||||
// substitute the inner char for the derivative variable in d
|
||||
var_subst vs(m);
|
||||
d = vs(d, inner_char);
|
||||
|
||||
th_rewriter thrw(m);
|
||||
thrw(d);
|
||||
|
||||
// Record concrete minterm edges for src_re so cycle_decomp can
|
||||
// detect SCCs lazily (mirrors the incremental DFA building from
|
||||
// concrete char consumption in the loop above).
|
||||
if (src_re->is_ground()) {
|
||||
euf::snode_vector mts;
|
||||
sg.compute_minterms(src_re, mts);
|
||||
for (euf::snode* mt : mts) {
|
||||
euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt);
|
||||
if (mt_deriv && !mt_deriv->is_fail())
|
||||
m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv);
|
||||
}
|
||||
euf::snode* next = nullptr;
|
||||
if (src_re->has_projection()) {
|
||||
// The generic symbolic derivative cannot see the projection
|
||||
// operator; route through the projection-aware derivative,
|
||||
// which yields the ite-residual with π propagated to leaves.
|
||||
next = sg.brzozowski_deriv(src_re, tok);
|
||||
}
|
||||
else {
|
||||
seq_rewriter rw(m);
|
||||
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
|
||||
|
||||
auto next = sg.mk(d);
|
||||
// Extract the inner char expression from seq.unit(?inner)
|
||||
expr *inner_char = tok->arg(0)->get_expr();
|
||||
|
||||
// substitute the inner char for the derivative variable in d
|
||||
var_subst vs(m);
|
||||
d = vs(d, inner_char);
|
||||
|
||||
th_rewriter thrw(m);
|
||||
thrw(d);
|
||||
|
||||
// Record concrete minterm edges for src_re so cycle_decomp can
|
||||
// detect SCCs lazily (mirrors the incremental DFA building from
|
||||
// concrete char consumption in the loop above).
|
||||
if (src_re->is_ground()) {
|
||||
euf::snode_vector mts;
|
||||
sg.compute_minterms(src_re, mts);
|
||||
for (euf::snode* mt : mts) {
|
||||
euf::snode* mt_deriv = sg.brzozowski_deriv(src_re, mt);
|
||||
if (mt_deriv && !mt_deriv->is_fail())
|
||||
m_graph.record_partial_derivative_edge(src_re, mt, mt_deriv);
|
||||
}
|
||||
}
|
||||
|
||||
next = sg.mk(d);
|
||||
}
|
||||
if (next->is_fail()) {
|
||||
TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << " d: " << d << "\n");
|
||||
TRACE(seq, tout << "empty regex" << spp(mem.m_regex, m) << "\n");
|
||||
set_general_conflict();
|
||||
set_conflict(backtrack_reason::regex, mem.m_dep);
|
||||
return simplify_result::conflict;
|
||||
|
|
@ -3384,7 +3298,10 @@ namespace seq {
|
|||
uint_set scc;
|
||||
if (!collect_scc_for_projection(root_re, scc))
|
||||
return nullptr;
|
||||
return build_projection_regex_from_scc(root_re, scc, m_projection_extract_idx);
|
||||
// Symbolic projection over the edges marked by earlier extractions
|
||||
// (index ≤ current snapshot). No new marking here, mirroring the old
|
||||
// behaviour of reusing the current extraction index.
|
||||
return mk_projection_term(root_re, m_projection_extract_idx);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
@ -3395,6 +3312,10 @@ namespace seq {
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
bool nielsen_graph::apply_cycle_subsumption(nielsen_node* node) {
|
||||
TRACE(seq, tout << "cycle_subsumption ENTER node " << node->id() << " with mems:\n";
|
||||
for (auto const& m2 : node->str_mems())
|
||||
tout << " [prim=" << m2.is_primitive() << "] "
|
||||
<< mk_pp(m2.m_str->get_expr(), m) << " ∈ " << mk_pp(m2.m_regex->get_expr(), m) << "\n");
|
||||
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
|
||||
str_mem const& mem = node->str_mems()[mi];
|
||||
SASSERT(mem.well_formed());
|
||||
|
|
@ -3418,7 +3339,12 @@ namespace seq {
|
|||
continue;
|
||||
|
||||
// Check L(x_regex) ⊆ L(stabilizer).
|
||||
if (m_seq_regex->is_language_subset(x_regex, stabilizer) != l_true)
|
||||
lbool subset = m_seq_regex->is_language_subset(x_regex, stabilizer);
|
||||
TRACE(seq, tout << "cycle_subsumption check: first=" << mk_pp(first->get_expr(), m)
|
||||
<< "\n x_regex=" << mk_pp(x_regex->get_expr(), m)
|
||||
<< "\n stabilizer=" << mk_pp(stabilizer->get_expr(), m)
|
||||
<< "\n subset=" << subset << "\n");
|
||||
if (subset != l_true)
|
||||
continue;
|
||||
|
||||
// Subsume: replace x·rest ∈ r with rest ∈ r.
|
||||
|
|
@ -3454,10 +3380,11 @@ namespace seq {
|
|||
// cycle is detected, project SCC onto stabilizer constraint b.
|
||||
// Rewrites x into x'·x'' with x' ∈ b*, x'' ∈ complement((b ∩ complement(eps)) · Sigma*).
|
||||
//
|
||||
// Here b = Proj{R}{E_SCC}{R} is the language of all paths from R back to R
|
||||
// in the known SCC edges (including ε), which equals s* for the non-empty
|
||||
// cycle language s. Equivalently, stabilizer_re from build_projection_regex_from_scc
|
||||
// is the Kleene closure computed by Floyd-Warshall over the SCC subgraph.
|
||||
// Here stabilizer_re = π_{Q_SCC,{R}}(R) is the projection operator denoting
|
||||
// the language of all paths from R back to R that stay within the explored
|
||||
// subautomaton Q_SCC (including ε), i.e. s* for the non-empty cycle language
|
||||
// s. It is kept symbolic; its derivative/nullability are evaluated lazily by
|
||||
// the projection-aware sgraph (paper §3.3) rather than materialized.
|
||||
//
|
||||
// The constraint on x'' prevents divergence: x'' may not begin with any
|
||||
// non-empty word from L(stabilizer_re), so it cannot re-enter the cycle.
|
||||
|
|
@ -3501,13 +3428,22 @@ namespace seq {
|
|||
euf::snode* xp = m_sg.mk(m_sk.mk("cycle", x->get_expr(), stabilizer_re->get_expr(), seq_sort));
|
||||
euf::snode* xpp = get_tail(x, compute_length_expr(xp).get());
|
||||
euf::snode* xp_xpp = m_sg.mk_concat(xp, xpp);
|
||||
TRACE(seq, tout << "cycle_decomp build: xp=" << mk_pp(xp->get_expr(), m)
|
||||
<< "\n xpp=" << mk_pp(xpp->get_expr(), m)
|
||||
<< "\n xp_xpp=" << mk_pp(xp_xpp->get_expr(), m)
|
||||
<< "\n xp_xpp.first=" << mk_pp(xp_xpp->first()->get_expr(), m) << "\n");
|
||||
|
||||
nielsen_node* child = mk_child(node);
|
||||
SASSERT(child->m_str_mem[mi] == mem);
|
||||
nielsen_edge* e = mk_edge(node, child, false);
|
||||
const nielsen_subst s(x, xp_xpp, nullptr, mem.m_dep);
|
||||
e->add_subst(s);
|
||||
child->apply_subst(m_sg, s);
|
||||
|
||||
// remove from mi^th element of the child the leading token, as it is immediately subsumed
|
||||
SASSERT(child->m_str_mem[mi].m_str->first() == xp);
|
||||
child->m_str_mem[mi].m_str = dir_drop(m_sg, child->m_str_mem[mi].m_str, 1, true);
|
||||
|
||||
// x' ∈ stabilizer_re (= s*, all repetitions of the detected cycle)
|
||||
child->add_str_mem(str_mem(xp, stabilizer_re, mem.m_dep));
|
||||
|
||||
|
|
|
|||
|
|
@ -855,7 +855,7 @@ namespace seq {
|
|||
|
||||
// the overall Nielsen transformation graph
|
||||
// mirrors ZIPT's NielsenGraph
|
||||
class nielsen_graph {
|
||||
class nielsen_graph : public euf::projection_oracle {
|
||||
friend class nielsen_node;
|
||||
friend class nielsen_edge;
|
||||
|
||||
|
|
@ -979,6 +979,11 @@ namespace seq {
|
|||
seq_util& seq() { return m_seq; }
|
||||
seq_util const& seq() const { return m_seq; }
|
||||
|
||||
// euf::projection_oracle: true iff regex `state` is part of the
|
||||
// explored subautomaton snapshot `nu` (a partial-DFA edge incident to
|
||||
// `state` was marked with an index in [1, nu]).
|
||||
bool projection_state_in_Q(expr* state, unsigned nu) override;
|
||||
|
||||
// node management
|
||||
nielsen_node* mk_node();
|
||||
nielsen_node* mk_child(nielsen_node* parent);
|
||||
|
|
@ -1169,9 +1174,12 @@ namespace seq {
|
|||
// currently covered edge count for this extraction.
|
||||
unsigned mark_scc_projection_edges(uint_set const& scc);
|
||||
|
||||
// Build regex equivalent to proj(root_re, E_scc, {root_re}) using the
|
||||
// currently marked SCC edges (projection index <= extract_idx).
|
||||
euf::snode* build_projection_regex_from_scc(euf::snode* root_re, uint_set const& scc, unsigned extract_idx);
|
||||
// Build the projection operator π_{Q,{root_re}}(root_re) as a re.proj
|
||||
// skolem snode, where Q is the explored subautomaton identified by the
|
||||
// snapshot index nu. This is the stabilizer of root_re kept symbolically
|
||||
// (the projection's derivative/nullability are evaluated lazily by the
|
||||
// sgraph consulting projection_state_in_Q).
|
||||
euf::snode* mk_projection_term(euf::snode* root_re, unsigned nu);
|
||||
|
||||
// Try to extract a stronger projection for root_re. Returns true and
|
||||
// stores it in projection_re iff SCC coverage has grown.
|
||||
|
|
|
|||
|
|
@ -400,6 +400,14 @@ namespace seq {
|
|||
if (re->is_fail() || re->is_full_char() || re->is_full_seq())
|
||||
return;
|
||||
|
||||
// projection operator: only the regex arguments carry character
|
||||
// structure; the third argument is the integer snapshot index.
|
||||
if (re->is_projection()) {
|
||||
collect_char_boundaries(re->arg(0), bounds);
|
||||
collect_char_boundaries(re->arg(1), bounds);
|
||||
return;
|
||||
}
|
||||
|
||||
// If we reached a leaf and none of the expected leaf forms matched,
|
||||
// this is a regex constructor we did not account for in boundary
|
||||
// extraction and should fail loudly in debug builds.
|
||||
|
|
|
|||
|
|
@ -210,8 +210,9 @@ namespace seq {
|
|||
}
|
||||
|
||||
// check if regex accepts the empty string
|
||||
// (projection-aware: re may contain re.proj operators)
|
||||
bool is_nullable(euf::snode* re) const {
|
||||
return re && seq.re.get_info(re->get_expr()).nullable == l_true;
|
||||
return re && m_sg.re_nullable(re) == l_true;
|
||||
}
|
||||
|
||||
// check if regex is ground (no string variables)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue