mirror of
https://github.com/Z3Prover/z3
synced 2026-06-19 15:16:29 +00:00
First attempt for monadic decomposition
This commit is contained in:
parent
9de196b3cb
commit
c20bc0e631
10 changed files with 546 additions and 242 deletions
|
|
@ -67,7 +67,7 @@ namespace euf {
|
|||
inline symbol re_proj_name() { return symbol("re.proj"); }
|
||||
|
||||
class snode {
|
||||
expr *m_expr = nullptr;
|
||||
expr *m_expr = nullptr; // assumed to be non-null
|
||||
snode_kind m_kind = snode_kind::s_var;
|
||||
unsigned m_id = UINT_MAX;
|
||||
unsigned m_num_args = 0;
|
||||
|
|
@ -107,7 +107,7 @@ namespace euf {
|
|||
|
||||
public:
|
||||
expr *get_expr() const {
|
||||
return m_expr;
|
||||
return m_expr; // assumed to be non-null
|
||||
}
|
||||
snode_kind kind() const {
|
||||
return m_kind;
|
||||
|
|
@ -115,14 +115,28 @@ namespace euf {
|
|||
unsigned id() const {
|
||||
return m_id;
|
||||
}
|
||||
|
||||
unsigned num_args() const {
|
||||
return m_num_args;
|
||||
}
|
||||
snode *arg(unsigned i) const {
|
||||
|
||||
snode* arg(const unsigned i) const {
|
||||
SASSERT(i < m_num_args);
|
||||
return m_args[i];
|
||||
}
|
||||
|
||||
snode* arg0() const {
|
||||
return arg(0);
|
||||
}
|
||||
|
||||
snode * const* begin() const {
|
||||
return m_args;
|
||||
}
|
||||
|
||||
snode * const* end() const {
|
||||
return m_args + m_num_args;
|
||||
}
|
||||
|
||||
// TODO: Track regex being "classical" (no complement, intersection, fail)
|
||||
bool is_ground() const {
|
||||
return m_ground;
|
||||
|
|
@ -214,6 +228,28 @@ namespace euf {
|
|||
bool is_in_re() const {
|
||||
return m_kind == snode_kind::s_in_re;
|
||||
}
|
||||
bool is_string() const {
|
||||
// constant string (we assume a flat concatenation)
|
||||
return is_concat() && std::ranges::all_of(begin(), end(),
|
||||
[](const snode * const arg) { return arg->is_char(); });
|
||||
}
|
||||
|
||||
bool is_string(zstring& str, seq_util& seq) const {
|
||||
// constant string (we assume a flat concatenation)
|
||||
// TODO: Optimize
|
||||
if (!is_concat())
|
||||
return false;
|
||||
str.reset();
|
||||
const unsigned cnt = num_args();
|
||||
for (unsigned i = 0; i < cnt; i++) {
|
||||
const snode* const c = arg(i);
|
||||
unsigned val;
|
||||
if (!seq.is_const_char(c->get_expr(), val))
|
||||
return false;
|
||||
str += zstring(val);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
bool is_projection() const {
|
||||
return m_kind == snode_kind::s_projection;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ void smt_params::updt_local_params(params_ref const & _p) {
|
|||
m_nseq_parikh = p.nseq_parikh();
|
||||
m_nseq_regex_precheck = p.nseq_regex_precheck();
|
||||
m_nseq_regex_factorization_threshold = p.nseq_regex_factorization_threshold();
|
||||
m_nseq_regex_factorization_eager = p.nseq_regex_factorization_eager();
|
||||
m_nseq_signature = p.nseq_signature();
|
||||
m_nseq_axiomatize_diseq = p.nseq_axiomatize_diseq();
|
||||
m_up_persist_clauses = p.up_persist_clauses();
|
||||
|
|
@ -175,6 +176,7 @@ void smt_params::display(std::ostream & out) const {
|
|||
DISPLAY_PARAM(m_nseq_parikh);
|
||||
DISPLAY_PARAM(m_nseq_regex_precheck);
|
||||
DISPLAY_PARAM(m_nseq_regex_factorization_threshold);
|
||||
DISPLAY_PARAM(m_nseq_regex_factorization_eager);
|
||||
DISPLAY_PARAM(m_nseq_axiomatize_diseq);
|
||||
|
||||
DISPLAY_PARAM(m_profile_res_sub);
|
||||
|
|
|
|||
|
|
@ -254,6 +254,7 @@ struct smt_params : public preprocessor_params,
|
|||
bool m_nseq_parikh = false;
|
||||
bool m_nseq_regex_precheck = true;
|
||||
unsigned m_nseq_regex_factorization_threshold = 1;
|
||||
bool m_nseq_regex_factorization_eager = false;
|
||||
bool m_nseq_signature = false;
|
||||
bool m_nseq_axiomatize_diseq = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -138,6 +138,7 @@ def_module_params(module_name='smt',
|
|||
('nseq.parikh', BOOL, False, 'enable Parikh image checks in nseq solver'),
|
||||
('nseq.regex_precheck', BOOL, True, 'enable regex membership pre-check before DFS in theory_nseq: checks intersection emptiness per-variable and short-circuits SAT/UNSAT for regex-only problems'),
|
||||
('nseq.regex_factorization_threshold', UINT, 1, 'maximum number of cases to factor a classical regex into in a single step (gives completeness on classical regexes)'),
|
||||
('nseq.regex_factorization_eager', BOOL, False, 'apply regex factorization (sigma splitting) eagerly in the theory interface (propagate_pos_mem) instead of lazily inside the Nielsen graph'),
|
||||
('nseq.signature', BOOL, False, 'enable heuristic signature-based string equation splitting in Nielsen solver'),
|
||||
('nseq.axiomatize_diseq', BOOL, False, 'eagerly axiomatize sequence disequalities'),
|
||||
('core.validate', BOOL, False, '[internal] validate unsat core produced by SMT context. This option is intended for debugging'),
|
||||
|
|
|
|||
|
|
@ -379,8 +379,8 @@ namespace seq {
|
|||
ast_manager& m = graph().get_manager();
|
||||
|
||||
if (s.is_char_subst()) {
|
||||
expr* var_c_expr = s.m_var->arg(0)->get_expr();
|
||||
expr* repl_c_expr = s.m_replacement->arg(0)->get_expr();
|
||||
expr* var_c_expr = s.m_var->arg0()->get_expr();
|
||||
expr* repl_c_expr = s.m_replacement->arg0()->get_expr();
|
||||
add_constraint(
|
||||
constraint(m.mk_eq(var_c_expr, repl_c_expr), s.m_dep, m));
|
||||
|
||||
|
|
@ -835,7 +835,7 @@ namespace seq {
|
|||
}
|
||||
if (ub.is_one()) {
|
||||
// base^1 → base
|
||||
euf::snode* base_sn = tok->arg(0);
|
||||
euf::snode* base_sn = tok->arg0();
|
||||
if (base_sn) {
|
||||
dep = node->graph().dep_mgr().mk_join(dep, ub_dep);
|
||||
result.push_back(base_sn);
|
||||
|
|
@ -900,7 +900,7 @@ namespace seq {
|
|||
// Skip at leading position (i == 0) to avoid undoing power unwinding:
|
||||
// unwind produces u · u^(n-1); merging it back to u^n creates an infinite cycle.
|
||||
if (i > 0 && t->is_power()) {
|
||||
euf::snode* pow_base = t->arg(0);
|
||||
euf::snode* pow_base = t->arg0();
|
||||
if (pow_base) {
|
||||
euf::snode_vector pb_tokens;
|
||||
collect_tokens_dir(pow_base, fwd, pb_tokens);
|
||||
|
|
@ -1455,7 +1455,7 @@ namespace seq {
|
|||
euf::snode* end_tok = dir_token(pow_side, fwd);
|
||||
if (!end_tok || !end_tok->is_power())
|
||||
continue;
|
||||
euf::snode* base_sn = end_tok->arg(0);
|
||||
euf::snode* base_sn = end_tok->arg0();
|
||||
expr* pow_exp = get_power_exp_expr(end_tok, seq);
|
||||
if (!base_sn || !pow_exp)
|
||||
continue;
|
||||
|
|
@ -1659,7 +1659,7 @@ namespace seq {
|
|||
expr_ref d(rw.mk_derivative(mem.m_regex->get_expr()), m);
|
||||
|
||||
// Extract the inner char expression from seq.unit(?inner)
|
||||
expr *inner_char = tok->arg(0)->get_expr();
|
||||
expr *inner_char = tok->arg0()->get_expr();
|
||||
|
||||
// substitute the inner char for the derivative variable in d
|
||||
var_subst vs(m);
|
||||
|
|
@ -2236,7 +2236,7 @@ namespace seq {
|
|||
euf::snode* other_head = (side == 0) ? rh : lh;
|
||||
if (!pow_head || !pow_head->is_power() || !other_head || !other_head->is_char())
|
||||
continue;
|
||||
euf::snode* base_sn = pow_head->arg(0);
|
||||
euf::snode* base_sn = pow_head->arg0();
|
||||
if (!base_sn) continue;
|
||||
euf::snode* base_head = dir_token(base_sn, fwd);
|
||||
if (!base_head || !base_head->is_char()) continue;
|
||||
|
|
@ -2853,7 +2853,7 @@ namespace seq {
|
|||
if (apply_gpower_intr(node))
|
||||
return ++m_stats.m_mod_gpower_intr, true;
|
||||
|
||||
// Priority 8: Regex Factorization (Boolean Closure)
|
||||
// Priority 8: Regex Factorization
|
||||
if (apply_regex_factorization(node))
|
||||
return ++m_stats.m_mod_regex_factorization, true;
|
||||
|
||||
|
|
@ -3058,7 +3058,7 @@ namespace seq {
|
|||
return false;
|
||||
|
||||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||||
euf::snode *base = power->arg(0);
|
||||
euf::snode *base = power->arg0();
|
||||
|
||||
nielsen_node* child;
|
||||
nielsen_edge* e;
|
||||
|
|
@ -3109,7 +3109,7 @@ namespace seq {
|
|||
if (lhead->num_args() < 1 || rhead->num_args() < 1)
|
||||
continue;
|
||||
// same base: compare the two powers
|
||||
if (lhead->arg(0) != rhead->arg(0))
|
||||
if (lhead->arg0() != rhead->arg0())
|
||||
continue;
|
||||
|
||||
// Skip if the exponents differ by a constant — simplify_and_init's
|
||||
|
|
@ -3169,7 +3169,7 @@ namespace seq {
|
|||
euf::snode* end_tok = dir_token(pow_side, fwd);
|
||||
if (!end_tok || !end_tok->is_power())
|
||||
continue;
|
||||
euf::snode* base_sn = end_tok->arg(0);
|
||||
euf::snode* base_sn = end_tok->arg0();
|
||||
expr* pow_exp = get_power_exp_expr(end_tok, m_seq);
|
||||
// NB: Shuvendu - this test is also redundant
|
||||
if (!base_sn || !pow_exp)
|
||||
|
|
@ -3223,7 +3223,7 @@ namespace seq {
|
|||
return false;
|
||||
|
||||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||||
euf::snode *base = power->arg(0);
|
||||
euf::snode *base = power->arg0();
|
||||
expr *exp_n = get_power_exponent(power);
|
||||
expr *zero = a.mk_int(0);
|
||||
expr *one = a.mk_int(1);
|
||||
|
|
@ -3582,62 +3582,157 @@ namespace seq {
|
|||
// Modifier: apply_regex_factorization (Boolean Closure)
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
struct tau_pair {
|
||||
expr_ref m_p;
|
||||
expr_ref m_q;
|
||||
tau_pair(expr* p, expr* q, ast_manager& m) : m_p(p, m), m_q(q, m) {
|
||||
SASSERT(p);
|
||||
SASSERT(q);
|
||||
// Cross-product intersection of two split-sets (split algebra):
|
||||
// S1 ⊓ S2 = { ⟨Δ1⊓Δ2, ∇1⊓∇2⟩ | ⟨Δ1,∇1⟩∈S1, ⟨Δ2,∇2⟩∈S2 }
|
||||
// Pairs where either component is the empty regex are dropped (∅⊓r ≡ ∅).
|
||||
static bool intersect_sigma_pairs(ast_manager& m, seq_util& seq,
|
||||
sigma_pairs const& s1, sigma_pairs const& s2, sigma_pairs& result, unsigned threshold) {
|
||||
for (auto const& p1 : s1) {
|
||||
for (auto const& p2 : s2) {
|
||||
if (seq.re.is_empty(p1.m_p) || seq.re.is_empty(p2.m_p) ||
|
||||
seq.re.is_empty(p1.m_q) || seq.re.is_empty(p2.m_q))
|
||||
continue;
|
||||
result.push_back(sigma_pair(seq.re.mk_inter(p1.m_p, p2.m_p),
|
||||
seq.re.mk_inter(p1.m_q, p2.m_q), m));
|
||||
if (result.size() > threshold)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
typedef vector<tau_pair> tau_pairs;
|
||||
return true;
|
||||
}
|
||||
|
||||
static void compute_tau(ast_manager& m, seq_util& seq, euf::sgraph& sg, expr* r, tau_pairs& result) {
|
||||
// Complement of a split-set via De Morgan: ~S = ⊓_{s∈S} ~s,
|
||||
// ~⟨Δ,∇⟩ = { ⟨~Δ, .*⟩, ⟨.*, ~∇⟩ } and ~{} = { ⟨.*, .*⟩ }.
|
||||
// str_sort is the sequence-element sort; mk_full_seq needs the regex sort.
|
||||
// May produce up to 2^|sp| pairs (bounded downstream by the factorization threshold).
|
||||
static bool complement_sigma_pairs(ast_manager& m, seq_util& seq, sort* str_sort,
|
||||
sigma_pairs const& sp, sigma_pairs& result, unsigned threshold) {
|
||||
|
||||
sort* re_sort = seq.re.mk_re(str_sort);
|
||||
const expr_ref full(seq.re.mk_full_seq(re_sort), m); // .*
|
||||
if (sp.empty()) { // ~{} = ⟨.*, .*⟩
|
||||
result.push_back(sigma_pair(full, full, m));
|
||||
return true;
|
||||
}
|
||||
sigma_pairs acc;
|
||||
acc.push_back(sigma_pair(seq.re.mk_complement(sp[0].m_p), full, m));
|
||||
acc.push_back(sigma_pair(full, seq.re.mk_complement(sp[0].m_q), m));
|
||||
for (unsigned i = 1; i < sp.size(); ++i) {
|
||||
sigma_pairs next;
|
||||
next.push_back(sigma_pair(seq.re.mk_complement(sp[i].m_p), full, m));
|
||||
next.push_back(sigma_pair(full, seq.re.mk_complement(sp[i].m_q), m));
|
||||
sigma_pairs tmp;
|
||||
if (intersect_sigma_pairs(m, seq, acc, next, tmp, threshold) || tmp.empty())
|
||||
break;
|
||||
acc = std::move(tmp);
|
||||
if (acc.size() > threshold)
|
||||
return false;
|
||||
}
|
||||
result.append(acc);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool compute_sigma(ast_manager& m, seq_util& seq, seq_rewriter& rw, const euf::snode* r, sigma_pairs& result, unsigned threshold) {
|
||||
SASSERT(r);
|
||||
sort* str_sort = nullptr;
|
||||
if (!seq.is_re(r, str_sort)) return;
|
||||
expr *body = nullptr;
|
||||
if (!seq.is_re(r->get_expr(), str_sort))
|
||||
return false;
|
||||
std::cout << "Computing sigma of " << snode_label_html(r, m, false) << std::endl;
|
||||
|
||||
if (seq.re.is_epsilon(r)) {
|
||||
if (r->is_empty()) {
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(tau_pair(eps, eps, m));
|
||||
result.push_back(sigma_pair(eps, eps, m));
|
||||
return true;
|
||||
}
|
||||
else if (seq.str.is_unit(r) || seq.str.is_string(r) || seq.re.is_range(r) || seq.re.is_full_char(r) ||
|
||||
(seq.re.is_to_re(r) && seq.str.is_string(to_app(r)->get_arg(0)))) {
|
||||
if (seq.re.is_to_re(r)) {
|
||||
const expr * arg = to_app(r)->get_arg(0);
|
||||
if (r->is_to_re()) {
|
||||
const euf::snode* const c = r->arg0();
|
||||
if (c->is_range()) {
|
||||
const expr_ref ex(c->get_expr(), m);
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
|
||||
result.push_back(sigma_pair(eps, ex, m));
|
||||
result.push_back(sigma_pair(ex, eps, m));
|
||||
return true;
|
||||
}
|
||||
if (c->is_empty()) {
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(sigma_pair(eps, eps, m));
|
||||
return true;
|
||||
}
|
||||
if (c->is_char()) {
|
||||
unsigned val;
|
||||
VERIFY(seq.is_const_char(c->arg0()->get_expr(), val));
|
||||
const expr_ref ex(seq.re.mk_to_re(seq.str.mk_string(zstring(val))), m);
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
|
||||
result.push_back(sigma_pair(eps, ex, m));
|
||||
result.push_back(sigma_pair(ex, eps, m));
|
||||
return true;
|
||||
}
|
||||
if (c->is_string()) {
|
||||
const euf::snode * arg = r->arg0();
|
||||
zstring s;
|
||||
if (seq.str.is_string(arg, s) && s.length() > 1) {
|
||||
if (arg->is_string(s, seq) && s.length() > 1) {
|
||||
for (unsigned i = 0; i <= s.length(); ++i) {
|
||||
expr_ref p(seq.re.mk_to_re(seq.str.mk_string(s.extract(0, i))), m);
|
||||
expr_ref q(seq.re.mk_to_re(seq.str.mk_string(s.extract(i, s.length() - i))), m);
|
||||
result.push_back(tau_pair(p, q, m));
|
||||
result.push_back(sigma_pair(p, q, m));
|
||||
}
|
||||
return;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
}
|
||||
if (r->is_full_char()) {
|
||||
const expr_ref ex(r->get_expr(), m);
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(tau_pair(eps, r, m));
|
||||
result.push_back(tau_pair(r, eps, m));
|
||||
|
||||
result.push_back(sigma_pair(eps, ex, m));
|
||||
result.push_back(sigma_pair(ex, eps, m));
|
||||
return true;
|
||||
}
|
||||
else if (seq.re.is_empty(r)) {
|
||||
// empty set has no splits
|
||||
}
|
||||
else if (seq.re.is_union(r)) {
|
||||
for (expr* arg : *to_app(r)) {
|
||||
compute_tau(m, seq, sg, arg, result);
|
||||
if (r->is_union()) {
|
||||
// σ(r₁ ⊔ r₂) = σ(r₁) ∪ σ(r₂)
|
||||
SASSERT(r->num_args() >= 2);
|
||||
for (const euf::snode* const arg : *r) {
|
||||
if (!compute_sigma(m, seq, rw, arg, result, threshold))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else if (seq.re.is_concat(r)) {
|
||||
const unsigned num_args = to_app(r)->get_num_args();
|
||||
if (num_args == 0) {
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(tau_pair(eps, eps, m));
|
||||
return;
|
||||
if (r->is_intersect()) {
|
||||
// σ(r₁ ⊓ r₂ ⊓ …) = σ(r₁) ⊓ σ(r₂) ⊓ …; empty intersection (0 args) = {⟨.*,.*⟩}
|
||||
const unsigned n = r->num_args();
|
||||
SASSERT(n >= 2);
|
||||
sigma_pairs current;
|
||||
if (!compute_sigma(m, seq, rw, r->arg0(), current, threshold))
|
||||
return false;
|
||||
for (unsigned i = 1; i < n && !current.empty(); ++i) {
|
||||
sigma_pairs arg_i;
|
||||
compute_sigma(m, seq, rw, r->arg(i), arg_i, threshold);
|
||||
sigma_pairs tmp;
|
||||
if (!intersect_sigma_pairs(m, seq, current, arg_i, tmp, threshold))
|
||||
return false;
|
||||
current = std::move(tmp);
|
||||
}
|
||||
for (unsigned i = 0; i < num_args; ++i) {
|
||||
tau_pairs tau_arg;
|
||||
compute_tau(m, seq, sg, to_app(r)->get_arg(i), tau_arg);
|
||||
result.append(current);
|
||||
return true;
|
||||
}
|
||||
if (r->is_complement()) {
|
||||
// σ(~r) = ~σ(r)
|
||||
sigma_pairs body_pairs;
|
||||
if (!compute_sigma(m, seq, rw, r->arg0(), body_pairs, threshold))
|
||||
return false;
|
||||
return complement_sigma_pairs(m, seq, str_sort, body_pairs, result, threshold);
|
||||
}
|
||||
if (r->is_concat()) {
|
||||
const unsigned n = r->num_args();
|
||||
SASSERT(n >= 2);
|
||||
for (unsigned i = 0; i < n; ++i) {
|
||||
sigma_pairs sigma_arg;
|
||||
if (!compute_sigma(m, seq, rw, r->arg(i), sigma_arg, threshold))
|
||||
return false;
|
||||
|
||||
expr_ref left(m);
|
||||
expr_ref right(m);
|
||||
|
|
@ -3646,58 +3741,116 @@ namespace seq {
|
|||
left = seq.re.mk_epsilon(str_sort);
|
||||
else {
|
||||
for (unsigned j = 0; j < i; ++j) {
|
||||
const auto arg = to_app(r)->get_arg(j);
|
||||
left = left ? seq.re.mk_concat(left, arg) : arg;
|
||||
const euf::snode* arg = r->arg(j);
|
||||
left = left ? seq.re.mk_concat(left, arg->get_expr()) : arg->get_expr();
|
||||
}
|
||||
}
|
||||
|
||||
if (i == num_args - 1)
|
||||
if (i == n - 1)
|
||||
right = seq.re.mk_epsilon(str_sort);
|
||||
else {
|
||||
for (unsigned j = i + 1; j < num_args; ++j) {
|
||||
const auto arg = to_app(r)->get_arg(j);
|
||||
right = right ? seq.re.mk_concat(right, arg) : arg;
|
||||
for (unsigned j = i + 1; j < n; ++j) {
|
||||
const euf::snode* arg = r->arg(j);
|
||||
right = right ? seq.re.mk_concat(right, arg->get_expr()) : arg->get_expr();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto const &[tp, tq] : tau_arg) {
|
||||
seq_rewriter rw(m);
|
||||
auto p = rw.mk_re_append(left, tp);
|
||||
auto q = rw.mk_re_append(tq, right);
|
||||
result.push_back(tau_pair(p, q, m));
|
||||
for (auto const &[tp, tq] : sigma_arg) {
|
||||
expr_ref p = rw.mk_re_append(left, tp);
|
||||
expr_ref q = rw.mk_re_append(tq, right);
|
||||
result.push_back(sigma_pair(p, q, m));
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else if (seq.re.is_star(r, body) || seq.re.is_plus(r, body)) {
|
||||
if (seq.re.is_plus(r)) {
|
||||
const expr_ref star(seq.re.mk_star(body), m);
|
||||
const expr_ref concat(seq.re.mk_concat(body, star), m);
|
||||
compute_tau(m, seq, sg, concat, result);
|
||||
return;
|
||||
}
|
||||
if (r->is_star()) {
|
||||
const euf::snode* body = r->arg0();
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(sigma_pair(eps, eps, m));
|
||||
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(tau_pair(eps, eps, m));
|
||||
|
||||
tau_pairs tau_body;
|
||||
compute_tau(m, seq, sg, body, tau_body);
|
||||
for (auto const &[tp, tq] : tau_body) {
|
||||
seq_rewriter rw(m);
|
||||
auto p = rw.mk_re_append(r, tp);
|
||||
auto q = rw.mk_re_append(tq, r);
|
||||
result.push_back(tau_pair(p, q, m));
|
||||
sigma_pairs sigma_body;
|
||||
if (!compute_sigma(m, seq, rw, body, sigma_body, threshold))
|
||||
return false;
|
||||
for (auto const &[tp, tq] : sigma_body) {
|
||||
auto p = rw.mk_re_append(r->get_expr(), tp);
|
||||
auto q = rw.mk_re_append(tq, r->get_expr());
|
||||
result.push_back(sigma_pair(p, q, m));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else if (seq.re.is_opt(r, body)) {
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(tau_pair(eps, eps, m));
|
||||
compute_tau(m, seq, sg, body, result);
|
||||
if (r->is_plus()) {
|
||||
// r⁺ = r·r* ; by Kleene factorization σ(r⁺) = r*·σ(r)·r*.
|
||||
// Same shape as the star rule but with the surrounding factor being
|
||||
// body* without the {⟨ε,ε⟩} pair
|
||||
const euf::snode* body = r->arg0();
|
||||
const expr_ref star(seq.re.mk_star(body->get_expr()), m); // body*
|
||||
|
||||
sigma_pairs sigma_body;
|
||||
if (!compute_sigma(m, seq, rw, body, sigma_body, threshold))
|
||||
return false;
|
||||
for (auto const &[tp, tq] : sigma_body) {
|
||||
auto p = rw.mk_re_append(star, tp); // body* · tp
|
||||
auto q = rw.mk_re_append(tq, star); // tq · body*
|
||||
result.push_back(sigma_pair(p, q, m));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
const expr_ref eps(seq.re.mk_epsilon(str_sort), m);
|
||||
result.push_back(tau_pair(eps, r, m));
|
||||
result.push_back(tau_pair(r, eps, m));
|
||||
// the simplifier should have eliminated everything else already
|
||||
UNREACHABLE();
|
||||
return false;
|
||||
}
|
||||
|
||||
void simplify_sigma_pairs(sigma_pairs& pairs, seq_regex& sr, euf::sgraph& sg) {
|
||||
return; // For now
|
||||
if (pairs.size() <= 1)
|
||||
return;
|
||||
// Guard against pathological cost: subsumption is O(n^2) language-subset
|
||||
// BFS checks. Large split-sets are left to the factorization threshold.
|
||||
if (pairs.size() > 64)
|
||||
return;
|
||||
|
||||
struct row { euf::snode* p; euf::snode* q; unsigned idx; };
|
||||
|
||||
// Materialise snodes once; drop pairs with a structurally-empty component.
|
||||
vector<row> rows;
|
||||
for (unsigned i = 0; i < pairs.size(); ++i) {
|
||||
euf::snode* p = sg.mk(pairs[i].m_p);
|
||||
euf::snode* q = sg.mk(pairs[i].m_q);
|
||||
if (sr.is_empty_regex(p) || sr.is_empty_regex(q))
|
||||
continue;
|
||||
rows.push_back({ p, q, i });
|
||||
}
|
||||
|
||||
// a subsumes b iff L(b.p) ⊆ L(a.p) and L(b.q) ⊆ L(a.q).
|
||||
// is_language_subset may return l_undef (inconclusive); only treat a
|
||||
// definite l_true as subsumption, so we never drop a needed split.
|
||||
auto subsumes = [&](row const& a, row const& b) {
|
||||
return sr.is_language_subset(b.p, a.p) == l_true &&
|
||||
sr.is_language_subset(b.q, a.q) == l_true;
|
||||
};
|
||||
|
||||
vector<row> kept;
|
||||
for (row const& r : rows) {
|
||||
bool redundant = false;
|
||||
for (row const& k : kept)
|
||||
if (subsumes(k, r)) { redundant = true; break; }
|
||||
if (redundant)
|
||||
continue;
|
||||
// drop already-kept rows strictly subsumed by r
|
||||
unsigned w = 0;
|
||||
for (unsigned t = 0; t < kept.size(); ++t) {
|
||||
if (subsumes(r, kept[t]))
|
||||
continue;
|
||||
kept[w++] = kept[t];
|
||||
}
|
||||
kept.shrink(w);
|
||||
kept.push_back(r);
|
||||
}
|
||||
|
||||
sigma_pairs result;
|
||||
for (row const& k : kept)
|
||||
result.push_back(pairs[k.idx]);
|
||||
pairs.swap(result);
|
||||
}
|
||||
|
||||
bool nielsen_graph::apply_regex_factorization(nielsen_node* node) {
|
||||
|
|
@ -3713,7 +3866,9 @@ namespace seq {
|
|||
for (str_mem const& mem : node->str_mems()) {
|
||||
SASSERT(mem.well_formed());
|
||||
|
||||
if (mem.m_str->is_empty() || mem.is_primitive() || !mem.m_regex->is_classical())
|
||||
// compute_sigma handles all regex forms (incl. complement / intersection),
|
||||
// so the classical restriction is no longer needed.
|
||||
if (mem.m_str->is_empty() || mem.is_primitive())
|
||||
continue;
|
||||
|
||||
euf::snode* first = mem.m_str->first();
|
||||
|
|
@ -3721,10 +3876,14 @@ namespace seq {
|
|||
SASSERT(!first->is_char());
|
||||
euf::snode* tail = m_sg.drop_first(mem.m_str);
|
||||
SASSERT(tail);
|
||||
std::cout << "Processing: " << mem_pp(mem, m) << std::endl;
|
||||
|
||||
tau_pairs pairs;
|
||||
compute_tau(m, m_seq, m_sg, mem.m_regex->get_expr(), pairs);
|
||||
sigma_pairs pairs;
|
||||
seq_rewriter rw(m);
|
||||
if (!compute_sigma(m, m_seq, rw, mem.m_regex, pairs, m_regex_factorization_threshold))
|
||||
continue;
|
||||
|
||||
if (m_seq_regex)
|
||||
simplify_sigma_pairs(pairs, *m_seq_regex, m_sg);
|
||||
|
||||
vector<rf_split> feasible;
|
||||
dep_tracker eliminated_dep = mem.m_dep;
|
||||
|
|
@ -4222,7 +4381,7 @@ namespace seq {
|
|||
return false;
|
||||
|
||||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||||
euf::snode* base = power->arg(0);
|
||||
euf::snode* base = power->arg0();
|
||||
const expr_ref zero(a.mk_int(0), m);
|
||||
|
||||
// Branch 1: enumerate all decompositions of the base.
|
||||
|
|
@ -4331,7 +4490,7 @@ namespace seq {
|
|||
return false;
|
||||
|
||||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||||
euf::snode* base = power->arg(0);
|
||||
euf::snode* base = power->arg0();
|
||||
expr* exp_n = get_power_exponent(power);
|
||||
SASSERT(exp_n);
|
||||
const expr_ref zero(a.mk_int(0), m);
|
||||
|
|
@ -4375,7 +4534,7 @@ namespace seq {
|
|||
return false;
|
||||
|
||||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||||
euf::snode* base = power->arg(0);
|
||||
euf::snode* base = power->arg0();
|
||||
expr* exp_n = get_power_exponent(power);
|
||||
SASSERT(exp_n);
|
||||
const expr_ref zero(a.mk_int(0), m);
|
||||
|
|
@ -4535,7 +4694,7 @@ namespace seq {
|
|||
return expr_ref(a.mk_int(1), m);
|
||||
|
||||
if (n->is_concat()) {
|
||||
const expr_ref left = compute_length_expr(n->arg(0));
|
||||
const expr_ref left = compute_length_expr(n->arg0());
|
||||
const expr_ref right = compute_length_expr(n->arg(1));
|
||||
return expr_ref(a.mk_add(left, right), m);
|
||||
}
|
||||
|
|
@ -4562,8 +4721,8 @@ namespace seq {
|
|||
expr_ref len_lhs = compute_length_expr(eq.m_lhs);
|
||||
expr_ref len_rhs = compute_length_expr(eq.m_rhs);
|
||||
TRACE(seq,
|
||||
tout << "Length constraint from LHS " << snode_label_html(eq.m_lhs, m) << " to " << len_lhs << ":\n";
|
||||
tout << "Length constraint from RHS " << snode_label_html(eq.m_rhs, m) << " to " << len_rhs << "\n");
|
||||
tout << "Length constraint from LHS " << snode_label_html(eq.m_lhs, m, true) << " to " << len_lhs << ":\n";
|
||||
tout << "Length constraint from RHS " << snode_label_html(eq.m_rhs, m, true) << " to " << len_rhs << "\n");
|
||||
expr_ref len_eq(m.mk_eq(len_lhs, len_rhs), m);
|
||||
constraints.push_back(length_constraint(len_eq, eq.m_dep, length_kind::eq, true, m));
|
||||
|
||||
|
|
|
|||
|
|
@ -257,9 +257,34 @@ namespace seq {
|
|||
class seq_parikh;
|
||||
class seq_regex; // forward declaration (defined in smt/seq/seq_regex.h)
|
||||
|
||||
std::string snode_label_html(euf::snode const* n, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m);
|
||||
std::string snode_label_html(euf::snode const* n,
|
||||
obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m, bool html_escape);
|
||||
|
||||
std::string snode_label_html(euf::snode const* n, ast_manager& m);
|
||||
std::string snode_label_html(euf::snode const* n, ast_manager& m, bool html_escape);
|
||||
|
||||
// Split-pair produced by compute_sigma: uv |= r iff exists i: u |= m_p[i] and v |= m_q[i]
|
||||
struct sigma_pair {
|
||||
expr_ref m_p;
|
||||
expr_ref m_q;
|
||||
sigma_pair(expr* p, expr* q, ast_manager& m) : m_p(p, m), m_q(q, m) {
|
||||
SASSERT(p && q);
|
||||
}
|
||||
};
|
||||
typedef vector<sigma_pair> sigma_pairs;
|
||||
|
||||
// Compute the split-set sigma(r) per the splitting rules of the paper
|
||||
// "Extended Regular Expression Membership". Generalises the classical compute_tau
|
||||
// with intersection and complement cases via the split-set algebra.
|
||||
// `result` is appended to (not cleared).
|
||||
bool compute_sigma(ast_manager& m, seq_util& seq, seq_rewriter& rw, const euf::snode* r, sigma_pairs& result, unsigned threshold);
|
||||
|
||||
// Simplify a split-set in place using the split algebra's language-level rules
|
||||
// (paper section "split-set simplification heuristics"): drop pairs with an
|
||||
// empty-language component, and drop any split subsumed by another
|
||||
// (<D_i,N_i> is subsumed by <D_j,N_j> iff L(D_i) subseteq L(D_j) and L(N_i) subseteq L(N_j)).
|
||||
// Subsumption tames the 2^k blow-up of sigma(~r) (e.g. sigma(~a*): 8 -> 2).
|
||||
// Requires the regex emptiness/subset checker and an sgraph to build snodes.
|
||||
void simplify_sigma_pairs(sigma_pairs& pairs, seq_regex& sr, euf::sgraph& sg);
|
||||
|
||||
// simplification result for constraint processing
|
||||
// mirrors ZIPT's SimplifyResult enum
|
||||
|
|
@ -377,8 +402,8 @@ namespace seq {
|
|||
// string equality constraint: lhs = rhs
|
||||
// mirrors ZIPT's StrEq (both sides are regex-free snode trees)
|
||||
struct str_eq {
|
||||
euf::snode* m_lhs;
|
||||
euf::snode* m_rhs;
|
||||
euf::snode* m_lhs; // assumed to be non-null
|
||||
euf::snode* m_rhs; // assumed to be non-null
|
||||
dep_tracker m_dep;
|
||||
|
||||
str_eq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep):
|
||||
|
|
@ -400,7 +425,8 @@ namespace seq {
|
|||
bool contains_var(euf::snode* var) const;
|
||||
|
||||
bool well_formed() const {
|
||||
return !!m_lhs && !!m_rhs;
|
||||
// assumed to be always true
|
||||
return m_lhs && m_rhs;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -411,13 +437,15 @@ namespace seq {
|
|||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &out, eq_pp const &p) {
|
||||
return out << mk_pp(p.eq.m_lhs->get_expr(), p.m) << " == " << mk_pp(p.eq.m_rhs->get_expr(), p.m) << "\n";
|
||||
return out << snode_label_html(p.eq.m_lhs, p.m, false)
|
||||
<< " == "
|
||||
<< snode_label_html(p.eq.m_rhs, p.m, false);
|
||||
}
|
||||
|
||||
// string disequality constraint: lhs != rhs
|
||||
struct str_deq {
|
||||
euf::snode* m_lhs;
|
||||
euf::snode* m_rhs;
|
||||
euf::snode* m_lhs; // assumed to be non-null
|
||||
euf::snode* m_rhs; // assumed to be non-null
|
||||
dep_tracker m_dep;
|
||||
|
||||
str_deq(euf::snode* lhs, euf::snode* rhs, dep_tracker const& dep):
|
||||
|
|
@ -440,7 +468,8 @@ namespace seq {
|
|||
}
|
||||
|
||||
bool well_formed() const {
|
||||
return !!m_lhs && !!m_rhs;
|
||||
// assumed to be always true
|
||||
return m_lhs && m_rhs;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -451,17 +480,18 @@ namespace seq {
|
|||
};
|
||||
|
||||
inline std::ostream &operator<<(std::ostream &out, deq_pp const &p) {
|
||||
return out << mk_pp(p.deq.m_lhs->get_expr(), p.m) << " != " << mk_pp(p.deq.m_rhs->get_expr(), p.m) << "\n";
|
||||
return out << snode_label_html(p.deq.m_lhs, p.m, false)
|
||||
<< " != "
|
||||
<< snode_label_html(p.deq.m_rhs, p.m, false);
|
||||
}
|
||||
|
||||
// regex membership constraint: str in regex
|
||||
// mirrors ZIPT's StrMem
|
||||
struct str_mem {
|
||||
euf::snode* m_str;
|
||||
euf::snode* m_regex;
|
||||
euf::snode* m_str; // assumed to be non-null
|
||||
euf::snode* m_regex; // assumed to be non-null
|
||||
dep_tracker m_dep;
|
||||
|
||||
// str_mem(): m_str(nullptr), m_regex(nullptr), m_dep(nullptr) {}
|
||||
str_mem(euf::snode* str, euf::snode* regex, dep_tracker const& dep):
|
||||
m_str(str), m_regex(regex), m_dep(dep) {}
|
||||
|
||||
|
|
@ -481,17 +511,21 @@ namespace seq {
|
|||
bool contains_var(euf::snode* var) const;
|
||||
|
||||
bool well_formed() const {
|
||||
return !!m_str && !!m_regex;
|
||||
// assumed to be always true
|
||||
return m_str && m_regex;
|
||||
}
|
||||
};
|
||||
|
||||
struct mem_pp {
|
||||
str_mem const& mem;
|
||||
ast_manager &m;
|
||||
mem_pp(str_mem const& mem, ast_manager& m) : m(m), mem(mem) {}
|
||||
mem_pp(str_mem const& mem, ast_manager& m) : mem(mem), m(m) {}
|
||||
};
|
||||
inline std::ostream &operator<<(std::ostream &out, mem_pp const &p) {
|
||||
return out << mk_pp(p.mem.m_str->get_expr(), p.m) << " in " << mk_pp(p.mem.m_regex->get_expr(), p.m) << "\n";
|
||||
return out
|
||||
<< snode_label_html(p.mem.m_str, p.m, false)
|
||||
<< " in "
|
||||
<< snode_label_html(p.mem.m_regex, p.m, false);
|
||||
}
|
||||
|
||||
// string variable substitution: var -> replacement
|
||||
|
|
@ -907,6 +941,7 @@ namespace seq {
|
|||
bool m_parikh_enabled = true;
|
||||
bool m_signature_split = false;
|
||||
unsigned m_regex_factorization_threshold = 1;
|
||||
bool m_regex_factorization_eager = false;
|
||||
unsigned m_fresh_cnt = 0;
|
||||
nielsen_stats m_stats;
|
||||
|
||||
|
|
@ -1050,6 +1085,7 @@ namespace seq {
|
|||
void set_signature_split(bool e) { m_signature_split = e; }
|
||||
|
||||
void set_regex_factorization_threshold(unsigned max) { m_regex_factorization_threshold = max; }
|
||||
void set_regex_factorization_eager(bool e) { m_regex_factorization_eager = e; }
|
||||
|
||||
// display for debugging
|
||||
std::ostream& display(std::ostream& out) const;
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ namespace seq {
|
|||
out << "nielsen_graph with " << m_nodes.size() << " nodes, "
|
||||
<< m_edges.size() << " edges\n";
|
||||
|
||||
for (nielsen_node* n : m_nodes)
|
||||
for (const nielsen_node * n : m_nodes)
|
||||
display(out, n);
|
||||
|
||||
return out;
|
||||
|
|
@ -105,10 +105,12 @@ namespace seq {
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
// Helper: HTML-escape a string and replace literal \n with <br/>.
|
||||
static std::string dot_html_escape(std::string const& s) {
|
||||
static std::string dot_html_escape(std::string const& s, const bool html_escape) {
|
||||
if (!html_escape)
|
||||
return s;
|
||||
std::string r;
|
||||
r.reserve(s.size());
|
||||
for (char c : s) {
|
||||
for (const char c : s) {
|
||||
switch (c) {
|
||||
case '&': r += "&"; break;
|
||||
case '<': r += "<"; break;
|
||||
|
|
@ -130,7 +132,7 @@ namespace seq {
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string decode_recursive_name(expr* e, ast_manager& m) {
|
||||
std::string decode_recursive_name(expr* e, ast_manager& m, bool html_escape) {
|
||||
SASSERT(e && is_app(e));
|
||||
th_rewriter rw(m);
|
||||
const skolem sk(m, rw);
|
||||
|
|
@ -141,7 +143,7 @@ namespace seq {
|
|||
}
|
||||
if (to_app(arg)->get_num_args() != 0)
|
||||
return "";
|
||||
std::string s = dot_html_escape(to_app(arg)->get_decl()->get_name().str());
|
||||
std::string s = dot_html_escape(to_app(arg)->get_decl()->get_name().str(), html_escape);
|
||||
if (cnt == 0)
|
||||
return s;
|
||||
return s + "[" + std::to_string(cnt) + "]";
|
||||
|
|
@ -159,7 +161,7 @@ namespace seq {
|
|||
// Helper: render an arithmetic/integer expression in infix HTML notation.
|
||||
// Recognises +, -, *, unary minus, numerals, str.len, and named constants;
|
||||
// falls back to HTML-escaped mk_pp for anything else.
|
||||
static std::string arith_expr_html(expr* e, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m) {
|
||||
static std::string arith_expr_html(expr* e, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m, bool html_escape) {
|
||||
if (!e) return "null";
|
||||
arith_util arith(m);
|
||||
seq_util seq(m);
|
||||
|
|
@ -169,39 +171,39 @@ namespace seq {
|
|||
if (!is_app(e)) {
|
||||
std::ostringstream os;
|
||||
os << mk_bounded_pp(e, m);
|
||||
return dot_html_escape(os.str());
|
||||
return dot_html_escape(os.str(), html_escape);
|
||||
}
|
||||
app* a = to_app(e);
|
||||
expr* x, * y;
|
||||
if (m.is_or(e)) {
|
||||
app* ap = to_app(e);
|
||||
std::string res;
|
||||
res = arith_expr_html(ap->get_arg(0), names, next_id, m);
|
||||
res = arith_expr_html(ap->get_arg(0), names, next_id, m, html_escape);
|
||||
for (unsigned i = 1; i < ap->get_num_args(); ++i) {
|
||||
res += " || ";
|
||||
res += arith_expr_html(ap->get_arg(i), names, next_id, m);
|
||||
res += arith_expr_html(ap->get_arg(i), names, next_id, m, html_escape);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (m.is_not(e, x))
|
||||
return "!(" + arith_expr_html(x, names, next_id, m) + ")";
|
||||
return "!(" + arith_expr_html(x, names, next_id, m, html_escape) + ")";
|
||||
if (arith.is_lt(e, x, y)) {
|
||||
return arith_expr_html(x, names, next_id, m) + " < " + arith_expr_html(y, names, next_id, m);
|
||||
return arith_expr_html(x, names, next_id, m, html_escape) + " < " + arith_expr_html(y, names, next_id, m, html_escape);
|
||||
}
|
||||
if (arith.is_gt(e, x, y)) {
|
||||
return arith_expr_html(x, names, next_id, m) + " > " + arith_expr_html(y, names, next_id, m);
|
||||
return arith_expr_html(x, names, next_id, m, html_escape) + " > " + arith_expr_html(y, names, next_id, m, html_escape);
|
||||
}
|
||||
if (arith.is_le(e, x, y)) {
|
||||
return arith_expr_html(x, names, next_id, m) + " ≤ " + arith_expr_html(y, names, next_id, m);
|
||||
return arith_expr_html(x, names, next_id, m, html_escape) + " ≤ " + arith_expr_html(y, names, next_id, m, html_escape);
|
||||
}
|
||||
if (arith.is_ge(e, x, y)) {
|
||||
return arith_expr_html(x, names, next_id, m) + " ≥ " + arith_expr_html(y, names, next_id, m);
|
||||
return arith_expr_html(x, names, next_id, m, html_escape) + " ≥ " + arith_expr_html(y, names, next_id, m, html_escape);
|
||||
}
|
||||
if (m.is_eq(e, x, y)) {
|
||||
return arith_expr_html(x, names, next_id, m) + " = " + arith_expr_html(y, names, next_id, m);
|
||||
return arith_expr_html(x, names, next_id, m, html_escape) + " = " + arith_expr_html(y, names, next_id, m, html_escape);
|
||||
}
|
||||
if (arith.is_add(e)) {
|
||||
std::string r = arith_expr_html(a->get_arg(0), names, next_id, m);
|
||||
std::string r = arith_expr_html(a->get_arg(0), names, next_id, m, html_escape);
|
||||
for (unsigned i = 1; i < a->get_num_args(); ++i) {
|
||||
expr* arg = a->get_arg(i);
|
||||
// render (+ x (- y)) as "x - y" and (+ x (- n)) as "x - n"
|
||||
|
|
@ -209,32 +211,32 @@ namespace seq {
|
|||
rational neg_val;
|
||||
if (arith.is_uminus(arg, neg_inner)) {
|
||||
r += " − "; // minus sign
|
||||
r += arith_expr_html(neg_inner, names, next_id, m);
|
||||
r += arith_expr_html(neg_inner, names, next_id, m, html_escape);
|
||||
} else if (arith.is_numeral(arg, neg_val) && neg_val.is_neg()) {
|
||||
r += " − "; // minus sign
|
||||
r += (-neg_val).to_string();
|
||||
}
|
||||
else {
|
||||
r += " + ";
|
||||
r += arith_expr_html(arg, names, next_id, m);
|
||||
r += arith_expr_html(arg, names, next_id, m, html_escape);
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
if (arith.is_sub(e, x, y))
|
||||
return arith_expr_html(x, names, next_id, m) + " − " + arith_expr_html(y, names, next_id, m);
|
||||
return arith_expr_html(x, names, next_id, m, html_escape) + " − " + arith_expr_html(y, names, next_id, m, html_escape);
|
||||
if (arith.is_uminus(e, x))
|
||||
return "−" + arith_expr_html(x, names, next_id, m);
|
||||
return "−" + arith_expr_html(x, names, next_id, m, html_escape);
|
||||
if (arith.is_mul(e)) {
|
||||
std::string r;
|
||||
for (unsigned i = 0; i < a->get_num_args(); ++i) {
|
||||
if (i > 0) r += " · "; // middle dot
|
||||
r += arith_expr_html(a->get_arg(i), names, next_id, m);
|
||||
r += arith_expr_html(a->get_arg(i), names, next_id, m, html_escape);
|
||||
}
|
||||
return r;
|
||||
}
|
||||
if (seq.str.is_length(e, x)) {
|
||||
std::string name = decode_recursive_name(x, m);
|
||||
std::string name = decode_recursive_name(x, m, html_escape);
|
||||
if (!name.empty()) {
|
||||
return "|" + name + "|";
|
||||
}
|
||||
|
|
@ -253,28 +255,23 @@ namespace seq {
|
|||
|
||||
// named constant, fresh variable like n!0
|
||||
if (a->get_num_args() == 0)
|
||||
return dot_html_escape(a->get_decl()->get_name().str());
|
||||
return dot_html_escape(a->get_decl()->get_name().str(), html_escape);
|
||||
if (names.contains(e))
|
||||
return names[e];
|
||||
std::stringstream ss;
|
||||
ss << mk_bounded_pp(e, m);
|
||||
std::string s = dot_html_escape(ss.str());
|
||||
std::string s = dot_html_escape(ss.str(), html_escape);
|
||||
names.insert(e, s);
|
||||
return s;
|
||||
|
||||
// fallback
|
||||
std::ostringstream os;
|
||||
os << mk_pp(e, m);
|
||||
return dot_html_escape(os.str());
|
||||
}
|
||||
|
||||
// Helper: render a constraint as an HTML string for DOT edge labels.
|
||||
static std::string constraint_html(constraint const& ic, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m) {
|
||||
if (ic.fml) return arith_expr_html(ic.fml, names, next_id, m);
|
||||
if (ic.fml) return arith_expr_html(ic.fml, names, next_id, m, true);
|
||||
return "null";
|
||||
}
|
||||
|
||||
static std::string regex_expr_html(expr* e, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m, seq_util& seq) {
|
||||
static std::string regex_expr_html(expr* e, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m, seq_util& seq, bool html_escape) {
|
||||
if (!e)
|
||||
return "null";
|
||||
expr* a = nullptr, * b = nullptr;
|
||||
|
|
@ -296,14 +293,14 @@ namespace seq {
|
|||
}
|
||||
if (seq.str.is_string(arg, s)) {
|
||||
if (!first) os << " ";
|
||||
os << "\"" + dot_html_escape(s.encode()) + "\"";
|
||||
os << "\"" + dot_html_escape(s.encode(), html_escape) + "\"";
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
unsigned ch_val = 0;
|
||||
if (seq.str.is_unit(arg) && seq.is_const_char(to_app(arg)->get_arg(0), ch_val)) {
|
||||
if (!first) os << " ";
|
||||
os << "\"" + dot_html_escape(zstring(ch_val).encode()) + "\"";
|
||||
os << "\"" + dot_html_escape(zstring(ch_val).encode(), html_escape) + "\"";
|
||||
first = false;
|
||||
continue;
|
||||
}
|
||||
|
|
@ -311,7 +308,7 @@ namespace seq {
|
|||
os << mk_pp(arg, m);
|
||||
first = false;
|
||||
}
|
||||
return dot_html_escape(os.str());
|
||||
return dot_html_escape(os.str(), html_escape);
|
||||
}
|
||||
unsigned c;
|
||||
if (seq.is_const_char(e, c))
|
||||
|
|
@ -324,16 +321,16 @@ namespace seq {
|
|||
if (i > 0) res += " ";
|
||||
bool needs_parens = seq.re.is_union(ap->get_arg(i));
|
||||
if (needs_parens) res += "(";
|
||||
res += regex_expr_html(ap->get_arg(i), names, next_id, m, seq);
|
||||
res += regex_expr_html(ap->get_arg(i), names, next_id, m, seq, html_escape);
|
||||
if (needs_parens) res += ")";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
if (m.is_ite(e)) {
|
||||
app* ap = to_app(e);
|
||||
std::string cond = arith_expr_html(ap->get_arg(0), names, next_id, m);
|
||||
std::string t = regex_expr_html(ap->get_arg(1), names, next_id, m, seq);
|
||||
std::string f = regex_expr_html(ap->get_arg(2), names, next_id, m, seq);
|
||||
std::string cond = arith_expr_html(ap->get_arg(0), names, next_id, m, html_escape);
|
||||
std::string t = regex_expr_html(ap->get_arg(1), names, next_id, m, seq, html_escape);
|
||||
std::string f = regex_expr_html(ap->get_arg(2), names, next_id, m, seq, html_escape);
|
||||
return cond + " ? (" + t + ") : (" + f + ")";
|
||||
}
|
||||
if (seq.re.is_union(e)) {
|
||||
|
|
@ -341,10 +338,10 @@ namespace seq {
|
|||
std::string res;
|
||||
if (ap->get_num_args() == 0)
|
||||
return "∅";
|
||||
res = regex_expr_html(ap->get_arg(0), names, next_id, m, seq);
|
||||
res = regex_expr_html(ap->get_arg(0), names, next_id, m, seq, html_escape);
|
||||
for (unsigned i = 1; i < ap->get_num_args(); ++i) {
|
||||
res += " | ";
|
||||
res += regex_expr_html(ap->get_arg(i), names, next_id, m, seq);
|
||||
res += regex_expr_html(ap->get_arg(i), names, next_id, m, seq, html_escape);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
|
@ -355,7 +352,7 @@ namespace seq {
|
|||
if (i > 0) res += " & ";
|
||||
bool needs_parens = seq.re.is_union(ap->get_arg(i)) || seq.re.is_concat(ap->get_arg(i));
|
||||
if (needs_parens) res += "(";
|
||||
res += regex_expr_html(ap->get_arg(i), names, next_id, m, seq);
|
||||
res += regex_expr_html(ap->get_arg(i), names, next_id, m, seq, html_escape);
|
||||
if (needs_parens) res += ")";
|
||||
}
|
||||
return res;
|
||||
|
|
@ -363,21 +360,21 @@ namespace seq {
|
|||
if (seq.re.is_star(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, names, next_id, m, seq);
|
||||
res += regex_expr_html(a, names, next_id, m, seq, html_escape);
|
||||
res += needs_parens ? ")<SUP>*</SUP>" : "<SUP>*</SUP>";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_plus(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, names, next_id, m, seq);
|
||||
res += regex_expr_html(a, names, next_id, m, seq, html_escape);
|
||||
res += needs_parens ? ")<SUP>+</SUP>" : "<SUP>+</SUP>";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_opt(e, a)) {
|
||||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, names, next_id, m, seq);
|
||||
res += regex_expr_html(a, names, next_id, m, seq, html_escape);
|
||||
res += needs_parens ? ")?" : "?";
|
||||
return res;
|
||||
}
|
||||
|
|
@ -385,14 +382,14 @@ namespace seq {
|
|||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||||
std::string res = "~";
|
||||
res += needs_parens ? "(" : "";
|
||||
res += regex_expr_html(a, names, next_id, m, seq);
|
||||
res += regex_expr_html(a, names, next_id, m, seq, html_escape);
|
||||
res += needs_parens ? ")" : "";
|
||||
return res;
|
||||
}
|
||||
if (seq.re.is_range(e, a, b)) {
|
||||
zstring s1, s2;
|
||||
std::string c1 = seq.str.is_string(a, s1) ? dot_html_escape(s1.encode()) : arith_expr_html(a, names, next_id, m);
|
||||
std::string c2 = seq.str.is_string(b, s2) ? dot_html_escape(s2.encode()) : arith_expr_html(b, names, next_id, m);
|
||||
std::string c1 = seq.str.is_string(a, s1) ? dot_html_escape(s1.encode(), html_escape) : arith_expr_html(a, names, next_id, m, html_escape);
|
||||
std::string c2 = seq.str.is_string(b, s2) ? dot_html_escape(s2.encode(), html_escape) : arith_expr_html(b, names, next_id, m, html_escape);
|
||||
return "[" + c1 + "-" + c2 + "]";
|
||||
}
|
||||
if (seq.re.is_full_char(e)) {
|
||||
|
|
@ -407,14 +404,14 @@ namespace seq {
|
|||
|
||||
std::ostringstream os;
|
||||
os << mk_pp(e, m);
|
||||
return dot_html_escape(os.str());
|
||||
return dot_html_escape(os.str(), html_escape);
|
||||
}
|
||||
|
||||
// Helper: render a snode as an HTML label for DOT output.
|
||||
// Groups consecutive s_char tokens into quoted strings, renders s_var by name,
|
||||
// shows s_power with superscripts, s_unit by its inner expression,
|
||||
// and falls back to mk_pp, HTML-escaped, for other token kinds.
|
||||
std::string snode_label_html(euf::snode const* n, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m) {
|
||||
std::string snode_label_html(euf::snode const* n, obj_map<expr, std::string>& names, uint64_t& next_id, ast_manager& m, bool html_escape) {
|
||||
if (!n) return "null";
|
||||
seq_util seq(m);
|
||||
|
||||
|
|
@ -433,7 +430,7 @@ namespace seq {
|
|||
auto flush_chars = [&]() {
|
||||
if (char_acc.empty()) return;
|
||||
if (!first) result += " + ";
|
||||
result += "\"" + dot_html_escape(char_acc) + "\"";
|
||||
result += "\"" + dot_html_escape(char_acc, html_escape) + "\"";
|
||||
first = false;
|
||||
char_acc.clear();
|
||||
};
|
||||
|
|
@ -459,7 +456,7 @@ namespace seq {
|
|||
if (!e) {
|
||||
result += "#" + std::to_string(tok->id());
|
||||
} else if (tok->is_var()) {
|
||||
std::string name = decode_recursive_name(e, m);
|
||||
std::string name = decode_recursive_name(e, m, html_escape);
|
||||
if (!name.empty()) {
|
||||
result += name;
|
||||
}
|
||||
|
|
@ -468,7 +465,7 @@ namespace seq {
|
|||
else {
|
||||
std::stringstream ss;
|
||||
ss << mk_bounded_pp(e, m);
|
||||
std::string s = dot_html_escape(ss.str());
|
||||
std::string s = dot_html_escape(ss.str(), html_escape);
|
||||
names.insert(e, s);
|
||||
result += s;
|
||||
}
|
||||
|
|
@ -477,40 +474,40 @@ namespace seq {
|
|||
std::cout << mk_pp(e, m) << std::endl;
|
||||
expr* ch = to_app(e)->get_arg(0);
|
||||
if (is_app(ch) && to_app(ch)->get_num_args() == 0)
|
||||
result += "[" + dot_html_escape(to_app(ch)->get_decl()->get_name().str()) + "]";
|
||||
result += "[" + dot_html_escape(to_app(ch)->get_decl()->get_name().str(), html_escape) + "]";
|
||||
else {
|
||||
std::ostringstream os;
|
||||
os << mk_pp(ch, m);
|
||||
result += "[" + dot_html_escape(os.str()) + "]";
|
||||
result += "[" + dot_html_escape(os.str(), html_escape) + "]";
|
||||
}
|
||||
}
|
||||
else if (tok->is_power()) {
|
||||
// seq.power(base, n): render as base<SUP>n</SUP>
|
||||
std::string base_html = snode_label_html(tok->arg(0), m);
|
||||
std::string base_html = snode_label_html(tok->arg(0), m, html_escape);
|
||||
if (tok->arg(0)->length() > 1)
|
||||
base_html = "(" + base_html + ")";
|
||||
result += base_html;
|
||||
result += "<SUP>";
|
||||
expr* exp_expr = to_app(e)->get_arg(1);
|
||||
result += arith_expr_html(exp_expr, names, next_id, m);
|
||||
result += arith_expr_html(exp_expr, names, next_id, m, html_escape);
|
||||
result += "</SUP>";
|
||||
}
|
||||
else if (e && seq.is_re(e))
|
||||
result += regex_expr_html(e, names, next_id, m, seq);
|
||||
else if (seq.is_re(e))
|
||||
result += regex_expr_html(e, names, next_id, m, seq, html_escape);
|
||||
else {
|
||||
std::ostringstream os;
|
||||
os << mk_pp(e, m);
|
||||
result += dot_html_escape(os.str());
|
||||
result += dot_html_escape(os.str(), html_escape);
|
||||
}
|
||||
}
|
||||
flush_chars();
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string snode_label_html(euf::snode const* n, ast_manager& m) {
|
||||
std::string snode_label_html(euf::snode const* n, ast_manager& m, bool html_escape) {
|
||||
obj_map<expr, std::string> names;
|
||||
uint64_t next_id = 0;
|
||||
return snode_label_html(n, names, next_id, m);
|
||||
return snode_label_html(n, names, next_id, m, html_escape);
|
||||
}
|
||||
|
||||
std::ostream& nielsen_node::to_html(std::ostream& out, ast_manager& m) const {
|
||||
|
|
@ -530,27 +527,27 @@ namespace seq {
|
|||
for (auto const& eq : m_str_eq) {
|
||||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||||
if (!hasEq) { out << "Eq:<br/>"; hasEq = true; }
|
||||
out << snode_label_html(eq.m_lhs, names, next_id, m)
|
||||
out << snode_label_html(eq.m_lhs, names, next_id, m, true)
|
||||
<< " = "
|
||||
<< snode_label_html(eq.m_rhs, names, next_id, m)
|
||||
<< snode_label_html(eq.m_rhs, names, next_id, m, true)
|
||||
<< "<br/>";
|
||||
}
|
||||
// string disequalities
|
||||
for (auto const& eq : m_str_deq) {
|
||||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||||
if (!hasDisEq) { out << "DisEq:<br/>"; hasDisEq = true; }
|
||||
out << snode_label_html(eq.m_lhs, names, next_id, m)
|
||||
out << snode_label_html(eq.m_lhs, names, next_id, m, true)
|
||||
<< " ≠ "
|
||||
<< snode_label_html(eq.m_rhs, names, next_id, m)
|
||||
<< snode_label_html(eq.m_rhs, names, next_id, m, true)
|
||||
<< "<br/>";
|
||||
}
|
||||
// regex memberships
|
||||
for (auto const& mem : m_str_mem) {
|
||||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||||
if (!hasMem) { out << "Mem:<br/>"; hasMem = true; }
|
||||
out << snode_label_html(mem.m_str, names, next_id, m)
|
||||
out << snode_label_html(mem.m_str, names, next_id, m, true)
|
||||
<< " ∈ "
|
||||
<< regex_expr_html(mem.m_regex->get_expr(), names, next_id, m, graph().seq())
|
||||
<< regex_expr_html(mem.m_regex->get_expr(), names, next_id, m, graph().seq(), true)
|
||||
<< "<br/>";
|
||||
}
|
||||
// character ranges
|
||||
|
|
@ -613,19 +610,6 @@ namespace seq {
|
|||
}
|
||||
}
|
||||
|
||||
// Returns true when the reason is a direct, non-propagated, conflict.
|
||||
// Mirrors ZIPT's NielsenNode.IsActualConflict(): all conflicts except ChildrenFailed.
|
||||
static bool is_actual_conflict(backtrack_reason r) {
|
||||
return r == backtrack_reason::symbol_clash
|
||||
|| r == backtrack_reason::parikh_image
|
||||
|| r == backtrack_reason::subsumption
|
||||
|| r == backtrack_reason::arithmetic
|
||||
|| r == backtrack_reason::regex
|
||||
|| r == backtrack_reason::regex_widening
|
||||
|| r == backtrack_reason::character_range
|
||||
|| r == backtrack_reason::smt;
|
||||
}
|
||||
|
||||
// Render the current substitution of the original (root) variables at `node`.
|
||||
// Walks the parent-edge chain from the root down to `node`, composing the
|
||||
// edge substitutions exactly the way seq_model::extract_assignments does,
|
||||
|
|
@ -683,9 +667,9 @@ namespace seq {
|
|||
if (val == var)
|
||||
continue; // unchanged: variable is still free at this node
|
||||
if (!any) { out << "<br/>Subst:<br/>"; any = true; }
|
||||
out << snode_label_html(var, names, next_id, m)
|
||||
out << snode_label_html(var, names, next_id, m, true)
|
||||
<< " → "
|
||||
<< snode_label_html(val, names, next_id, m)
|
||||
<< snode_label_html(val, names, next_id, m, true)
|
||||
<< "<br/>";
|
||||
}
|
||||
}
|
||||
|
|
@ -747,9 +731,9 @@ namespace seq {
|
|||
out << "[" << e->rule_name() << "]";
|
||||
for (auto const& s : e->subst()) {
|
||||
out << "<br/>";
|
||||
out << snode_label_html(s.m_var, m)
|
||||
out << snode_label_html(s.m_var, m, true)
|
||||
<< " → " // mapping arrow
|
||||
<< snode_label_html(s.m_replacement, m);
|
||||
<< snode_label_html(s.m_replacement, m, true);
|
||||
}
|
||||
// side constraints: integer equalities/inequalities
|
||||
for (auto const& ic : e->side_constraints()) {
|
||||
|
|
@ -787,7 +771,7 @@ namespace seq {
|
|||
if (!start_state || !start_state->get_expr())
|
||||
return out << "}\n";
|
||||
|
||||
unsigned start_state_id = start_state->get_expr()->get_id();
|
||||
const unsigned start_state_id = start_state->get_expr()->get_id();
|
||||
|
||||
unsigned_vector todo;
|
||||
uint_set visited;
|
||||
|
|
@ -796,11 +780,9 @@ namespace seq {
|
|||
todo.push_back(start_state_id);
|
||||
visited.insert(start_state_id);
|
||||
|
||||
ast_manager& m = m_sg.get_manager();
|
||||
|
||||
auto sanitize = [](std::string const& s) {
|
||||
std::string res;
|
||||
for (char c : s) {
|
||||
for (const char c : s) {
|
||||
if (c == '"') res += "\\\"";
|
||||
else if (c == '\n') res += "\\n";
|
||||
else res += c;
|
||||
|
|
@ -816,8 +798,9 @@ namespace seq {
|
|||
if (it == m_partial_dfa_out.end())
|
||||
continue;
|
||||
|
||||
for (unsigned edge_idx : it->second) {
|
||||
if (edge_idx >= m_partial_dfa_edges.size()) continue;
|
||||
for (const unsigned edge_idx : it->second) {
|
||||
if (edge_idx >= m_partial_dfa_edges.size())
|
||||
continue;
|
||||
partial_dfa_edge const& e = m_partial_dfa_edges[edge_idx];
|
||||
edges.push_back(&e);
|
||||
|
||||
|
|
@ -828,11 +811,17 @@ namespace seq {
|
|||
}
|
||||
}
|
||||
|
||||
for (unsigned node_id : visited) {
|
||||
for (const unsigned node_id : visited) {
|
||||
expr* node_expr = nullptr;
|
||||
for (auto* e : edges) {
|
||||
if (e->m_src->get_id() == node_id) { node_expr = e->m_src; break; }
|
||||
if (e->m_dst->get_id() == node_id) { node_expr = e->m_dst; break; }
|
||||
if (e->m_src->get_id() == node_id) {
|
||||
node_expr = e->m_src;
|
||||
break;
|
||||
}
|
||||
if (e->m_dst->get_id() == node_id) {
|
||||
node_expr = e->m_dst;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!node_expr) {
|
||||
for (expr* pinned : m_partial_dfa_pin) {
|
||||
|
|
|
|||
|
|
@ -324,8 +324,7 @@ namespace seq {
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
bool seq_regex::is_empty_regex(euf::snode* re) const {
|
||||
if (!re)
|
||||
return false;
|
||||
SASSERT(re);
|
||||
// direct empty language constant
|
||||
if (re->is_fail())
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -208,9 +208,8 @@ namespace smt {
|
|||
case l_true: {
|
||||
// regexes are disjoint: conflict
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
eqs.push_back({n1, n2});
|
||||
set_conflict(eqs, lits);
|
||||
set_conflict(eqs);
|
||||
return;
|
||||
}
|
||||
default: break;
|
||||
|
|
@ -454,37 +453,53 @@ namespace smt {
|
|||
|
||||
SASSERT(mem.well_formed());
|
||||
|
||||
expr* const re = mem.m_regex->get_expr();
|
||||
expr* const s = mem.m_str->get_expr();
|
||||
std::cout << "Propagating: " << seq::mem_pp(mem, m) << std::endl;
|
||||
|
||||
if (mem.m_regex->is_full_seq()) {
|
||||
// u \in .* can be ignored
|
||||
m_ignored_mem.insert(mem.lit);
|
||||
ctx.push_trail(insert_map(m_ignored_mem, mem.lit));
|
||||
return;
|
||||
}
|
||||
|
||||
// try to rewrite into an easier form
|
||||
expr_ref simpl(m);
|
||||
m_th_rewriter(re, simpl);
|
||||
if (simpl != re) {
|
||||
// we could simplify; let's propagate it
|
||||
const expr_ref e(m_seq.re.mk_in_re(s, simpl), m);
|
||||
ctx.mk_th_axiom(get_id(), ~mem.lit, mk_literal(e));
|
||||
m_ignored_mem.insert(mem.lit);
|
||||
ctx.push_trail(insert_map(m_ignored_mem, mem.lit));
|
||||
// std::cout << "Simplified to " << seq::snode_label_html(m_sgraph.mk(simpl), m, false) << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
// regex is ∅ → conflict
|
||||
if (m_regex.is_empty_regex(mem.m_regex)) {
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
lits.push_back(mem.lit);
|
||||
set_conflict(eqs, lits);
|
||||
set_conflict(lits);
|
||||
return;
|
||||
}
|
||||
|
||||
// empty string in non-nullable regex → conflict
|
||||
if (mem.m_str->is_empty() && m_seq.re.get_info(mem.m_regex->get_expr()).nullable == l_false) {
|
||||
enode_pair_vector eqs;
|
||||
if (mem.m_str->is_empty() && m_sgraph.re_nullable(mem.m_regex) == l_false) {
|
||||
literal_vector lits;
|
||||
lits.push_back(mem.lit);
|
||||
set_conflict(eqs, lits);
|
||||
set_conflict(lits);
|
||||
return;
|
||||
}
|
||||
|
||||
// ensure length term exists for the string argument
|
||||
expr* s_expr = mem.m_str->get_expr();
|
||||
if (s_expr)
|
||||
ensure_length_var(s_expr);
|
||||
|
||||
if (!get_fparams().m_nseq_regex_factorization_threshold)
|
||||
return;
|
||||
|
||||
// Boolean Closure Propagations
|
||||
expr* re_expr = mem.m_regex->get_expr();
|
||||
if (m_seq.re.is_intersection(re_expr)) {
|
||||
for (expr* arg : *to_app(re_expr)) {
|
||||
expr_ref in_r(m_seq.re.mk_in_re(s_expr, arg), m);
|
||||
if (mem.m_regex->is_intersect()) {
|
||||
// u \in r1 & r_2 → u \in r1 && u \in r2
|
||||
for (const euf::snode* const arg : *mem.m_regex) {
|
||||
expr_ref in_r(m_seq.re.mk_in_re(s, arg->get_expr()), m);
|
||||
literal_vector lits;
|
||||
lits.push_back(~mem.lit);
|
||||
lits.push_back(mk_literal(in_r));
|
||||
|
|
@ -494,11 +509,12 @@ namespace smt {
|
|||
ctx.push_trail(insert_map(m_ignored_mem, mem.lit));
|
||||
return;
|
||||
}
|
||||
if (m_seq.re.is_union(re_expr)) {
|
||||
if (mem.m_regex->is_union()) {
|
||||
// u \in r1 | r_2 → u \in r1 || u \in r2
|
||||
literal_vector lits;
|
||||
lits.push_back(~mem.lit);
|
||||
for (expr* arg : *to_app(re_expr)) {
|
||||
expr_ref in_r(m_seq.re.mk_in_re(s_expr, arg), m);
|
||||
for (const euf::snode* const arg : *mem.m_regex) {
|
||||
expr_ref in_r(m_seq.re.mk_in_re(s, arg->get_expr()), m);
|
||||
lits.push_back(mk_literal(in_r));
|
||||
}
|
||||
ctx.mk_th_axiom(get_id(), lits.size(), lits.data());
|
||||
|
|
@ -506,24 +522,81 @@ namespace smt {
|
|||
ctx.push_trail(insert_map(m_ignored_mem, mem.lit));
|
||||
return;
|
||||
}
|
||||
if (m_seq.re.is_to_re(re_expr)) {
|
||||
return;
|
||||
zstring s;
|
||||
expr_ref arg(to_app(re_expr)->get_arg(0), m);
|
||||
if (m_seq.str.is_string(arg, s)) {
|
||||
|
||||
if (mem.m_regex->is_to_re()) {
|
||||
// u \in v (with v is constant) → u = v
|
||||
zstring str;
|
||||
const expr_ref arg(to_app(re)->get_arg(0), m);
|
||||
if (m_seq.str.is_string(arg, str)) {
|
||||
literal_vector lits;
|
||||
lits.push_back(~mem.lit);
|
||||
lits.push_back(mk_literal(m.mk_eq(s_expr, arg)));
|
||||
lits.push_back(mk_literal(m.mk_eq(m_seq.str.mk_string(str), arg)));
|
||||
ctx.mk_th_axiom(get_id(), lits.size(), lits.data());
|
||||
m_ignored_mem.insert(mem.lit);
|
||||
ctx.push_trail(insert_map(m_ignored_mem, mem.lit));
|
||||
}
|
||||
}
|
||||
|
||||
// Eager sigma factorization (token-level): when enabled, split a non-primitive
|
||||
// membership s ∈ r at the boundary between the first concat argument (head) and
|
||||
// the rest (tail), using compute_sigma. This mirrors the lazy Nielsen
|
||||
// apply_regex_factorization and the paper's Reduce rule for x·u'.
|
||||
// (s ∈ r) → ⋁_{⟨Δ,∇⟩∈σ(r)} ( head ∈ Δ ∧ tail ∈ ∇ )
|
||||
// Only fires for a concatenation s (single-variable s is already primitive).
|
||||
if (get_fparams().m_nseq_regex_factorization_eager &&
|
||||
get_fparams().m_nseq_regex_factorization_threshold > 0 &&
|
||||
mem.m_str->is_concat()) {
|
||||
const app* const a = to_app(s);
|
||||
const unsigned na = a->get_num_args();
|
||||
SASSERT(na >= 2);
|
||||
|
||||
const expr_ref head(a->get_arg(0), m);
|
||||
const expr_ref tail(m_seq.str.mk_concat(na - 1, a->get_args() + 1, s->get_sort()), m);
|
||||
|
||||
const unsigned threshold = get_fparams().m_nseq_regex_factorization_threshold;
|
||||
seq::sigma_pairs pairs;
|
||||
if (!seq::compute_sigma(m, m_seq, m_rewriter, mem.m_regex, pairs, threshold))
|
||||
// we give up
|
||||
return;
|
||||
|
||||
seq::simplify_sigma_pairs(pairs, m_regex, m_sgraph);
|
||||
|
||||
if (pairs.empty()) {
|
||||
// no viable splits
|
||||
literal_vector lits;
|
||||
lits.push_back(~mem.lit);
|
||||
set_conflict(lits);
|
||||
return;
|
||||
}
|
||||
if (pairs.size() <= threshold) {
|
||||
TRACE(seq, tout << "eager regex fact: " << mk_pp(s, m) << " in "
|
||||
<< mk_pp(re, m) << " -> " << pairs.size() << " splits\n";);
|
||||
|
||||
if (!ctx.e_internalized(head))
|
||||
ctx.internalize(head, false);
|
||||
if (!ctx.e_internalized(tail))
|
||||
ctx.internalize(tail, false);
|
||||
|
||||
// forward direction; mk_literal Tseitin-encodes each conjunction
|
||||
literal_vector lits;
|
||||
lits.push_back(~mem.lit);
|
||||
for (auto const& sp : pairs) {
|
||||
expr_ref mem_head(m_seq.re.mk_in_re(head, sp.m_p), m);
|
||||
expr_ref mem_tail(m_seq.re.mk_in_re(tail, sp.m_q), m);
|
||||
expr_ref conj(m.mk_and(mem_head, mem_tail), m);
|
||||
lits.push_back(mk_literal(conj));
|
||||
}
|
||||
ctx.mk_th_axiom(get_id(), lits.size(), lits.data());
|
||||
m_ignored_mem.insert(mem.lit);
|
||||
ctx.push_trail(insert_map(m_ignored_mem, mem.lit));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void theory_nseq::ensure_length_var(expr* e) {
|
||||
void theory_nseq::ensure_length_var(expr* e) const {
|
||||
SASSERT(e && m_seq.is_seq(e));
|
||||
expr_ref len(m_seq.str.mk_length(e), m);
|
||||
const expr_ref len(m_seq.str.mk_length(e), m);
|
||||
if (!ctx.e_internalized(len))
|
||||
ctx.internalize(len, false);
|
||||
}
|
||||
|
|
@ -773,6 +846,7 @@ namespace smt {
|
|||
m_nielsen.set_parikh_enabled(get_fparams().m_nseq_parikh);
|
||||
m_nielsen.set_signature_split(get_fparams().m_nseq_signature);
|
||||
m_nielsen.set_regex_factorization_threshold(get_fparams().m_nseq_regex_factorization_threshold);
|
||||
m_nielsen.set_regex_factorization_eager(get_fparams().m_nseq_regex_factorization_eager);
|
||||
}
|
||||
|
||||
SASSERT(!m_nielsen.root()->is_currently_conflict());
|
||||
|
|
@ -1044,11 +1118,11 @@ namespace smt {
|
|||
std::cout << "The root node contained " << m_nielsen.root()->str_mems().size() << " memberships and " << m_nielsen.root()->str_eqs().size() << " equalities" << std::endl;
|
||||
unsigned idx = 0;
|
||||
for (auto& eq : m_nielsen.root()->str_eqs()) {
|
||||
std::cout << "[" << (idx++) << "]: " << seq::eq_pp(eq, m);
|
||||
std::cout << "[" << (idx++) << "]: " << seq::eq_pp(eq, m) << "\n";
|
||||
}
|
||||
idx = 0;
|
||||
for (auto& mem : m_nielsen.root()->str_mems()) {
|
||||
std::cout << "[" << (idx++) << "]: " << seq::mem_pp(mem, m);
|
||||
std::cout << "[" << (idx++) << "]: " << seq::mem_pp(mem, m) << "\n";
|
||||
}
|
||||
std::flush(std::cout);
|
||||
#endif
|
||||
|
|
@ -1505,7 +1579,6 @@ namespace smt {
|
|||
if (result == l_true) {
|
||||
// Intersection is empty → the memberships on this variable are
|
||||
// jointly unsatisfiable. Assert a conflict from all their literals.
|
||||
enode_pair_vector eqs;
|
||||
literal_vector lits;
|
||||
for (unsigned i : mem_indices) {
|
||||
SASSERT(ctx.get_assignment(mems[i]->lit) == l_true); // we already stored the polarity of the literal
|
||||
|
|
@ -1513,7 +1586,7 @@ namespace smt {
|
|||
}
|
||||
TRACE(seq, tout << "nseq regex precheck: empty intersection for var "
|
||||
<< var_id << ", conflict with " << lits.size() << " lits\n";);
|
||||
set_conflict(eqs, lits);
|
||||
set_conflict(lits);
|
||||
return l_true; // conflict asserted
|
||||
}
|
||||
if (result == l_undef)
|
||||
|
|
|
|||
|
|
@ -137,6 +137,14 @@ namespace smt {
|
|||
void populate_nielsen_graph();
|
||||
void explain_nielsen_conflict();
|
||||
void set_conflict(enode_pair_vector const& eqs, literal_vector const& lits);
|
||||
void set_conflict(literal_vector const& lits) {
|
||||
const enode_pair_vector eqs;
|
||||
set_conflict(eqs, lits);
|
||||
}
|
||||
void set_conflict(enode_pair_vector const& eqs) {
|
||||
const literal_vector lits;
|
||||
set_conflict(eqs, lits);
|
||||
}
|
||||
void set_propagate(enode_pair_vector const &eqs, literal_vector const &lits, literal p);
|
||||
bool add_nielsen_assumptions();
|
||||
euf::snode* get_snode(expr* e);
|
||||
|
|
@ -147,7 +155,7 @@ namespace smt {
|
|||
void propagate_pos_mem(tracked_str_mem const& mem);
|
||||
void enqueue_axiom(expr* e);
|
||||
void dequeue_axiom(expr* e);
|
||||
void ensure_length_var(expr* e);
|
||||
void ensure_length_var(expr* e) const;
|
||||
|
||||
// higher-order term unfolding
|
||||
bool unfold_ho_terms();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue