mirror of
https://github.com/Z3Prover/z3
synced 2026-03-17 18:43:45 +00:00
* Initial plan * replace dep_tracker uint_set with scoped_dependency_manager<dep_source> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> * fix test build: update dep_tracker usages in test files and seq_state.h Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
4593 lines
194 KiB
C++
4593 lines
194 KiB
C++
/*++
|
||
Copyright (c) 2026 Microsoft Corporation
|
||
|
||
Module Name:
|
||
|
||
seq_nielsen.cpp
|
||
|
||
Abstract:
|
||
|
||
Nielsen graph implementation for string constraint solving.
|
||
|
||
Ports the constraint types and Nielsen graph structures from
|
||
ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
|
||
|
||
Author:
|
||
|
||
Clemens Eisenhofer 2026-03-02
|
||
Nikolaj Bjorner (nbjorner) 2026-03-02
|
||
|
||
--*/
|
||
|
||
#include "smt/seq/seq_nielsen.h"
|
||
#include "smt/seq/seq_parikh.h"
|
||
#include "smt/seq/seq_regex.h"
|
||
#include "ast/arith_decl_plugin.h"
|
||
#include "ast/ast_pp.h"
|
||
#include "ast/rewriter/seq_rewriter.h"
|
||
#include "ast/rewriter/th_rewriter.h"
|
||
#include "util/hashtable.h"
|
||
#include "util/statistics.h"
|
||
#include <algorithm>
|
||
#include <cstdlib>
|
||
#include <sstream>
|
||
|
||
namespace seq {
|
||
|
||
// Normalize an arithmetic expression using th_rewriter.
|
||
// Simplifies e.g. (n - 1 + 1) to n, preventing unbounded growth
|
||
// of power exponents during unwind/merge cycles.
|
||
static expr_ref normalize_arith(ast_manager& m, expr* e) {
|
||
expr_ref result(e, m);
|
||
th_rewriter rw(m);
|
||
rw(result);
|
||
return result;
|
||
}
|
||
|
||
// Directional helpers mirroring ZIPT's fwd flag:
|
||
// fwd=true -> left-to-right (prefix/head)
|
||
// fwd=false -> right-to-left (suffix/tail)
|
||
static euf::snode* dir_token(euf::snode* s, bool fwd) {
|
||
if (!s) return nullptr;
|
||
return fwd ? s->first() : s->last();
|
||
}
|
||
|
||
static euf::snode* dir_drop(euf::sgraph& sg, euf::snode* s, unsigned count, bool fwd) {
|
||
if (!s || count == 0) return s;
|
||
return fwd ? sg.drop_left(s, count) : sg.drop_right(s, count);
|
||
}
|
||
|
||
static euf::snode* dir_concat(euf::sgraph& sg, euf::snode* a, euf::snode* b, bool fwd) {
|
||
if (!a) return b;
|
||
if (!b) return a;
|
||
return fwd ? sg.mk_concat(a, b) : sg.mk_concat(b, a);
|
||
}
|
||
|
||
static void collect_tokens_dir(euf::snode* s, bool fwd, euf::snode_vector& toks) {
|
||
toks.reset();
|
||
if (!s) return;
|
||
s->collect_tokens(toks);
|
||
if (fwd || toks.size() < 2) return;
|
||
unsigned n = toks.size();
|
||
for (unsigned i = 0; i < n / 2; ++i)
|
||
std::swap(toks[i], toks[n - 1 - i]);
|
||
}
|
||
|
||
// Right-derivative helper used by backward str_mem simplification:
|
||
// dR(re, c) = reverse( derivative(c, reverse(re)) ).
|
||
static euf::snode* reverse_brzozowski_deriv(euf::sgraph& sg, euf::snode* re, euf::snode* elem) {
|
||
if (!re || !elem || !re->get_expr() || !elem->get_expr())
|
||
return nullptr;
|
||
ast_manager& m = sg.get_manager();
|
||
seq_util& seq = sg.get_seq_util();
|
||
seq_rewriter rw(m);
|
||
|
||
expr* elem_expr = elem->get_expr();
|
||
expr* ch = nullptr;
|
||
if (seq.str.is_unit(elem_expr, ch))
|
||
elem_expr = ch;
|
||
|
||
expr_ref re_rev(seq.re.mk_reverse(re->get_expr()), m);
|
||
expr_ref d = rw.mk_derivative(elem_expr, re_rev);
|
||
if (!d.get())
|
||
return nullptr;
|
||
expr_ref result(seq.re.mk_reverse(d), m);
|
||
th_rewriter tr(m);
|
||
tr(result);
|
||
return sg.mk(result);
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// str_eq
|
||
// -----------------------------------------------
|
||
|
||
void str_eq::sort() {
|
||
if (m_lhs && m_rhs && m_lhs->id() > m_rhs->id())
|
||
std::swap(m_lhs, m_rhs);
|
||
}
|
||
|
||
bool str_eq::is_trivial() const {
|
||
return m_lhs == m_rhs ||
|
||
(m_lhs && m_rhs && m_lhs->is_empty() && m_rhs->is_empty());
|
||
}
|
||
|
||
bool str_eq::contains_var(euf::snode* var) const {
|
||
if (!var) return false;
|
||
// check if var appears in the token list of lhs or rhs
|
||
if (m_lhs) {
|
||
euf::snode_vector tokens;
|
||
m_lhs->collect_tokens(tokens);
|
||
for (euf::snode* t : tokens)
|
||
if (t == var) return true;
|
||
}
|
||
if (m_rhs) {
|
||
euf::snode_vector tokens;
|
||
m_rhs->collect_tokens(tokens);
|
||
for (euf::snode* t : tokens)
|
||
if (t == var) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// str_mem
|
||
// -----------------------------------------------
|
||
|
||
bool str_mem::is_primitive() const {
|
||
return m_str && m_str->length() == 1 && m_str->is_var();
|
||
}
|
||
|
||
bool str_mem::contains_var(euf::snode* var) const {
|
||
if (!var) return false;
|
||
if (m_str) {
|
||
euf::snode_vector tokens;
|
||
m_str->collect_tokens(tokens);
|
||
for (euf::snode* t : tokens)
|
||
if (t == var) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// nielsen_subst
|
||
// -----------------------------------------------
|
||
|
||
bool nielsen_subst::is_eliminating() const {
|
||
if (!m_var || !m_replacement) return true;
|
||
// check if var appears in replacement
|
||
euf::snode_vector tokens;
|
||
m_replacement->collect_tokens(tokens);
|
||
for (euf::snode* t : tokens)
|
||
if (t == m_var) return false;
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// nielsen_edge
|
||
// -----------------------------------------------
|
||
|
||
nielsen_edge::nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress):
|
||
m_src(src), m_tgt(tgt), m_is_progress(is_progress) {
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// nielsen_node
|
||
// -----------------------------------------------
|
||
|
||
nielsen_node::nielsen_node(nielsen_graph& graph, unsigned id):
|
||
m_id(id), m_graph(graph), m_is_progress(true) {
|
||
}
|
||
|
||
void nielsen_node::clone_from(nielsen_node const& parent) {
|
||
m_str_eq.reset();
|
||
m_str_mem.reset();
|
||
m_int_constraints.reset();
|
||
m_char_diseqs.reset();
|
||
m_char_ranges.reset();
|
||
m_var_lb.reset();
|
||
m_var_ub.reset();
|
||
for (auto const& eq : parent.m_str_eq)
|
||
m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep));
|
||
for (auto const& mem : parent.m_str_mem)
|
||
m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep));
|
||
for (auto const& ic : parent.m_int_constraints)
|
||
m_int_constraints.push_back(ic);
|
||
// clone character disequalities
|
||
for (auto const& kv : parent.m_char_diseqs) {
|
||
ptr_vector<euf::snode> diseqs;
|
||
for (euf::snode* s : kv.m_value)
|
||
diseqs.push_back(s);
|
||
m_char_diseqs.insert(kv.m_key, diseqs);
|
||
}
|
||
// clone character ranges
|
||
for (auto const& kv : parent.m_char_ranges) {
|
||
m_char_ranges.insert(kv.m_key, kv.m_value.clone());
|
||
}
|
||
// clone per-variable integer bounds
|
||
for (auto const& kv : parent.m_var_lb)
|
||
m_var_lb.insert(kv.m_key, kv.m_value);
|
||
for (auto const& kv : parent.m_var_ub)
|
||
m_var_ub.insert(kv.m_key, kv.m_value);
|
||
// clone regex occurrence tracking
|
||
m_regex_occurrence = parent.m_regex_occurrence;
|
||
}
|
||
|
||
bool nielsen_node::track_regex_occurrence(unsigned regex_id, unsigned mem_id) {
|
||
auto key = std::make_pair(regex_id, mem_id);
|
||
auto it = m_regex_occurrence.find(key);
|
||
if (it != m_regex_occurrence.end()) {
|
||
// Already seen — cycle detected
|
||
return true;
|
||
}
|
||
m_regex_occurrence[key] = m_id;
|
||
return false;
|
||
}
|
||
|
||
void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
|
||
if (!s.m_var) return;
|
||
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
|
||
str_eq& eq = m_str_eq[i];
|
||
eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement);
|
||
eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement);
|
||
eq.m_dep = m_graph.dep_mgr().mk_join(eq.m_dep, s.m_dep);
|
||
eq.sort();
|
||
}
|
||
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
|
||
str_mem& mem = m_str_mem[i];
|
||
mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement);
|
||
// regex is typically ground, but apply subst for generality
|
||
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
|
||
mem.m_dep = m_graph.dep_mgr().mk_join(mem.m_dep, s.m_dep);
|
||
}
|
||
// VarBoundWatcher: propagate bounds on s.m_var to variables in s.m_replacement
|
||
watch_var_bounds(s);
|
||
}
|
||
|
||
void nielsen_node::add_char_range(euf::snode* sym_char, char_set const& range) {
|
||
SASSERT(sym_char && sym_char->is_unit());
|
||
unsigned id = sym_char->id();
|
||
if (m_char_ranges.contains(id)) {
|
||
char_set& existing = m_char_ranges.find(id);
|
||
char_set inter = existing.intersect_with(range);
|
||
existing = inter;
|
||
if (inter.is_empty()) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::character_range;
|
||
}
|
||
} else {
|
||
m_char_ranges.insert(id, range.clone());
|
||
}
|
||
}
|
||
|
||
void nielsen_node::add_char_diseq(euf::snode* sym_char, euf::snode* other) {
|
||
SASSERT(sym_char && sym_char->is_unit());
|
||
SASSERT(other && other->is_unit());
|
||
unsigned id = sym_char->id();
|
||
if (!m_char_diseqs.contains(id))
|
||
m_char_diseqs.insert(id, ptr_vector<euf::snode>());
|
||
ptr_vector<euf::snode>& existing = m_char_diseqs.find(id);
|
||
// check for duplicates
|
||
for (euf::snode* s : existing)
|
||
if (s == other) return;
|
||
existing.push_back(other);
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// nielsen_node: IntBounds methods
|
||
// mirrors ZIPT's AddLowerIntBound / AddHigherIntBound
|
||
// -----------------------------------------------
|
||
|
||
unsigned nielsen_node::var_lb(euf::snode* var) const {
|
||
if (!var) return 0;
|
||
unsigned v = 0;
|
||
m_var_lb.find(var->id(), v);
|
||
return v;
|
||
}
|
||
|
||
unsigned nielsen_node::var_ub(euf::snode* var) const {
|
||
if (!var) return UINT_MAX;
|
||
unsigned v = UINT_MAX;
|
||
m_var_ub.find(var->id(), v);
|
||
return v;
|
||
}
|
||
|
||
bool nielsen_node::add_lower_int_bound(euf::snode* var, unsigned lb, dep_tracker const& dep) {
|
||
if (!var || !var->is_var()) return false;
|
||
unsigned id = var->id();
|
||
// check against existing lower bound
|
||
unsigned cur_lb = 0;
|
||
m_var_lb.find(id, cur_lb);
|
||
if (lb <= cur_lb) return false; // no tightening
|
||
m_var_lb.insert(id, lb);
|
||
// conflict if lb > current upper bound
|
||
unsigned cur_ub = UINT_MAX;
|
||
m_var_ub.find(id, cur_ub);
|
||
if (lb > cur_ub) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::arithmetic;
|
||
return true;
|
||
}
|
||
// add int_constraint: len(var) >= lb
|
||
ast_manager& m = m_graph.m();
|
||
seq_util& seq = m_graph.seq();
|
||
arith_util arith(m);
|
||
expr_ref len_var(seq.str.mk_length(var->get_expr()), m);
|
||
expr_ref bound(arith.mk_int(lb), m);
|
||
m_int_constraints.push_back(int_constraint(len_var, bound, int_constraint_kind::ge, dep, m));
|
||
return true;
|
||
}
|
||
|
||
bool nielsen_node::add_upper_int_bound(euf::snode* var, unsigned ub, dep_tracker const& dep) {
|
||
if (!var || !var->is_var()) return false;
|
||
unsigned id = var->id();
|
||
// check against existing upper bound
|
||
unsigned cur_ub = UINT_MAX;
|
||
m_var_ub.find(id, cur_ub);
|
||
if (ub >= cur_ub) return false; // no tightening
|
||
m_var_ub.insert(id, ub);
|
||
// conflict if current lower bound > ub
|
||
unsigned cur_lb = 0;
|
||
m_var_lb.find(id, cur_lb);
|
||
if (cur_lb > ub) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::arithmetic;
|
||
return true;
|
||
}
|
||
// add int_constraint: len(var) <= ub
|
||
ast_manager& m = m_graph.m();
|
||
seq_util& seq = m_graph.seq();
|
||
arith_util arith(m);
|
||
expr_ref len_var(seq.str.mk_length(var->get_expr()), m);
|
||
expr_ref bound(arith.mk_int(ub), m);
|
||
m_int_constraints.push_back(int_constraint(len_var, bound, int_constraint_kind::le, dep, m));
|
||
return true;
|
||
}
|
||
|
||
// VarBoundWatcher: after applying substitution s, propagate bounds on s.m_var
|
||
// to variables in s.m_replacement.
|
||
// If s.m_var has bounds [lo, hi], and the replacement decomposes into
|
||
// const_len concrete chars plus a list of variable tokens, then:
|
||
// - for a single variable y: lo-const_len <= len(y) <= hi-const_len
|
||
// - for multiple variables: each gets an upper bound hi-const_len
|
||
// Mirrors ZIPT's VarBoundWatcher mechanism.
|
||
void nielsen_node::watch_var_bounds(nielsen_subst const& s) {
|
||
if (!s.m_var) return;
|
||
unsigned id = s.m_var->id();
|
||
unsigned lo = 0, hi = UINT_MAX;
|
||
m_var_lb.find(id, lo);
|
||
m_var_ub.find(id, hi);
|
||
if (lo == 0 && hi == UINT_MAX) return; // no bounds to propagate
|
||
|
||
// decompose replacement into constant length + variable tokens
|
||
if (!s.m_replacement) return;
|
||
euf::snode_vector tokens;
|
||
s.m_replacement->collect_tokens(tokens);
|
||
|
||
unsigned const_len = 0;
|
||
euf::snode_vector var_tokens;
|
||
for (euf::snode* t : tokens) {
|
||
if (t->is_char() || t->is_unit()) {
|
||
++const_len;
|
||
} else if (t->is_var()) {
|
||
var_tokens.push_back(t);
|
||
} else {
|
||
// power or unknown token: cannot propagate simply, abort
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (var_tokens.empty()) {
|
||
// all concrete: check if const_len is within [lo, hi]
|
||
if (const_len < lo || (hi != UINT_MAX && const_len > hi)) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::arithmetic;
|
||
}
|
||
return;
|
||
}
|
||
|
||
if (var_tokens.size() == 1) {
|
||
euf::snode* y = var_tokens[0];
|
||
// lo <= const_len + len(y) => len(y) >= lo - const_len (if lo > const_len)
|
||
if (lo > const_len)
|
||
add_lower_int_bound(y, lo - const_len, s.m_dep);
|
||
// const_len + len(y) <= hi => len(y) <= hi - const_len
|
||
if (hi != UINT_MAX) {
|
||
if (const_len > hi) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::arithmetic;
|
||
} else {
|
||
add_upper_int_bound(y, hi - const_len, s.m_dep);
|
||
}
|
||
}
|
||
} else {
|
||
// multiple variables: propagate upper bound to each
|
||
// (each variable contributes >= 0, so each <= hi - const_len)
|
||
if (hi != UINT_MAX) {
|
||
if (const_len > hi) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::arithmetic;
|
||
return;
|
||
}
|
||
unsigned each_ub = hi - const_len;
|
||
for (euf::snode* y : var_tokens)
|
||
add_upper_int_bound(y, each_ub, s.m_dep);
|
||
}
|
||
}
|
||
}
|
||
|
||
// Initialize per-variable Parikh bounds from this node's regex memberships.
|
||
// For each str_mem (str ∈ regex) with bounded regex length [min_len, max_len],
|
||
// calls add_lower/upper_int_bound for the primary string variable (if str is
|
||
// a single variable) or stores a bound on the length expression otherwise.
|
||
void nielsen_node::init_var_bounds_from_mems() {
|
||
for (str_mem const& mem : m_str_mem) {
|
||
if (!mem.m_str || !mem.m_regex) continue;
|
||
unsigned min_len = 0, max_len = UINT_MAX;
|
||
m_graph.compute_regex_length_interval(mem.m_regex, min_len, max_len);
|
||
if (min_len == 0 && max_len == UINT_MAX) continue;
|
||
|
||
// if str is a single variable, apply bounds directly
|
||
if (mem.m_str->is_var()) {
|
||
if (min_len > 0)
|
||
add_lower_int_bound(mem.m_str, min_len, mem.m_dep);
|
||
if (max_len < UINT_MAX)
|
||
add_upper_int_bound(mem.m_str, max_len, mem.m_dep);
|
||
} else {
|
||
// str is a concatenation or other term: add as general int_constraints
|
||
ast_manager& m = m_graph.m();
|
||
arith_util arith(m);
|
||
expr_ref len_str = m_graph.compute_length_expr(mem.m_str);
|
||
if (min_len > 0) {
|
||
expr_ref bound(arith.mk_int(min_len), m);
|
||
m_int_constraints.push_back(
|
||
int_constraint(len_str, bound, int_constraint_kind::ge, mem.m_dep, m));
|
||
}
|
||
if (max_len < UINT_MAX) {
|
||
expr_ref bound(arith.mk_int(max_len), m);
|
||
m_int_constraints.push_back(
|
||
int_constraint(len_str, bound, int_constraint_kind::le, mem.m_dep, m));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
void nielsen_node::apply_char_subst(euf::sgraph& sg, char_subst const& s) {
|
||
if (!s.m_var) return;
|
||
|
||
// replace occurrences of s.m_var with s.m_val in all string constraints
|
||
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
|
||
str_eq& eq = m_str_eq[i];
|
||
eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_val);
|
||
eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_val);
|
||
eq.sort();
|
||
}
|
||
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
|
||
str_mem& mem = m_str_mem[i];
|
||
mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_val);
|
||
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_val);
|
||
}
|
||
|
||
unsigned var_id = s.m_var->id();
|
||
|
||
if (s.m_val->is_unit()) {
|
||
// symbolic char → symbolic char: check disequalities
|
||
if (m_char_diseqs.contains(var_id)) {
|
||
ptr_vector<euf::snode>& diseqs = m_char_diseqs.find(var_id);
|
||
for (euf::snode* d : diseqs) {
|
||
if (d == s.m_val) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::character_range;
|
||
return;
|
||
}
|
||
}
|
||
m_char_diseqs.remove(var_id);
|
||
m_char_ranges.remove(var_id);
|
||
}
|
||
} else {
|
||
SASSERT(s.m_val->is_char());
|
||
// symbolic char → concrete char: check range constraints
|
||
if (m_char_ranges.contains(var_id)) {
|
||
char_set& range = m_char_ranges.find(var_id);
|
||
unsigned ch_val = 0;
|
||
seq_util& seq = sg.get_seq_util();
|
||
expr* unit_expr = s.m_val->get_expr();
|
||
expr* ch_expr = nullptr;
|
||
bool have_char = unit_expr && seq.str.is_unit(unit_expr, ch_expr) && seq.is_const_char(ch_expr, ch_val);
|
||
if (have_char && !range.contains(ch_val)) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::character_range;
|
||
return;
|
||
}
|
||
m_char_diseqs.remove(var_id);
|
||
m_char_ranges.remove(var_id);
|
||
}
|
||
}
|
||
}
|
||
|
||
// -----------------------------------------------
|
||
// nielsen_graph
|
||
// -----------------------------------------------
|
||
|
||
nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver):
|
||
m_m(sg.get_manager()),
|
||
m_seq(sg.get_seq_util()),
|
||
m_sg(sg),
|
||
m_solver(solver),
|
||
m_parikh(alloc(seq_parikh, sg)),
|
||
m_seq_regex(alloc(seq::seq_regex, sg)),
|
||
m_len_vars(sg.get_manager()) {
|
||
}
|
||
|
||
nielsen_graph::~nielsen_graph() {
|
||
dealloc(m_parikh);
|
||
dealloc(m_seq_regex);
|
||
reset();
|
||
}
|
||
|
||
nielsen_node* nielsen_graph::mk_node() {
|
||
unsigned id = m_nodes.size();
|
||
nielsen_node* n = alloc(nielsen_node, *this, id);
|
||
m_nodes.push_back(n);
|
||
return n;
|
||
}
|
||
|
||
nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) {
|
||
nielsen_node* child = mk_node();
|
||
child->clone_from(*parent);
|
||
child->m_parent_ic_count = parent->int_constraints().size();
|
||
return child;
|
||
}
|
||
|
||
nielsen_edge* nielsen_graph::mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress) {
|
||
nielsen_edge* e = alloc(nielsen_edge, src, tgt, is_progress);
|
||
m_edges.push_back(e);
|
||
src->add_outgoing(e);
|
||
return e;
|
||
}
|
||
|
||
void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) {
|
||
if (!m_root)
|
||
m_root = mk_node();
|
||
dep_tracker dep = m_dep_mgr.mk_leaf({dep_source::kind::eq, m_num_input_eqs});
|
||
str_eq eq(lhs, rhs, dep);
|
||
eq.sort();
|
||
m_root->add_str_eq(eq);
|
||
++m_num_input_eqs;
|
||
}
|
||
|
||
void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) {
|
||
if (!m_root)
|
||
m_root = mk_node();
|
||
dep_tracker dep = m_dep_mgr.mk_leaf({dep_source::kind::mem, m_num_input_mems});
|
||
euf::snode* history = m_sg.mk_empty_seq(str->get_sort());
|
||
unsigned id = next_mem_id();
|
||
m_root->add_str_mem(str_mem(str, regex, history, id, dep));
|
||
++m_num_input_mems;
|
||
}
|
||
|
||
void nielsen_graph::inc_run_idx() {
|
||
if (m_run_idx == UINT_MAX) {
|
||
for (nielsen_node* n : m_nodes)
|
||
n->reset_counter();
|
||
m_run_idx = 1;
|
||
}
|
||
else
|
||
++m_run_idx;
|
||
}
|
||
|
||
void nielsen_graph::reset() {
|
||
for (nielsen_node* n : m_nodes)
|
||
dealloc(n);
|
||
for (nielsen_edge* e : m_edges)
|
||
dealloc(e);
|
||
m_nodes.reset();
|
||
m_edges.reset();
|
||
m_root = nullptr;
|
||
m_sat_node = nullptr;
|
||
m_sat_path.reset();
|
||
m_run_idx = 0;
|
||
m_depth_bound = 0;
|
||
m_next_mem_id = 0;
|
||
m_fresh_cnt = 0;
|
||
m_num_input_eqs = 0;
|
||
m_num_input_mems = 0;
|
||
m_root_constraints_asserted = false;
|
||
m_mod_cnt.reset();
|
||
m_len_var_cache.clear();
|
||
m_len_vars.reset();
|
||
m_dep_mgr.reset();
|
||
}
|
||
|
||
std::ostream& nielsen_graph::display(std::ostream& out) const {
|
||
out << "nielsen_graph with " << m_nodes.size() << " nodes, "
|
||
<< m_edges.size() << " edges\n";
|
||
|
||
for (nielsen_node const* n : m_nodes) {
|
||
out << " node[" << n->id() << "]";
|
||
if (n == m_root)
|
||
out << " (root)";
|
||
if (n->is_general_conflict())
|
||
out << " CONFLICT";
|
||
if (n->is_extended())
|
||
out << " EXTENDED";
|
||
out << "\n";
|
||
|
||
// display string equalities
|
||
for (auto const& eq : n->str_eqs()) {
|
||
out << " str_eq: ";
|
||
if (eq.m_lhs) out << "lhs[id=" << eq.m_lhs->id() << ",len=" << eq.m_lhs->length() << "]";
|
||
else out << "null";
|
||
out << " = ";
|
||
if (eq.m_rhs) out << "rhs[id=" << eq.m_rhs->id() << ",len=" << eq.m_rhs->length() << "]";
|
||
else out << "null";
|
||
out << "\n";
|
||
}
|
||
|
||
// display regex memberships
|
||
for (auto const& mem : n->str_mems()) {
|
||
out << " str_mem[" << mem.m_id << "]: ";
|
||
if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]";
|
||
else out << "null";
|
||
out << " in ";
|
||
if (mem.m_regex) out << "re[id=" << mem.m_regex->id() << "]";
|
||
else out << "null";
|
||
out << "\n";
|
||
}
|
||
|
||
// display outgoing edges
|
||
for (nielsen_edge const* e : n->outgoing()) {
|
||
out << " -> node[" << e->tgt()->id() << "]";
|
||
if (e->is_progress()) out << " (progress)";
|
||
for (auto const& s : e->subst()) {
|
||
out << " {";
|
||
if (s.m_var) out << "var[" << s.m_var->id() << "]";
|
||
out << " -> ";
|
||
if (s.m_replacement) out << "repl[" << s.m_replacement->id() << ",len=" << s.m_replacement->length() << "]";
|
||
else out << "eps";
|
||
out << "}";
|
||
}
|
||
out << "\n";
|
||
}
|
||
|
||
if (n->backedge())
|
||
out << " backedge -> node[" << n->backedge()->id() << "]\n";
|
||
}
|
||
|
||
return out;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_node: display_html
|
||
// Render constraint set as an HTML fragment for DOT labels.
|
||
// Mirrors ZIPT's NielsenNode.ToHtmlString().
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Helper: HTML-escape a string and replace literal \n with <br/>.
|
||
static std::string dot_html_escape(std::string const& s) {
|
||
std::string r;
|
||
r.reserve(s.size());
|
||
for (char c : s) {
|
||
switch (c) {
|
||
case '&': r += "&"; break;
|
||
case '<': r += "<"; break;
|
||
case '>': r += ">"; break;
|
||
default: r += c; break;
|
||
}
|
||
}
|
||
// replace literal "\n" two-char sequence with <br/>
|
||
std::string result;
|
||
result.reserve(r.size());
|
||
for (std::size_t i = 0; i < r.size(); ) {
|
||
if (i + 1 < r.size() && r[i] == '\\' && r[i+1] == 'n') {
|
||
result += "<br/>";
|
||
i += 2;
|
||
} else {
|
||
result += r[i++];
|
||
}
|
||
}
|
||
return result;
|
||
}
|
||
|
||
// Helper: render an arithmetic/integer expression in infix HTML notation.
|
||
// Recognises +, -, *, unary minus, numerals, str.len, and named constants;
|
||
// falls back to HTML-escaped mk_pp for anything else.
|
||
static std::string arith_expr_html(expr* e, ast_manager& m) {
|
||
if (!e) return "null";
|
||
arith_util arith(m);
|
||
seq_util seq(m);
|
||
rational val;
|
||
if (arith.is_numeral(e, val))
|
||
return val.to_string();
|
||
if (!is_app(e)) {
|
||
std::ostringstream os;
|
||
os << mk_pp(e, m);
|
||
return dot_html_escape(os.str());
|
||
}
|
||
app* a = to_app(e);
|
||
expr* x, * y;
|
||
if (arith.is_add(e)) {
|
||
std::string r = arith_expr_html(a->get_arg(0), m);
|
||
for (unsigned i = 1; i < a->get_num_args(); ++i) {
|
||
expr* arg = a->get_arg(i);
|
||
// render (+ x (- y)) as "x - y" and (+ x (- n)) as "x - n"
|
||
expr* neg_inner;
|
||
rational neg_val;
|
||
if (arith.is_uminus(arg, neg_inner)) {
|
||
r += " − "; // minus sign
|
||
r += arith_expr_html(neg_inner, m);
|
||
} else if (arith.is_numeral(arg, neg_val) && neg_val.is_neg()) {
|
||
r += " − "; // minus sign
|
||
r += (-neg_val).to_string();
|
||
} else {
|
||
r += " + ";
|
||
r += arith_expr_html(arg, m);
|
||
}
|
||
}
|
||
return r;
|
||
}
|
||
if (arith.is_sub(e, x, y))
|
||
return arith_expr_html(x, m) + " − " + arith_expr_html(y, m);
|
||
if (arith.is_uminus(e, x))
|
||
return "−" + arith_expr_html(x, m);
|
||
if (arith.is_mul(e)) {
|
||
std::string r;
|
||
for (unsigned i = 0; i < a->get_num_args(); ++i) {
|
||
if (i > 0) r += " · "; // middle dot
|
||
r += arith_expr_html(a->get_arg(i), m);
|
||
}
|
||
return r;
|
||
}
|
||
if (seq.str.is_length(e, x)) {
|
||
return "|" + dot_html_escape(to_app(x)->get_decl()->get_name().str()) + "|";
|
||
}
|
||
// named constant (fresh variable like n!0)
|
||
if (a->get_num_args() == 0)
|
||
return dot_html_escape(a->get_decl()->get_name().str());
|
||
// fallback
|
||
std::ostringstream os;
|
||
os << mk_pp(e, m);
|
||
return dot_html_escape(os.str());
|
||
}
|
||
|
||
// Helper: render an int_constraint as an HTML string for DOT edge labels.
|
||
static std::string int_constraint_html(int_constraint const& ic, ast_manager& m) {
|
||
std::string r = arith_expr_html(ic.m_lhs, m);
|
||
switch (ic.m_kind) {
|
||
case int_constraint_kind::eq: r += " = "; break;
|
||
case int_constraint_kind::le: r += " ≤ "; break; // ≤
|
||
case int_constraint_kind::ge: r += " ≥ "; break; // ≥
|
||
}
|
||
r += arith_expr_html(ic.m_rhs, m);
|
||
return r;
|
||
}
|
||
|
||
static std::string regex_expr_html(expr* e, ast_manager& m, seq_util& seq) {
|
||
if (!e) return "null";
|
||
expr* a = nullptr, * b = nullptr;
|
||
|
||
if (seq.re.is_to_re(e, a)) {
|
||
zstring s;
|
||
if (seq.str.is_string(a, s)) {
|
||
return "\"" + dot_html_escape(s.encode()) + "\"";
|
||
}
|
||
std::ostringstream os;
|
||
os << mk_pp(a, m);
|
||
return dot_html_escape(os.str());
|
||
}
|
||
if (seq.re.is_concat(e)) {
|
||
app* ap = to_app(e);
|
||
std::string res;
|
||
if (ap->get_num_args() == 0) return "()";
|
||
for (unsigned i = 0; i < ap->get_num_args(); ++i) {
|
||
if (i > 0) res += " ";
|
||
bool needs_parens = seq.re.is_union(ap->get_arg(i));
|
||
if (needs_parens) res += "(";
|
||
res += regex_expr_html(ap->get_arg(i), m, seq);
|
||
if (needs_parens) res += ")";
|
||
}
|
||
return res;
|
||
}
|
||
if (seq.re.is_union(e)) {
|
||
app* ap = to_app(e);
|
||
std::string res;
|
||
if (ap->get_num_args() == 0) return "∅";
|
||
for (unsigned i = 0; i < ap->get_num_args(); ++i) {
|
||
if (i > 0) res += " | ";
|
||
res += regex_expr_html(ap->get_arg(i), m, seq);
|
||
}
|
||
return res;
|
||
}
|
||
if (seq.re.is_intersection(e)) {
|
||
app* ap = to_app(e);
|
||
std::string res;
|
||
for (unsigned i = 0; i < ap->get_num_args(); ++i) {
|
||
if (i > 0) res += " & ";
|
||
bool needs_parens = seq.re.is_union(ap->get_arg(i)) || seq.re.is_concat(ap->get_arg(i));
|
||
if (needs_parens) res += "(";
|
||
res += regex_expr_html(ap->get_arg(i), m, seq);
|
||
if (needs_parens) res += ")";
|
||
}
|
||
return res;
|
||
}
|
||
if (seq.re.is_star(e, a)) {
|
||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||
std::string res = needs_parens ? "(" : "";
|
||
res += regex_expr_html(a, m, seq);
|
||
res += needs_parens ? ")<SUP>*</SUP>" : "<SUP>*</SUP>";
|
||
return res;
|
||
}
|
||
if (seq.re.is_plus(e, a)) {
|
||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||
std::string res = needs_parens ? "(" : "";
|
||
res += regex_expr_html(a, m, seq);
|
||
res += needs_parens ? ")<SUP>+</SUP>" : "<SUP>+</SUP>";
|
||
return res;
|
||
}
|
||
if (seq.re.is_opt(e, a)) {
|
||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||
std::string res = needs_parens ? "(" : "";
|
||
res += regex_expr_html(a, m, seq);
|
||
res += needs_parens ? ")?" : "?";
|
||
return res;
|
||
}
|
||
if (seq.re.is_complement(e, a)) {
|
||
bool needs_parens = seq.re.is_union(a) || seq.re.is_concat(a) || seq.re.is_intersection(a);
|
||
std::string res = "~";
|
||
res += needs_parens ? "(" : "";
|
||
res += regex_expr_html(a, m, seq);
|
||
res += needs_parens ? ")" : "";
|
||
return res;
|
||
}
|
||
if (seq.re.is_range(e, a, b)) {
|
||
zstring s1, s2;
|
||
std::string c1 = seq.str.is_string(a, s1) ? dot_html_escape(s1.encode()) : arith_expr_html(a, m);
|
||
std::string c2 = seq.str.is_string(b, s2) ? dot_html_escape(s2.encode()) : arith_expr_html(b, m);
|
||
return "[" + c1 + "-" + c2 + "]";
|
||
}
|
||
if (seq.re.is_full_char(e)) {
|
||
return "Σ"; // Sigma
|
||
}
|
||
if (seq.re.is_full_seq(e)) {
|
||
return "Σ<SUP>*</SUP>"; // Sigma*
|
||
}
|
||
if (seq.re.is_empty(e)) {
|
||
return "∅"; // empty set
|
||
}
|
||
|
||
std::ostringstream os;
|
||
os << mk_pp(e, m);
|
||
return dot_html_escape(os.str());
|
||
}
|
||
|
||
// Helper: render a snode as an HTML label for DOT output.
|
||
// Groups consecutive s_char tokens into quoted strings, renders s_var by name,
|
||
// shows s_power with superscripts, s_unit by its inner expression,
|
||
// and falls back to mk_pp (HTML-escaped) for other token kinds.
|
||
std::string snode_label_html(euf::snode const* n, ast_manager& m) {
|
||
if (!n) return "null";
|
||
seq_util seq(m);
|
||
|
||
// collect all leaf tokens left-to-right
|
||
euf::snode_vector tokens;
|
||
n->collect_tokens(tokens);
|
||
|
||
if (tokens.empty())
|
||
return "\"\""; // empty string
|
||
|
||
std::string result;
|
||
std::string char_acc; // accumulator for consecutive printable chars
|
||
bool first = true;
|
||
|
||
// flush accumulated chars as a quoted string
|
||
auto flush_chars = [&]() {
|
||
if (char_acc.empty()) return;
|
||
if (!first) result += " + ";
|
||
result += "\"" + dot_html_escape(char_acc) + "\"";
|
||
first = false;
|
||
char_acc.clear();
|
||
};
|
||
|
||
for (euf::snode const* tok : tokens) {
|
||
expr* e = tok->get_expr();
|
||
|
||
// s_char: seq.unit(const_char) – extract char code and accumulate
|
||
if (tok->is_char() && e) {
|
||
expr* ch_expr = to_app(e)->get_arg(0);
|
||
unsigned code = 0;
|
||
if (seq.is_const_char(ch_expr, code)) {
|
||
if (code >= 32 && code < 127 && code != '"' && code != '\\') {
|
||
char_acc += static_cast<char>(code);
|
||
} else {
|
||
flush_chars();
|
||
char buf[16];
|
||
snprintf(buf, sizeof(buf), "'\\x%x'", code);
|
||
if (!first) result += " + ";
|
||
result += buf;
|
||
first = false;
|
||
}
|
||
continue;
|
||
}
|
||
}
|
||
|
||
flush_chars();
|
||
if (!first) result += " + ";
|
||
first = false;
|
||
|
||
if (!e) {
|
||
result += "#" + std::to_string(tok->id());
|
||
} else if (tok->is_var()) {
|
||
result += dot_html_escape(to_app(e)->get_decl()->get_name().str());
|
||
} else if (tok->is_unit()) {
|
||
// seq.unit with non-literal character: show the character expression
|
||
expr* ch = to_app(e)->get_arg(0);
|
||
if (is_app(ch)) {
|
||
result += dot_html_escape(to_app(ch)->get_decl()->get_name().str());
|
||
} else {
|
||
std::ostringstream os;
|
||
os << mk_pp(ch, m);
|
||
result += dot_html_escape(os.str());
|
||
}
|
||
} else if (tok->is_power()) {
|
||
// seq.power(base, n): render as base<SUP>n</SUP>
|
||
std::string base_html = snode_label_html(tok->arg(0), m);
|
||
if (tok->arg(0)->length() > 1)
|
||
base_html = "(" + base_html + ")";
|
||
result += base_html;
|
||
result += "<SUP>";
|
||
expr* exp_expr = to_app(e)->get_arg(1);
|
||
result += arith_expr_html(exp_expr, m);
|
||
result += "</SUP>";
|
||
} else if (e && seq.is_re(e)) {
|
||
result += regex_expr_html(e, m, seq);
|
||
} else {
|
||
std::ostringstream os;
|
||
os << mk_pp(e, m);
|
||
result += dot_html_escape(os.str());
|
||
}
|
||
}
|
||
flush_chars();
|
||
return result;
|
||
}
|
||
|
||
std::ostream& nielsen_node::display_html(std::ostream& out, ast_manager& m) const {
|
||
bool any = false;
|
||
|
||
// string equalities
|
||
for (auto const& eq : m_str_eq) {
|
||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||
out << snode_label_html(eq.m_lhs, m)
|
||
<< " = "
|
||
<< snode_label_html(eq.m_rhs, m)
|
||
<< "<br/>";
|
||
}
|
||
// regex memberships
|
||
for (auto const& mem : m_str_mem) {
|
||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||
out << snode_label_html(mem.m_str, m)
|
||
<< " ∈ "
|
||
<< snode_label_html(mem.m_regex, m)
|
||
<< "<br/>";
|
||
}
|
||
// character ranges
|
||
for (auto const& kv : m_char_ranges) {
|
||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||
out << "?" << kv.m_key << " ∈ ";
|
||
kv.m_value.display(out);
|
||
out << "<br/>";
|
||
}
|
||
// character disequalities
|
||
for (auto const& kv : m_char_diseqs) {
|
||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||
for (euf::snode* d : kv.m_value) {
|
||
out << "?" << kv.m_key << " ≠ ?" << d->id() << "<br/>";
|
||
}
|
||
}
|
||
// integer constraints
|
||
for (auto const& ic : m_int_constraints) {
|
||
if (!any) { out << "Cnstr:<br/>"; any = true; }
|
||
out << int_constraint_html(ic, m) << "<br/>";
|
||
}
|
||
|
||
if (!any)
|
||
out << "⊤"; // ⊤ (trivially satisfied)
|
||
return out;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_graph: to_dot
|
||
// Output the graph in graphviz DOT format, optionally colour-highlighting
|
||
// the satisfying path. Mirrors ZIPT's NielsenGraph.ToDot().
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Convert a backtrack_reason to a short display string.
|
||
static char const* reason_to_str(backtrack_reason r) {
|
||
switch (r) {
|
||
case backtrack_reason::unevaluated: return "Unevaluated";
|
||
case backtrack_reason::extended: return "Extended";
|
||
case backtrack_reason::symbol_clash: return "Symbol Clash";
|
||
case backtrack_reason::parikh_image: return "Parikh Image";
|
||
case backtrack_reason::subsumption: return "Subsumption";
|
||
case backtrack_reason::arithmetic: return "Arithmetic";
|
||
case backtrack_reason::regex: return "Regex";
|
||
case backtrack_reason::regex_widening: return "RegexWidening";
|
||
case backtrack_reason::character_range: return "Character Range";
|
||
case backtrack_reason::smt: return "SMT";
|
||
case backtrack_reason::children_failed: return "Children Failed";
|
||
default: return "?";
|
||
}
|
||
}
|
||
|
||
// Returns true when the reason is a direct (non-propagated) conflict.
|
||
// Mirrors ZIPT's NielsenNode.IsActualConflict(): all conflicts except ChildrenFailed.
|
||
static bool is_actual_conflict(backtrack_reason r) {
|
||
return r == backtrack_reason::symbol_clash
|
||
|| r == backtrack_reason::parikh_image
|
||
|| r == backtrack_reason::subsumption
|
||
|| r == backtrack_reason::arithmetic
|
||
|| r == backtrack_reason::regex
|
||
|| r == backtrack_reason::regex_widening
|
||
|| r == backtrack_reason::character_range
|
||
|| r == backtrack_reason::smt;
|
||
}
|
||
|
||
// gives a graphviz graph representation of the Nielsen graph (for debugging)
|
||
std::ostream& nielsen_graph::to_dot(std::ostream& out) const {
|
||
ast_manager& m = m_sg.get_manager();
|
||
|
||
// collect sat-path nodes and edges for green highlighting
|
||
ptr_addr_hashtable<nielsen_node> sat_nodes;
|
||
ptr_addr_hashtable<nielsen_edge> sat_edges;
|
||
for (nielsen_edge* e : m_sat_path) {
|
||
if (e->src()) sat_nodes.insert(e->src());
|
||
if (e->tgt()) sat_nodes.insert(e->tgt());
|
||
sat_edges.insert(e);
|
||
}
|
||
|
||
out << "digraph G {\n";
|
||
|
||
// --- nodes ---
|
||
for (nielsen_node const* n : m_nodes) {
|
||
out << " " << n->id() << " [label=<"
|
||
<< n->id() << ": ";
|
||
n->display_html(out, m);
|
||
// append conflict reason if this is a direct conflict
|
||
if (is_actual_conflict(n->reason()))
|
||
out << "<br/>" << reason_to_str(n->reason());
|
||
out << ">";
|
||
|
||
// colour
|
||
if (sat_nodes.contains(const_cast<nielsen_node*>(n)))
|
||
out << ", color=green";
|
||
else if (n->is_general_conflict())
|
||
out << ", color=darkred";
|
||
else if (n->eval_idx() != m_run_idx) // inactive (not visited this run)
|
||
out << ", color=blue";
|
||
else if (n->is_currently_conflict())
|
||
out << ", color=red";
|
||
|
||
out << "];\n";
|
||
}
|
||
|
||
// --- edges ---
|
||
for (nielsen_node const* n : m_nodes) {
|
||
for (nielsen_edge const* e : n->outgoing()) {
|
||
out << " " << n->id() << " -> " << e->tgt()->id() << " [label=<";
|
||
|
||
// edge label: substitutions joined by <br/>
|
||
bool first = true;
|
||
for (auto const& s : e->subst()) {
|
||
if (!first) out << "<br/>";
|
||
first = false;
|
||
out << snode_label_html(s.m_var, m)
|
||
<< " → " // mapping arrow
|
||
<< snode_label_html(s.m_replacement, m);
|
||
}
|
||
for (auto const& cs : e->char_substs()) {
|
||
if (!first) out << "<br/>";
|
||
first = false;
|
||
out << "?" << cs.m_var->id()
|
||
<< " → ?" << cs.m_val->id();
|
||
}
|
||
// side constraints: string equalities
|
||
for (auto const* eq : e->side_str_eq()) {
|
||
if (!first) out << "<br/>";
|
||
first = false;
|
||
out << "<font color=\"gray\">"
|
||
<< snode_label_html(eq->m_lhs, m)
|
||
<< " = "
|
||
<< snode_label_html(eq->m_rhs, m)
|
||
<< "</font>";
|
||
}
|
||
// side constraints: regex memberships
|
||
for (auto const* mem : e->side_str_mem()) {
|
||
if (!first) out << "<br/>";
|
||
first = false;
|
||
out << "<font color=\"gray\">"
|
||
<< snode_label_html(mem->m_str, m)
|
||
<< " ∈ "
|
||
<< snode_label_html(mem->m_regex, m)
|
||
<< "</font>";
|
||
}
|
||
// side constraints: integer equalities/inequalities
|
||
for (auto const& ic : e->side_int()) {
|
||
if (!first) out << "<br/>";
|
||
first = false;
|
||
out << "<font color=\"gray\">"
|
||
<< int_constraint_html(ic, m)
|
||
<< "</font>";
|
||
}
|
||
out << ">";
|
||
|
||
// colour
|
||
nielsen_edge* ep = const_cast<nielsen_edge*>(e);
|
||
if (sat_edges.contains(ep))
|
||
out << ", color=green";
|
||
else if (e->tgt()->eval_idx() != m_run_idx)
|
||
out << ", color=blue";
|
||
else if (e->tgt()->is_currently_conflict())
|
||
out << ", color=red";
|
||
|
||
out << "];\n";
|
||
}
|
||
|
||
// backedge as dotted arrow
|
||
if (n->backedge())
|
||
out << " " << n->id() << " -> " << n->backedge()->id()
|
||
<< " [style=dotted];\n";
|
||
}
|
||
|
||
out << "}\n";
|
||
return out;
|
||
}
|
||
|
||
std::string nielsen_graph::to_dot() const {
|
||
std::stringstream ss;
|
||
to_dot(ss);
|
||
return ss.str();
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_node: simplify_and_init
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_node::handle_empty_side(euf::sgraph& sg, euf::snode* non_empty_side,
|
||
dep_tracker const& dep, bool& changed) {
|
||
euf::snode_vector tokens;
|
||
non_empty_side->collect_tokens(tokens);
|
||
bool has_char = std::any_of(tokens.begin(), tokens.end(), [](euf::snode* t){ return t->is_char(); });
|
||
bool all_eliminable = std::all_of(tokens.begin(), tokens.end(), [](euf::snode* t){
|
||
return t->is_var() || t->is_power() || t->kind() == euf::snode_kind::s_other;
|
||
});
|
||
if (has_char || !all_eliminable) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::symbol_clash;
|
||
return true;
|
||
}
|
||
ast_manager& m = sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util& seq = sg.get_seq_util();
|
||
for (euf::snode* t : tokens) {
|
||
if (t->is_var()) {
|
||
nielsen_subst s(t, sg.mk_empty_seq(t->get_sort()), dep);
|
||
apply_subst(sg, s);
|
||
changed = true;
|
||
}
|
||
else if (t->is_power()) {
|
||
// Power equated to empty → exponent must be 0.
|
||
expr* e = t->get_expr();
|
||
expr* pow_base = nullptr, *pow_exp = nullptr;
|
||
if (e) seq.str.is_power(e, pow_base, pow_exp);
|
||
if (pow_exp) {
|
||
expr* zero = arith.mk_numeral(rational(0), true);
|
||
m_int_constraints.push_back(
|
||
int_constraint(pow_exp, zero, int_constraint_kind::eq, dep, m));
|
||
}
|
||
nielsen_subst s(t, sg.mk_empty_seq(t->get_sort()), dep);
|
||
apply_subst(sg, s);
|
||
changed = true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Power simplification helpers (mirrors ZIPT's MergePowers,
|
||
// SimplifyPowerElim/CommPower, SimplifyPowerSingle)
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Check if exponent b equals exponent a + diff for some rational constant diff.
|
||
// Uses syntactic matching on Z3 expression structure: pointer equality
|
||
// detects shared sub-expressions created during ConstNumUnwinding.
|
||
//
|
||
static bool get_const_power_diff(expr* b, expr* a, arith_util& arith, rational& diff) {
|
||
if (a == b) { diff = rational(0); return true; }
|
||
// b = (+ a k) ?
|
||
if (arith.is_add(b) && to_app(b)->get_num_args() == 2) {
|
||
rational val;
|
||
if (to_app(b)->get_arg(0) == a && arith.is_numeral(to_app(b)->get_arg(1), val))
|
||
{ diff = val; return true; }
|
||
if (to_app(b)->get_arg(1) == a && arith.is_numeral(to_app(b)->get_arg(0), val))
|
||
{ diff = val; return true; }
|
||
}
|
||
// a = (+ b k) → diff = -k
|
||
if (arith.is_add(a) && to_app(a)->get_num_args() == 2) {
|
||
rational val;
|
||
if (to_app(a)->get_arg(0) == b && arith.is_numeral(to_app(a)->get_arg(1), val))
|
||
{ diff = -val; return true; }
|
||
if (to_app(a)->get_arg(1) == b && arith.is_numeral(to_app(a)->get_arg(0), val))
|
||
{ diff = -val; return true; }
|
||
}
|
||
// b = (- a k) → diff = -k
|
||
if (arith.is_sub(b) && to_app(b)->get_num_args() == 2) {
|
||
rational val;
|
||
if (to_app(b)->get_arg(0) == a && arith.is_numeral(to_app(b)->get_arg(1), val))
|
||
{ diff = -val; return true; }
|
||
}
|
||
// a = (- b k) → diff = k
|
||
if (arith.is_sub(a) && to_app(a)->get_num_args() == 2) {
|
||
rational val;
|
||
if (to_app(a)->get_arg(0) == b && arith.is_numeral(to_app(a)->get_arg(1), val))
|
||
{ diff = val; return true; }
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// Get the base expression of a power snode.
|
||
static expr* get_power_base_expr(euf::snode* power, seq_util& seq) {
|
||
if (!power || !power->is_power()) return nullptr;
|
||
expr* e = power->get_expr();
|
||
expr* base = nullptr, *exp = nullptr;
|
||
return (e && seq.str.is_power(e, base, exp)) ? base : nullptr;
|
||
}
|
||
|
||
// Get the exponent expression of a power snode.
|
||
static expr* get_power_exp_expr(euf::snode* power, seq_util& seq) {
|
||
if (!power || !power->is_power()) return nullptr;
|
||
expr* e = power->get_expr();
|
||
expr* base = nullptr, *exp = nullptr;
|
||
return (e && seq.str.is_power(e, base, exp)) ? exp : nullptr;
|
||
}
|
||
|
||
// Merge adjacent tokens with the same power base on one side of an equation.
|
||
// Handles: char(c) · power(c^e) → power(c^(e+1)),
|
||
// power(c^e) · char(c) → power(c^(e+1)),
|
||
// power(c^e1) · power(c^e2) → power(c^(e1+e2)).
|
||
// Returns new snode if merging happened, nullptr otherwise.
|
||
static euf::snode* merge_adjacent_powers(euf::sgraph& sg, euf::snode* side) {
|
||
if (!side || side->is_empty() || side->is_token())
|
||
return nullptr;
|
||
|
||
euf::snode_vector tokens;
|
||
side->collect_tokens(tokens);
|
||
if (tokens.size() < 2) return nullptr;
|
||
|
||
ast_manager& m = sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util seq(m);
|
||
|
||
bool merged = false;
|
||
euf::snode_vector result;
|
||
|
||
unsigned i = 0;
|
||
while (i < tokens.size()) {
|
||
euf::snode* tok = tokens[i];
|
||
|
||
// Case 1: current is a power token — absorb following same-base tokens.
|
||
// Skip at leading position (i == 0) to keep exponents small: CommPower
|
||
// cross-side cancellation works better with unmerged leading powers
|
||
// (e.g., w^k trivially ≤ 1+k, but w^(2k) vs 1+k requires k ≥ 1).
|
||
if (tok->is_power() && i > 0) {
|
||
expr* base_e = get_power_base_expr(tok, seq);
|
||
expr* exp_acc = get_power_exp_expr(tok, seq);
|
||
if (!base_e || !exp_acc) { result.push_back(tok); ++i; continue; }
|
||
|
||
bool local_merged = false;
|
||
unsigned j = i + 1;
|
||
while (j < tokens.size()) {
|
||
euf::snode* next = tokens[j];
|
||
if (next->is_power()) {
|
||
expr* nb = get_power_base_expr(next, seq);
|
||
if (nb == base_e) {
|
||
exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(next, seq));
|
||
local_merged = true; ++j; continue;
|
||
}
|
||
}
|
||
if (next->is_char() && next->get_expr() == base_e) {
|
||
exp_acc = arith.mk_add(exp_acc, arith.mk_int(1));
|
||
local_merged = true; ++j; continue;
|
||
}
|
||
break;
|
||
}
|
||
if (local_merged) {
|
||
merged = true;
|
||
expr_ref norm_exp = normalize_arith(m, exp_acc);
|
||
expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m);
|
||
result.push_back(sg.mk(new_pow));
|
||
} else {
|
||
result.push_back(tok);
|
||
}
|
||
i = j;
|
||
continue;
|
||
}
|
||
|
||
// Case 2: current is a char — check if next is a same-base power.
|
||
// Skip at leading position (i == 0) to avoid undoing power unwinding:
|
||
// unwind produces u · u^(n-1); merging it back to u^n creates an infinite cycle.
|
||
if (i > 0 && tok->is_char() && tok->get_expr() && i + 1 < tokens.size()) {
|
||
euf::snode* next = tokens[i + 1];
|
||
if (next->is_power() && get_power_base_expr(next, seq) == tok->get_expr()) {
|
||
expr* base_e = tok->get_expr();
|
||
// Use same arg order as Case 1: add(exp, 1), not add(1, exp),
|
||
// so that merging "c · c^e" and "c^e · c" both produce add(e, 1)
|
||
// and the resulting power expression is hash-consed identically.
|
||
expr* exp_acc = arith.mk_add(get_power_exp_expr(next, seq), arith.mk_int(1));
|
||
unsigned j = i + 2;
|
||
while (j < tokens.size()) {
|
||
euf::snode* further = tokens[j];
|
||
if (further->is_power() && get_power_base_expr(further, seq) == base_e) {
|
||
exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(further, seq));
|
||
++j; continue;
|
||
}
|
||
if (further->is_char() && further->get_expr() == base_e) {
|
||
exp_acc = arith.mk_add(exp_acc, arith.mk_int(1));
|
||
++j; continue;
|
||
}
|
||
break;
|
||
}
|
||
merged = true;
|
||
expr_ref norm_exp = normalize_arith(m, exp_acc);
|
||
expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m);
|
||
result.push_back(sg.mk(new_pow));
|
||
i = j;
|
||
continue;
|
||
}
|
||
}
|
||
|
||
result.push_back(tok);
|
||
++i;
|
||
}
|
||
|
||
if (!merged) return nullptr;
|
||
|
||
euf::snode* rebuilt = nullptr;
|
||
for (unsigned k = 0; k < result.size(); ++k)
|
||
rebuilt = (k == 0) ? result[k] : sg.mk_concat(rebuilt, result[k]);
|
||
if (!rebuilt) rebuilt = sg.mk_empty_seq(side->get_sort());
|
||
return rebuilt;
|
||
}
|
||
|
||
// Simplify constant-exponent powers: base^0 → ε, base^1 → base.
|
||
// Returns new snode if any simplification happened, nullptr otherwise.
|
||
static euf::snode* simplify_const_powers(euf::sgraph& sg, euf::snode* side) {
|
||
if (!side || side->is_empty()) return nullptr;
|
||
|
||
euf::snode_vector tokens;
|
||
side->collect_tokens(tokens);
|
||
|
||
ast_manager& m = sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util seq(m);
|
||
|
||
bool simplified = false;
|
||
euf::snode_vector result;
|
||
|
||
for (euf::snode* tok : tokens) {
|
||
if (tok->is_power()) {
|
||
expr* exp_e = get_power_exp_expr(tok, seq);
|
||
rational val;
|
||
if (exp_e && arith.is_numeral(exp_e, val)) {
|
||
if (val.is_zero()) {
|
||
// base^0 → ε (skip this token entirely)
|
||
simplified = true;
|
||
continue;
|
||
}
|
||
if (val.is_one()) {
|
||
// base^1 → base
|
||
euf::snode* base_sn = tok->arg(0);
|
||
if (base_sn) {
|
||
result.push_back(base_sn);
|
||
simplified = true;
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
result.push_back(tok);
|
||
}
|
||
|
||
if (!simplified) return nullptr;
|
||
|
||
euf::snode* rebuilt = nullptr;
|
||
for (euf::snode* tok : result)
|
||
rebuilt = rebuilt ? sg.mk_concat(rebuilt, tok) : tok;
|
||
if (!rebuilt) rebuilt = sg.mk_empty_seq(side->get_sort());
|
||
return rebuilt;
|
||
}
|
||
|
||
// CommPower: count how many times a power's base pattern appears in
|
||
// the directional prefix of the other side (fwd=true: left prefix,
|
||
// fwd=false: right suffix). Mirrors ZIPT's CommPower helper.
|
||
// Returns (count_expr, num_tokens_consumed). count_expr is nullptr
|
||
// when no complete base-pattern match is found.
|
||
static std::pair<expr_ref, unsigned> comm_power(
|
||
euf::snode* base_sn, euf::snode* side, ast_manager& m, bool fwd) {
|
||
arith_util arith(m);
|
||
seq_util seq(m);
|
||
euf::snode_vector base_tokens, side_tokens;
|
||
collect_tokens_dir(base_sn, fwd, base_tokens);
|
||
collect_tokens_dir(side, fwd, side_tokens);
|
||
if (base_tokens.empty() || side_tokens.empty())
|
||
return {expr_ref(nullptr, m), 0};
|
||
|
||
expr* sum = nullptr;
|
||
unsigned pos = 0;
|
||
expr* last_stable_sum = nullptr;
|
||
unsigned last_stable_idx = 0;
|
||
|
||
unsigned i = 0;
|
||
for (; i < side_tokens.size(); i++) {
|
||
euf::snode* t = side_tokens[i];
|
||
if (pos == 0) {
|
||
last_stable_idx = i;
|
||
last_stable_sum = sum;
|
||
}
|
||
// Case 1: direct token match with base pattern
|
||
if (pos < base_tokens.size() && t == base_tokens[pos]) {
|
||
pos++;
|
||
if (pos >= base_tokens.size()) {
|
||
pos = 0;
|
||
sum = sum ? arith.mk_add(sum, arith.mk_int(1))
|
||
: arith.mk_int(1);
|
||
}
|
||
continue;
|
||
}
|
||
// Case 2: power token whose base matches our base pattern (at pos == 0)
|
||
if (pos == 0 && t->is_power()) {
|
||
euf::snode* pow_base = t->arg(0);
|
||
if (pow_base) {
|
||
euf::snode_vector pb_tokens;
|
||
collect_tokens_dir(pow_base, fwd, pb_tokens);
|
||
if (pb_tokens.size() == base_tokens.size()) {
|
||
bool match = true;
|
||
for (unsigned j = 0; j < pb_tokens.size() && match; j++)
|
||
match = (pb_tokens[j] == base_tokens[j]);
|
||
if (match) {
|
||
expr* pow_exp = get_power_exp_expr(t, seq);
|
||
if (pow_exp) {
|
||
sum = sum ? arith.mk_add(sum, pow_exp) : pow_exp;
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
// After loop: i = break index or side_tokens.size()
|
||
if (pos == 0) {
|
||
last_stable_idx = i;
|
||
last_stable_sum = sum;
|
||
}
|
||
return {expr_ref(last_stable_sum, m), last_stable_idx};
|
||
}
|
||
|
||
simplify_result nielsen_node::simplify_and_init(svector<nielsen_edge*> const& cur_path) {
|
||
if (m_is_extended)
|
||
return simplify_result::proceed;
|
||
|
||
euf::sgraph& sg = m_graph.sg();
|
||
ast_manager& m = sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util seq(m);
|
||
bool changed = true;
|
||
|
||
while (changed) {
|
||
changed = false;
|
||
|
||
// pass 1: remove trivially satisfied equalities
|
||
unsigned wi = 0;
|
||
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
|
||
str_eq& eq = m_str_eq[i];
|
||
if (eq.is_trivial())
|
||
continue;
|
||
m_str_eq[wi++] = eq;
|
||
}
|
||
if (wi < m_str_eq.size()) {
|
||
m_str_eq.shrink(wi);
|
||
changed = true;
|
||
}
|
||
|
||
// pass 2: detect symbol clashes, empty-propagation, and prefix cancellation
|
||
for (str_eq& eq : m_str_eq) {
|
||
if (!eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
|
||
// one side empty, the other not empty => conflict or substitution
|
||
if (eq.m_lhs->is_empty() && !eq.m_rhs->is_empty()) {
|
||
if (handle_empty_side(sg, eq.m_rhs, eq.m_dep, changed))
|
||
return simplify_result::conflict;
|
||
continue;
|
||
}
|
||
if (eq.m_rhs->is_empty() && !eq.m_lhs->is_empty()) {
|
||
if (handle_empty_side(sg, eq.m_lhs, eq.m_dep, changed))
|
||
return simplify_result::conflict;
|
||
continue;
|
||
}
|
||
|
||
// prefix/suffix cancellation: strip common leading and trailing tokens.
|
||
// Same char or same variable on both sides can always be cancelled.
|
||
// Two different concrete characters is a symbol clash.
|
||
{
|
||
euf::snode_vector lhs_toks, rhs_toks;
|
||
eq.m_lhs->collect_tokens(lhs_toks);
|
||
eq.m_rhs->collect_tokens(rhs_toks);
|
||
|
||
// --- prefix ---
|
||
unsigned prefix = 0;
|
||
while (prefix < lhs_toks.size() && prefix < rhs_toks.size()) {
|
||
euf::snode* lt = lhs_toks[prefix];
|
||
euf::snode* rt = rhs_toks[prefix];
|
||
if ((lt->is_char() || lt->is_var()) && m.are_equal(lt->get_expr(), rt->get_expr())) {
|
||
++prefix;
|
||
} else if (lt->is_char() && rt->is_char() && m.are_distinct(lt->get_expr(), rt->get_expr())) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::symbol_clash;
|
||
return simplify_result::conflict;
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
// --- suffix (only among the tokens not already consumed by prefix) ---
|
||
unsigned lsz = lhs_toks.size(), rsz = rhs_toks.size();
|
||
unsigned suffix = 0;
|
||
while (suffix < lsz - prefix && suffix < rsz - prefix) {
|
||
euf::snode* lt = lhs_toks[lsz - 1 - suffix];
|
||
euf::snode* rt = rhs_toks[rsz - 1 - suffix];
|
||
if ((lt->is_char() || lt->is_var()) && m.are_equal(lt->get_expr(), rt->get_expr())) {
|
||
++suffix;
|
||
} else if (lt->is_char() && rt->is_char() && m.are_distinct(lt->get_expr(), rt->get_expr())) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::symbol_clash;
|
||
return simplify_result::conflict;
|
||
} else {
|
||
break;
|
||
}
|
||
}
|
||
|
||
if (prefix > 0 || suffix > 0) {
|
||
eq.m_lhs = sg.drop_left(sg.drop_right(eq.m_lhs, suffix), prefix);
|
||
eq.m_rhs = sg.drop_left(sg.drop_right(eq.m_rhs, suffix), prefix);
|
||
changed = true;
|
||
}
|
||
}
|
||
|
||
// pass 2b: power-character directional inconsistency
|
||
// (mirrors ZIPT's SimplifyDir power unit case + IsPrefixConsistent)
|
||
// For each direction (left-to-right and right-to-left):
|
||
// when one side starts (in that direction) with a power u^n whose
|
||
// base starts (in that direction) with char a, and the other side
|
||
// starts with a different char b, then exponent n must be 0.
|
||
// Creates a single deterministic child with the substitution and
|
||
// constraint as edge labels so they appear in the graph.
|
||
// Guard: skip if we already created a child (re-entry via iterative deepening).
|
||
|
||
// Spec: DirectionalInconsistency.
|
||
// For direction d ∈ {left, right}, let first_d(u) denote the first token of u
|
||
// in direction d.
|
||
// If pow_side[d] = u^p and other_side[d] = c (a concrete character),
|
||
// and first_d(base(u)) = c' with c' ≠ c,
|
||
// then p = 0 (the power is vacuous in direction d).
|
||
// Action: create a deterministic child with substitution u^p → ε and
|
||
// int constraint p = 0.
|
||
if (!eq.is_trivial() && eq.m_lhs && eq.m_rhs) {
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* lh = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rh = dir_token(eq.m_rhs, fwd);
|
||
for (int side = 0; side < 2; ++side) {
|
||
euf::snode* pow_head = (side == 0) ? lh : rh;
|
||
euf::snode* other_head = (side == 0) ? rh : lh;
|
||
if (!pow_head || !pow_head->is_power() || !other_head || !other_head->is_char())
|
||
continue;
|
||
euf::snode* base_sn = pow_head->arg(0);
|
||
if (!base_sn) continue;
|
||
euf::snode* base_head = dir_token(base_sn, fwd);
|
||
if (!base_head || !base_head->is_char()) continue;
|
||
if (m.are_equal(base_head->get_expr(), other_head->get_expr())) continue;
|
||
// Directional base/head mismatch -> force exponent 0 and power -> ε.
|
||
nielsen_node* child = m_graph.mk_child(this);
|
||
nielsen_edge* e = m_graph.mk_edge(this, child, true);
|
||
nielsen_subst s(pow_head, sg.mk_empty_seq(pow_head->get_sort()), eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(sg, s);
|
||
expr* pow_exp = get_power_exp_expr(pow_head, seq);
|
||
if (pow_exp) {
|
||
expr* zero = arith.mk_numeral(rational(0), true);
|
||
e->add_side_int(m_graph.mk_int_constraint(
|
||
pow_exp, zero, int_constraint_kind::eq, eq.m_dep));
|
||
}
|
||
set_extended(true);
|
||
return simplify_result::proceed;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// pass 3: power simplification (mirrors ZIPT's LcpCompression +
|
||
// SimplifyPowerElim + SimplifyPowerSingle)
|
||
for (str_eq& eq : m_str_eq) {
|
||
if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
|
||
// 3a: simplify constant-exponent powers (base^0 → ε, base^1 → base)
|
||
if (euf::snode* s = simplify_const_powers(sg, eq.m_lhs))
|
||
{ eq.m_lhs = s; changed = true; }
|
||
if (euf::snode* s = simplify_const_powers(sg, eq.m_rhs))
|
||
{ eq.m_rhs = s; changed = true; }
|
||
|
||
// 3b: merge adjacent same-base tokens into combined powers
|
||
if (euf::snode* s = merge_adjacent_powers(sg, eq.m_lhs))
|
||
{ eq.m_lhs = s; changed = true; }
|
||
if (euf::snode* s = merge_adjacent_powers(sg, eq.m_rhs))
|
||
{ eq.m_rhs = s; changed = true; }
|
||
|
||
// 3c: CommPower-based power elimination — when one side starts
|
||
// with a power w^p, count base-pattern occurrences c on the
|
||
// other side's prefix. If we can determine the ordering between
|
||
// p and c, cancel the matched portion. Mirrors ZIPT's
|
||
// SimplifyPowerElim calling CommPower.
|
||
// Spec: CommPower cancellation.
|
||
// Given: pow_side = w^p · rest_pow and other_side = w^c · rest_other
|
||
// where c is the number of times the base pattern w occurs in the
|
||
// directional prefix of other_side.
|
||
// - If p ≤ c: pow_side := rest_pow, other_side := w^(c-p) · rest_other
|
||
// - If c ≤ p: pow_side := w^(p-c) · rest_pow, other_side := rest_other
|
||
// - If p = c: both reduce completely (handled by both conditions above).
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
bool comm_changed = false;
|
||
for (int side = 0; side < 2 && !comm_changed; ++side) {
|
||
euf::snode*& pow_side = (side == 0) ? eq.m_lhs : eq.m_rhs;
|
||
euf::snode*& other_side = (side == 0) ? eq.m_rhs : eq.m_lhs;
|
||
if (!pow_side || !other_side) continue;
|
||
for (unsigned od = 0; od < 2 && !comm_changed; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* end_tok = dir_token(pow_side, fwd);
|
||
if (!end_tok || !end_tok->is_power()) continue;
|
||
euf::snode* base_sn = end_tok->arg(0);
|
||
expr* pow_exp = get_power_exp_expr(end_tok, seq);
|
||
if (!base_sn || !pow_exp) continue;
|
||
|
||
auto [count, consumed] = comm_power(base_sn, other_side, m, fwd);
|
||
if (!count.get() || consumed == 0) continue;
|
||
|
||
expr_ref norm_count = normalize_arith(m, count);
|
||
bool pow_le_count = false, count_le_pow = false;
|
||
rational diff;
|
||
if (get_const_power_diff(norm_count, pow_exp, arith, diff)) {
|
||
count_le_pow = diff.is_nonpos();
|
||
pow_le_count = diff.is_nonneg();
|
||
}
|
||
else if (!cur_path.empty()) {
|
||
pow_le_count = m_graph.check_lp_le(pow_exp, norm_count, this, cur_path);
|
||
count_le_pow = m_graph.check_lp_le(norm_count, pow_exp, this, cur_path);
|
||
}
|
||
if (!pow_le_count && !count_le_pow) continue;
|
||
|
||
pow_side = dir_drop(sg, pow_side, 1, fwd);
|
||
other_side = dir_drop(sg, other_side, consumed, fwd);
|
||
expr* base_e = get_power_base_expr(end_tok, seq);
|
||
if (pow_le_count && count_le_pow) {
|
||
// equal: both cancel completely
|
||
}
|
||
else if (pow_le_count) {
|
||
// pow <= count: remainder goes to other_side
|
||
expr_ref rem = normalize_arith(m, arith.mk_sub(norm_count, pow_exp));
|
||
expr_ref pw(seq.str.mk_power(base_e, rem), m);
|
||
other_side = dir_concat(sg, sg.mk(pw), other_side, fwd);
|
||
}
|
||
else {
|
||
// count <= pow: remainder goes to pow_side
|
||
expr_ref rem = normalize_arith(m, arith.mk_sub(pow_exp, norm_count));
|
||
expr_ref pw(seq.str.mk_power(base_e, rem), m);
|
||
pow_side = dir_concat(sg, sg.mk(pw), pow_side, fwd);
|
||
}
|
||
comm_changed = true;
|
||
}
|
||
}
|
||
if (comm_changed)
|
||
changed = true;
|
||
|
||
// After any change in passes 3a–3c, restart the while loop
|
||
// before running 3d/3e. This lets 3a simplify new constant-
|
||
// exponent powers (e.g. base^1 → base created by 3c) before
|
||
// 3e attempts LP-based elimination which would introduce a
|
||
// needless fresh variable.
|
||
if (changed)
|
||
continue;
|
||
|
||
// 3d: power prefix elimination — when both sides start with a
|
||
// power of the same base, cancel the common power prefix.
|
||
// (Subsumed by 3c for many cases, but handles same-base-power
|
||
// pairs that CommPower may miss when both leading tokens are powers.)
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
for (unsigned od = 0; od < 2 && !changed; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* lh = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rh = dir_token(eq.m_rhs, fwd);
|
||
if (!(lh && rh && lh->is_power() && rh->is_power()))
|
||
continue;
|
||
expr* lb = get_power_base_expr(lh, seq);
|
||
expr* rb = get_power_base_expr(rh, seq);
|
||
if (!(lb && rb && lb == rb))
|
||
continue;
|
||
expr* lp = get_power_exp_expr(lh, seq);
|
||
expr* rp = get_power_exp_expr(rh, seq);
|
||
rational diff;
|
||
if (lp && rp && get_const_power_diff(rp, lp, arith, diff)) {
|
||
// rp = lp + diff (constant difference)
|
||
eq.m_lhs = dir_drop(sg, eq.m_lhs, 1, fwd);
|
||
eq.m_rhs = dir_drop(sg, eq.m_rhs, 1, fwd);
|
||
if (diff.is_pos()) {
|
||
// rp > lp: put base^diff on RHS (direction-aware prepend/append)
|
||
expr_ref de(arith.mk_int(diff), m);
|
||
expr_ref pw(seq.str.mk_power(lb, de), m);
|
||
eq.m_rhs = dir_concat(sg, sg.mk(pw), eq.m_rhs, fwd);
|
||
}
|
||
else if (diff.is_neg()) {
|
||
// lp > rp: put base^(-diff) on LHS
|
||
expr_ref de(arith.mk_int(-diff), m);
|
||
expr_ref pw(seq.str.mk_power(lb, de), m);
|
||
eq.m_lhs = dir_concat(sg, sg.mk(pw), eq.m_lhs, fwd);
|
||
}
|
||
// diff == 0: both powers cancel completely
|
||
changed = true;
|
||
}
|
||
// 3e: LP-aware power directional elimination
|
||
else if (lp && rp && !cur_path.empty()) {
|
||
bool lp_le_rp = m_graph.check_lp_le(lp, rp, this, cur_path);
|
||
bool rp_le_lp = m_graph.check_lp_le(rp, lp, this, cur_path);
|
||
if (lp_le_rp || rp_le_lp) {
|
||
expr* smaller_exp = lp_le_rp ? lp : rp;
|
||
expr* larger_exp = lp_le_rp ? rp : lp;
|
||
eq.m_lhs = dir_drop(sg, eq.m_lhs, 1, fwd);
|
||
eq.m_rhs = dir_drop(sg, eq.m_rhs, 1, fwd);
|
||
if (lp_le_rp && rp_le_lp) {
|
||
// both ≤ -> equal -> both cancel completely
|
||
add_int_constraint(m_graph.mk_int_constraint(lp, rp, int_constraint_kind::eq, eq.m_dep));
|
||
}
|
||
else {
|
||
// strictly less: create diff power d = larger - smaller >= 1
|
||
expr_ref d(m_graph.mk_fresh_int_var());
|
||
expr_ref one_e(arith.mk_int(1), m);
|
||
expr_ref d_plus_smaller(arith.mk_add(d, smaller_exp), m);
|
||
add_int_constraint(m_graph.mk_int_constraint(d, one_e, int_constraint_kind::ge, eq.m_dep));
|
||
add_int_constraint(m_graph.mk_int_constraint(d_plus_smaller, larger_exp, int_constraint_kind::eq, eq.m_dep));
|
||
expr_ref pw(seq.str.mk_power(lb, d), m);
|
||
euf::snode*& larger_side = lp_le_rp ? eq.m_rhs : eq.m_lhs;
|
||
larger_side = dir_concat(sg, sg.mk(pw), larger_side, fwd);
|
||
}
|
||
changed = true;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// consume concrete characters from str_mem via Brzozowski derivatives
|
||
// in both directions (left-to-right, then right-to-left), mirroring ZIPT.
|
||
for (str_mem& mem : m_str_mem) {
|
||
if (!mem.m_str || !mem.m_regex)
|
||
continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
while (mem.m_str && !mem.m_str->is_empty()) {
|
||
euf::snode* tok = dir_token(mem.m_str, fwd);
|
||
if (!tok || !tok->is_char())
|
||
break;
|
||
euf::snode* deriv = fwd
|
||
? sg.brzozowski_deriv(mem.m_regex, tok)
|
||
: reverse_brzozowski_deriv(sg, mem.m_regex, tok);
|
||
if (!deriv)
|
||
break;
|
||
if (deriv->is_fail()) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::regex;
|
||
return simplify_result::conflict;
|
||
}
|
||
mem.m_str = dir_drop(sg, mem.m_str, 1, fwd);
|
||
mem.m_regex = deriv;
|
||
// ZIPT tracks history only in forward direction.
|
||
if (fwd)
|
||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||
}
|
||
}
|
||
}
|
||
|
||
// consume symbolic characters (s_unit tokens) via uniform derivative
|
||
// check. When all minterms of the regex produce the same Brzozowski
|
||
// derivative, the derivative is independent of the character value
|
||
// and we can deterministically consume the token. Forward direction
|
||
// only (matches ZIPT history tracking convention).
|
||
//
|
||
// Extended: when uniform derivative fails but the token has a char_range
|
||
// constraint (from apply_regex_var_split), check if the char_range is a
|
||
// subset of a single minterm's character class. If so, the derivative
|
||
// is deterministic for that token.
|
||
// Mirrors ZIPT StrMem.SimplifyCharRegex lines 96-117.
|
||
for (str_mem& mem : m_str_mem) {
|
||
if (!mem.m_str || !mem.m_regex)
|
||
continue;
|
||
while (mem.m_str && !mem.m_str->is_empty()) {
|
||
euf::snode* tok = mem.m_str->first();
|
||
if (!tok || !tok->is_unit())
|
||
break;
|
||
// compute minterms and check for uniform derivative
|
||
euf::snode_vector minterms;
|
||
sg.compute_minterms(mem.m_regex, minterms);
|
||
if (minterms.empty())
|
||
break;
|
||
euf::snode* uniform = nullptr;
|
||
bool is_uniform = true;
|
||
for (euf::snode* mt : minterms) {
|
||
if (!mt || mt->is_fail())
|
||
continue;
|
||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||
if (!deriv) {
|
||
is_uniform = false;
|
||
break;
|
||
}
|
||
if (!uniform)
|
||
uniform = deriv;
|
||
else if (uniform->id() != deriv->id()) {
|
||
is_uniform = false;
|
||
break;
|
||
}
|
||
}
|
||
if (is_uniform && uniform) {
|
||
if (uniform->is_fail()) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::regex;
|
||
return simplify_result::conflict;
|
||
}
|
||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||
mem.m_regex = uniform;
|
||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||
continue;
|
||
}
|
||
// Uniform derivative failed — try char_range subset approach.
|
||
// If the symbolic char has a char_range constraint and that
|
||
// range is a subset of exactly one minterm's character class,
|
||
// we can deterministically take that minterm's derivative.
|
||
if (m_char_ranges.contains(tok->id()) && m_graph.m_parikh) {
|
||
char_set const& cs = m_char_ranges[tok->id()];
|
||
if (!cs.is_empty()) {
|
||
euf::snode* matching_deriv = nullptr;
|
||
bool found = false;
|
||
for (euf::snode* mt : minterms) {
|
||
if (!mt || mt->is_fail()) continue;
|
||
if (!mt->get_expr()) continue;
|
||
char_set mt_cs = m_graph.m_parikh->minterm_to_char_set(mt->get_expr());
|
||
if (cs.is_subset(mt_cs)) {
|
||
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, mt);
|
||
if (!deriv) { found = false; break; }
|
||
if (deriv->is_fail()) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::regex;
|
||
return simplify_result::conflict;
|
||
}
|
||
matching_deriv = deriv;
|
||
found = true;
|
||
break;
|
||
}
|
||
}
|
||
if (found && matching_deriv) {
|
||
mem.m_str = sg.drop_left(mem.m_str, 1);
|
||
mem.m_regex = matching_deriv;
|
||
mem.m_history = sg.mk_concat(mem.m_history, tok);
|
||
continue;
|
||
}
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
|
||
// check for regex memberships that are immediately infeasible
|
||
for (str_mem& mem : m_str_mem) {
|
||
if (!mem.m_str || !mem.m_regex)
|
||
continue;
|
||
if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::regex;
|
||
return simplify_result::conflict;
|
||
}
|
||
}
|
||
|
||
// remove satisfied str_mem constraints (ε ∈ nullable regex)
|
||
unsigned wi = 0;
|
||
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
|
||
str_mem& mem = m_str_mem[i];
|
||
if (mem.m_str && mem.m_str->is_empty() && mem.m_regex && mem.m_regex->is_nullable())
|
||
continue; // satisfied, drop
|
||
m_str_mem[wi++] = mem;
|
||
}
|
||
if (wi < m_str_mem.size())
|
||
m_str_mem.shrink(wi);
|
||
|
||
// Regex widening: for each remaining str_mem, overapproximate
|
||
// the string by replacing variables with their regex intersection
|
||
// and check if the result intersected with the target regex is empty.
|
||
// Detects infeasible constraints that would otherwise require
|
||
// expensive exploration. Mirrors ZIPT NielsenNode.CheckRegexWidening.
|
||
if (m_graph.m_seq_regex) {
|
||
for (str_mem const& mem : m_str_mem) {
|
||
if (!mem.m_str || !mem.m_regex)
|
||
continue;
|
||
if (m_graph.check_regex_widening(*this, mem.m_str, mem.m_regex)) {
|
||
m_is_general_conflict = true;
|
||
m_reason = backtrack_reason::regex;
|
||
return simplify_result::conflict;
|
||
}
|
||
}
|
||
}
|
||
|
||
// IntBounds initialization: derive per-variable Parikh length bounds from
|
||
// remaining regex memberships and add to m_int_constraints.
|
||
init_var_bounds_from_mems();
|
||
|
||
// pass 5: variable-character look-ahead substitution
|
||
// (mirrors ZIPT's StrEq.SimplifyFinal)
|
||
// When one side starts (in a direction) with variable x and the other
|
||
// with chars, look ahead to find how many chars can be deterministically
|
||
// assigned to x without splitting, by checking directional consistency.
|
||
// Guard: skip if we already created a child (re-entry via iterative deepening).
|
||
for (str_eq& eq : m_str_eq) {
|
||
if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
|
||
// Orient: var_side starts with a variable, char_side with a char
|
||
euf::snode* var_side = nullptr;
|
||
euf::snode* char_side = nullptr;
|
||
euf::snode* lhead = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, fwd);
|
||
if (!lhead || !rhead) continue;
|
||
|
||
if (lhead->is_var() && rhead->is_char()) {
|
||
var_side = eq.m_lhs;
|
||
char_side = eq.m_rhs;
|
||
}
|
||
else if (rhead->is_var() && lhead->is_char()) {
|
||
var_side = eq.m_rhs;
|
||
char_side = eq.m_lhs;
|
||
}
|
||
else {
|
||
continue;
|
||
}
|
||
|
||
euf::snode_vector var_toks, char_toks;
|
||
collect_tokens_dir(var_side, fwd, var_toks);
|
||
collect_tokens_dir(char_side, fwd, char_toks);
|
||
if (var_toks.size() <= 1 || char_toks.empty())
|
||
continue;
|
||
|
||
euf::snode* var_node = var_toks[0];
|
||
SASSERT(var_node->is_var());
|
||
|
||
// For increasing directional prefix lengths i (chars from char_side),
|
||
// check if x -> chars[0..i-1] · x (or x · chars[...] in backward mode)
|
||
// is consistent.
|
||
unsigned i = 0;
|
||
for (; i < char_toks.size() && char_toks[i]->is_char(); ++i) {
|
||
unsigned j1 = 1; // index into var_toks (after the variable)
|
||
unsigned j2 = i; // index into char_toks (after copied prefix)
|
||
bool failed = false;
|
||
|
||
while (j1 < var_toks.size() && j2 < char_toks.size()) {
|
||
euf::snode* st1 = var_toks[j1];
|
||
euf::snode* st2 = char_toks[j2];
|
||
|
||
if (!st2->is_char())
|
||
break; // cannot compare against non-char -> indeterminate
|
||
|
||
if (st1->is_char()) {
|
||
if (st1->id() == st2->id()) {
|
||
j1++;
|
||
j2++;
|
||
continue;
|
||
}
|
||
failed = true;
|
||
break;
|
||
}
|
||
|
||
if (st1->id() != var_node->id())
|
||
break; // different variable/power -> indeterminate
|
||
|
||
// st1 is the same variable x again — compare against chars copied so far
|
||
bool inner_indet = false;
|
||
for (unsigned l = 0; j2 < char_toks.size() && l < i; ++l) {
|
||
st2 = char_toks[j2];
|
||
if (!st2->is_char()) {
|
||
inner_indet = true;
|
||
break;
|
||
}
|
||
if (st2->id() == char_toks[l]->id()) {
|
||
j2++;
|
||
continue;
|
||
}
|
||
failed = true;
|
||
break;
|
||
}
|
||
if (inner_indet || failed) break;
|
||
j1++;
|
||
}
|
||
|
||
if (failed)
|
||
continue; // prefix i inconsistent -> try longer
|
||
break; // prefix i consistent/indeterminate -> stop
|
||
}
|
||
|
||
if (i == 0)
|
||
continue;
|
||
|
||
// Divergence guard (mirrors ZIPT's HasDepCycle + power skip).
|
||
bool skip_dir = false;
|
||
euf::snode* next_var = nullptr;
|
||
for (unsigned k = i; k < char_toks.size(); ++k) {
|
||
euf::snode* t = char_toks[k];
|
||
if (t->is_power()) {
|
||
skip_dir = true; // could diverge via repeated power unwinding
|
||
break;
|
||
}
|
||
if (t->is_var()) {
|
||
next_var = t;
|
||
break;
|
||
}
|
||
}
|
||
if (skip_dir)
|
||
continue;
|
||
|
||
// If there is a variable after the copied chars, reject substitutions
|
||
// that create a directional Nielsen dependency cycle.
|
||
if (next_var) {
|
||
u_map<unsigned> dep_edges; // var id -> first dependent var id
|
||
for (str_eq const& other_eq : m_str_eq) {
|
||
if (other_eq.is_trivial()) continue;
|
||
if (!other_eq.m_lhs || !other_eq.m_rhs) continue;
|
||
|
||
euf::snode* lh2 = dir_token(other_eq.m_lhs, fwd);
|
||
euf::snode* rh2 = dir_token(other_eq.m_rhs, fwd);
|
||
if (!lh2 || !rh2) continue;
|
||
|
||
auto record_dep = [&](euf::snode* head_var, euf::snode* other_side) {
|
||
euf::snode_vector other_toks;
|
||
collect_tokens_dir(other_side, fwd, other_toks);
|
||
for (unsigned idx = 0; idx < other_toks.size(); ++idx) {
|
||
if (other_toks[idx]->is_var()) {
|
||
if (!dep_edges.contains(head_var->id()))
|
||
dep_edges.insert(head_var->id(), other_toks[idx]->id());
|
||
return;
|
||
}
|
||
}
|
||
};
|
||
|
||
if (lh2->is_var() && rh2->is_var()) {
|
||
if (!dep_edges.contains(lh2->id()))
|
||
dep_edges.insert(lh2->id(), rh2->id());
|
||
if (!dep_edges.contains(rh2->id()))
|
||
dep_edges.insert(rh2->id(), lh2->id());
|
||
}
|
||
else if (lh2->is_var() && !rh2->is_var()) {
|
||
record_dep(lh2, other_eq.m_rhs);
|
||
}
|
||
else if (rh2->is_var() && !lh2->is_var()) {
|
||
record_dep(rh2, other_eq.m_lhs);
|
||
}
|
||
}
|
||
|
||
// DFS from next_var to see if we can reach var_node
|
||
uint_set visited;
|
||
svector<unsigned> worklist;
|
||
worklist.push_back(next_var->id());
|
||
bool cycle_found = false;
|
||
while (!worklist.empty() && !cycle_found) {
|
||
unsigned cur = worklist.back();
|
||
worklist.pop_back();
|
||
if (cur == var_node->id()) {
|
||
cycle_found = true;
|
||
break;
|
||
}
|
||
if (visited.contains(cur))
|
||
continue;
|
||
visited.insert(cur);
|
||
unsigned dep_id;
|
||
if (dep_edges.find(cur, dep_id))
|
||
worklist.push_back(dep_id);
|
||
}
|
||
if (cycle_found)
|
||
continue;
|
||
}
|
||
|
||
// Create a single deterministic child with directional substitution.
|
||
euf::snode* prefix_sn = char_toks[0];
|
||
for (unsigned j = 1; j < i; ++j)
|
||
prefix_sn = dir_concat(sg, prefix_sn, char_toks[j], fwd);
|
||
euf::snode* replacement = dir_concat(sg, prefix_sn, var_node, fwd);
|
||
nielsen_subst s(var_node, replacement, eq.m_dep);
|
||
nielsen_node* child = m_graph.mk_child(this);
|
||
nielsen_edge* e = m_graph.mk_edge(this, child, true);
|
||
e->add_subst(s);
|
||
child->apply_subst(sg, s);
|
||
set_extended(true);
|
||
return simplify_result::proceed;
|
||
}
|
||
}
|
||
|
||
if (is_satisfied())
|
||
return simplify_result::satisfied;
|
||
|
||
return simplify_result::proceed;
|
||
}
|
||
|
||
bool nielsen_node::is_satisfied() const {
|
||
for (str_eq const& eq : m_str_eq) {
|
||
if (!eq.is_trivial())
|
||
return false;
|
||
}
|
||
return m_str_mem.empty();
|
||
}
|
||
|
||
bool nielsen_node::has_opaque_terms() const {
|
||
auto is_opaque = [](euf::snode* n) { return n && n->kind() == euf::snode_kind::s_other; };
|
||
for (str_eq const& eq : m_str_eq) {
|
||
if (eq.is_trivial()) continue;
|
||
if (is_opaque(eq.m_lhs) || is_opaque(eq.m_rhs)) return true;
|
||
euf::snode_vector toks;
|
||
if (eq.m_lhs) { eq.m_lhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; toks.reset(); }
|
||
if (eq.m_rhs) { eq.m_rhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; }
|
||
}
|
||
for (str_mem const& mem : m_str_mem) {
|
||
if (!mem.m_str) continue;
|
||
if (is_opaque(mem.m_str)) return true;
|
||
euf::snode_vector toks;
|
||
mem.m_str->collect_tokens(toks);
|
||
for (auto* t : toks) if (is_opaque(t)) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_graph: search
|
||
// -----------------------------------------------------------------------
|
||
|
||
void nielsen_graph::apply_parikh_to_node(nielsen_node& node) {
|
||
if (!m_parikh_enabled) return;
|
||
if (node.m_parikh_applied) return;
|
||
node.m_parikh_applied = true;
|
||
|
||
// Generate modular length constraints (len(str) = min_len + stride·k, etc.)
|
||
// and append them to the node's integer constraint list.
|
||
m_parikh->apply_to_node(node);
|
||
|
||
// Lightweight feasibility pre-check: does the Parikh modular constraint
|
||
// contradict the variable's current integer bounds? If so, mark this
|
||
// node as a Parikh-image conflict immediately (avoids a solver call).
|
||
if (!node.is_currently_conflict() && m_parikh->check_parikh_conflict(node)) {
|
||
node.set_general_conflict(true);
|
||
node.set_reason(backtrack_reason::parikh_image);
|
||
}
|
||
}
|
||
|
||
void nielsen_graph::assert_root_constraints_to_solver() {
|
||
if (m_root_constraints_asserted) return;
|
||
m_root_constraints_asserted = true;
|
||
// Constraint.Shared: assert all root-level length/Parikh constraints
|
||
// to m_solver at the base level (no push/pop). These include:
|
||
// - len(lhs) = len(rhs) for each non-trivial string equality
|
||
// - len(str) >= min_len and len(str) <= max_len for each regex membership
|
||
// - len(x) >= 0 for each variable appearing in the root constraints
|
||
// Making these visible to the solver before the DFS allows arithmetic
|
||
// pruning at every node, not just the root.
|
||
vector<length_constraint> constraints;
|
||
generate_length_constraints(constraints);
|
||
for (auto const& lc : constraints)
|
||
m_solver.assert_expr(lc.m_expr);
|
||
}
|
||
|
||
nielsen_graph::search_result nielsen_graph::solve() {
|
||
if (!m_root)
|
||
return search_result::sat;
|
||
|
||
try {
|
||
++m_stats.m_num_solve_calls;
|
||
m_sat_node = nullptr;
|
||
m_sat_path.reset();
|
||
|
||
// Constraint.Shared: assert root-level length/Parikh constraints to the
|
||
// solver at the base level, so they are visible during all feasibility checks.
|
||
assert_root_constraints_to_solver();
|
||
|
||
// Iterative deepening: increment by 1 on each failure.
|
||
// m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it.
|
||
m_depth_bound = 10;
|
||
while (true) {
|
||
if (!m().inc()) {
|
||
#ifdef Z3DEBUG
|
||
// Examining the Nielsen graph is probably the best way of debugging
|
||
std::string dot = to_dot();
|
||
IF_VERBOSE(1, verbose_stream() << dot << "\n";);
|
||
#endif
|
||
break;
|
||
}
|
||
if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth)
|
||
break;
|
||
inc_run_idx();
|
||
svector<nielsen_edge *> cur_path;
|
||
search_result r = search_dfs(m_root, 0, cur_path);
|
||
IF_VERBOSE(1, verbose_stream()
|
||
<< " depth_bound=" << m_depth_bound << " dfs_nodes=" << m_stats.m_num_dfs_nodes
|
||
<< " max_depth=" << m_stats.m_max_depth << " extensions=" << m_stats.m_num_extensions
|
||
<< " arith_prune=" << m_stats.m_num_arith_infeasible << " result="
|
||
<< (r == search_result::sat ? "SAT"
|
||
: r == search_result::unsat ? "UNSAT"
|
||
: "UNKNOWN")
|
||
<< "\n";);
|
||
if (r == search_result::sat) {
|
||
++m_stats.m_num_sat;
|
||
return r;
|
||
}
|
||
if (r == search_result::unsat) {
|
||
++m_stats.m_num_unsat;
|
||
return r;
|
||
}
|
||
// depth limit hit – double the bound and retry
|
||
m_depth_bound *= 2;
|
||
}
|
||
++m_stats.m_num_unknown;
|
||
return search_result::unknown;
|
||
}
|
||
catch(const std::exception&) {
|
||
#ifdef Z3DEBUG
|
||
std::string dot = to_dot();
|
||
#endif
|
||
throw;
|
||
}
|
||
}
|
||
|
||
nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path) {
|
||
++m_stats.m_num_dfs_nodes;
|
||
m_stats.m_max_depth = std::max(m_stats.m_max_depth, depth);
|
||
|
||
// check for external cancellation (timeout, user interrupt)
|
||
if (!m().inc())
|
||
return search_result::unknown;
|
||
|
||
// check DFS node budget (0 = unlimited)
|
||
if (m_max_nodes > 0 && m_stats.m_num_dfs_nodes > m_max_nodes)
|
||
return search_result::unknown;
|
||
|
||
// revisit detection: if already visited this run, return cached status.
|
||
// mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check.
|
||
if (node->eval_idx() == m_run_idx) {
|
||
if (node->is_satisfied()) {
|
||
m_sat_node = node;
|
||
m_sat_path = cur_path;
|
||
return search_result::sat;
|
||
}
|
||
if (node->is_currently_conflict())
|
||
return search_result::unsat;
|
||
return search_result::unknown;
|
||
}
|
||
node->set_eval_idx(m_run_idx);
|
||
|
||
// simplify constraints (idempotent after first call)
|
||
simplify_result sr = node->simplify_and_init(cur_path);
|
||
|
||
if (sr == simplify_result::conflict) {
|
||
++m_stats.m_num_simplify_conflict;
|
||
node->set_general_conflict(true);
|
||
return search_result::unsat;
|
||
}
|
||
|
||
// Apply Parikh image filter: generate modular length constraints and
|
||
// perform a lightweight feasibility pre-check. The filter is guarded
|
||
// internally (m_parikh_applied) so it only runs once per node.
|
||
// Note: Parikh filtering is skipped for satisfied nodes (returned above);
|
||
// a fully satisfied node has no remaining memberships to filter.
|
||
apply_parikh_to_node(*node);
|
||
if (node->is_currently_conflict()) {
|
||
++m_stats.m_num_simplify_conflict;
|
||
return search_result::unsat;
|
||
}
|
||
|
||
// Assert any new int_constraints added during simplify_and_init for this
|
||
// node into the current solver scope. Constraints inherited from the parent
|
||
// (indices 0..m_parent_ic_count-1) are already present at the enclosing
|
||
// scope level; only the newly-added tail needs to be asserted here.
|
||
// Also generate per-node |LHS| = |RHS| length constraints for descendant
|
||
// equations (root constraints are already at the base level).
|
||
generate_node_length_constraints(node);
|
||
assert_node_new_int_constraints(node);
|
||
|
||
// integer feasibility check: the solver now holds all path constraints
|
||
// incrementally; just query the solver directly
|
||
if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) {
|
||
node->set_reason(backtrack_reason::arithmetic);
|
||
++m_stats.m_num_arith_infeasible;
|
||
return search_result::unsat;
|
||
}
|
||
|
||
if (sr == simplify_result::satisfied || node->is_satisfied()) {
|
||
// Before declaring SAT, check leaf-node regex feasibility:
|
||
// for each variable with multiple regex constraints, verify
|
||
// that the intersection of all its regexes is non-empty.
|
||
// Mirrors ZIPT NielsenNode.CheckRegex.
|
||
if (!check_leaf_regex(*node)) {
|
||
node->set_general_conflict(true);
|
||
node->set_reason(backtrack_reason::regex);
|
||
return search_result::unsat;
|
||
}
|
||
m_sat_node = node;
|
||
m_sat_path = cur_path;
|
||
return search_result::sat;
|
||
}
|
||
|
||
// depth bound check
|
||
if (depth >= m_depth_bound)
|
||
return search_result::unknown;
|
||
|
||
// generate extensions only once per node; children persist across runs
|
||
if (!node->is_extended()) {
|
||
bool ext = generate_extensions(node);
|
||
if (!ext) {
|
||
node->set_extended(true);
|
||
// No extensions could be generated. If the node still has
|
||
// unsatisfied constraints with opaque (s_other) terms that
|
||
// we cannot decompose, report unknown rather than unsat
|
||
// to avoid unsoundness.
|
||
if (node->has_opaque_terms())
|
||
return search_result::unknown;
|
||
node->set_reason(backtrack_reason::children_failed);
|
||
return search_result::unsat;
|
||
}
|
||
node->set_extended(true);
|
||
++m_stats.m_num_extensions;
|
||
}
|
||
|
||
// explore children
|
||
bool any_unknown = false;
|
||
for (nielsen_edge *e : node->outgoing()) {
|
||
cur_path.push_back(e);
|
||
// Push a solver scope for this edge and assert its side integer
|
||
// constraints. The child's own new int_constraints will be asserted
|
||
// inside the recursive call (above). On return, pop the scope so
|
||
// that backtracking removes those assertions.
|
||
m_solver.push();
|
||
|
||
// Lazily compute substitution length constraints (|x| = |u|) on first
|
||
// traversal. This must happen before asserting side_int and before
|
||
// bumping mod counts, so that LHS uses the parent's counts and RHS
|
||
// uses the temporarily-bumped counts.
|
||
if (!e->len_constraints_computed()) {
|
||
add_subst_length_constraints(e);
|
||
e->set_len_constraints_computed(true);
|
||
}
|
||
|
||
for (auto const &ic : e->side_int())
|
||
m_solver.assert_expr(int_constraint_to_expr(ic));
|
||
|
||
// Bump modification counts for the child's context.
|
||
inc_edge_mod_counts(e);
|
||
|
||
search_result r = search_dfs(e->tgt(), e->is_progress() ? depth : depth + 1, cur_path);
|
||
|
||
// Restore modification counts on backtrack.
|
||
dec_edge_mod_counts(e);
|
||
|
||
m_solver.pop(1);
|
||
if (r == search_result::sat)
|
||
return search_result::sat;
|
||
cur_path.pop_back();
|
||
if (r == search_result::unknown)
|
||
any_unknown = true;
|
||
}
|
||
|
||
// If no children exist and the node has opaque terms, report unknown
|
||
if (node->outgoing().empty() && node->has_opaque_terms())
|
||
return search_result::unknown;
|
||
|
||
if (!any_unknown) {
|
||
node->set_reason(backtrack_reason::children_failed);
|
||
return search_result::unsat;
|
||
}
|
||
return search_result::unknown;
|
||
}
|
||
|
||
// Returns true if variable snode `var` appears anywhere in the token list of `n`.
|
||
static bool snode_contains_var(euf::snode const* n, euf::snode const* var) {
|
||
if (!n || !var)
|
||
return false;
|
||
euf::snode_vector tokens;
|
||
n->collect_tokens(tokens);
|
||
for (const euf::snode* t : tokens) {
|
||
if (t == var)
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
bool nielsen_graph::apply_det_modifier(nielsen_node* node) {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
SASSERT(!eq.is_trivial()); // We should have simplified it away before
|
||
if (!eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
|
||
// variable definition: x = t where x is a single var and x ∉ vars(t)
|
||
// → deterministically substitute x → t throughout the node
|
||
euf::snode* var = nullptr;
|
||
euf::snode* def;
|
||
if (eq.m_lhs->is_var() && !snode_contains_var(eq.m_rhs, eq.m_lhs)) {
|
||
var = eq.m_lhs;
|
||
def = eq.m_rhs;
|
||
}
|
||
else if (eq.m_rhs->is_var() && !snode_contains_var(eq.m_lhs, eq.m_rhs)) {
|
||
var = eq.m_rhs;
|
||
def = eq.m_lhs;
|
||
}
|
||
|
||
if (var) {
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(var, def, eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
bool nielsen_graph::apply_const_nielsen(nielsen_node* node) {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial())
|
||
continue;
|
||
if (!eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* lhead = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, fwd);
|
||
if (!lhead || !rhead)
|
||
continue;
|
||
|
||
// char vs var: branch 1: var -> ε, branch 2: var -> char·var
|
||
if (lhead->is_char() && rhead->is_var()) {
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(rhead, m_sg.mk_empty_seq(rhead->get_sort()), eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
{
|
||
euf::snode* replacement = dir_concat(m_sg, lhead, rhead, fwd);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(rhead, replacement, eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
return true;
|
||
}
|
||
|
||
// var vs char: branch 1: var -> ε, branch 2: var -> char·var
|
||
if (rhead->is_char() && lhead->is_var()) {
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(lhead, m_sg.mk_empty_seq(lhead->get_sort()), eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
{
|
||
euf::snode* replacement = dir_concat(m_sg, rhead, lhead, fwd);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(lhead, replacement, eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
bool nielsen_graph::apply_var_nielsen(nielsen_node* node) {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial())
|
||
continue;
|
||
if (!eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* lhead = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, fwd);
|
||
if (!lhead || !rhead)
|
||
continue;
|
||
if (!lhead->is_var() || !rhead->is_var())
|
||
continue;
|
||
if (lhead->id() == rhead->id())
|
||
continue;
|
||
|
||
// x·A = y·B where x,y are distinct variables (classic Nielsen)
|
||
// child 1: x → ε (progress)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(lhead, m_sg.mk_empty_seq(lhead->get_sort()), eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
// child 2: y → ε (progress)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(rhead, m_sg.mk_empty_seq(rhead->get_sort()), eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
// child 3: x → y·x (no progress)
|
||
{
|
||
euf::snode* replacement = dir_concat(m_sg, rhead, lhead, fwd);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(lhead, replacement, eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// EqSplit helpers: token length classification
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::token_has_variable_length(euf::snode* tok) const {
|
||
// Chars and units have known constant length 1.
|
||
if (tok->is_char() || tok->is_unit())
|
||
return false;
|
||
// Variables and powers have symbolic/unknown length.
|
||
if (tok->is_var() || tok->is_power())
|
||
return true;
|
||
// For s_other: check if it's a string literal (known constant length).
|
||
if (tok->get_expr()) {
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
zstring s;
|
||
if (seq.str.is_string(tok->get_expr(), s))
|
||
return false;
|
||
}
|
||
// Everything else is treated as variable length.
|
||
return true;
|
||
}
|
||
|
||
unsigned nielsen_graph::token_const_length(euf::snode* tok) const {
|
||
if (tok->is_char() || tok->is_unit())
|
||
return 1;
|
||
if (tok->get_expr()) {
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
zstring s;
|
||
if (seq.str.is_string(tok->get_expr(), s))
|
||
return s.length();
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// EqSplit: find_eq_split_point
|
||
// Port of ZIPT's StrEq.SplitEq algorithm.
|
||
//
|
||
// Walks tokens from each side tracking two accumulators:
|
||
// - lhs_has_symbolic / rhs_has_symbolic : whether a variable-length token
|
||
// has been consumed on that side since the last reset
|
||
// - const_diff : net constant-length difference (LHS constants − RHS constants)
|
||
//
|
||
// A potential split point arises when both accumulators are "zero"
|
||
// (no outstanding symbolic contributions on either side), meaning
|
||
// the prefix lengths are determined up to a constant offset (const_diff).
|
||
//
|
||
// Among all such split points, we pick the one minimising |const_diff|
|
||
// (the padding amount). We also require having seen at least one variable-
|
||
// length token before accepting a split, so that the split is non-trivial.
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::find_eq_split_point(
|
||
euf::snode_vector const& lhs_toks,
|
||
euf::snode_vector const& rhs_toks,
|
||
unsigned& out_lhs_idx,
|
||
unsigned& out_rhs_idx,
|
||
int& out_padding) const {
|
||
|
||
unsigned lhs_len = lhs_toks.size();
|
||
unsigned rhs_len = rhs_toks.size();
|
||
if (lhs_len <= 1 || rhs_len <= 1)
|
||
return false;
|
||
|
||
// Initialize accumulators with the first token on each side (index 0).
|
||
bool lhs_has_symbolic = token_has_variable_length(lhs_toks[0]);
|
||
bool rhs_has_symbolic = token_has_variable_length(rhs_toks[0]);
|
||
int const_diff = 0;
|
||
if (!lhs_has_symbolic)
|
||
const_diff += (int)token_const_length(lhs_toks[0]);
|
||
if (!rhs_has_symbolic)
|
||
const_diff -= (int)token_const_length(rhs_toks[0]);
|
||
|
||
bool seen_variable = lhs_has_symbolic || rhs_has_symbolic;
|
||
|
||
// Best confirmed split point.
|
||
bool has_best = false;
|
||
unsigned best_lhs = 0, best_rhs = 0;
|
||
int best_padding = 0;
|
||
|
||
// Pending split (needs confirmation by a subsequent variable token).
|
||
bool has_pending = false;
|
||
unsigned pending_lhs = 0, pending_rhs = 0;
|
||
int pending_padding = 0;
|
||
|
||
unsigned li = 1, ri = 1;
|
||
|
||
while (li < lhs_len || ri < rhs_len) {
|
||
bool lhs_zero = !lhs_has_symbolic;
|
||
bool rhs_zero = !rhs_has_symbolic;
|
||
|
||
// Potential split point: both symbolic accumulators are zero.
|
||
if (seen_variable && lhs_zero && rhs_zero) {
|
||
if (!has_pending || std::abs(const_diff) < std::abs(pending_padding)) {
|
||
has_pending = true;
|
||
pending_padding = const_diff;
|
||
pending_lhs = li;
|
||
pending_rhs = ri;
|
||
}
|
||
}
|
||
|
||
// Decide which side to consume from.
|
||
// Prefer consuming from the side whose symbolic accumulator is zero
|
||
// (i.e., that side has no outstanding variable contributions).
|
||
bool consume_lhs;
|
||
if (lhs_zero && !rhs_zero)
|
||
consume_lhs = true;
|
||
else if (!lhs_zero && rhs_zero)
|
||
consume_lhs = false;
|
||
else if (lhs_zero && rhs_zero)
|
||
consume_lhs = (const_diff <= 0); // consume from side with less accumulated constant
|
||
else
|
||
consume_lhs = (li <= ri); // both non-zero: round-robin
|
||
|
||
if (consume_lhs) {
|
||
if (li >= lhs_len) break;
|
||
euf::snode* tok = lhs_toks[li++];
|
||
bool is_var = token_has_variable_length(tok);
|
||
if (is_var) {
|
||
// Confirm any pending split point.
|
||
if (has_pending && (!has_best || std::abs(pending_padding) < std::abs(best_padding))) {
|
||
has_best = true;
|
||
best_padding = pending_padding;
|
||
best_lhs = pending_lhs;
|
||
best_rhs = pending_rhs;
|
||
has_pending = false;
|
||
}
|
||
seen_variable = true;
|
||
lhs_has_symbolic = true;
|
||
} else {
|
||
const_diff += (int)token_const_length(tok);
|
||
}
|
||
} else {
|
||
if (ri >= rhs_len) break;
|
||
euf::snode* tok = rhs_toks[ri++];
|
||
bool is_var = token_has_variable_length(tok);
|
||
if (is_var) {
|
||
if (has_pending && (!has_best || std::abs(pending_padding) < std::abs(best_padding))) {
|
||
has_best = true;
|
||
best_padding = pending_padding;
|
||
best_lhs = pending_lhs;
|
||
best_rhs = pending_rhs;
|
||
has_pending = false;
|
||
}
|
||
seen_variable = true;
|
||
rhs_has_symbolic = true;
|
||
} else {
|
||
const_diff -= (int)token_const_length(tok);
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!has_best) return false;
|
||
|
||
// A split at the very start or very end is degenerate — skip.
|
||
if ((best_lhs == 0 && best_rhs == 0) ||
|
||
(best_lhs == lhs_len && best_rhs == rhs_len))
|
||
return false;
|
||
|
||
out_lhs_idx = best_lhs;
|
||
out_rhs_idx = best_rhs;
|
||
out_padding = best_padding;
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// apply_eq_split — Port of ZIPT's EqSplitModifier.Apply
|
||
//
|
||
// For a regex-free equation LHS = RHS, finds a split point and decomposes
|
||
// into two shorter equations with optional padding variable:
|
||
//
|
||
// eq1: LHS[0..lhsIdx] · [pad if padding<0] = [pad if padding>0] · RHS[0..rhsIdx]
|
||
// eq2: LHS[lhsIdx..] · [pad if padding>0] = [pad if padding<0] · RHS[rhsIdx..]
|
||
//
|
||
// Creates a single progress child.
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_eq_split(nielsen_node* node) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
arith_util arith(m);
|
||
|
||
for (unsigned eq_idx = 0; eq_idx < node->str_eqs().size(); ++eq_idx) {
|
||
str_eq const& eq = node->str_eqs()[eq_idx];
|
||
if (eq.is_trivial())
|
||
continue;
|
||
if (!eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
// EqSplit only applies to regex-free equations.
|
||
if (!eq.m_lhs->is_regex_free() || !eq.m_rhs->is_regex_free())
|
||
continue;
|
||
|
||
euf::snode_vector lhs_toks, rhs_toks;
|
||
eq.m_lhs->collect_tokens(lhs_toks);
|
||
eq.m_rhs->collect_tokens(rhs_toks);
|
||
if (lhs_toks.empty() || rhs_toks.empty())
|
||
continue;
|
||
|
||
unsigned split_lhs = 0, split_rhs = 0;
|
||
int padding = 0;
|
||
if (!find_eq_split_point(lhs_toks, rhs_toks, split_lhs, split_rhs, padding))
|
||
continue;
|
||
|
||
// Split the equation sides into prefix / suffix.
|
||
euf::snode* lhs_prefix = m_sg.drop_right(eq.m_lhs, lhs_toks.size() - split_lhs);
|
||
euf::snode* lhs_suffix = m_sg.drop_left(eq.m_lhs, split_lhs);
|
||
euf::snode* rhs_prefix = m_sg.drop_right(eq.m_rhs, rhs_toks.size() - split_rhs);
|
||
euf::snode* rhs_suffix = m_sg.drop_left(eq.m_rhs, split_rhs);
|
||
|
||
// Build the two new equations, incorporating a padding variable if needed.
|
||
euf::snode* eq1_lhs = lhs_prefix;
|
||
euf::snode* eq1_rhs = rhs_prefix;
|
||
euf::snode* eq2_lhs = lhs_suffix;
|
||
euf::snode* eq2_rhs = rhs_suffix;
|
||
|
||
euf::snode* pad = nullptr;
|
||
if (padding != 0) {
|
||
pad = mk_fresh_var();
|
||
if (padding > 0) {
|
||
// LHS prefix is longer by |padding| constants.
|
||
// Prepend pad to RHS prefix, append pad to LHS suffix.
|
||
eq1_rhs = m_sg.mk_concat(pad, rhs_prefix);
|
||
eq2_lhs = m_sg.mk_concat(lhs_suffix, pad);
|
||
} else {
|
||
// RHS prefix is longer by |padding| constants.
|
||
// Append pad to LHS prefix, prepend pad to RHS suffix.
|
||
eq1_lhs = m_sg.mk_concat(lhs_prefix, pad);
|
||
eq2_rhs = m_sg.mk_concat(pad, rhs_suffix);
|
||
}
|
||
}
|
||
|
||
// Create single progress child.
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
|
||
// Remove the original equation and add the two new ones.
|
||
auto& eqs = child->str_eqs();
|
||
eqs[eq_idx] = eqs.back();
|
||
eqs.pop_back();
|
||
eqs.push_back(str_eq(eq1_lhs, eq1_rhs, eq.m_dep));
|
||
eqs.push_back(str_eq(eq2_lhs, eq2_rhs, eq.m_dep));
|
||
|
||
// Int constraints on the edge.
|
||
// 1) len(pad) = |padding| (if padding variable was created)
|
||
if (pad && pad->get_expr()) {
|
||
expr_ref len_pad(seq.str.mk_length(pad->get_expr()), m);
|
||
expr_ref abs_pad(arith.mk_int(std::abs(padding)), m);
|
||
e->add_side_int(mk_int_constraint(len_pad, abs_pad, int_constraint_kind::eq, eq.m_dep));
|
||
}
|
||
// 2) len(eq1_lhs) = len(eq1_rhs)
|
||
expr_ref l1 = compute_length_expr(eq1_lhs);
|
||
expr_ref r1 = compute_length_expr(eq1_rhs);
|
||
e->add_side_int(mk_int_constraint(l1, r1, int_constraint_kind::eq, eq.m_dep));
|
||
|
||
// 3) len(eq2_lhs) = len(eq2_rhs)
|
||
expr_ref l2 = compute_length_expr(eq2_lhs);
|
||
expr_ref r2 = compute_length_expr(eq2_rhs);
|
||
e->add_side_int(mk_int_constraint(l2, r2, int_constraint_kind::eq, eq.m_dep));
|
||
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_graph: mk_fresh_var
|
||
// -----------------------------------------------------------------------
|
||
|
||
euf::snode* nielsen_graph::mk_fresh_var() {
|
||
++m_stats.m_num_fresh_vars;
|
||
std::string name = "v!" + std::to_string(m_fresh_cnt++);
|
||
return m_sg.mk_var(symbol(name.c_str()));
|
||
}
|
||
|
||
euf::snode* nielsen_graph::mk_fresh_char_var() {
|
||
++m_stats.m_num_fresh_vars;
|
||
std::string name = "?c!" + std::to_string(m_fresh_cnt++);
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
ast_manager& m = m_sg.get_manager();
|
||
sort* char_sort = seq.mk_char_sort();
|
||
expr_ref fresh_const(m.mk_fresh_const(name.c_str(), char_sort), m);
|
||
expr_ref unit(seq.str.mk_unit(fresh_const), m);
|
||
return m_sg.mk(unit);
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_graph: apply_regex_char_split
|
||
// -----------------------------------------------------------------------
|
||
|
||
void nielsen_graph::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
|
||
if (!re)
|
||
return;
|
||
|
||
// to_re(s): the first character of the string s
|
||
if (re->is_to_re()) {
|
||
euf::snode* body = re->arg(0);
|
||
if (body && !body->is_empty()) {
|
||
euf::snode* first = body->first();
|
||
if (first && first->is_char()) {
|
||
bool dup = false;
|
||
for (euf::snode* c : chars)
|
||
if (c == first) { dup = true; break; }
|
||
if (!dup)
|
||
chars.push_back(first);
|
||
}
|
||
// Handle string literals (classified as s_other in sgraph):
|
||
// extract the first character from the zstring and create a char snode.
|
||
else if (first && first->get_expr()) {
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
zstring s;
|
||
if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) {
|
||
euf::snode* ch = m_sg.mk_char(s[0]);
|
||
bool dup = false;
|
||
for (euf::snode* c : chars)
|
||
if (c == ch) { dup = true; break; }
|
||
if (!dup)
|
||
chars.push_back(ch);
|
||
}
|
||
}
|
||
}
|
||
return;
|
||
}
|
||
|
||
// full character set (.): use 'a' as representative
|
||
if (re->is_full_char()) {
|
||
euf::snode* ch = m_sg.mk_char('a');
|
||
bool dup = false;
|
||
for (euf::snode* c : chars)
|
||
if (c == ch) { dup = true; break; }
|
||
if (!dup)
|
||
chars.push_back(ch);
|
||
return;
|
||
}
|
||
|
||
// re.range(lo, hi): use lo as representative
|
||
if (re->get_expr()) {
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
expr* lo = nullptr, *hi = nullptr;
|
||
if (seq.re.is_range(re->get_expr(), lo, hi) && lo) {
|
||
unsigned ch_val = 'a';
|
||
if (seq.is_const_char(lo, ch_val)) {
|
||
euf::snode* ch = m_sg.mk_char(ch_val);
|
||
bool dup = false;
|
||
for (euf::snode* c : chars)
|
||
if (c == ch) { dup = true; break; }
|
||
if (!dup)
|
||
chars.push_back(ch);
|
||
}
|
||
return;
|
||
}
|
||
}
|
||
|
||
// fail, full_seq: no concrete first chars to extract
|
||
if (re->is_fail() || re->is_full_seq())
|
||
return;
|
||
|
||
// recurse into children (handles union, concat, star, loop, etc.)
|
||
for (unsigned i = 0; i < re->num_args(); ++i)
|
||
collect_first_chars(re->arg(i), chars);
|
||
}
|
||
|
||
bool nielsen_graph::apply_regex_char_split(nielsen_node* node) {
|
||
for (str_mem const& mem : node->str_mems()) {
|
||
if (!mem.m_str || !mem.m_regex)
|
||
continue;
|
||
|
||
// find the first token of the string side
|
||
euf::snode* first = mem.m_str->first();
|
||
if (!first || !first->is_var())
|
||
continue;
|
||
|
||
// collect concrete characters from the regex
|
||
euf::snode_vector chars;
|
||
collect_first_chars(mem.m_regex, chars);
|
||
|
||
// If no concrete characters found, fall through to apply_regex_var_split
|
||
if (chars.empty())
|
||
continue;
|
||
|
||
bool created = false;
|
||
|
||
// for each character c with non-fail derivative:
|
||
// child: x → c · x
|
||
for (euf::snode* ch : chars) {
|
||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch);
|
||
if (!deriv || deriv->is_fail())
|
||
continue;
|
||
|
||
euf::snode* replacement = m_sg.mk_concat(ch, first);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(first, replacement, mem.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
created = true;
|
||
}
|
||
|
||
// x → ε branch (variable is empty)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(first, m_sg.mk_empty_seq(first->get_sort()), mem.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
created = true;
|
||
}
|
||
|
||
if (created)
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
bool nielsen_graph::generate_extensions(nielsen_node *node) {
|
||
// The first modifier that produces edges is used and returned immediately.
|
||
|
||
// Priority 1: deterministic modifiers (single child, always progress)
|
||
if (apply_det_modifier(node))
|
||
return ++m_stats.m_mod_det, true;
|
||
|
||
// Priority 2: PowerEpsilon - power → ε via base=ε or n=0
|
||
if (apply_power_epsilon(node))
|
||
return ++m_stats.m_mod_power_epsilon, true;
|
||
|
||
// Priority 3: NumCmp - length comparison branching for power tokens
|
||
if (apply_num_cmp(node))
|
||
return ++m_stats.m_mod_num_cmp, true;
|
||
|
||
// Priority 3b: SplitPowerElim - CommPower-based branching when
|
||
// one side has a power and the other side has same-base occurrences
|
||
// but ordering is unknown.
|
||
if (apply_split_power_elim(node))
|
||
return ++m_stats.m_mod_split_power_elim, true;
|
||
|
||
// Priority 4: ConstNumUnwinding - power vs constant: n=0 or peel
|
||
if (apply_const_num_unwinding(node))
|
||
return ++m_stats.m_mod_const_num_unwinding, true;
|
||
|
||
// Priority 5: EqSplit - split equations into two (single progress child)
|
||
if (apply_eq_split(node))
|
||
return ++m_stats.m_mod_eq_split, true;
|
||
|
||
// Priority 6: StarIntr - stabilizer introduction for regex cycles
|
||
if (apply_star_intr(node))
|
||
return ++m_stats.m_mod_star_intr, true;
|
||
|
||
// Priority 7: GPowerIntr - ground power introduction
|
||
if (apply_gpower_intr(node))
|
||
return ++m_stats.m_mod_gpower_intr, true;
|
||
|
||
// Priority 8: ConstNielsen - char vs var (2 children)
|
||
if (apply_const_nielsen(node))
|
||
return ++m_stats.m_mod_const_nielsen, true;
|
||
|
||
// Priority 9: RegexCharSplit - split str_mem by first-position chars
|
||
if (apply_regex_char_split(node))
|
||
return ++m_stats.m_mod_regex_char_split, true;
|
||
|
||
// Priority 10: RegexVarSplit - split str_mem by minterms
|
||
if (apply_regex_var_split(node))
|
||
return ++m_stats.m_mod_regex_var_split, true;
|
||
|
||
// Priority 11: PowerSplit - power unwinding with bounded prefix
|
||
if (apply_power_split(node))
|
||
return ++m_stats.m_mod_power_split, true;
|
||
|
||
// Priority 12: VarNielsen - var vs var, all progress (classic Nielsen)
|
||
if (apply_var_nielsen(node))
|
||
return ++m_stats.m_mod_var_nielsen, true;
|
||
|
||
// Priority 13: VarNumUnwinding - variable power unwinding
|
||
if (apply_var_num_unwinding(node))
|
||
return ++m_stats.m_mod_var_num_unwinding, true;
|
||
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Helper: find a power token in any str_eq
|
||
// -----------------------------------------------------------------------
|
||
|
||
euf::snode* nielsen_graph::find_power_token(nielsen_node* node) const {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial()) continue;
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
euf::snode_vector toks;
|
||
eq.m_lhs->collect_tokens(toks);
|
||
for (euf::snode* t : toks)
|
||
if (t->is_power()) return t;
|
||
toks.reset();
|
||
eq.m_rhs->collect_tokens(toks);
|
||
for (euf::snode* t : toks)
|
||
if (t->is_power()) return t;
|
||
}
|
||
return nullptr;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Helper: find a power token facing a constant (char) head
|
||
// Returns true if found, sets power, other_head, eq_out.
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::find_power_vs_non_var(nielsen_node* node,
|
||
euf::snode*& power,
|
||
euf::snode*& other_head,
|
||
str_eq const*& eq_out,
|
||
bool& fwd) const {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial()) continue;
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool local_fwd = (od == 0);
|
||
euf::snode* lhead = dir_token(eq.m_lhs, local_fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, local_fwd);
|
||
// Match power vs any non-variable, non-empty token (char, unit,
|
||
// power with different base, etc.).
|
||
// Same-base power vs power is handled by NumCmp (priority 3).
|
||
// Power vs variable is handled by PowerSplit (priority 11).
|
||
// Power vs empty is handled by PowerEpsilon (priority 2).
|
||
if (lhead && lhead->is_power() && rhead && !rhead->is_var() && !rhead->is_empty()) {
|
||
power = lhead;
|
||
other_head = rhead;
|
||
eq_out = &eq;
|
||
fwd = local_fwd;
|
||
return true;
|
||
}
|
||
if (rhead && rhead->is_power() && lhead && !lhead->is_var() && !lhead->is_empty()) {
|
||
power = rhead;
|
||
other_head = lhead;
|
||
eq_out = &eq;
|
||
fwd = local_fwd;
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Helper: find a power token facing a variable head
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::find_power_vs_var(nielsen_node* node,
|
||
euf::snode*& power,
|
||
euf::snode*& var_head,
|
||
str_eq const*& eq_out,
|
||
bool& fwd) const {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial()) continue;
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool local_fwd = (od == 0);
|
||
euf::snode* lhead = dir_token(eq.m_lhs, local_fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, local_fwd);
|
||
if (lhead && lhead->is_power() && rhead && rhead->is_var()) {
|
||
power = lhead;
|
||
var_head = rhead;
|
||
eq_out = &eq;
|
||
fwd = local_fwd;
|
||
return true;
|
||
}
|
||
if (rhead && rhead->is_power() && lhead && lhead->is_var()) {
|
||
power = rhead;
|
||
var_head = lhead;
|
||
eq_out = &eq;
|
||
fwd = local_fwd;
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_power_epsilon
|
||
// Fires only when one side of an equation is empty and the other side
|
||
// starts with a power token u^n. In that case, branch:
|
||
// (1) base u → ε (base is empty, so u^n = ε)
|
||
// (2) u^n → ε (the power is zero, replace power with empty)
|
||
// mirrors ZIPT's PowerEpsilonModifier (which requires LHS.IsEmpty())
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_power_epsilon(nielsen_node* node) {
|
||
// Match only when one equation side is empty and the other starts
|
||
// with a power. This mirrors ZIPT where PowerEpsilonModifier is
|
||
// constructed only inside the "if (LHS.IsEmpty())" branch of
|
||
// ExtendDir. When both sides are non-empty and a power faces a
|
||
// constant, ConstNumUnwinding (priority 4) handles it with both
|
||
// n=0 and n≥1 branches.
|
||
euf::snode* power = nullptr;
|
||
dep_tracker dep = m_dep_mgr.mk_empty();
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial()) continue;
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
if (eq.m_lhs->is_empty() && eq.m_rhs->first() && eq.m_rhs->first()->is_power()) {
|
||
power = eq.m_rhs->first();
|
||
dep = eq.m_dep;
|
||
break;
|
||
}
|
||
if (eq.m_rhs->is_empty() && eq.m_lhs->first() && eq.m_lhs->first()->is_power()) {
|
||
power = eq.m_lhs->first();
|
||
dep = eq.m_dep;
|
||
break;
|
||
}
|
||
}
|
||
if (!power)
|
||
return false;
|
||
|
||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||
euf::snode *base = power->arg(0);
|
||
|
||
nielsen_node* child;
|
||
nielsen_edge* e;
|
||
|
||
// Branch 1: base → ε (if base is a variable, substitute it to empty)
|
||
// This makes u^n = ε^n = ε for any n.
|
||
if (base->is_var()) {
|
||
child = mk_child(node);
|
||
e = mk_edge(node, child, true);
|
||
nielsen_subst s1(base, m_sg.mk_empty_seq(base->get_sort()), dep);
|
||
e->add_subst(s1);
|
||
child->apply_subst(m_sg, s1);
|
||
}
|
||
|
||
// Branch 2: replace the power token itself with ε (n = 0 semantics)
|
||
child = mk_child(node);
|
||
e = mk_edge(node, child, true);
|
||
nielsen_subst s2(power, m_sg.mk_empty_seq(power->get_sort()), dep);
|
||
e->add_subst(s2);
|
||
child->apply_subst(m_sg, s2);
|
||
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_num_cmp
|
||
// For equations involving two power tokens u^m and u^n with the same base,
|
||
// branch on the numeric relationship: m <= n vs n < m.
|
||
// Generates proper integer side constraints for each branch.
|
||
// mirrors ZIPT's NumCmpModifier
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_num_cmp(nielsen_node* node) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
|
||
// Look for two directional endpoint power tokens with the same base.
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial())
|
||
continue;
|
||
if (!eq.m_lhs || !eq.m_rhs)
|
||
continue;
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* lhead = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, fwd);
|
||
if (!lhead || !rhead) continue;
|
||
if (!lhead->is_power() || !rhead->is_power())
|
||
continue;
|
||
if (lhead->num_args() < 1 || rhead->num_args() < 1)
|
||
continue;
|
||
// same base: compare the two powers
|
||
if (lhead->arg(0)->id() != rhead->arg(0)->id())
|
||
continue;
|
||
|
||
// Skip if the exponents differ by a constant — simplify_and_init's
|
||
// directional power elimination already handles that case.
|
||
expr* exp_m = get_power_exponent(lhead);
|
||
expr* exp_n = get_power_exponent(rhead);
|
||
if (!exp_m || !exp_n)
|
||
continue;
|
||
rational diff;
|
||
SASSERT(!get_const_power_diff(exp_n, exp_m, arith, diff)); // handled by simplification
|
||
|
||
// Branch 1 (explored first): n < m (i.e., m >= n + 1)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m);
|
||
e->add_side_int(mk_int_constraint(
|
||
exp_m, n_plus_1,
|
||
int_constraint_kind::ge, eq.m_dep));
|
||
}
|
||
// Branch 2 (explored second): m <= n (i.e., n >= m)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
e->add_side_int(mk_int_constraint(
|
||
exp_n, exp_m,
|
||
int_constraint_kind::ge, eq.m_dep));
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_split_power_elim
|
||
// When one side starts with a power w^p, call CommPower on the other
|
||
// side to count base-pattern occurrences c. If c > 0 and the ordering
|
||
// between p and c cannot be determined, create two branches:
|
||
// Branch 1: p < c (add constraint c ≥ p + 1)
|
||
// Branch 2: c ≤ p (add constraint p ≥ c)
|
||
// After branching, simplify_and_init's CommPower pass (3c) can resolve
|
||
// the ordering deterministically and cancel the matched portion.
|
||
// mirrors ZIPT's SplitPowerElim + NumCmpModifier
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_split_power_elim(nielsen_node* node) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util seq(m);
|
||
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial()) continue;
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
|
||
for (int side = 0; side < 2; ++side) {
|
||
euf::snode* pow_side = (side == 0) ? eq.m_lhs : eq.m_rhs;
|
||
euf::snode* other_side = (side == 0) ? eq.m_rhs : eq.m_lhs;
|
||
if (!pow_side || !other_side) continue;
|
||
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* end_tok = dir_token(pow_side, fwd);
|
||
if (!end_tok || !end_tok->is_power()) continue;
|
||
euf::snode* base_sn = end_tok->arg(0);
|
||
expr* pow_exp = get_power_exp_expr(end_tok, seq);
|
||
if (!base_sn || !pow_exp) continue;
|
||
|
||
auto [count, consumed] = comm_power(base_sn, other_side, m, fwd);
|
||
if (!count.get() || consumed == 0) continue;
|
||
|
||
expr_ref norm_count = normalize_arith(m, count);
|
||
|
||
// Skip if ordering is already deterministic — simplify_and_init
|
||
// pass 3c should have handled it.
|
||
rational diff;
|
||
if (get_const_power_diff(norm_count, pow_exp, arith, diff))
|
||
continue;
|
||
|
||
// Branch 1: pow_exp < count (i.e., count >= pow_exp + 1)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
expr_ref pow_plus1(arith.mk_add(pow_exp, arith.mk_int(1)), m);
|
||
e->add_side_int(mk_int_constraint(
|
||
norm_count, pow_plus1,
|
||
int_constraint_kind::ge, eq.m_dep));
|
||
}
|
||
// Branch 2: count <= pow_exp (i.e., pow_exp >= count)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
e->add_side_int(mk_int_constraint(
|
||
pow_exp, norm_count,
|
||
int_constraint_kind::ge, eq.m_dep));
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_const_num_unwinding
|
||
// For a power token u^n facing a constant (char) head,
|
||
// branch: (1) n = 0 → u^n = ε, (2) n >= 1 → peel one u from power.
|
||
// Generates integer side constraints for each branch.
|
||
// mirrors ZIPT's ConstNumUnwindingModifier
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) {
|
||
ast_manager &m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
|
||
euf::snode *power = nullptr;
|
||
euf::snode *other_head = nullptr;
|
||
str_eq const *eq = nullptr;
|
||
bool fwd = true;
|
||
if (!find_power_vs_non_var(node, power, other_head, eq, fwd))
|
||
return false;
|
||
|
||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||
euf::snode *base = power->arg(0);
|
||
expr *exp_n = get_power_exponent(power);
|
||
expr *zero = arith.mk_int(0);
|
||
expr *one = arith.mk_int(1);
|
||
|
||
// Branch 1 (explored first): n = 0 → replace power with ε (progress)
|
||
// Side constraint: n = 0
|
||
nielsen_node *child = mk_child(node);
|
||
nielsen_edge *e = mk_edge(node, child, true);
|
||
nielsen_subst s1(power, m_sg.mk_empty_seq(power->get_sort()), eq->m_dep);
|
||
e->add_subst(s1);
|
||
child->apply_subst(m_sg, s1);
|
||
if (exp_n)
|
||
e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep));
|
||
|
||
// Branch 2 (explored second): n >= 1 → peel one u: replace u^n with u · u^(n-1)
|
||
// Side constraint: n >= 1
|
||
// Create a proper nested power base^(n-1) instead of a fresh string variable.
|
||
// This preserves power structure so that simplify_and_init can merge and
|
||
// cancel adjacent same-base powers (mirroring ZIPT's SimplifyPowerUnwind).
|
||
// Explored first because the n≥1 branch is typically more productive
|
||
// for SAT instances (preserves power structure).
|
||
seq_util &seq = m_sg.get_seq_util();
|
||
expr *power_e = power->get_expr();
|
||
SASSERT(power_e);
|
||
expr *base_expr = to_app(power_e)->get_arg(0);
|
||
expr_ref n_minus_1 = normalize_arith(m, arith.mk_sub(exp_n, one));
|
||
expr_ref nested_pow(seq.str.mk_power(base_expr, n_minus_1), m);
|
||
euf::snode *nested_power_snode = m_sg.mk(nested_pow);
|
||
|
||
euf::snode *replacement = dir_concat(m_sg, base, nested_power_snode, fwd);
|
||
child = mk_child(node);
|
||
e = mk_edge(node, child, false);
|
||
nielsen_subst s2(power, replacement, eq->m_dep);
|
||
e->add_subst(s2);
|
||
child->apply_subst(m_sg, s2);
|
||
if (exp_n)
|
||
e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep));
|
||
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_star_intr
|
||
// Star introduction: for a str_mem x·s ∈ R where a cycle is detected
|
||
// (backedge exists), introduce a stabilizer constraint x ∈ base*.
|
||
// Creates a child that splits x into pr·po, adds pr ∈ base* and
|
||
// po·s ∈ R, with a blocking constraint on po.
|
||
// Enhanced to use strengthened_stabilizer and register via add_stabilizer.
|
||
// mirrors ZIPT's StarIntrModifier (StarIntrModifier.cs:22-85)
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_star_intr(nielsen_node* node) {
|
||
// Only fire if a backedge was set (cycle detected)
|
||
if (!node->backedge())
|
||
return false;
|
||
|
||
// Look for a str_mem with a variable-headed string
|
||
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
|
||
str_mem const& mem = node->str_mems()[mi];
|
||
if (!mem.m_str || !mem.m_regex) continue;
|
||
euf::snode* first = mem.m_str->first();
|
||
if (!first || !first->is_var()) continue;
|
||
|
||
euf::snode* x = first;
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
ast_manager& mgr = m_sg.get_manager();
|
||
expr* re_expr = mem.m_regex->get_expr();
|
||
if (!re_expr)
|
||
continue;
|
||
|
||
// Determine the stabilizer base:
|
||
// 1. If self-stabilizing, use regex itself as stabilizer
|
||
// 2. Otherwise, try strengthened_stabilizer from cycle history
|
||
// 3. Fall back to simple star(R)
|
||
euf::snode* stab_base = nullptr;
|
||
|
||
if (m_seq_regex && m_seq_regex->is_self_stabilizing(mem.m_regex)) {
|
||
stab_base = mem.m_regex;
|
||
}
|
||
else if (m_seq_regex && mem.m_history) {
|
||
stab_base = m_seq_regex->strengthened_stabilizer(mem.m_regex, mem.m_history);
|
||
}
|
||
|
||
if (!stab_base) {
|
||
// Fall back: simple star of the cycle regex
|
||
stab_base = mem.m_regex;
|
||
}
|
||
|
||
// Register the stabilizer
|
||
if (m_seq_regex)
|
||
m_seq_regex->add_stabilizer(mem.m_regex, stab_base);
|
||
|
||
// Check if stabilizer equals regex → self-stabilizing
|
||
if (m_seq_regex && stab_base == mem.m_regex)
|
||
m_seq_regex->set_self_stabilizing(mem.m_regex);
|
||
|
||
// Build stab_star = star(stab_base)
|
||
expr* stab_expr = stab_base->get_expr();
|
||
if (!stab_expr)
|
||
continue;
|
||
expr_ref star_re(seq.re.mk_star(stab_expr), mgr);
|
||
euf::snode* star_sn = m_sg.mk(star_re);
|
||
if (!star_sn)
|
||
continue;
|
||
|
||
// Try subsumption: if L(x_range) ⊆ L(stab_star), drop the constraint
|
||
if (m_seq_regex && m_seq_regex->try_subsume(mem, *node))
|
||
continue;
|
||
|
||
// Create child: x → pr · po
|
||
euf::snode* pr = mk_fresh_var();
|
||
euf::snode* po = mk_fresh_var();
|
||
euf::snode* str_tail = m_sg.drop_first(mem.m_str);
|
||
|
||
nielsen_node* child = mk_child(node);
|
||
|
||
// Add membership: pr ∈ stab_base* (stabilizer constraint)
|
||
child->add_str_mem(str_mem(pr, star_sn, mem.m_history, next_mem_id(), mem.m_dep));
|
||
|
||
// Add remaining membership: po · tail ∈ R (same regex, trimmed history)
|
||
euf::snode* post_tail = str_tail->is_empty() ? po : m_sg.mk_concat(po, str_tail);
|
||
child->add_str_mem(str_mem(post_tail, mem.m_regex, nullptr, next_mem_id(), mem.m_dep));
|
||
|
||
// Blocking constraint: po must NOT start with stab_base
|
||
// po ∈ complement(non_nullable(stab_base) · Σ*)
|
||
// This prevents redundant unrolling.
|
||
if (!stab_base->is_nullable()) {
|
||
sort* str_sort = seq.str.mk_string_sort();
|
||
expr_ref full_seq(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
|
||
expr_ref base_then_all(seq.re.mk_concat(stab_expr, full_seq), mgr);
|
||
expr_ref block_re(seq.re.mk_complement(base_then_all), mgr);
|
||
euf::snode* block_sn = m_sg.mk(block_re);
|
||
if (block_sn)
|
||
child->add_str_mem(str_mem(po, block_sn, nullptr, next_mem_id(), mem.m_dep));
|
||
}
|
||
|
||
// Substitute x → pr · po
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
euf::snode* replacement = m_sg.mk_concat(pr, po);
|
||
nielsen_subst s(x, replacement, mem.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_gpower_intr
|
||
// Generalized power introduction: for an equation where one side's head
|
||
// is a variable v and the other side has a ground prefix followed by a
|
||
// variable x that forms a dependency cycle back to v, introduce
|
||
// v = base^n · suffix where base is the ground prefix.
|
||
// Generates side constraints n >= 0 and 0 <= len(suffix) < len(base).
|
||
// mirrors ZIPT's GPowerIntrModifier (SplitGroundPower + TryGetPowerSplitBase)
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_gpower_intr(nielsen_node* node) {
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial()) continue;
|
||
if (!eq.m_lhs || !eq.m_rhs) continue;
|
||
|
||
// Try both directions (ZIPT's ExtendDir(fwd=true/false)).
|
||
for (unsigned od = 0; od < 2; ++od) {
|
||
bool fwd = (od == 0);
|
||
euf::snode* lhead = dir_token(eq.m_lhs, fwd);
|
||
euf::snode* rhead = dir_token(eq.m_rhs, fwd);
|
||
if (!lhead || !rhead) continue;
|
||
|
||
// Orientation 1: RHS directional head is var, scan LHS in that
|
||
// direction for ground prefix + matching cycle var.
|
||
if (rhead->is_var() && !lhead->is_var()) {
|
||
euf::snode_vector toks;
|
||
collect_tokens_dir(eq.m_lhs, fwd, toks);
|
||
euf::snode_vector ground_prefix;
|
||
euf::snode* target_var = nullptr;
|
||
for (unsigned i = 0; i < toks.size(); ++i) {
|
||
if (toks[i]->is_var()) { target_var = toks[i]; break; }
|
||
ground_prefix.push_back(toks[i]);
|
||
}
|
||
if (target_var && !ground_prefix.empty() && target_var->id() == rhead->id()) {
|
||
if (fire_gpower_intro(node, eq, rhead, ground_prefix, fwd))
|
||
return true;
|
||
}
|
||
}
|
||
|
||
// Orientation 2: LHS directional head is var, scan RHS analogously.
|
||
if (lhead->is_var() && !rhead->is_var()) {
|
||
euf::snode_vector toks;
|
||
collect_tokens_dir(eq.m_rhs, fwd, toks);
|
||
euf::snode_vector ground_prefix;
|
||
euf::snode* target_var = nullptr;
|
||
for (unsigned i = 0; i < toks.size(); ++i) {
|
||
if (toks[i]->is_var()) { target_var = toks[i]; break; }
|
||
ground_prefix.push_back(toks[i]);
|
||
}
|
||
if (target_var && !ground_prefix.empty() && target_var->id() == lhead->id()) {
|
||
if (fire_gpower_intro(node, eq, lhead, ground_prefix, fwd))
|
||
return true;
|
||
}
|
||
}
|
||
}
|
||
// TODO: Extend to transitive cycles across multiple equations
|
||
// (ZIPT's varDep + HasDepCycle). Currently only self-cycles are detected.
|
||
}
|
||
return false;
|
||
}
|
||
|
||
bool nielsen_graph::fire_gpower_intro(
|
||
nielsen_node* node, str_eq const& eq,
|
||
euf::snode* var, euf::snode_vector const& ground_prefix_orig, bool fwd) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
|
||
// Compress repeated patterns in the ground prefix (mirrors ZIPT's LcpCompressionFull).
|
||
// E.g., [a,b,a,b] has minimal period 2 → use [a,b] as the power base.
|
||
// This ensures we use the minimal repeating unit: x = (ab)^n · suffix
|
||
// instead of x = (abab)^n · suffix.
|
||
euf::snode_vector ground_prefix;
|
||
unsigned n = ground_prefix_orig.size();
|
||
unsigned period = n;
|
||
for (unsigned p = 1; p <= n / 2; ++p) {
|
||
if (n % p != 0) continue;
|
||
bool match = true;
|
||
for (unsigned i = p; i < n && match; ++i)
|
||
match = ground_prefix_orig[i]->id() == ground_prefix_orig[i % p]->id();
|
||
if (match) { period = p; break; }
|
||
}
|
||
for (unsigned i = 0; i < period; ++i) {
|
||
ground_prefix.push_back(ground_prefix_orig[i]);
|
||
}
|
||
|
||
// If the compressed prefix is a single power snode, unwrap it to use
|
||
// its base tokens, avoiding nested powers.
|
||
// E.g., [(ab)^3] → [a, b] so we get (ab)^n instead of ((ab)^3)^n.
|
||
// (mirrors ZIPT: if b.Length == 1 && b is PowerToken pt => b = pt.Base)
|
||
if (ground_prefix.size() == 1 && ground_prefix[0]->is_power()) {
|
||
expr* base_e = get_power_base_expr(ground_prefix[0], seq);
|
||
if (base_e) {
|
||
euf::snode* base_sn = m_sg.mk(base_e);
|
||
if (base_sn) {
|
||
euf::snode_vector base_toks;
|
||
collect_tokens_dir(base_sn, fwd, base_toks);
|
||
if (!base_toks.empty()) {
|
||
ground_prefix.reset();
|
||
ground_prefix.append(base_toks);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
unsigned base_len = ground_prefix.size();
|
||
|
||
// Build base string expression from ground prefix tokens.
|
||
// Each s_char snode's get_expr() is already seq.unit(ch) (a string).
|
||
expr_ref base_str(m);
|
||
for (unsigned i = 0; i < base_len; ++i) {
|
||
expr* tok_expr = ground_prefix[i]->get_expr();
|
||
if (!tok_expr) return false;
|
||
if (i == 0)
|
||
base_str = tok_expr;
|
||
else if (fwd)
|
||
base_str = seq.str.mk_concat(base_str, tok_expr);
|
||
else
|
||
base_str = seq.str.mk_concat(tok_expr, base_str);
|
||
}
|
||
|
||
// Create fresh exponent variable and power expression: base^n
|
||
expr_ref fresh_n = mk_fresh_int_var();
|
||
expr_ref power_expr(seq.str.mk_power(base_str, fresh_n), m);
|
||
euf::snode* power_snode = m_sg.mk(power_expr);
|
||
if (!power_snode) return false;
|
||
|
||
expr* zero = arith.mk_int(0);
|
||
|
||
// Generate children mirroring ZIPT's GetDecompose:
|
||
// P(t0 · t1 · ... · t_{k-1}) = P(t0) | t0·P(t1) | ... | t0·...·t_{k-2}·P(t_{k-1})
|
||
// For char tokens P(c) = {ε}, for power tokens P(u^m) = {u^m', 0 ≤ m' ≤ m}.
|
||
// Child at position i substitutes var → base^n · t0·...·t_{i-1} · P(t_i).
|
||
for (unsigned i = 0; i < base_len; ++i) {
|
||
euf::snode* tok = ground_prefix[i];
|
||
|
||
// Skip char position when preceding token is a power:
|
||
// The power case at i-1 with 0 ≤ m' ≤ exp already covers m' = exp,
|
||
// which produces the same result. Using the original exponent here
|
||
// creates a rigid coupling that causes cycling.
|
||
if (!tok->is_power() && i > 0 && ground_prefix[i - 1]->is_power())
|
||
continue;
|
||
|
||
// Build full-token prefix: ground_prefix[0..i-1]
|
||
euf::snode* prefix_sn = nullptr;
|
||
for (unsigned j = 0; j < i; ++j)
|
||
prefix_sn = (j == 0) ? ground_prefix[0] : dir_concat(m_sg, prefix_sn, ground_prefix[j], fwd);
|
||
|
||
euf::snode* replacement;
|
||
expr_ref fresh_m(m);
|
||
|
||
if (tok->is_power()) {
|
||
// Token is a power u^exp: use fresh m' with 0 ≤ m' ≤ exp
|
||
expr* inner_exp = get_power_exponent(tok);
|
||
expr* inner_base = get_power_base_expr(tok, seq);
|
||
if (inner_exp && inner_base) {
|
||
fresh_m = mk_fresh_int_var();
|
||
expr_ref partial_pow(seq.str.mk_power(inner_base, fresh_m), m);
|
||
euf::snode* partial_sn = m_sg.mk(partial_pow);
|
||
euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, partial_sn, fwd) : partial_sn;
|
||
replacement = dir_concat(m_sg, power_snode, suffix_sn, fwd);
|
||
} else {
|
||
// Fallback: use full token (shouldn't normally happen)
|
||
euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, tok, fwd) : tok;
|
||
replacement = dir_concat(m_sg, power_snode, suffix_sn, fwd);
|
||
}
|
||
} else {
|
||
// Token is a char: P(char) = ε, suffix = just the prefix
|
||
replacement = prefix_sn ? dir_concat(m_sg, power_snode, prefix_sn, fwd) : power_snode;
|
||
}
|
||
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(var, replacement, eq.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
|
||
// Side constraint: n >= 0
|
||
e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep));
|
||
|
||
// Side constraints for fresh partial exponent
|
||
if (fresh_m.get()) {
|
||
expr* inner_exp = get_power_exponent(tok);
|
||
// m' >= 0
|
||
e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq.m_dep));
|
||
// m' <= inner_exp
|
||
e->add_side_int(mk_int_constraint(inner_exp, fresh_m, int_constraint_kind::ge, eq.m_dep));
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_regex_var_split
|
||
// For str_mem x·s ∈ R where x is a variable, split using minterms:
|
||
// (1) x → ε (empty)
|
||
// (2) x → c · x' for each minterm character class c
|
||
// More general than regex_char_split; uses minterm partitioning rather
|
||
// than just extracting concrete characters.
|
||
// mirrors ZIPT's RegexVarSplitModifier
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_regex_var_split(nielsen_node* node) {
|
||
for (str_mem const& mem : node->str_mems()) {
|
||
if (!mem.m_str || !mem.m_regex) continue;
|
||
euf::snode* first = mem.m_str->first();
|
||
if (!first || !first->is_var()) continue;
|
||
|
||
// Compute minterms from the regex
|
||
euf::snode_vector minterms;
|
||
m_sg.compute_minterms(mem.m_regex, minterms);
|
||
if (minterms.empty()) continue;
|
||
|
||
bool created = false;
|
||
|
||
// Branch 1: x → ε (progress)
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(first, m_sg.mk_empty_seq(first->get_sort()), mem.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
created = true;
|
||
}
|
||
|
||
// Branch 2+: for each minterm m_i, x → ?c · x
|
||
// where ?c is a symbolic char constrained by the minterm
|
||
for (euf::snode* mt : minterms) {
|
||
if (mt->is_fail()) continue;
|
||
|
||
// Try Brzozowski derivative with respect to this minterm
|
||
// If the derivative is fail (empty language), skip this minterm
|
||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt);
|
||
if (deriv && deriv->is_fail()) continue;
|
||
|
||
euf::snode* fresh_char = mk_fresh_char_var();
|
||
euf::snode* replacement = m_sg.mk_concat(fresh_char, first);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(first, replacement, mem.m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
// Constrain fresh_char to the character class of this minterm.
|
||
// This is the key Parikh pruning step: when x → ?c · x' is
|
||
// generated from minterm m_i, ?c must belong to the character
|
||
// class described by m_i so that str ∈ derivative(R, m_i).
|
||
if (mt->get_expr()) {
|
||
char_set cs = m_parikh->minterm_to_char_set(mt->get_expr());
|
||
if (!cs.is_empty())
|
||
child->add_char_range(fresh_char, cs);
|
||
}
|
||
created = true;
|
||
}
|
||
|
||
if (created)
|
||
return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_power_split
|
||
// For a variable x facing a power token u^n, branch:
|
||
// (1) x = u^m · prefix(u) with m < n (bounded power prefix)
|
||
// (2) x = u^n · x' (the variable extends past the full power)
|
||
// Generates integer side constraints for the fresh exponent variables.
|
||
// mirrors ZIPT's PowerSplitModifier
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_power_split(nielsen_node* node) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
|
||
euf::snode* power = nullptr;
|
||
euf::snode* var_head = nullptr;
|
||
str_eq const* eq = nullptr;
|
||
bool fwd = true;
|
||
if (!find_power_vs_var(node, power, var_head, eq, fwd))
|
||
return false;
|
||
|
||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||
euf::snode* base = power->arg(0);
|
||
expr* exp_n = get_power_exponent(power);
|
||
expr* zero = arith.mk_int(0);
|
||
|
||
// Branch 1: enumerate all decompositions of the base.
|
||
// x = base^m · prefix_i(base) where 0 <= m < n
|
||
// Uses the same GetDecompose pattern as fire_gpower_intro.
|
||
{
|
||
euf::snode_vector base_toks;
|
||
collect_tokens_dir(base, fwd, base_toks);
|
||
unsigned base_len = base_toks.size();
|
||
expr* base_expr = get_power_base_expr(power, seq);
|
||
if (!base_expr || base_len == 0)
|
||
return false;
|
||
|
||
expr_ref fresh_m = mk_fresh_int_var();
|
||
expr_ref power_m_expr(seq.str.mk_power(base_expr, fresh_m), m);
|
||
euf::snode* power_m_sn = m_sg.mk(power_m_expr);
|
||
if (!power_m_sn)
|
||
return false;
|
||
|
||
for (unsigned i = 0; i < base_len; ++i) {
|
||
euf::snode* tok = base_toks[i];
|
||
|
||
// Skip char position when preceding token is a power:
|
||
// the power case at i-1 with 0 <= m' <= exp already covers m' = exp.
|
||
if (!tok->is_power() && i > 0 && base_toks[i - 1]->is_power())
|
||
continue;
|
||
|
||
// Build full-token prefix: base_toks[0..i-1]
|
||
euf::snode* prefix_sn = nullptr;
|
||
for (unsigned j = 0; j < i; ++j)
|
||
prefix_sn = (j == 0) ? base_toks[0] : dir_concat(m_sg, prefix_sn, base_toks[j], fwd);
|
||
|
||
euf::snode* replacement;
|
||
expr_ref fresh_inner_m(m);
|
||
|
||
if (tok->is_power()) {
|
||
// Token is a power u^exp: decompose with fresh m', 0 <= m' <= exp
|
||
expr* inner_exp = get_power_exponent(tok);
|
||
expr* inner_base_e = get_power_base_expr(tok, seq);
|
||
if (inner_exp && inner_base_e) {
|
||
fresh_inner_m = mk_fresh_int_var();
|
||
expr_ref partial_pow(seq.str.mk_power(inner_base_e, fresh_inner_m), m);
|
||
euf::snode* partial_sn = m_sg.mk(partial_pow);
|
||
euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, partial_sn, fwd) : partial_sn;
|
||
replacement = dir_concat(m_sg, power_m_sn, suffix_sn, fwd);
|
||
} else {
|
||
euf::snode* suffix_sn = prefix_sn ? dir_concat(m_sg, prefix_sn, tok, fwd) : tok;
|
||
replacement = dir_concat(m_sg, power_m_sn, suffix_sn, fwd);
|
||
}
|
||
} else {
|
||
// P(char) = ε, suffix is just the prefix
|
||
replacement = prefix_sn ? dir_concat(m_sg, power_m_sn, prefix_sn, fwd) : power_m_sn;
|
||
}
|
||
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(var_head, replacement, eq->m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
|
||
// m >= 0
|
||
e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep));
|
||
// m < n ⟺ n >= m + 1
|
||
if (exp_n) {
|
||
expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m);
|
||
e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep));
|
||
}
|
||
|
||
// Inner power constraints: 0 <= m' <= inner_exp
|
||
if (fresh_inner_m.get()) {
|
||
expr* inner_exp = get_power_exponent(tok);
|
||
e->add_side_int(mk_int_constraint(fresh_inner_m, zero, int_constraint_kind::ge, eq->m_dep));
|
||
e->add_side_int(mk_int_constraint(inner_exp, fresh_inner_m, int_constraint_kind::ge, eq->m_dep));
|
||
}
|
||
}
|
||
}
|
||
|
||
// Branch 2: x = u^n · x' (variable extends past full power, non-progress)
|
||
{
|
||
euf::snode* fresh_tail = mk_fresh_var();
|
||
euf::snode* replacement = dir_concat(m_sg, power, fresh_tail, fwd);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(var_head, replacement, eq->m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modifier: apply_var_num_unwinding
|
||
// For a power token u^n facing a variable, branch:
|
||
// (1) n = 0 → u^n = ε (replace power with empty)
|
||
// (2) n >= 1 → peel one u from the power
|
||
// Generates integer side constraints for each branch.
|
||
// mirrors ZIPT's VarNumUnwindingModifier
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::apply_var_num_unwinding(nielsen_node* node) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
|
||
euf::snode* power = nullptr;
|
||
euf::snode* var_head = nullptr;
|
||
str_eq const* eq = nullptr;
|
||
bool fwd = true;
|
||
if (!find_power_vs_var(node, power, var_head, eq, fwd))
|
||
return false;
|
||
|
||
SASSERT(power->is_power() && power->num_args() >= 1);
|
||
euf::snode* base = power->arg(0);
|
||
expr* exp_n = get_power_exponent(power);
|
||
expr* zero = arith.mk_int(0);
|
||
expr* one = arith.mk_int(1);
|
||
|
||
// Branch 1: n = 0 → replace u^n with ε (progress)
|
||
// Side constraint: n = 0
|
||
{
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, true);
|
||
nielsen_subst s(power, m_sg.mk_empty_seq(power->get_sort()), eq->m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
if (exp_n)
|
||
e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep));
|
||
}
|
||
|
||
// Branch 2: n >= 1 → peel one u: u^n → u · u^(n-1)
|
||
// Side constraint: n >= 1
|
||
{
|
||
euf::snode* fresh = mk_fresh_var();
|
||
euf::snode* replacement = dir_concat(m_sg, base, fresh, fwd);
|
||
nielsen_node* child = mk_child(node);
|
||
nielsen_edge* e = mk_edge(node, child, false);
|
||
nielsen_subst s(power, replacement, eq->m_dep);
|
||
e->add_subst(s);
|
||
child->apply_subst(m_sg, s);
|
||
if (exp_n)
|
||
e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep));
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
void nielsen_graph::collect_conflict_deps(dep_tracker& deps) const {
|
||
for (nielsen_node const* n : m_nodes) {
|
||
if (n->eval_idx() != m_run_idx)
|
||
continue;
|
||
if (!n->is_currently_conflict())
|
||
continue;
|
||
for (str_eq const& eq : n->str_eqs())
|
||
deps = m_dep_mgr.mk_join(deps, eq.m_dep);
|
||
for (str_mem const& mem : n->str_mems())
|
||
deps = m_dep_mgr.mk_join(deps, mem.m_dep);
|
||
}
|
||
}
|
||
|
||
void nielsen_graph::explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const {
|
||
SASSERT(m_root);
|
||
dep_tracker deps = m_dep_mgr.mk_empty();
|
||
collect_conflict_deps(deps);
|
||
vector<dep_source, false> vs;
|
||
m_dep_mgr.linearize(deps, vs);
|
||
for (dep_source const& d : vs) {
|
||
if (d.m_kind == dep_source::kind::eq)
|
||
eq_indices.push_back(d.index);
|
||
else
|
||
mem_indices.push_back(d.index);
|
||
}
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// nielsen_graph: length constraint generation
|
||
// -----------------------------------------------------------------------
|
||
|
||
expr_ref nielsen_graph::compute_length_expr(euf::snode* n) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
arith_util arith(m);
|
||
|
||
if (n->is_empty())
|
||
return expr_ref(arith.mk_int(0), m);
|
||
|
||
if (n->is_char() || n->is_unit())
|
||
return expr_ref(arith.mk_int(1), m);
|
||
|
||
if (n->is_concat()) {
|
||
expr_ref left = compute_length_expr(n->arg(0));
|
||
expr_ref right = compute_length_expr(n->arg(1));
|
||
return expr_ref(arith.mk_add(left, right), m);
|
||
}
|
||
|
||
// For variables: consult modification counter.
|
||
// mod_count > 0 means the variable has been reused by a non-eliminating
|
||
// substitution; use a fresh integer variable to avoid the circular
|
||
// |x| = 1 + |x| problem.
|
||
if (n->is_var()) {
|
||
unsigned mc = 0;
|
||
m_mod_cnt.find(n->id(), mc);
|
||
if (mc > 0)
|
||
return get_or_create_len_var(n, mc);
|
||
}
|
||
|
||
// for variables at mod_count 0 and other terms, use symbolic (str.len expr)
|
||
return expr_ref(seq.str.mk_length(n->get_expr()), m);
|
||
}
|
||
|
||
void nielsen_graph::generate_length_constraints(vector<length_constraint>& constraints) {
|
||
if (!m_root)
|
||
return;
|
||
|
||
ast_manager& m = m_sg.get_manager();
|
||
uint_set seen_vars;
|
||
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
arith_util arith(m);
|
||
|
||
for (str_eq const& eq : m_root->str_eqs()) {
|
||
if (eq.is_trivial())
|
||
continue;
|
||
|
||
expr_ref len_lhs = compute_length_expr(eq.m_lhs);
|
||
expr_ref len_rhs = compute_length_expr(eq.m_rhs);
|
||
expr_ref len_eq(m.mk_eq(len_lhs, len_rhs), m);
|
||
constraints.push_back(length_constraint(len_eq, eq.m_dep, length_kind::eq, m));
|
||
|
||
// collect variables for non-negativity constraints
|
||
euf::snode_vector tokens;
|
||
eq.m_lhs->collect_tokens(tokens);
|
||
eq.m_rhs->collect_tokens(tokens);
|
||
for (euf::snode* tok : tokens) {
|
||
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||
seen_vars.insert(tok->id());
|
||
expr_ref len_var(seq.str.mk_length(tok->get_expr()), m);
|
||
expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
|
||
constraints.push_back(length_constraint(ge_zero, eq.m_dep, length_kind::nonneg, m));
|
||
}
|
||
}
|
||
}
|
||
|
||
// Parikh interval reasoning for regex memberships:
|
||
// for each str_mem constraint str in regex, compute length bounds
|
||
// from the regex structure and generate arithmetic constraints.
|
||
for (str_mem const& mem : m_root->str_mems()) {
|
||
expr* re_expr = mem.m_regex->get_expr();
|
||
if (!re_expr || !seq.is_re(re_expr))
|
||
continue;
|
||
|
||
unsigned min_len = 0, max_len = UINT_MAX;
|
||
compute_regex_length_interval(mem.m_regex, min_len, max_len);
|
||
|
||
expr_ref len_str = compute_length_expr(mem.m_str);
|
||
|
||
// generate len(str) >= min_len when min_len > 0
|
||
if (min_len > 0) {
|
||
expr_ref bound(arith.mk_ge(len_str, arith.mk_int(min_len)), m);
|
||
constraints.push_back(length_constraint(bound, mem.m_dep, length_kind::bound, m));
|
||
}
|
||
|
||
// generate len(str) <= max_len when bounded
|
||
if (max_len < UINT_MAX) {
|
||
expr_ref bound(arith.mk_le(len_str, arith.mk_int(max_len)), m);
|
||
constraints.push_back(length_constraint(bound, mem.m_dep, length_kind::bound, m));
|
||
}
|
||
|
||
// generate len(x) >= 0 for variables in the string side
|
||
euf::snode_vector tokens;
|
||
mem.m_str->collect_tokens(tokens);
|
||
for (euf::snode* tok : tokens) {
|
||
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||
seen_vars.insert(tok->id());
|
||
expr_ref len_var(seq.str.mk_length(tok->get_expr()), m);
|
||
expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
|
||
constraints.push_back(length_constraint(ge_zero, mem.m_dep, length_kind::nonneg, m));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
void nielsen_graph::compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len) {
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
expr* e = regex->get_expr();
|
||
if (!e || !seq.is_re(e)) {
|
||
min_len = 0;
|
||
max_len = UINT_MAX;
|
||
return;
|
||
}
|
||
min_len = seq.re.min_length(e);
|
||
max_len = seq.re.max_length(e);
|
||
// for empty language, min_length may be UINT_MAX (vacuously true);
|
||
// normalize to avoid generating bogus constraints
|
||
if (min_len > max_len) {
|
||
min_len = 0;
|
||
max_len = 0;
|
||
}
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// int_constraint display
|
||
// -----------------------------------------------------------------------
|
||
|
||
std::ostream& int_constraint::display(std::ostream& out) const {
|
||
if (m_lhs) out << mk_pp(m_lhs, m_lhs.get_manager());
|
||
else out << "null";
|
||
switch (m_kind) {
|
||
case int_constraint_kind::eq: out << " = "; break;
|
||
case int_constraint_kind::le: out << " <= "; break;
|
||
case int_constraint_kind::ge: out << " >= "; break;
|
||
}
|
||
if (m_rhs) out << mk_pp(m_rhs, m_rhs.get_manager());
|
||
else out << "null";
|
||
return out;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Integer feasibility subsolver implementation
|
||
// Uses the injected simple_solver (default: lp_simple_solver).
|
||
// -----------------------------------------------------------------------
|
||
|
||
expr_ref nielsen_graph::int_constraint_to_expr(int_constraint const& ic) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
switch (ic.m_kind) {
|
||
case int_constraint_kind::eq:
|
||
return expr_ref(m.mk_eq(ic.m_lhs, ic.m_rhs), m);
|
||
case int_constraint_kind::le:
|
||
return expr_ref(arith.mk_le(ic.m_lhs, ic.m_rhs), m);
|
||
case int_constraint_kind::ge:
|
||
return expr_ref(arith.mk_ge(ic.m_lhs, ic.m_rhs), m);
|
||
}
|
||
UNREACHABLE();
|
||
return expr_ref(m.mk_true(), m);
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Modification counter: substitution length tracking
|
||
// mirrors ZIPT's LocalInfo.CurrentModificationCnt + NielsenEdge.IncModCount/DecModCount
|
||
// + NielsenNode constructor length assertion logic
|
||
// -----------------------------------------------------------------------
|
||
|
||
expr_ref nielsen_graph::get_or_create_len_var(euf::snode* var, unsigned mod_count) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
SASSERT(var && var->is_var());
|
||
SASSERT(mod_count > 0);
|
||
|
||
auto key = std::make_pair(var->id(), mod_count);
|
||
auto it = m_len_var_cache.find(key);
|
||
if (it != m_len_var_cache.end())
|
||
return expr_ref(it->second, m);
|
||
|
||
// Create a fresh integer variable: len_<varname>_<mod_count>
|
||
arith_util arith(m);
|
||
std::string name = "len!" + std::to_string(var->id()) + "!" + std::to_string(mod_count);
|
||
expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
|
||
m_len_vars.push_back(fresh);
|
||
m_len_var_cache.insert({key, fresh.get()});
|
||
return fresh;
|
||
}
|
||
|
||
void nielsen_graph::add_subst_length_constraints(nielsen_edge* e) {
|
||
auto const& substs = e->subst();
|
||
|
||
// Quick check: any non-eliminating substitutions?
|
||
bool has_non_elim = false;
|
||
for (auto const& s : substs)
|
||
if (!s.is_eliminating()) { has_non_elim = true; break; }
|
||
if (!has_non_elim) return;
|
||
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
|
||
// Step 1: Compute LHS (|x|) for each non-eliminating substitution
|
||
// using current m_mod_cnt (before bumping).
|
||
// Also assert |x|_k >= 0 (mirrors ZIPT's NielsenNode constructor line 172).
|
||
svector<std::pair<unsigned, expr*>> lhs_exprs;
|
||
for (unsigned i = 0; i < substs.size(); ++i) {
|
||
auto const& s = substs[i];
|
||
if (s.is_eliminating()) continue;
|
||
SASSERT(s.m_var && s.m_var->is_var());
|
||
expr_ref lhs = compute_length_expr(s.m_var);
|
||
lhs_exprs.push_back({i, lhs.get()});
|
||
// Assert LHS >= 0
|
||
e->add_side_int(int_constraint(lhs, arith.mk_int(0),
|
||
int_constraint_kind::ge, s.m_dep, m));
|
||
}
|
||
|
||
// Step 2: Bump mod counts for all non-eliminating variables at once.
|
||
for (auto const& s : substs) {
|
||
if (s.is_eliminating()) continue;
|
||
unsigned id = s.m_var->id();
|
||
unsigned prev = 0;
|
||
m_mod_cnt.find(id, prev);
|
||
m_mod_cnt.insert(id, prev + 1);
|
||
}
|
||
|
||
// Step 3: Compute RHS (|u|) with bumped mod counts and add |x| = |u|.
|
||
for (auto const& p : lhs_exprs) {
|
||
unsigned idx = p.first;
|
||
expr* lhs_expr = p.second;
|
||
auto const& s = substs[idx];
|
||
expr_ref rhs = compute_length_expr(s.m_replacement);
|
||
e->add_side_int(int_constraint(lhs_expr, rhs, int_constraint_kind::eq,
|
||
s.m_dep, m));
|
||
|
||
// Assert non-negativity for any fresh length variables in the RHS
|
||
// (variables at mod_count > 0 that are newly created).
|
||
euf::snode_vector tokens;
|
||
s.m_replacement->collect_tokens(tokens);
|
||
for (euf::snode* tok : tokens) {
|
||
if (tok->is_var()) {
|
||
unsigned mc = 0;
|
||
m_mod_cnt.find(tok->id(), mc);
|
||
if (mc > 0) {
|
||
expr_ref len_var = get_or_create_len_var(tok, mc);
|
||
e->add_side_int(int_constraint(len_var, arith.mk_int(0),
|
||
int_constraint_kind::ge, s.m_dep, m));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Step 4: Restore mod counts (temporary bump for computing RHS only).
|
||
for (auto const& s : substs) {
|
||
if (s.is_eliminating()) continue;
|
||
unsigned id = s.m_var->id();
|
||
unsigned prev = 0;
|
||
m_mod_cnt.find(id, prev);
|
||
SASSERT(prev >= 1);
|
||
if (prev <= 1)
|
||
m_mod_cnt.remove(id);
|
||
else
|
||
m_mod_cnt.insert(id, prev - 1);
|
||
}
|
||
}
|
||
|
||
void nielsen_graph::inc_edge_mod_counts(nielsen_edge* e) {
|
||
for (auto const& s : e->subst()) {
|
||
if (s.is_eliminating()) continue;
|
||
unsigned id = s.m_var->id();
|
||
unsigned prev = 0;
|
||
m_mod_cnt.find(id, prev);
|
||
m_mod_cnt.insert(id, prev + 1);
|
||
}
|
||
}
|
||
|
||
void nielsen_graph::dec_edge_mod_counts(nielsen_edge* e) {
|
||
for (auto const& s : e->subst()) {
|
||
if (s.is_eliminating()) continue;
|
||
unsigned id = s.m_var->id();
|
||
unsigned prev = 0;
|
||
m_mod_cnt.find(id, prev);
|
||
SASSERT(prev >= 1);
|
||
if (prev <= 1)
|
||
m_mod_cnt.remove(id);
|
||
else
|
||
m_mod_cnt.insert(id, prev - 1);
|
||
}
|
||
}
|
||
|
||
void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) {
|
||
// Assert only the int_constraints that are new to this node (beyond those
|
||
// inherited from its parent via clone_from). The parent's constraints are
|
||
// already present in the enclosing solver scope; asserting them again would
|
||
// be redundant (though harmless). This is called by search_dfs right after
|
||
// simplify_and_init, which is where new constraints are produced.
|
||
for (unsigned i = node->m_parent_ic_count; i < node->int_constraints().size(); ++i)
|
||
m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i]));
|
||
}
|
||
|
||
void nielsen_graph::generate_node_length_constraints(nielsen_node* node) {
|
||
if (node->m_node_len_constraints_generated)
|
||
return;
|
||
node->m_node_len_constraints_generated = true;
|
||
|
||
// Skip the root node — its length constraints are already asserted
|
||
// at the base solver level by assert_root_constraints_to_solver().
|
||
if (node == m_root)
|
||
return;
|
||
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
uint_set seen_vars;
|
||
|
||
for (str_eq const& eq : node->str_eqs()) {
|
||
if (eq.is_trivial())
|
||
continue;
|
||
|
||
expr_ref len_lhs = compute_length_expr(eq.m_lhs);
|
||
expr_ref len_rhs = compute_length_expr(eq.m_rhs);
|
||
node->add_int_constraint(int_constraint(len_lhs, len_rhs,
|
||
int_constraint_kind::eq, eq.m_dep, m));
|
||
|
||
// non-negativity for each variable (mod-count-aware)
|
||
euf::snode_vector tokens;
|
||
eq.m_lhs->collect_tokens(tokens);
|
||
eq.m_rhs->collect_tokens(tokens);
|
||
for (euf::snode* tok : tokens) {
|
||
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||
seen_vars.insert(tok->id());
|
||
expr_ref len_var = compute_length_expr(tok);
|
||
node->add_int_constraint(int_constraint(len_var, arith.mk_int(0),
|
||
int_constraint_kind::ge, eq.m_dep, m));
|
||
}
|
||
}
|
||
}
|
||
|
||
// Parikh interval bounds for regex memberships at this node
|
||
seq_util& seq = m_sg.get_seq_util();
|
||
for (str_mem const& mem : node->str_mems()) {
|
||
expr* re_expr = mem.m_regex->get_expr();
|
||
if (!re_expr || !seq.is_re(re_expr))
|
||
continue;
|
||
|
||
unsigned min_len = 0, max_len = UINT_MAX;
|
||
compute_regex_length_interval(mem.m_regex, min_len, max_len);
|
||
|
||
expr_ref len_str = compute_length_expr(mem.m_str);
|
||
|
||
if (min_len > 0) {
|
||
node->add_int_constraint(int_constraint(len_str, arith.mk_int(min_len),
|
||
int_constraint_kind::ge, mem.m_dep, m));
|
||
}
|
||
if (max_len < UINT_MAX) {
|
||
node->add_int_constraint(int_constraint(len_str, arith.mk_int(max_len),
|
||
int_constraint_kind::le, mem.m_dep, m));
|
||
}
|
||
|
||
// non-negativity for string-side variables
|
||
euf::snode_vector tokens;
|
||
mem.m_str->collect_tokens(tokens);
|
||
for (euf::snode* tok : tokens) {
|
||
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||
seen_vars.insert(tok->id());
|
||
expr_ref len_var = compute_length_expr(tok);
|
||
node->add_int_constraint(int_constraint(len_var, arith.mk_int(0),
|
||
int_constraint_kind::ge, mem.m_dep, m));
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path) {
|
||
// In incremental mode the solver already holds all path constraints
|
||
// (root length constraints at the base level, edge side_int and node
|
||
// int_constraints pushed/popped as the DFS descends and backtracks).
|
||
// A plain check() is therefore sufficient.
|
||
return m_solver.check() != l_false;
|
||
}
|
||
|
||
bool nielsen_graph::check_lp_le(expr* lhs, expr* rhs,
|
||
nielsen_node* node,
|
||
svector<nielsen_edge*> const& cur_path) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
|
||
// The solver already holds all path constraints incrementally.
|
||
// Temporarily add NOT(lhs <= rhs), i.e. lhs >= rhs + 1.
|
||
// If that is unsat, then lhs <= rhs is entailed.
|
||
expr_ref one(arith.mk_int(1), m);
|
||
expr_ref rhs_plus_one(arith.mk_add(rhs, one), m);
|
||
|
||
m_solver.push();
|
||
m_solver.assert_expr(arith.mk_ge(lhs, rhs_plus_one));
|
||
lbool result = m_solver.check();
|
||
m_solver.pop(1);
|
||
return result == l_false;
|
||
}
|
||
|
||
int_constraint nielsen_graph::mk_int_constraint(expr* lhs, expr* rhs,
|
||
int_constraint_kind kind,
|
||
dep_tracker const& dep) {
|
||
return int_constraint(lhs, rhs, kind, dep, m_sg.get_manager());
|
||
}
|
||
|
||
expr* nielsen_graph::get_power_exponent(euf::snode* power) {
|
||
if (!power || !power->is_power()) return nullptr;
|
||
if (power->num_args() < 2) return nullptr;
|
||
euf::snode* exp_snode = power->arg(1);
|
||
return exp_snode ? exp_snode->get_expr() : nullptr;
|
||
}
|
||
|
||
expr_ref nielsen_graph::mk_fresh_int_var() {
|
||
ast_manager& m = m_sg.get_manager();
|
||
arith_util arith(m);
|
||
std::string name = "n!" + std::to_string(m_fresh_cnt++);
|
||
return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
|
||
}
|
||
|
||
bool nielsen_graph::solve_sat_path_ints(model_ref& mdl) {
|
||
mdl = nullptr;
|
||
if (m_sat_path.empty() && (!m_sat_node || m_sat_node->int_constraints().empty()))
|
||
return false;
|
||
|
||
// Re-assert the sat-path constraints into m_solver (which holds only root-level
|
||
// constraints at this point) and extract a model via the injected simple_solver.
|
||
// The sat path was found by DFS which verified feasibility at every step via
|
||
// check_int_feasibility, so all constraints along this path are jointly satisfiable
|
||
// and do not require incremental skipping.
|
||
IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints: sat_path length=" << m_sat_path.size() << "\n";);
|
||
m_solver.push();
|
||
for (nielsen_edge* e : m_sat_path)
|
||
for (auto const& ic : e->side_int())
|
||
m_solver.assert_expr(int_constraint_to_expr(ic));
|
||
if (m_sat_node)
|
||
for (auto const& ic : m_sat_node->int_constraints())
|
||
m_solver.assert_expr(int_constraint_to_expr(ic));
|
||
lbool result = m_solver.check();
|
||
IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints result: " << result << "\n";);
|
||
if (result == l_true) {
|
||
m_solver.get_model(mdl);
|
||
IF_VERBOSE(1, if (mdl) {
|
||
ast_manager& m = m_sg.get_manager();
|
||
verbose_stream() << " int_model:\n";
|
||
for (unsigned i = 0; i < mdl->get_num_constants(); ++i) {
|
||
func_decl* fd = mdl->get_constant(i);
|
||
expr* val = mdl->get_const_interp(fd);
|
||
if (val) verbose_stream() << " " << fd->get_name() << " = " << mk_bounded_pp(val, m, 3) << "\n";
|
||
}
|
||
});
|
||
}
|
||
m_solver.pop(1);
|
||
return mdl.get() != nullptr;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Regex widening: overapproximate string and check intersection emptiness
|
||
// Mirrors ZIPT NielsenNode.CheckRegexWidening (NielsenNode.cs:1350-1380)
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::check_regex_widening(nielsen_node const& node,
|
||
euf::snode* str, euf::snode* regex) {
|
||
if (!str || !regex || !m_seq_regex)
|
||
return false;
|
||
// Only apply to ground regexes with non-trivial strings
|
||
if (!regex->is_ground())
|
||
return false;
|
||
|
||
seq_util seq(m_sg.get_manager());
|
||
ast_manager& mgr = m_sg.get_manager();
|
||
|
||
// Build the overapproximation regex for the string.
|
||
// Variables → intersection of their primitive regex constraints (or Σ*)
|
||
// Symbolic chars → re.range from char_ranges (or full_char)
|
||
// Concrete chars → to_re(unit(c))
|
||
euf::snode_vector tokens;
|
||
str->collect_tokens(tokens);
|
||
if (tokens.empty())
|
||
return false;
|
||
|
||
euf::snode* approx = nullptr;
|
||
for (euf::snode* tok : tokens) {
|
||
euf::snode* tok_re = nullptr;
|
||
|
||
if (tok->is_char()) {
|
||
// Concrete character → to_re(unit(c))
|
||
expr* te = tok->get_expr();
|
||
if (!te)
|
||
return false;
|
||
expr_ref tre(seq.re.mk_to_re(te), mgr);
|
||
tok_re = m_sg.mk(tre);
|
||
}
|
||
else if (tok->is_var()) {
|
||
// Variable → intersection of primitive regex constraints, or Σ*
|
||
euf::snode* x_range = m_seq_regex->collect_primitive_regex_intersection(tok, node);
|
||
if (x_range)
|
||
tok_re = x_range;
|
||
else {
|
||
// Unconstrained variable: approximate as Σ*
|
||
sort* str_sort = seq.str.mk_string_sort();
|
||
expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
|
||
tok_re = m_sg.mk(all_re);
|
||
}
|
||
}
|
||
else if (tok->is_unit()) {
|
||
// Symbolic char — try to get char_range
|
||
if (node.char_ranges().contains(tok->id())) {
|
||
char_set const& cs = node.char_ranges()[tok->id()];
|
||
if (!cs.is_empty()) {
|
||
// Build union of re.range for each interval
|
||
euf::snode* range_re = nullptr;
|
||
for (auto const& r : cs.ranges()) {
|
||
expr_ref lo(seq.mk_char(r.m_lo), mgr);
|
||
expr_ref hi(seq.mk_char(r.m_hi - 1), mgr);
|
||
expr_ref rng(seq.re.mk_range(
|
||
seq.str.mk_string(zstring(r.m_lo)),
|
||
seq.str.mk_string(zstring(r.m_hi - 1))), mgr);
|
||
euf::snode* rng_sn = m_sg.mk(rng);
|
||
if (!range_re) {
|
||
range_re = rng_sn;
|
||
} else {
|
||
expr_ref u(seq.re.mk_union(range_re->get_expr(), rng_sn->get_expr()), mgr);
|
||
range_re = m_sg.mk(u);
|
||
}
|
||
}
|
||
tok_re = range_re;
|
||
}
|
||
}
|
||
if (!tok_re) {
|
||
// Unconstrained symbolic char: approximate as full_char (single char, any value)
|
||
sort* str_sort = seq.str.mk_string_sort();
|
||
expr_ref fc(seq.re.mk_full_char(seq.re.mk_re(str_sort)), mgr);
|
||
tok_re = m_sg.mk(fc);
|
||
}
|
||
}
|
||
else {
|
||
// Unknown token type — approximate as Σ*
|
||
sort* str_sort = seq.str.mk_string_sort();
|
||
expr_ref all_re(seq.re.mk_full_seq(seq.re.mk_re(str_sort)), mgr);
|
||
tok_re = m_sg.mk(all_re);
|
||
}
|
||
|
||
if (!tok_re)
|
||
return false;
|
||
|
||
if (!approx)
|
||
approx = tok_re;
|
||
else {
|
||
expr* ae = approx->get_expr();
|
||
expr* te = tok_re->get_expr(
|
||
);
|
||
if (!ae || !te)
|
||
return false;
|
||
expr_ref cat(seq.re.mk_concat(ae, te), mgr);
|
||
approx = m_sg.mk(cat);
|
||
}
|
||
}
|
||
|
||
if (!approx)
|
||
return false;
|
||
|
||
// Check intersection(approx, regex) for emptiness
|
||
// Build intersection regex
|
||
expr* ae = approx->get_expr();
|
||
expr* re = regex->get_expr();
|
||
if (!ae || !re)
|
||
return false;
|
||
expr_ref inter(seq.re.mk_inter(ae, re), mgr);
|
||
euf::snode* inter_sn = m_sg.mk(inter);
|
||
if (!inter_sn)
|
||
return false;
|
||
|
||
lbool result = m_seq_regex->is_empty_bfs(inter_sn, 5000);
|
||
return result == l_true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Leaf-node regex intersection emptiness check
|
||
// Mirrors ZIPT NielsenNode.CheckRegex (NielsenNode.cs:1311-1329)
|
||
// -----------------------------------------------------------------------
|
||
|
||
bool nielsen_graph::check_leaf_regex(nielsen_node const& node) {
|
||
if (!m_seq_regex)
|
||
return true;
|
||
|
||
// Group str_mem constraints by variable (primitive constraints only)
|
||
u_map<ptr_vector<euf::snode>> var_regexes;
|
||
|
||
for (auto const& mem : node.str_mems()) {
|
||
if (!mem.m_str || !mem.m_regex)
|
||
continue;
|
||
if (!mem.is_primitive())
|
||
continue;
|
||
euf::snode* first = mem.m_str->first();
|
||
if (!first || !first->is_var())
|
||
continue;
|
||
auto& list = var_regexes.insert_if_not_there(first->id(), ptr_vector<euf::snode>());
|
||
list.push_back(mem.m_regex);
|
||
}
|
||
|
||
// For each variable with 2+ regex constraints, check intersection non-emptiness
|
||
for (auto& [var_id, regexes] : var_regexes) {
|
||
if (regexes.size() < 2)
|
||
continue;
|
||
lbool result = m_seq_regex->check_intersection_emptiness(regexes, 5000);
|
||
if (result == l_true) {
|
||
// Intersection is empty — infeasible
|
||
return false;
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Statistics collection
|
||
// -----------------------------------------------------------------------
|
||
|
||
void nielsen_graph::collect_statistics(::statistics& st) const {
|
||
st.update("nseq solve calls", m_stats.m_num_solve_calls);
|
||
st.update("nseq dfs nodes", m_stats.m_num_dfs_nodes);
|
||
st.update("nseq sat", m_stats.m_num_sat);
|
||
st.update("nseq unsat", m_stats.m_num_unsat);
|
||
st.update("nseq unknown", m_stats.m_num_unknown);
|
||
st.update("nseq simplify clash", m_stats.m_num_simplify_conflict);
|
||
st.update("nseq extensions", m_stats.m_num_extensions);
|
||
st.update("nseq fresh vars", m_stats.m_num_fresh_vars);
|
||
st.update("nseq max depth", m_stats.m_max_depth);
|
||
|
||
// modifier breakdown
|
||
st.update("nseq mod det", m_stats.m_mod_det);
|
||
st.update("nseq mod power epsilon", m_stats.m_mod_power_epsilon);
|
||
st.update("nseq mod num cmp", m_stats.m_mod_num_cmp);
|
||
st.update("nseq mod const num unwind", m_stats.m_mod_const_num_unwinding);
|
||
st.update("nseq mod eq split", m_stats.m_mod_eq_split);
|
||
st.update("nseq mod star intr", m_stats.m_mod_star_intr);
|
||
st.update("nseq mod gpower intr", m_stats.m_mod_gpower_intr);
|
||
st.update("nseq mod const nielsen", m_stats.m_mod_const_nielsen);
|
||
st.update("nseq mod regex char", m_stats.m_mod_regex_char_split);
|
||
st.update("nseq mod regex var", m_stats.m_mod_regex_var_split);
|
||
st.update("nseq mod power split", m_stats.m_mod_power_split);
|
||
st.update("nseq mod var nielsen", m_stats.m_mod_var_nielsen);
|
||
st.update("nseq mod var num unwind", m_stats.m_mod_var_num_unwinding);
|
||
}
|
||
|
||
}
|