3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-07 22:04:53 +00:00
z3/src/smt/seq/seq_nielsen.cpp

1682 lines
64 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*++
Copyright (c) 2026 Microsoft Corporation
Module Name:
seq_nielsen.cpp
Abstract:
Nielsen graph implementation for string constraint solving.
Ports the constraint types and Nielsen graph structures from
ZIPT (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints)
Author:
Nikolaj Bjorner (nbjorner) 2026-03-02
Clemens Eisenhofer 2026-03-02
--*/
#include "smt/seq/seq_nielsen.h"
#include "ast/arith_decl_plugin.h"
#include "ast/ast_pp.h"
#include "util/bit_util.h"
namespace seq {
// -----------------------------------------------
// dep_tracker
// -----------------------------------------------
dep_tracker::dep_tracker(unsigned num_bits) {
unsigned words = (num_bits + 31) / 32;
m_bits.resize(words, 0);
}
dep_tracker::dep_tracker(unsigned num_bits, unsigned set_bit) {
unsigned words = (num_bits + 31) / 32;
m_bits.resize(words, 0);
if (set_bit < num_bits) {
unsigned word_idx = set_bit / 32;
unsigned bit_idx = set_bit % 32;
m_bits[word_idx] = 1u << bit_idx;
}
}
void dep_tracker::merge(dep_tracker const& other) {
if (other.m_bits.empty())
return;
if (m_bits.size() < other.m_bits.size())
m_bits.resize(other.m_bits.size(), 0);
for (unsigned i = 0; i < other.m_bits.size(); ++i)
m_bits[i] |= other.m_bits[i];
}
bool dep_tracker::is_superset(dep_tracker const& other) const {
for (unsigned i = 0; i < other.m_bits.size(); ++i) {
unsigned my_bits = (i < m_bits.size()) ? m_bits[i] : 0;
if ((my_bits & other.m_bits[i]) != other.m_bits[i])
return false;
}
return true;
}
bool dep_tracker::empty() const {
for (unsigned b : m_bits)
if (b != 0) return false;
return true;
}
void dep_tracker::get_set_bits(unsigned_vector& indices) const {
for (unsigned i = 0; i < m_bits.size(); ++i) {
unsigned word = m_bits[i];
while (word != 0) {
unsigned bit = i * 32 + ntz_core(word);
indices.push_back(bit);
word &= word - 1; // clear lowest set bit
}
}
}
// -----------------------------------------------
// str_eq
// -----------------------------------------------
void str_eq::sort() {
if (m_lhs && m_rhs && m_lhs->id() > m_rhs->id())
std::swap(m_lhs, m_rhs);
}
bool str_eq::is_trivial() const {
return m_lhs == m_rhs ||
(m_lhs && m_rhs && m_lhs->is_empty() && m_rhs->is_empty());
}
bool str_eq::contains_var(euf::snode* var) const {
if (!var) return false;
// check if var appears in the token list of lhs or rhs
if (m_lhs) {
euf::snode_vector tokens;
m_lhs->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == var) return true;
}
if (m_rhs) {
euf::snode_vector tokens;
m_rhs->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == var) return true;
}
return false;
}
// -----------------------------------------------
// str_mem
// -----------------------------------------------
bool str_mem::is_primitive() const {
return m_str && m_str->length() == 1 && m_str->is_var();
}
bool str_mem::contains_var(euf::snode* var) const {
if (!var) return false;
if (m_str) {
euf::snode_vector tokens;
m_str->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == var) return true;
}
return false;
}
// -----------------------------------------------
// nielsen_subst
// -----------------------------------------------
bool nielsen_subst::is_eliminating() const {
if (!m_var || !m_replacement) return true;
// check if var appears in replacement
euf::snode_vector tokens;
m_replacement->collect_tokens(tokens);
for (euf::snode* t : tokens)
if (t == m_var) return false;
return true;
}
// -----------------------------------------------
// nielsen_edge
// -----------------------------------------------
nielsen_edge::nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress):
m_src(src), m_tgt(tgt), m_is_progress(is_progress) {
}
// -----------------------------------------------
// nielsen_node
// -----------------------------------------------
nielsen_node::nielsen_node(nielsen_graph* graph, unsigned id):
m_id(id), m_graph(graph), m_is_progress(true) {
}
void nielsen_node::clone_from(nielsen_node const& parent) {
m_str_eq.reset();
m_str_mem.reset();
for (auto const& eq : parent.m_str_eq)
m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep));
for (auto const& mem : parent.m_str_mem)
m_str_mem.push_back(str_mem(mem.m_str, mem.m_regex, mem.m_history, mem.m_id, mem.m_dep));
}
void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) {
if (!s.m_var) return;
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
str_eq& eq = m_str_eq[i];
eq.m_lhs = sg.subst(eq.m_lhs, s.m_var, s.m_replacement);
eq.m_rhs = sg.subst(eq.m_rhs, s.m_var, s.m_replacement);
eq.m_dep.merge(s.m_dep);
eq.sort();
}
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
str_mem& mem = m_str_mem[i];
mem.m_str = sg.subst(mem.m_str, s.m_var, s.m_replacement);
// regex is typically ground, but apply subst for generality
mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement);
mem.m_dep.merge(s.m_dep);
}
}
// -----------------------------------------------
// nielsen_graph
// -----------------------------------------------
nielsen_graph::nielsen_graph(euf::sgraph& sg):
m_sg(sg) {
}
nielsen_graph::~nielsen_graph() {
reset();
}
nielsen_node* nielsen_graph::mk_node() {
unsigned id = m_nodes.size();
nielsen_node* n = alloc(nielsen_node, this, id);
m_nodes.push_back(n);
return n;
}
nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) {
nielsen_node* child = mk_node();
child->clone_from(*parent);
return child;
}
nielsen_edge* nielsen_graph::mk_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress) {
nielsen_edge* e = alloc(nielsen_edge, src, tgt, is_progress);
m_edges.push_back(e);
src->add_outgoing(e);
return e;
}
void nielsen_graph::add_str_eq(euf::snode* lhs, euf::snode* rhs) {
if (!m_root)
m_root = mk_node();
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
m_root->str_eqs().size());
str_eq eq(lhs, rhs, dep);
eq.sort();
m_root->add_str_eq(eq);
++m_num_input_eqs;
}
void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) {
if (!m_root)
m_root = mk_node();
dep_tracker dep(m_root->str_eqs().size() + m_root->str_mems().size() + 1,
m_root->str_eqs().size() + m_root->str_mems().size());
euf::snode* history = m_sg.mk_empty();
unsigned id = next_mem_id();
m_root->add_str_mem(str_mem(str, regex, history, id, dep));
++m_num_input_mems;
}
void nielsen_graph::inc_run_idx() {
if (m_run_idx == UINT_MAX) {
for (nielsen_node* n : m_nodes)
n->reset_counter();
m_run_idx = 1;
}
else
++m_run_idx;
}
void nielsen_graph::reset() {
for (nielsen_node* n : m_nodes)
dealloc(n);
for (nielsen_edge* e : m_edges)
dealloc(e);
m_nodes.reset();
m_edges.reset();
m_root = nullptr;
m_sat_node = nullptr;
m_sat_path.reset();
m_run_idx = 0;
m_depth_bound = 0;
m_next_mem_id = 0;
m_fresh_cnt = 0;
m_num_input_eqs = 0;
m_num_input_mems = 0;
}
std::ostream& nielsen_graph::display(std::ostream& out) const {
out << "nielsen_graph with " << m_nodes.size() << " nodes, "
<< m_edges.size() << " edges\n";
for (nielsen_node const* n : m_nodes) {
out << " node[" << n->id() << "]";
if (n == m_root)
out << " (root)";
if (n->is_general_conflict())
out << " CONFLICT";
if (n->is_extended())
out << " EXTENDED";
out << "\n";
// display string equalities
for (auto const& eq : n->str_eqs()) {
out << " str_eq: ";
if (eq.m_lhs) out << "lhs[id=" << eq.m_lhs->id() << ",len=" << eq.m_lhs->length() << "]";
else out << "null";
out << " = ";
if (eq.m_rhs) out << "rhs[id=" << eq.m_rhs->id() << ",len=" << eq.m_rhs->length() << "]";
else out << "null";
out << "\n";
}
// display regex memberships
for (auto const& mem : n->str_mems()) {
out << " str_mem[" << mem.m_id << "]: ";
if (mem.m_str) out << "str[id=" << mem.m_str->id() << ",len=" << mem.m_str->length() << "]";
else out << "null";
out << " in ";
if (mem.m_regex) out << "re[id=" << mem.m_regex->id() << "]";
else out << "null";
out << "\n";
}
// display outgoing edges
for (nielsen_edge const* e : n->outgoing()) {
out << " -> node[" << e->tgt()->id() << "]";
if (e->is_progress()) out << " (progress)";
for (auto const& s : e->subst()) {
out << " {";
if (s.m_var) out << "var[" << s.m_var->id() << "]";
out << " -> ";
if (s.m_replacement) out << "repl[" << s.m_replacement->id() << ",len=" << s.m_replacement->length() << "]";
else out << "eps";
out << "}";
}
out << "\n";
}
if (n->backedge())
out << " backedge -> node[" << n->backedge()->id() << "]\n";
}
return out;
}
// -----------------------------------------------------------------------
// nielsen_node: simplify_and_init
// -----------------------------------------------------------------------
simplify_result nielsen_node::simplify_and_init(nielsen_graph& g) {
euf::sgraph& sg = g.sg();
bool changed = true;
while (changed) {
changed = false;
// pass 1: remove trivially satisfied equalities
unsigned wi = 0;
for (unsigned i = 0; i < m_str_eq.size(); ++i) {
str_eq& eq = m_str_eq[i];
if (eq.is_trivial())
continue;
m_str_eq[wi++] = eq;
}
if (wi < m_str_eq.size()) {
m_str_eq.shrink(wi);
changed = true;
}
// pass 2: detect symbol clashes and empty-propagation
for (str_eq& eq : m_str_eq) {
if (!eq.m_lhs || !eq.m_rhs)
continue;
// both sides start with a concrete character: check match
if (eq.m_lhs->is_char() && eq.m_rhs->is_char()) {
if (eq.m_lhs->id() != eq.m_rhs->id()) {
// symbol clash
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
// same char: drop from both sides
eq.m_lhs = sg.drop_first(eq.m_lhs);
eq.m_rhs = sg.drop_first(eq.m_rhs);
changed = true;
continue;
}
// one side empty, the other not empty => conflict or substitution
if (eq.m_lhs->is_empty() && !eq.m_rhs->is_empty()) {
euf::snode_vector tokens;
eq.m_rhs->collect_tokens(tokens);
bool all_vars_or_opaque = true;
bool has_char = false;
for (euf::snode* t : tokens) {
if (t->is_char()) has_char = true;
else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) {
all_vars_or_opaque = false; break;
}
}
if (has_char || !all_vars_or_opaque) {
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
for (euf::snode* t : tokens) {
if (t->is_var()) {
nielsen_subst s(t, sg.mk_empty(), eq.m_dep);
apply_subst(sg, s);
changed = true;
}
}
continue;
}
if (eq.m_rhs->is_empty() && !eq.m_lhs->is_empty()) {
euf::snode_vector tokens;
eq.m_lhs->collect_tokens(tokens);
bool all_vars_or_opaque = true;
bool has_char = false;
for (euf::snode* t : tokens) {
if (t->is_char()) has_char = true;
else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) {
all_vars_or_opaque = false; break;
}
}
if (has_char || !all_vars_or_opaque) {
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
for (euf::snode* t : tokens) {
if (t->is_var()) {
nielsen_subst s(t, sg.mk_empty(), eq.m_dep);
apply_subst(sg, s);
changed = true;
}
}
continue;
}
// prefix matching: lhs and rhs both start with the same char => cancel
{
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
unsigned prefix = 0;
while (prefix < lhs_toks.size() && prefix < rhs_toks.size() &&
lhs_toks[prefix]->is_char() && rhs_toks[prefix]->is_char()) {
if (lhs_toks[prefix]->id() != rhs_toks[prefix]->id()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::symbol_clash;
return simplify_result::conflict;
}
++prefix;
}
if (prefix > 0) {
eq.m_lhs = sg.drop_left(eq.m_lhs, prefix);
eq.m_rhs = sg.drop_left(eq.m_rhs, prefix);
changed = true;
}
}
}
}
// consume leading concrete characters from str_mem via Brzozowski derivatives
for (str_mem& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
continue;
while (mem.m_str && !mem.m_str->is_empty()) {
euf::snode* first = mem.m_str->first();
if (!first || !first->is_char())
break;
euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, first);
if (!deriv) break;
if (deriv->is_fail()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
mem.m_str = sg.drop_first(mem.m_str);
mem.m_regex = deriv;
changed = true;
}
}
// check for regex memberships that are immediately infeasible
for (str_mem& mem : m_str_mem) {
if (!mem.m_str || !mem.m_regex)
continue;
if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) {
m_is_general_conflict = true;
m_reason = backtrack_reason::regex;
return simplify_result::conflict;
}
}
// remove satisfied str_mem constraints (ε ∈ nullable regex)
{
unsigned wi = 0;
for (unsigned i = 0; i < m_str_mem.size(); ++i) {
str_mem& mem = m_str_mem[i];
if (mem.m_str && mem.m_str->is_empty() && mem.m_regex && mem.m_regex->is_nullable())
continue; // satisfied, drop
m_str_mem[wi++] = mem;
}
if (wi < m_str_mem.size()) {
m_str_mem.shrink(wi);
changed = true;
}
}
if (is_satisfied())
return simplify_result::satisfied;
return simplify_result::proceed;
}
bool nielsen_node::is_satisfied() const {
for (str_eq const& eq : m_str_eq)
if (!eq.is_trivial()) return false;
return m_str_mem.empty();
}
bool nielsen_node::has_opaque_terms() const {
auto is_opaque = [](euf::snode* n) { return n && n->kind() == euf::snode_kind::s_other; };
for (str_eq const& eq : m_str_eq) {
if (eq.is_trivial()) continue;
if (is_opaque(eq.m_lhs) || is_opaque(eq.m_rhs)) return true;
euf::snode_vector toks;
if (eq.m_lhs) { eq.m_lhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; toks.reset(); }
if (eq.m_rhs) { eq.m_rhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; }
}
for (str_mem const& mem : m_str_mem) {
if (!mem.m_str) continue;
if (is_opaque(mem.m_str)) return true;
euf::snode_vector toks;
mem.m_str->collect_tokens(toks);
for (auto* t : toks) if (is_opaque(t)) return true;
}
return false;
}
// -----------------------------------------------------------------------
// nielsen_graph: search
// -----------------------------------------------------------------------
nielsen_graph::search_result nielsen_graph::solve() {
if (!m_root)
return search_result::sat;
++m_stats.m_num_solve_calls;
m_sat_node = nullptr;
m_sat_path.reset();
// Iterative deepening: start at depth 3, increment by 1 on each failure.
// m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it.
m_depth_bound = 3;
while (true) {
if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth)
break;
inc_run_idx();
svector<nielsen_edge*> cur_path;
search_result r = search_dfs(m_root, 0, cur_path);
if (r == search_result::sat) {
++m_stats.m_num_sat;
return r;
}
if (r == search_result::unsat) {
++m_stats.m_num_unsat;
return r;
}
// depth limit hit increment bound by 1 and retry
if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth)
break;
++m_depth_bound;
}
++m_stats.m_num_unknown;
return search_result::unknown;
}
nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth, svector<nielsen_edge*>& cur_path) {
++m_stats.m_num_dfs_nodes;
if (depth > m_stats.m_max_depth)
m_stats.m_max_depth = depth;
// cycle/revisit detection: if already visited this run, return cached status.
// mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check.
if (node->eval_idx() == m_run_idx) {
if (node->is_satisfied()) {
m_sat_node = node;
m_sat_path = cur_path;
return search_result::sat;
}
if (node->is_currently_conflict())
return search_result::unsat;
return search_result::unknown;
}
node->set_eval_idx(m_run_idx);
// simplify constraints (idempotent after first call)
simplify_result sr = node->simplify_and_init(*this);
if (sr == simplify_result::conflict) {
++m_stats.m_num_simplify_conflict;
node->set_general_conflict(true);
return search_result::unsat;
}
if (sr == simplify_result::satisfied || node->is_satisfied()) {
m_sat_node = node;
m_sat_path = cur_path;
return search_result::sat;
}
// depth bound check
if (depth >= m_depth_bound)
return search_result::unknown;
// generate extensions only once per node; children persist across runs
if (!node->is_extended()) {
bool ext = generate_extensions(node);
if (!ext) {
node->set_extended(true);
// No extensions could be generated. If the node still has
// unsatisfied constraints with opaque (s_other) terms that
// we cannot decompose, report unknown rather than unsat
// to avoid unsoundness.
if (node->has_opaque_terms())
return search_result::unknown;
node->set_reason(backtrack_reason::children_failed);
return search_result::unsat;
}
node->set_extended(true);
++m_stats.m_num_extensions;
}
// explore children
bool any_unknown = false;
for (nielsen_edge* e : node->outgoing()) {
cur_path.push_back(e);
search_result r = search_dfs(e->tgt(), depth + 1, cur_path);
if (r == search_result::sat)
return search_result::sat;
cur_path.pop_back();
if (r == search_result::unknown)
any_unknown = true;
}
// If no children exist and the node has opaque terms, report unknown
if (node->outgoing().empty() && node->has_opaque_terms())
return search_result::unknown;
if (!any_unknown) {
node->set_reason(backtrack_reason::children_failed);
return search_result::unsat;
}
return search_result::unknown;
}
simplify_result nielsen_graph::simplify_node(nielsen_node* node) {
return node->simplify_and_init(*this);
}
bool nielsen_graph::apply_det_modifier(nielsen_node* node) {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial())
continue;
if (!eq.m_lhs || !eq.m_rhs)
continue;
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
if (lhs_toks.empty() || rhs_toks.empty())
continue;
euf::snode* lhead = lhs_toks[0];
euf::snode* rhead = rhs_toks[0];
// same-head variable cancellation: x·A = x·B → A = B
if (lhead->is_var() && lhead->id() == rhead->id()) {
euf::snode* ltail = m_sg.drop_first(eq.m_lhs);
euf::snode* rtail = m_sg.drop_first(eq.m_rhs);
nielsen_node* child = mk_child(node);
// replace the equation with the tails
for (str_eq& ceq : child->str_eqs()) {
if (ceq.m_lhs == eq.m_lhs && ceq.m_rhs == eq.m_rhs) {
ceq.m_lhs = ltail;
ceq.m_rhs = rtail;
break;
}
}
mk_edge(node, child, true);
return true;
}
// variable definition: x = t where x ∉ vars(t) → subst x → t
if (lhead->is_var() && eq.m_rhs->is_empty()) {
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
if (rhead->is_var() && eq.m_lhs->is_empty()) {
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
}
return false;
}
bool nielsen_graph::apply_const_nielsen(nielsen_node* node) {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial())
continue;
if (!eq.m_lhs || !eq.m_rhs)
continue;
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
if (lhs_toks.empty() || rhs_toks.empty())
continue;
euf::snode* lhead = lhs_toks[0];
euf::snode* rhead = rhs_toks[0];
// char·A = y·B → branch 1: y→ε, branch 2: y→char·fresh
if (lhead->is_char() && rhead->is_var()) {
// branch 1: y → ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// branch 2: y → char·fresh (progress)
{
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
// x·A = char·B → branch 1: x→ε, branch 2: x→char·fresh
if (rhead->is_char() && lhead->is_var()) {
// branch 1: x → ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// branch 2: x → char·fresh (progress)
{
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
}
return false;
}
bool nielsen_graph::apply_var_nielsen(nielsen_node* node) {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial())
continue;
if (!eq.m_lhs || !eq.m_rhs)
continue;
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
if (lhs_toks.empty() || rhs_toks.empty())
continue;
euf::snode* lhead = lhs_toks[0];
euf::snode* rhead = rhs_toks[0];
if (!lhead->is_var() || !rhead->is_var())
continue;
if (lhead->id() == rhead->id())
continue;
// x·A = y·B where x,y are distinct variables (classic Nielsen)
// child 1: x → ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 2: x → y·x' (progress)
{
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 3: y → x·y' (progress)
{
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
return false;
}
bool nielsen_graph::apply_eq_split(nielsen_node* node) {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial())
continue;
if (!eq.m_lhs || !eq.m_rhs)
continue;
euf::snode_vector lhs_toks, rhs_toks;
eq.m_lhs->collect_tokens(lhs_toks);
eq.m_rhs->collect_tokens(rhs_toks);
if (lhs_toks.empty() || rhs_toks.empty())
continue;
euf::snode* lhead = lhs_toks[0];
euf::snode* rhead = rhs_toks[0];
if (!lhead->is_var() || !rhead->is_var())
continue;
// x·A = y·B where x,y are distinct variables
// child 1: x → ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 2: x → y·z_fresh (non-progress, x starts with y)
if (lhead->id() != rhead->id()) {
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(lhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// child 3: y → x·z_fresh (non-progress, y starts with x)
if (lhead->id() != rhead->id()) {
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(rhead, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
return false;
}
// -----------------------------------------------------------------------
// nielsen_graph: mk_fresh_var
// -----------------------------------------------------------------------
euf::snode* nielsen_graph::mk_fresh_var() {
++m_stats.m_num_fresh_vars;
std::string name = "v!" + std::to_string(m_fresh_cnt++);
return m_sg.mk_var(symbol(name.c_str()));
}
// -----------------------------------------------------------------------
// nielsen_graph: apply_regex_char_split
// -----------------------------------------------------------------------
void nielsen_graph::collect_first_chars(euf::snode* re, euf::snode_vector& chars) {
if (!re)
return;
// to_re(s): the first character of the string s
if (re->is_to_re()) {
euf::snode* body = re->arg(0);
if (body && !body->is_empty()) {
euf::snode* first = body->first();
if (first && first->is_char()) {
bool dup = false;
for (euf::snode* c : chars)
if (c == first) { dup = true; break; }
if (!dup)
chars.push_back(first);
}
// Handle string literals (classified as s_other in sgraph):
// extract the first character from the zstring and create a char snode.
else if (first && first->get_expr()) {
seq_util& seq = m_sg.get_seq_util();
zstring s;
if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) {
euf::snode* ch = m_sg.mk_char(s[0]);
bool dup = false;
for (euf::snode* c : chars)
if (c == ch) { dup = true; break; }
if (!dup)
chars.push_back(ch);
}
}
}
return;
}
// full character set (.): use 'a' as representative
if (re->is_full_char()) {
euf::snode* ch = m_sg.mk_char('a');
bool dup = false;
for (euf::snode* c : chars)
if (c == ch) { dup = true; break; }
if (!dup)
chars.push_back(ch);
return;
}
// re.range(lo, hi): use lo as representative
if (re->get_expr()) {
seq_util& seq = m_sg.get_seq_util();
expr* lo = nullptr, *hi = nullptr;
if (seq.re.is_range(re->get_expr(), lo, hi) && lo) {
unsigned ch_val = 'a';
if (seq.is_const_char(lo, ch_val)) {
euf::snode* ch = m_sg.mk_char(ch_val);
bool dup = false;
for (euf::snode* c : chars)
if (c == ch) { dup = true; break; }
if (!dup)
chars.push_back(ch);
}
return;
}
}
// fail, full_seq: no concrete first chars to extract
if (re->is_fail() || re->is_full_seq())
return;
// recurse into children (handles union, concat, star, loop, etc.)
for (unsigned i = 0; i < re->num_args(); ++i)
collect_first_chars(re->arg(i), chars);
}
bool nielsen_graph::apply_regex_char_split(nielsen_node* node) {
for (str_mem const& mem : node->str_mems()) {
if (!mem.m_str || !mem.m_regex)
continue;
// find the first token of the string side
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var())
continue;
// collect concrete characters from the regex
euf::snode_vector chars;
collect_first_chars(mem.m_regex, chars);
// If no concrete characters found, fall through to apply_regex_var_split
if (chars.empty())
continue;
bool created = false;
// for each character c with non-fail derivative:
// child: x → c · fresh_var
for (euf::snode* ch : chars) {
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch);
if (!deriv || deriv->is_fail())
continue;
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(ch, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(first, replacement, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
created = true;
}
// x → ε branch (variable is empty)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(first, m_sg.mk_empty(), mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
created = true;
}
if (created)
return true;
}
return false;
}
bool nielsen_graph::generate_extensions(nielsen_node *node) {
// Modifier priority chain mirrors ZIPT's ModifierBase.TypeOrder.
// The first modifier that produces edges is used and returned immediately.
// Priority 1: deterministic modifiers (single child, always progress)
if (apply_det_modifier(node))
return ++m_stats.m_mod_det, true;
// Priority 2: PowerEpsilon - power → ε via base=ε or n=0
if (apply_power_epsilon(node))
return ++m_stats.m_mod_power_epsilon, true;
// Priority 3: NumCmp - length comparison branching for power tokens
if (apply_num_cmp(node))
return ++m_stats.m_mod_num_cmp, true;
// Priority 4: ConstNumUnwinding - power vs constant: n=0 or peel
if (apply_const_num_unwinding(node))
return ++m_stats.m_mod_const_num_unwinding, true;
// Priority 5: EqSplit - var vs var (branching, 3 children)
if (apply_eq_split(node))
return ++m_stats.m_mod_eq_split, true;
// Priority 6: StarIntr - stabilizer introduction for regex cycles
if (apply_star_intr(node))
return ++m_stats.m_mod_star_intr, true;
// Priority 7: GPowerIntr - generalized power introduction
if (apply_gpower_intr(node))
return ++m_stats.m_mod_gpower_intr, true;
// Priority 8: ConstNielsen - char vs var (2 children)
if (apply_const_nielsen(node))
return ++m_stats.m_mod_const_nielsen, true;
// Priority 9: RegexCharSplit - split str_mem by first-position chars
if (apply_regex_char_split(node))
return ++m_stats.m_mod_regex_char_split, true;
// Priority 10: RegexVarSplit - split str_mem by minterms
if (apply_regex_var_split(node))
return ++m_stats.m_mod_regex_var_split, true;
// Priority 11: PowerSplit - power unwinding with bounded prefix
if (apply_power_split(node))
return ++m_stats.m_mod_power_split, true;
// Priority 12: VarNielsen - var vs var, all progress (classic Nielsen)
if (apply_var_nielsen(node))
return ++m_stats.m_mod_var_nielsen, true;
// Priority 13: VarNumUnwinding - variable power unwinding
if (apply_var_num_unwinding(node))
return ++m_stats.m_mod_var_num_unwinding, true;
return false;
}
// -----------------------------------------------------------------------
// Helper: find a power token in any str_eq
// -----------------------------------------------------------------------
euf::snode* nielsen_graph::find_power_token(nielsen_node* node) const {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
if (!eq.m_lhs || !eq.m_rhs) continue;
euf::snode_vector toks;
eq.m_lhs->collect_tokens(toks);
for (euf::snode* t : toks)
if (t->is_power()) return t;
toks.reset();
eq.m_rhs->collect_tokens(toks);
for (euf::snode* t : toks)
if (t->is_power()) return t;
}
return nullptr;
}
// -----------------------------------------------------------------------
// Helper: find a power token facing a constant (char) head
// Returns true if found, sets power, other_head, eq_out.
// -----------------------------------------------------------------------
bool nielsen_graph::find_power_vs_const(nielsen_node* node,
euf::snode*& power,
euf::snode*& other_head,
str_eq const*& eq_out) const {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
if (!eq.m_lhs || !eq.m_rhs) continue;
euf::snode* lhead = eq.m_lhs->first();
euf::snode* rhead = eq.m_rhs->first();
if (lhead && lhead->is_power() && rhead && rhead->is_char()) {
power = lhead; other_head = rhead; eq_out = &eq; return true;
}
if (rhead && rhead->is_power() && lhead && lhead->is_char()) {
power = rhead; other_head = lhead; eq_out = &eq; return true;
}
}
return false;
}
// -----------------------------------------------------------------------
// Helper: find a power token facing a variable head
// -----------------------------------------------------------------------
bool nielsen_graph::find_power_vs_var(nielsen_node* node,
euf::snode*& power,
euf::snode*& var_head,
str_eq const*& eq_out) const {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
if (!eq.m_lhs || !eq.m_rhs) continue;
euf::snode* lhead = eq.m_lhs->first();
euf::snode* rhead = eq.m_rhs->first();
if (lhead && lhead->is_power() && rhead && rhead->is_var()) {
power = lhead; var_head = rhead; eq_out = &eq; return true;
}
if (rhead && rhead->is_power() && lhead && lhead->is_var()) {
power = rhead; var_head = lhead; eq_out = &eq; return true;
}
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_power_epsilon
// For a power token u^n in an equation, branch:
// (1) base u → ε (base is empty, so u^n = ε)
// (2) u^n → ε (the power is zero, replace power with empty)
// mirrors ZIPT's PowerEpsilonModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_power_epsilon(nielsen_node* node) {
euf::snode* power = find_power_token(node);
if (!power)
return false;
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
dep_tracker dep;
for (str_eq const& eq : node->str_eqs())
if (!eq.is_trivial()) { dep = eq.m_dep; break; }
// Branch 1: base → ε (if base is a variable, substitute it to empty)
// This makes u^n = ε^n = ε for any n.
if (base->is_var()) {
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(base, m_sg.mk_empty(), dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// Branch 2: replace the power token itself with ε (n = 0 semantics)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(power, m_sg.mk_empty(), dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
// -----------------------------------------------------------------------
// Modifier: apply_num_cmp
// For equations involving two power tokens u^m and u^n with the same base,
// branch on the numeric relationship: m < n vs n <= m.
// Approximated as: (1) replace u^m with ε, (2) replace u^n with ε.
// mirrors ZIPT's NumCmpModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_num_cmp(nielsen_node* node) {
// Look for two power tokens with the same base on opposite sides
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
if (!eq.m_lhs || !eq.m_rhs) continue;
euf::snode* lhead = eq.m_lhs->first();
euf::snode* rhead = eq.m_rhs->first();
if (!lhead || !rhead) continue;
if (!lhead->is_power() || !rhead->is_power()) continue;
if (lhead->num_args() < 1 || rhead->num_args() < 1) continue;
// same base: compare the two powers
if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue;
// Branch 1: m < n → peel from lhs power (replace lhead with ε)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// Branch 2: n <= m → peel from rhs power (replace rhead with ε)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_const_num_unwinding
// For a power token u^n facing a constant (char) head,
// branch: (1) n = 0 → u^n = ε, (2) n >= 1 → peel one u from power.
// mirrors ZIPT's ConstNumUnwindingModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) {
euf::snode* power = nullptr;
euf::snode* other_head = nullptr;
str_eq const* eq = nullptr;
if (!find_power_vs_const(node, power, other_head, eq))
return false;
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
// Branch 1: n = 0 → replace power with ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// Branch 2: n >= 1 → peel one u: replace u^n with u · u^n'
// Approximated by prepending the base and keeping a fresh power variable
{
euf::snode* fresh_power = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(base, fresh_power);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(power, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
// -----------------------------------------------------------------------
// Modifier: apply_star_intr
// Star introduction: for a str_mem x·s ∈ R where a cycle is detected
// (backedge exists), introduce a stabilizer constraint x ∈ base*.
// Creates a child that adds x ∈ base* membership and constrains
// the remainder not to start with base.
// mirrors ZIPT's StarIntrModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_star_intr(nielsen_node* node) {
// Only fire if a backedge was set (cycle detected)
if (!node->backedge())
return false;
// Look for a str_mem with a variable-headed string
for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) {
str_mem const& mem = node->str_mems()[mi];
if (!mem.m_str || !mem.m_regex) continue;
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var()) continue;
// Introduce a fresh variable z constrained by z ∈ R*,
// replacing x → z·fresh_post. This breaks the cycle because
// z is a new variable that won't trigger star_intr again.
euf::snode* x = first;
euf::snode* z = mk_fresh_var();
euf::snode* fresh_post = mk_fresh_var();
euf::snode* str_tail = m_sg.drop_first(mem.m_str);
// Build z ∈ R* membership: the star of the current regex
seq_util& seq = m_sg.get_seq_util();
expr* re_expr = mem.m_regex->get_expr();
if (!re_expr)
continue;
expr_ref star_re(seq.re.mk_star(re_expr), seq.get_manager());
euf::snode* star_sn = m_sg.mk(star_re);
if (!star_sn)
continue;
nielsen_node* child = mk_child(node);
// Add membership: z ∈ R* (stabilizer constraint)
child->add_str_mem(str_mem(z, star_sn, mem.m_history, next_mem_id(), mem.m_dep));
// Add remaining membership: fresh_post · tail ∈ R
euf::snode* post_tail = str_tail->is_empty() ? fresh_post : m_sg.mk_concat(fresh_post, str_tail);
child->add_str_mem(str_mem(post_tail, mem.m_regex, mem.m_history, next_mem_id(), mem.m_dep));
// Substitute x → z · fresh_post
nielsen_edge* e = mk_edge(node, child, false);
euf::snode* replacement = m_sg.mk_concat(z, fresh_post);
nielsen_subst s(x, replacement, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
// Do not re-set backedge — z is fresh, so star_intr won't fire again
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_gpower_intr
// Generalized power introduction: for a variable x matched against a
// ground repeated pattern, introduce x = base^n · prefix with fresh n.
// Approximated: for each non-trivial equation with a variable head vs
// a ground concatenation, introduce power decomposition.
// mirrors ZIPT's GPowerIntrModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_gpower_intr(nielsen_node* node) {
for (str_eq const& eq : node->str_eqs()) {
if (eq.is_trivial()) continue;
if (!eq.m_lhs || !eq.m_rhs) continue;
euf::snode* var_side = nullptr;
euf::snode* ground_side = nullptr;
// One side must be a single variable, other side must be ground
euf::snode* lhead = eq.m_lhs->first();
euf::snode* rhead = eq.m_rhs->first();
if (lhead && lhead->is_var() && eq.m_lhs->length() == 1 && eq.m_rhs->is_ground()) {
var_side = lhead;
ground_side = eq.m_rhs;
}
else if (rhead && rhead->is_var() && eq.m_rhs->length() == 1 && eq.m_lhs->is_ground()) {
var_side = rhead;
ground_side = eq.m_lhs;
}
else continue;
if (!ground_side || ground_side->is_empty()) continue;
if (ground_side->length() < 2) continue; // need a repeated pattern
// Extract the first token as the "base" for power introduction
euf::snode* base_char = ground_side->first();
if (!base_char || !base_char->is_char()) continue;
// Check if the ground side has a repeated leading character
euf::snode_vector toks;
ground_side->collect_tokens(toks);
unsigned repeat_len = 0;
for (unsigned i = 0; i < toks.size(); ++i) {
if (toks[i]->id() == base_char->id())
++repeat_len;
else break;
}
if (repeat_len < 2) continue; // need at least 2 repetitions
// Introduce: x = base^n · fresh_suffix
// Branch with side constraint n >= 0
euf::snode* fresh_suffix = mk_fresh_var();
euf::snode* fresh_power = mk_fresh_var(); // represents base^n
euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(var_side, replacement, eq.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_regex_var_split
// For str_mem x·s ∈ R where x is a variable, split using minterms:
// (1) x → ε (empty)
// (2) x → c · x' for each minterm character class c
// More general than regex_char_split; uses minterm partitioning rather
// than just extracting concrete characters.
// mirrors ZIPT's RegexVarSplitModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_regex_var_split(nielsen_node* node) {
for (str_mem const& mem : node->str_mems()) {
if (!mem.m_str || !mem.m_regex) continue;
euf::snode* first = mem.m_str->first();
if (!first || !first->is_var()) continue;
// Compute minterms from the regex
euf::snode_vector minterms;
m_sg.compute_minterms(mem.m_regex, minterms);
if (minterms.empty()) continue;
bool created = false;
// Branch 1: x → ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(first, m_sg.mk_empty(), mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
created = true;
}
// Branch 2+: for each minterm m_i, x → fresh_char · x'
// where fresh_char is constrained by the minterm
for (euf::snode* mt : minterms) {
if (mt->is_fail()) continue;
// Try Brzozowski derivative with respect to this minterm
// If the derivative is fail (empty language), skip this minterm
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt);
if (deriv && deriv->is_fail()) continue;
euf::snode* fresh_var = mk_fresh_var();
euf::snode* fresh_char = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(fresh_char, fresh_var);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(first, replacement, mem.m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
created = true;
}
if (created)
return true;
}
return false;
}
// -----------------------------------------------------------------------
// Modifier: apply_power_split
// For a variable x facing a power token u^n, branch:
// (1) x = u^m · prefix(u) with m < n (bounded power prefix)
// (2) x = u^n · x (the variable extends past the full power)
// Approximated since we lack integer constraint infrastructure.
// mirrors ZIPT's PowerSplitModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_power_split(nielsen_node* node) {
euf::snode* power = nullptr;
euf::snode* var_head = nullptr;
str_eq const* eq = nullptr;
if (!find_power_vs_var(node, power, var_head, eq))
return false;
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
// Branch 1: x = base^m · prefix where m < n
// Approximated: x = fresh_power_var · fresh_suffix
{
euf::snode* fresh_power = mk_fresh_var();
euf::snode* fresh_suffix = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(var_head, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// Branch 2: x = u^n · x (variable extends past full power, non-progress)
{
euf::snode* replacement = m_sg.mk_concat(base, var_head);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(var_head, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
// -----------------------------------------------------------------------
// Modifier: apply_var_num_unwinding
// For a power token u^n facing a variable, branch:
// (1) n = 0 → u^n = ε (replace power with empty)
// (2) n >= 1 → peel one u from the power
// mirrors ZIPT's VarNumUnwindingModifier
// -----------------------------------------------------------------------
bool nielsen_graph::apply_var_num_unwinding(nielsen_node* node) {
euf::snode* power = nullptr;
euf::snode* var_head = nullptr;
str_eq const* eq = nullptr;
if (!find_power_vs_var(node, power, var_head, eq))
return false;
SASSERT(power->is_power() && power->num_args() >= 1);
euf::snode* base = power->arg(0);
// Branch 1: n = 0 → replace u^n with ε (progress)
{
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, true);
nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
// Branch 2: n >= 1 → peel one u: u^n → u · u^(n-1)
// Approximated: replace u^n with base · fresh_var
{
euf::snode* fresh = mk_fresh_var();
euf::snode* replacement = m_sg.mk_concat(base, fresh);
nielsen_node* child = mk_child(node);
nielsen_edge* e = mk_edge(node, child, false);
nielsen_subst s(power, replacement, eq->m_dep);
e->add_subst(s);
child->apply_subst(m_sg, s);
}
return true;
}
void nielsen_graph::collect_conflict_deps(dep_tracker& deps) const {
for (nielsen_node const* n : m_nodes) {
if (n->eval_idx() != m_run_idx)
continue;
if (!n->is_currently_conflict())
continue;
for (str_eq const& eq : n->str_eqs())
deps.merge(eq.m_dep);
for (str_mem const& mem : n->str_mems())
deps.merge(mem.m_dep);
}
}
void nielsen_graph::explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const {
SASSERT(m_root);
dep_tracker deps;
collect_conflict_deps(deps);
unsigned_vector bits;
deps.get_set_bits(bits);
for (unsigned b : bits) {
if (b < m_num_input_eqs)
eq_indices.push_back(b);
else
mem_indices.push_back(b - m_num_input_eqs);
}
}
// -----------------------------------------------------------------------
// nielsen_graph: length constraint generation
// -----------------------------------------------------------------------
expr_ref nielsen_graph::compute_length_expr(euf::snode* n) {
ast_manager& m = m_sg.get_manager();
seq_util& seq = m_sg.get_seq_util();
arith_util arith(m);
if (n->is_empty())
return expr_ref(arith.mk_int(0), m);
if (n->is_char() || n->is_unit())
return expr_ref(arith.mk_int(1), m);
if (n->is_concat()) {
expr_ref left = compute_length_expr(n->arg(0));
expr_ref right = compute_length_expr(n->arg(1));
return expr_ref(arith.mk_add(left, right), m);
}
// for variables and other terms, use symbolic (str.len expr)
return expr_ref(seq.str.mk_length(n->get_expr()), m);
}
void nielsen_graph::generate_length_constraints(vector<length_constraint>& constraints) {
if (!m_root)
return;
ast_manager& m = m_sg.get_manager();
uint_set seen_vars;
seq_util& seq = m_sg.get_seq_util();
arith_util arith(m);
for (str_eq const& eq : m_root->str_eqs()) {
if (eq.is_trivial())
continue;
expr_ref len_lhs = compute_length_expr(eq.m_lhs);
expr_ref len_rhs = compute_length_expr(eq.m_rhs);
expr_ref len_eq(m.mk_eq(len_lhs, len_rhs), m);
constraints.push_back(length_constraint(len_eq, eq.m_dep, length_kind::eq, m));
// collect variables for non-negativity constraints
euf::snode_vector tokens;
eq.m_lhs->collect_tokens(tokens);
eq.m_rhs->collect_tokens(tokens);
for (euf::snode* tok : tokens) {
if (tok->is_var() && !seen_vars.contains(tok->id())) {
seen_vars.insert(tok->id());
expr_ref len_var(seq.str.mk_length(tok->get_expr()), m);
expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
constraints.push_back(length_constraint(ge_zero, eq.m_dep, length_kind::nonneg, m));
}
}
}
// Parikh interval reasoning for regex memberships:
// for each str_mem constraint str in regex, compute length bounds
// from the regex structure and generate arithmetic constraints.
for (str_mem const& mem : m_root->str_mems()) {
expr* re_expr = mem.m_regex->get_expr();
if (!re_expr || !seq.is_re(re_expr))
continue;
unsigned min_len = 0, max_len = UINT_MAX;
compute_regex_length_interval(mem.m_regex, min_len, max_len);
expr_ref len_str = compute_length_expr(mem.m_str);
// generate len(str) >= min_len when min_len > 0
if (min_len > 0) {
expr_ref bound(arith.mk_ge(len_str, arith.mk_int(min_len)), m);
constraints.push_back(length_constraint(bound, mem.m_dep, length_kind::bound, m));
}
// generate len(str) <= max_len when bounded
if (max_len < UINT_MAX) {
expr_ref bound(arith.mk_le(len_str, arith.mk_int(max_len)), m);
constraints.push_back(length_constraint(bound, mem.m_dep, length_kind::bound, m));
}
// generate len(x) >= 0 for variables in the string side
euf::snode_vector tokens;
mem.m_str->collect_tokens(tokens);
for (euf::snode* tok : tokens) {
if (tok->is_var() && !seen_vars.contains(tok->id())) {
seen_vars.insert(tok->id());
expr_ref len_var(seq.str.mk_length(tok->get_expr()), m);
expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m);
constraints.push_back(length_constraint(ge_zero, mem.m_dep, length_kind::nonneg, m));
}
}
}
}
void nielsen_graph::compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len) {
seq_util& seq = m_sg.get_seq_util();
expr* e = regex->get_expr();
if (!e || !seq.is_re(e)) {
min_len = 0;
max_len = UINT_MAX;
return;
}
min_len = seq.re.min_length(e);
max_len = seq.re.max_length(e);
// for empty language, min_length may be UINT_MAX (vacuously true);
// normalize to avoid generating bogus constraints
if (min_len > max_len) {
min_len = 0;
max_len = 0;
}
}
}