diff --git a/src/ast/euf/euf_sgraph.cpp b/src/ast/euf/euf_sgraph.cpp index c167f16ce..1bd2a8d8b 100644 --- a/src/ast/euf/euf_sgraph.cpp +++ b/src/ast/euf/euf_sgraph.cpp @@ -355,6 +355,22 @@ namespace euf { if (n) return n; + // decompose non-empty string constants into character chains + // so that Nielsen graph can do prefix matching on them + zstring s; + if (m_seq.str.is_string(e, s) && !s.empty()) { + snode* result = mk_char(s[s.length() - 1]); + for (unsigned i = s.length() - 1; i-- > 0; ) + result = mk_concat(mk_char(s[i]), result); + // register the original string expression as an alias + unsigned eid = e->get_id(); + m_expr2snode.reserve(eid + 1, nullptr); + m_expr2snode[eid] = result; + m_alias_trail.push_back(eid); + mk_enode(e); + return result; + } + snode_kind k = classify(e); if (!is_app(e)) @@ -400,6 +416,7 @@ namespace euf { void sgraph::push() { m_scopes.push_back(m_nodes.size()); + m_alias_trail_lim.push_back(m_alias_trail.size()); ++m_num_scopes; m_egraph.push(); } @@ -420,6 +437,15 @@ namespace euf { } m_nodes.shrink(old_sz); m_scopes.shrink(new_lvl); + // undo alias entries (string constant decompositions) + unsigned alias_old = m_alias_trail_lim[new_lvl]; + for (unsigned i = m_alias_trail.size(); i-- > alias_old; ) { + unsigned eid = m_alias_trail[i]; + if (eid < m_expr2snode.size()) + m_expr2snode[eid] = nullptr; + } + m_alias_trail.shrink(alias_old); + m_alias_trail_lim.shrink(new_lvl); m_num_scopes = new_lvl; m_egraph.pop(num_scopes); } @@ -520,6 +546,25 @@ namespace euf { expr* ch = nullptr; if (m_seq.str.is_unit(elem_expr, ch)) elem_expr = ch; + + // If elem is a regex predicate (e.g., re.allchar from compute_minterms), + // extract a representative character for the derivative. + sort* seq_sort = nullptr, *ele_sort = nullptr; + if (m_seq.is_re(re_expr, seq_sort) && m_seq.is_seq(seq_sort, ele_sort)) { + if (ele_sort != elem_expr->get_sort()) { + expr* lo = nullptr, *hi = nullptr; + if (m_seq.re.is_full_char(elem_expr)) { + // re.allchar represents the entire alphabet; computing a derivative + // w.r.t. a single character would be imprecise and could incorrectly + // report fail. Return nullptr to prevent incorrect pruning. + return nullptr; + } + else if (m_seq.re.is_range(elem_expr, lo, hi) && lo) + elem_expr = lo; + else + return nullptr; + } + } expr_ref result = m_rewriter.mk_derivative(elem_expr, re_expr); if (!result) return nullptr; diff --git a/src/ast/euf/euf_sgraph.h b/src/ast/euf/euf_sgraph.h index 4d7231924..89a1c0eee 100644 --- a/src/ast/euf/euf_sgraph.h +++ b/src/ast/euf/euf_sgraph.h @@ -97,6 +97,10 @@ namespace euf { // maps expression id to snode ptr_vector m_expr2snode; + // trail of alias entries (string constant → decomposed snode) for pop + unsigned_vector m_alias_trail; // expression ids + unsigned_vector m_alias_trail_lim; // scope boundaries + snode* mk_snode(expr* e, snode_kind k, unsigned num_args, snode* const* args); snode_kind classify(expr* e) const; void compute_metadata(snode* n); diff --git a/src/smt/nseq_model.cpp b/src/smt/nseq_model.cpp index feb0d002d..fd9382219 100644 --- a/src/smt/nseq_model.cpp +++ b/src/smt/nseq_model.cpp @@ -7,7 +7,8 @@ Module Name: Abstract: - Implementation of nseq_model. + Implementation of nseq_model: model construction for the + Nielsen-based string solver. Author: @@ -15,3 +16,292 @@ Author: --*/ #include "smt/nseq_model.h" +#include "smt/theory_nseq.h" +#include "smt/nseq_regex.h" +#include "smt/nseq_state.h" +#include "smt/smt_context.h" +#include "smt/smt_model_generator.h" +#include "smt/proto_model/proto_model.h" +#include "ast/ast_pp.h" + +namespace smt { + + nseq_model::nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq, + seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex) + : m_th(th), m(m), m_seq(seq), m_rewriter(rw), m_sg(sg), m_regex(regex), m_trail(m) + {} + + void nseq_model::init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state) { + m_var_values.reset(); + m_var_regex.reset(); + m_trail.reset(); + + m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model()); + mg.register_factory(m_factory); + + register_existing_values(nielsen); + collect_var_regex_constraints(state); + + // if the last solve returned sat, extract assignments from the + // satisfying leaf node found during DFS. + seq::nielsen_node const* root = nielsen.root(); + if (root && root->is_satisfied()) + extract_assignments(root); + } + + model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) { + app* e = n->get_expr(); + if (!m_seq.is_seq(e) && !m_seq.is_re(e) && !m_seq.str.is_nth_u(e)) + return nullptr; + + // For regex-sorted enodes, return the expression itself as a model value. + // Regexes are interpreted as themselves in the model. + if (m_seq.is_re(e)) { + m_trail.push_back(e); + return alloc(expr_wrapper_proc, e); + } + + // For nth_u (underspecified nth), return a fresh value of the element sort. + if (m_seq.str.is_nth_u(e)) { + sort* srt = e->get_sort(); + expr* val = m_factory->get_fresh_value(srt); + if (val) { + m_trail.push_back(val); + return alloc(expr_wrapper_proc, to_app(val)); + } + return nullptr; + } + + // look up snode for this expression + euf::snode* sn = m_sg.find(e); + expr_ref val(m); + if (sn) + val = snode_to_value(sn); + + if (!val) { + // no assignment found — generate fresh value + val = m_factory->get_fresh_value(e->get_sort()); + } + + if (val) { + m_trail.push_back(val); + m_factory->add_trail(val); + return alloc(expr_wrapper_proc, to_app(val)); + } + + return alloc(expr_wrapper_proc, to_app(m_seq.str.mk_empty(e->get_sort()))); + } + + void nseq_model::finalize(model_generator& mg) { + m_var_values.reset(); + m_var_regex.reset(); + m_trail.reset(); + m_factory = nullptr; + } + + void nseq_model::extract_assignments(seq::nielsen_node const* node) { + if (!node) + return; + for (auto const& eq : node->str_eqs()) { + if (!eq.m_lhs || !eq.m_rhs) + continue; + if (eq.m_lhs->is_var() && !m_var_values.contains(eq.m_lhs->id())) { + expr_ref val = snode_to_value(eq.m_rhs); + if (val) { + m_trail.push_back(val); + m_var_values.insert(eq.m_lhs->id(), val); + } + } + if (eq.m_rhs->is_var() && !m_var_values.contains(eq.m_rhs->id())) { + expr_ref val = snode_to_value(eq.m_lhs); + if (val) { + m_trail.push_back(val); + m_var_values.insert(eq.m_rhs->id(), val); + } + } + } + } + + expr_ref nseq_model::snode_to_value(euf::snode* n) { + if (!n) + return expr_ref(m); + + if (n->is_empty()) + return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + + if (n->is_char() || n->is_unit()) { + expr* e = n->get_expr(); + return e ? expr_ref(e, m) : expr_ref(m); + } + + if (n->is_var()) + return expr_ref(get_var_value(n), m); + + if (n->is_concat()) { + expr_ref lhs = snode_to_value(n->arg(0)); + expr_ref rhs = snode_to_value(n->arg(1)); + if (lhs && rhs) + return expr_ref(m_seq.str.mk_concat(lhs, rhs), m); + if (lhs) return lhs; + if (rhs) return rhs; + return expr_ref(m); + } + + // fallback: use the underlying expression + expr* e = n->get_expr(); + return e ? expr_ref(e, m) : expr_ref(m); + } + + expr_ref nseq_model::generate_regex_witness(euf::snode* regex, unsigned depth) { + if (!regex) + return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + + // depth bound to prevent stack overflow on deep regexes + if (depth > 1000) { + sort* srt = m_seq.str.mk_string_sort(); + expr* fresh = m_factory->get_fresh_value(srt); + return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m); + } + + // nullable regex: empty string is a valid witness + if (m_regex.is_nullable(regex)) + return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + + // collect first-position characters + euf::snode_vector chars; + m_regex.collect_first_chars(regex, chars); + + if (!chars.empty()) { + // pick first concrete character, derive, and recurse + euf::snode* c = chars[0]; + euf::snode* deriv = m_regex.derivative(regex, c); + expr_ref tail = generate_regex_witness(deriv, depth + 1); + if (tail && c->get_expr()) + return expr_ref(m_seq.str.mk_concat(c->get_expr(), tail), m); + } + + // fallback: return fresh value from factory (may not satisfy the regex, + // but avoids returning empty string which definitely doesn't satisfy non-nullable regex) + sort* srt = m_seq.str.mk_string_sort(); + expr* fresh = m_factory->get_fresh_value(srt); + return fresh ? expr_ref(fresh, m) : expr_ref(m_seq.str.mk_empty(srt), m); + } + + void nseq_model::register_existing_values(seq::nielsen_graph& nielsen) { + seq::nielsen_node const* root = nielsen.root(); + if (!root) + return; + for (auto const& eq : root->str_eqs()) { + if (eq.m_lhs && eq.m_lhs->get_expr()) + m_factory->register_value(eq.m_lhs->get_expr()); + if (eq.m_rhs && eq.m_rhs->get_expr()) + m_factory->register_value(eq.m_rhs->get_expr()); + } + } + + expr* nseq_model::get_var_value(euf::snode* var) { + expr* val = nullptr; + if (m_var_values.find(var->id(), val)) + return val; + + // unconstrained or regex-constrained: delegate to mk_fresh_value + val = mk_fresh_value(var); + if (val) { + m_trail.push_back(val); + m_var_values.insert(var->id(), val); + } + return val; + } + + expr* nseq_model::mk_fresh_value(euf::snode* var) { + // check if this variable has regex constraints + euf::snode* re = nullptr; + if (m_var_regex.find(var->id(), re) && re) { + // generate a witness string satisfying the regex + expr_ref witness = generate_regex_witness(re); + if (witness) { + m_trail.push_back(witness); + m_factory->register_value(witness); + return witness; + } + } + + // no regex constraint or witness generation failed: plain fresh value + sort* srt = m_seq.str.mk_string_sort(); + if (var->get_expr()) + srt = var->get_expr()->get_sort(); + return m_factory->get_fresh_value(srt); + } + + void nseq_model::collect_var_regex_constraints(nseq_state const& state) { + for (auto const& mem : state.str_mems()) { + if (!mem.m_str || !mem.m_regex) + continue; + // only collect for variable snodes (leaf variables needing assignment) + if (!mem.m_str->is_var()) + continue; + unsigned id = mem.m_str->id(); + euf::snode* existing = nullptr; + if (m_var_regex.find(id, existing) && existing) { + // intersect with existing constraint: + // build re.inter(existing, new_regex) + expr* e1 = existing->get_expr(); + expr* e2 = mem.m_regex->get_expr(); + if (e1 && e2) { + expr_ref inter(m_seq.re.mk_inter(e1, e2), m); + euf::snode* inter_sn = m_sg.mk(inter); + if (inter_sn) + m_var_regex.insert(id, inter_sn); + } + } + else { + m_var_regex.insert(id, mem.m_regex); + } + } + } + + bool nseq_model::validate_regex(nseq_state const& state, ::proto_model& mdl) { + bool ok = true; + + // validate positive memberships: str ∈ regex + for (auto const& mem : state.str_mems()) { + if (!mem.m_str || !mem.m_regex) + continue; + expr* s_expr = mem.m_str->get_expr(); + expr* r_expr = mem.m_regex->get_expr(); + if (!s_expr || !r_expr) + continue; + + expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m); + if (mdl.is_false(in_re)) { + IF_VERBOSE(0, verbose_stream() << "nseq model: positive membership violated: " + << mk_bounded_pp(s_expr, m, 3) + << " in " << mk_bounded_pp(r_expr, m, 3) << "\n";); + ok = false; + } + } + + // validate negative memberships: str ∉ regex + for (auto const& entry : state.neg_mems()) { + if (!entry.m_str || !entry.m_regex) + continue; + expr* s_expr = entry.m_str->get_expr(); + expr* r_expr = entry.m_regex->get_expr(); + if (!s_expr || !r_expr) + continue; + + expr_ref in_re(m_seq.re.mk_in_re(s_expr, r_expr), m); + expr_ref val(m); + mdl.eval(in_re, val, true); + if (val && m.is_true(val)) { + IF_VERBOSE(0, verbose_stream() << "nseq model: negative membership violated: " + << mk_bounded_pp(s_expr, m, 3) + << " not in " << mk_bounded_pp(r_expr, m, 3) << "\n";); + ok = false; + } + } + + return ok; + } + +} diff --git a/src/smt/nseq_model.h b/src/smt/nseq_model.h index b804cca31..359703fed 100644 --- a/src/smt/nseq_model.h +++ b/src/smt/nseq_model.h @@ -7,7 +7,18 @@ Module Name: Abstract: - Model generation from solved Nielsen graph. + Model construction for the Nielsen-based string solver (theory_nseq). + + After the Nielsen graph search returns sat, this module extracts + variable-to-value assignments from the satisfying leaf node and + builds model_value_proc callbacks for the SMT model generator. + + The workflow is: + 1. init() — allocate seq_factory, register existing string literals, + and extract variable assignments from the satisfying Nielsen node. + 2. mk_value(enode*) — return a model_value_proc that lazily builds + the concrete value for a given enode. + 3. finalize() — clean up temporary state. Author: @@ -16,57 +27,99 @@ Author: --*/ #pragma once -#include "ast/ast.h" #include "ast/seq_decl_plugin.h" -#include "util/zstring.h" +#include "ast/rewriter/seq_rewriter.h" #include "ast/euf/euf_sgraph.h" +#include "smt/smt_types.h" #include "smt/seq/seq_nielsen.h" -#include -#include +#include "model/seq_factory.h" + +class proto_model; namespace smt { + class theory_nseq; + class nseq_regex; + class nseq_state; + class model_value_proc; + class nseq_model { - ast_manager& m; - seq_util m_seq; - euf::sgraph& m_sg; - unsigned m_fresh_counter = 0; + theory_nseq& m_th; + ast_manager& m; + seq_util& m_seq; + seq_rewriter& m_rewriter; + euf::sgraph& m_sg; + nseq_regex& m_regex; + + // factory for generating fresh string/regex values + seq_factory* m_factory = nullptr; + + // variable assignments extracted from the satisfying Nielsen node. + // maps snode id -> expr* (concrete value) + u_map m_var_values; + + // trail for GC protection of generated expressions + expr_ref_vector m_trail; + + // per-variable regex constraints: maps snode id -> intersected regex snode. + // collected during init() from the state's str_mem list. + u_map m_var_regex; public: - nseq_model(ast_manager& m, euf::sgraph& sg) : m(m), m_seq(m), m_sg(sg) {} + nseq_model(theory_nseq& th, ast_manager& m, seq_util& seq, + seq_rewriter& rw, euf::sgraph& sg, nseq_regex& regex); - // generate a fresh string value (used when a variable is unconstrained) - expr_ref mk_fresh_value() { - std::string name = "s!" + std::to_string(m_fresh_counter++); - zstring zs(name.c_str()); - return expr_ref(m_seq.str.mk_string(zs), m); - } + // Phase 1: initialize model construction. + // Allocates seq_factory, registers it with mg, collects + // existing string literals, and extracts variable assignments + // from the satisfying Nielsen leaf node. + void init(model_generator& mg, seq::nielsen_graph& nielsen, nseq_state const& state); - // extract variable assignments from a satisfied leaf node - // Returns true if all variables got a valid assignment - bool extract_assignments(seq::nielsen_node* node, - std::vector>& assignment) { - if (!node) - return false; - for (auto const& eq : node->str_eqs()) { - if (!eq.m_lhs || !eq.m_rhs) - continue; - if (eq.m_lhs->is_var() && eq.m_rhs->get_expr()) { - assignment.emplace_back(eq.m_lhs, eq.m_rhs->get_expr()); - } - else if (eq.m_rhs->is_var() && eq.m_lhs->get_expr()) { - assignment.emplace_back(eq.m_rhs, eq.m_lhs->get_expr()); - } - } - return true; - } + // Phase 2: build a model_value_proc for the given enode. + // Returns nullptr if the enode is not a sequence/string sort. + model_value_proc* mk_value(enode* n, model_generator& mg); - // validate that a regex membership constraint is satisfied by the assignment - bool validate_regex(seq::str_mem const& mem, - obj_map const& assignment) { - // stub: assume valid for now - return true; - } + // Phase 3: clean up temporary model construction state. + void finalize(model_generator& mg); + + // Validate that model assignments satisfy all regex membership + // constraints from the state. Checks positive and negative + // memberships. Returns true if all constraints pass. + bool validate_regex(nseq_state const& state, ::proto_model& mdl); + + private: + // extract variable assignments from a satisfying Nielsen node. + // Walks str_eqs looking for x = value patterns and records them. + void extract_assignments(seq::nielsen_node const* node); + + // recursively substitute known variable assignments into an snode tree. + // Returns a concrete Z3 expression. + expr_ref snode_to_value(euf::snode* n); + + // generate a concrete witness string for a regex. + // Uses nullable check and first-char collection to build + // a minimal satisfying string. depth bounds recursion. + expr_ref generate_regex_witness(euf::snode* regex, unsigned depth = 0); + + // register all string literals appearing in the constraint store + // with the factory to avoid collisions with fresh values. + void register_existing_values(seq::nielsen_graph& nielsen); + + // look up or compute the value for an snode variable. + // If no assignment exists, delegates to mk_fresh_value. + expr* get_var_value(euf::snode* var); + + // generate a fresh value for a variable, respecting regex + // membership constraints. If the variable has associated + // regex constraints (collected during init), generates a + // witness satisfying the intersection; otherwise falls back + // to a plain fresh value from the factory. + expr* mk_fresh_value(euf::snode* var); + + // collect per-variable regex constraints from the state. + // For each positive str_mem, records the regex (or intersects + // with existing) into m_var_regex keyed by the string snode id. + void collect_var_regex_constraints(nseq_state const& state); }; } diff --git a/src/smt/nseq_regex.cpp b/src/smt/nseq_regex.cpp index 7a36a26e7..df9847b6c 100644 --- a/src/smt/nseq_regex.cpp +++ b/src/smt/nseq_regex.cpp @@ -7,7 +7,7 @@ Module Name: Abstract: - Implementation of nseq_regex. + Lazy regex membership processing for the Nielsen-based string solver. Author: @@ -15,3 +15,394 @@ Author: --*/ #include "smt/nseq_regex.h" + +namespace smt { + + // ----------------------------------------------------------------------- + // Regex emptiness checking (structural analysis) + // ----------------------------------------------------------------------- + + bool nseq_regex::is_empty_regex(euf::snode* re) const { + if (!re) + return false; + // direct empty language constant + if (re->is_fail()) + return true; + // kinds that are never empty + if (re->is_star() || re->is_to_re() || + re->is_full_char() || re->is_full_seq()) + return false; + // loop with lo == 0 accepts ε + if (re->is_loop() && re->is_nullable()) + return false; + + seq_util& seq = m_sg.get_seq_util(); + expr* e = re->get_expr(); + if (!e) + return false; + + expr* r1, * r2; + // union is empty iff both children are empty + if (seq.re.is_union(e, r1, r2)) { + SASSERT(re->num_args() == 2); + return is_empty_regex(re->arg(0)) && is_empty_regex(re->arg(1)); + } + // regex concat is empty if either child is empty + if (seq.re.is_concat(e, r1, r2)) { + SASSERT(re->num_args() == 2); + return is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1)); + } + // intersection is empty if either child is empty + if (seq.re.is_intersection(e, r1, r2)) { + SASSERT(re->num_args() == 2); + if (is_empty_regex(re->arg(0)) || is_empty_regex(re->arg(1))) + return true; + } + // complement of full_seq is empty + if (re->is_complement() && re->num_args() == 1 && re->arg(0)->is_full_seq()) + return true; + // loop(empty, lo, _) with lo > 0 is empty + if (re->is_loop() && re->num_args() >= 1 && is_empty_regex(re->arg(0))) + return !re->is_nullable(); // empty if not nullable (i.e., lo > 0) + + return false; + } + + // ----------------------------------------------------------------------- + // Cycle detection + // ----------------------------------------------------------------------- + + bool nseq_regex::detect_cycle(seq::str_mem const& mem) const { + return extract_cycle(mem) != nullptr; + } + + // ----------------------------------------------------------------------- + // Ground prefix consumption + // ----------------------------------------------------------------------- + + nseq_regex::simplify_status nseq_regex::simplify_ground_prefix(seq::str_mem& mem) { + if (!mem.m_str || !mem.m_regex) + return simplify_status::ok; + + while (mem.m_str && !mem.m_str->is_empty()) { + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_char()) + break; + euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, first); + if (!deriv) + break; + if (deriv->is_fail()) + return simplify_status::conflict; + mem.m_str = m_sg.drop_first(mem.m_str); + mem.m_regex = deriv; + } + + // check final state + if (mem.m_str && mem.m_str->is_empty()) { + if (mem.m_regex->is_nullable()) + return simplify_status::satisfied; + return simplify_status::conflict; + } + + return simplify_status::ok; + } + + // ----------------------------------------------------------------------- + // Ground suffix consumption (best-effort) + // ----------------------------------------------------------------------- + + nseq_regex::simplify_status nseq_regex::simplify_ground_suffix(seq::str_mem& mem) { + // Suffix consumption via reverse derivatives is complex. + // For now, only handle the case where the entire string is ground: + // consume all characters from the front (which covers trailing chars + // when the string is fully ground). + if (!mem.m_str || !mem.m_regex) + return simplify_status::ok; + if (!mem.m_str->is_ground()) + return simplify_status::ok; + + // If the string is ground, simplify_ground_prefix handles everything. + return simplify_ground_prefix(mem); + } + + // ----------------------------------------------------------------------- + // Trivial checks + // ----------------------------------------------------------------------- + + int nseq_regex::check_trivial(seq::str_mem const& mem) const { + if (!mem.m_str || !mem.m_regex) + return 0; + // regex is ∅ => always conflict + if (is_empty_regex(mem.m_regex)) + return -1; + // regex is Σ* => always satisfied + if (is_full_regex(mem.m_regex)) + return 1; + // empty string checks + if (mem.m_str->is_empty()) { + if (mem.m_regex->is_nullable()) + return 1; + return -1; + } + return 0; + } + + // ----------------------------------------------------------------------- + // Minterm computation with filtering + // ----------------------------------------------------------------------- + + void nseq_regex::get_minterms(euf::snode* regex, euf::snode_vector& minterms) { + if (!regex) + return; + + // compute raw minterms from the regex predicates + euf::snode_vector raw; + m_sg.compute_minterms(regex, raw); + + // filter: keep only minterms that are non-fail (non-empty character class). + // note: minterms are regex character-class expressions, not concrete + // characters, so we cannot compute Brzozowski derivatives with them. + // callers should compute derivatives using concrete or fresh chars. + for (euf::snode* mt : raw) { + if (!mt || mt->is_fail()) + continue; + minterms.push_back(mt); + } + } + + // ----------------------------------------------------------------------- + // Collect first characters + // ----------------------------------------------------------------------- + + void nseq_regex::collect_first_chars(euf::snode* re, euf::snode_vector& chars) { + if (!re) + return; + + // to_re(s): extract first character of the string body + if (re->is_to_re()) { + euf::snode* body = re->arg(0); + if (body && !body->is_empty()) { + euf::snode* first = body->first(); + if (first && first->is_char()) { + bool dup = false; + for (euf::snode* c : chars) + if (c == first) { dup = true; break; } + if (!dup) + chars.push_back(first); + } + // Handle string literals (classified as s_other in sgraph) + else if (first && first->get_expr()) { + seq_util& seq = m_sg.get_seq_util(); + zstring s; + if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) { + euf::snode* ch = m_sg.mk_char(s[0]); + bool dup = false; + for (euf::snode* c : chars) + if (c == ch) { dup = true; break; } + if (!dup) + chars.push_back(ch); + } + } + } + return; + } + + // leaf cases: produce representative characters for character classes + if (re->is_full_char()) { + // full character set (.): use 'a' as representative + euf::snode* ch = m_sg.mk_char('a'); + bool dup = false; + for (euf::snode* c : chars) + if (c == ch) { dup = true; break; } + if (!dup) + chars.push_back(ch); + return; + } + + // re.range(lo, hi): use lo as representative + if (re->get_expr()) { + seq_util& seq = m_sg.get_seq_util(); + expr* lo = nullptr, *hi = nullptr; + if (seq.re.is_range(re->get_expr(), lo, hi) && lo) { + zstring s; + unsigned ch_val = 'a'; + if (seq.is_const_char(lo, ch_val)) { + euf::snode* ch = m_sg.mk_char(ch_val); + bool dup = false; + for (euf::snode* c : chars) + if (c == ch) { dup = true; break; } + if (!dup) + chars.push_back(ch); + } + return; + } + } + + if (re->is_fail() || re->is_full_seq()) + return; + + // recurse into children (handles union, concat, star, loop, etc.) + for (unsigned i = 0; i < re->num_args(); ++i) + collect_first_chars(re->arg(i), chars); + } + + // ----------------------------------------------------------------------- + // Membership processing + // ----------------------------------------------------------------------- + + bool nseq_regex::process_str_mem(seq::str_mem const& mem, + vector& out_mems) { + if (!mem.m_str || !mem.m_regex) + return true; + // empty string: check nullable + if (mem.m_str->is_empty()) + return mem.m_regex->is_nullable(); + + // consume ground prefix: derive regex by each leading concrete char + seq::str_mem working = mem; + simplify_status st = simplify_ground_prefix(working); + if (st == simplify_status::conflict) + return false; + if (st == simplify_status::satisfied) + return true; + + // after ground prefix consumption, if the front is still a concrete + // character we can take one more step (shouldn't happen after + // simplify_ground_prefix, but guard defensively) + euf::snode* first = working.m_str->first(); + if (first && first->is_char()) { + seq::str_mem derived = derive(working, first); + if (is_empty_regex(derived.m_regex)) + return false; + out_mems.push_back(derived); + return true; + } + + // string starts with a non-ground element (variable or unit): + // return the simplified constraint for the Nielsen graph to expand + // via character-split modifiers. + out_mems.push_back(working); + return true; + } + + // ----------------------------------------------------------------------- + // History recording + // ----------------------------------------------------------------------- + + seq::str_mem nseq_regex::record_history(seq::str_mem const& mem, euf::snode* history_re) { + // Build a history chain by prepending the new regex entry to the + // existing history. Uses regex-concat as a cons cell: + // new_history = re.concat(history_re, old_history) + // where arg(0) is the latest entry and arg(1) is the tail. + // If old_history is nullptr, the new entry becomes the terminal leaf. + euf::snode* new_history = history_re; + if (mem.m_history && history_re) { + expr* re_expr = history_re->get_expr(); + expr* old_expr = mem.m_history->get_expr(); + if (re_expr && old_expr) { + seq_util& seq = m_sg.get_seq_util(); + expr_ref chain(seq.re.mk_concat(re_expr, old_expr), m_sg.get_manager()); + new_history = m_sg.mk(chain); + } + } + return seq::str_mem(mem.m_str, mem.m_regex, new_history, mem.m_id, mem.m_dep); + } + + // ----------------------------------------------------------------------- + // Cycle detection + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::extract_cycle(seq::str_mem const& mem) const { + // Walk the history chain looking for a repeated regex. + // A cycle exists when the current regex matches a regex in the history. + if (!mem.m_regex || !mem.m_history) + return nullptr; + + euf::snode* current = mem.m_regex; + euf::snode* hist = mem.m_history; + + // Walk the history chain up to a bounded depth. + // The history is structured as a chain of regex snapshots connected + // via the sgraph's regex-concat: each level's arg(0) is a snapshot + // and arg(1) is the tail. A leaf (non-concat) is a terminal entry. + unsigned bound = 1000; + while (hist && bound-- > 0) { + euf::snode* entry = hist; + euf::snode* tail = nullptr; + + // If the history node is a regex concat, decompose it: + // arg(0) is the regex snapshot, arg(1) is the rest of the chain + seq_util& seq = m_sg.get_seq_util(); + if (hist->is_concat() && hist->get_expr() && + seq.re.is_concat(hist->get_expr())) { + entry = hist->arg(0); + tail = hist->arg(1); + } + + // Check pointer equality (fast, covers normalized regexes) + if (entry == current) + return entry; + // Check expression-level equality as fallback + if (entry->get_expr() && current->get_expr() && + entry->get_expr() == current->get_expr()) + return entry; + + hist = tail; + } + return nullptr; + } + + // ----------------------------------------------------------------------- + // Stabilizer from cycle + // ----------------------------------------------------------------------- + + euf::snode* nseq_regex::stabilizer_from_cycle(euf::snode* cycle_regex, + euf::snode* current_regex) { + if (!cycle_regex || !current_regex) + return nullptr; + + // The stabilizer is the Kleene star of the "cycle body" regex. + // If the cycle regex and current regex are the same (pointer equal), + // the stabilizer is cycle_regex* (Kleene star). + // This mirrors ZIPT's StabilizerFromCycle which extracts the + // regex between the cycle entry and current point and wraps it in *. + + // Build cycle_regex* via the sgraph's expression factory + expr* re_expr = cycle_regex->get_expr(); + if (!re_expr) + return nullptr; + + seq_util& seq = m_sg.get_seq_util(); + expr_ref star_expr(seq.re.mk_star(re_expr), m_sg.get_manager()); + return m_sg.mk(star_expr); + } + + // ----------------------------------------------------------------------- + // Stabilizer-based subsumption + // ----------------------------------------------------------------------- + + bool nseq_regex::try_subsume(seq::str_mem const& mem) { + // Check if the derivation history exhibits a cycle, and if so, + // whether the current regex is subsumed by the stabilizer. + euf::snode* cycle = extract_cycle(mem); + if (!cycle) + return false; + + euf::snode* stab = stabilizer_from_cycle(cycle, mem.m_regex); + if (!stab) + return false; + + // A constraint x ∈ R is subsumed when R ⊆ stab. + // For the simple case where cycle == current regex, + // R ⊆ R* is always true (since R* accepts everything R does, and more). + // This handles the common idempotent cycle case. + if (cycle == mem.m_regex) + return true; + + // More sophisticated subsumption checks (regex containment) + // would require a regex inclusion decision procedure. + // For now, only handle the pointer-equality case. + return false; + } + +} diff --git a/src/smt/nseq_regex.h b/src/smt/nseq_regex.h index 67598b36a..db9deebb9 100644 --- a/src/smt/nseq_regex.h +++ b/src/smt/nseq_regex.h @@ -7,8 +7,20 @@ Module Name: Abstract: - Regex membership handling using Brzozowski derivatives. - Processes str_mem constraints after character consumption. + Lazy regex membership processing for the Nielsen-based string solver. + + Provides Brzozowski derivative computation, ground prefix/suffix + consumption, cycle detection in derivation histories, and + stabilizer-based subsumption for regex membership constraints. + + Ports the following ZIPT StrMem operations: + - SimplifyCharRegex / SimplifyDir (ground prefix/suffix consumption) + - ExtractCycle / StabilizerFromCycle (cycle detection and widening) + - TrySubsume (stabilizer-based subsumption) + + The class wraps sgraph operations (brzozowski_deriv, compute_minterms, + drop_first, etc.) and provides a higher-level interface for + nielsen_graph and theory_nseq. Author: @@ -28,39 +40,146 @@ namespace smt { public: nseq_regex(euf::sgraph& sg) : m_sg(sg) {} - // check if a regex snode represents the empty language - bool is_empty_regex(euf::snode* re) const { - return re && re->is_fail(); + euf::sgraph& sg() { return m_sg; } + + // ----------------------------------------------------------------- + // Basic regex predicates + // ----------------------------------------------------------------- + + // check if regex is the empty language (∅ / re.empty). + // performs structural analysis beyond is_fail() to detect + // derived emptiness (e.g., union of empties, concat with empty). + bool is_empty_regex(euf::snode* re) const; + + // check if regex is the full language (Σ* / re.all) + bool is_full_regex(euf::snode* re) const { + return re && re->is_full_seq(); } - // compute derivative of regex re with respect to char elem and - // return a new str_mem for the resulting constraint + // check if regex accepts the empty string + bool is_nullable(euf::snode* re) const { + return re && re->is_nullable(); + } + + // check if regex is ground (no string variables) + bool is_ground(euf::snode* re) const { + return re && re->is_ground(); + } + + // ----------------------------------------------------------------- + // Derivative computation + // ----------------------------------------------------------------- + + // compute Brzozowski derivative of regex w.r.t. character element. + // returns nullptr on failure. + euf::snode* derivative(euf::snode* re, euf::snode* elem) { + return m_sg.brzozowski_deriv(re, elem); + } + + // compute derivative of a str_mem constraint: advance past one character. + // the string side is shortened by drop_first and the regex is derived. seq::str_mem derive(seq::str_mem const& mem, euf::snode* elem) { euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, elem); euf::snode* new_str = m_sg.drop_first(mem.m_str); return seq::str_mem(new_str, deriv, mem.m_history, mem.m_id, mem.m_dep); } - // process a regex membership constraint after one character has been consumed - // returns false if the resulting regex is empty (conflict) - bool process_str_mem(seq::str_mem const& mem, - vector& out_mems) { - if (!mem.m_str || !mem.m_regex) - return true; - // if regex does not accept the empty string and the string side is empty, conflict - if (mem.m_str->is_empty()) { - return mem.m_regex->is_nullable(); - } - // compute minterms for the regex - euf::snode_vector minterms; - m_sg.compute_minterms(mem.m_regex, minterms); - for (euf::snode* ch : minterms) { - seq::str_mem new_mem = derive(mem, ch); - if (!is_empty_regex(new_mem.m_regex)) - out_mems.push_back(new_mem); - } - return true; + // ----------------------------------------------------------------- + // Ground prefix/suffix consumption + // ----------------------------------------------------------------- + + enum class simplify_status { ok, conflict, satisfied }; + + // consume ground characters from the front of mem.m_str by computing + // Brzozowski derivatives against mem.m_regex. + // stops when: + // - the string front is not a concrete character (ok) + // - a derivative produces ∅ (conflict) + // - the string becomes empty and regex is nullable (satisfied) + // - the string becomes empty and regex is not nullable (conflict) + // modifies mem in-place. + simplify_status simplify_ground_prefix(seq::str_mem& mem); + + // consume ground characters from the back of mem.m_str by computing + // reverse derivatives. modifies mem in-place. + // (reverse derivatives require regex reversal; this is a best-effort + // simplification that handles the common case of trailing constants.) + simplify_status simplify_ground_suffix(seq::str_mem& mem); + + // ----------------------------------------------------------------- + // Trivial checks + // ----------------------------------------------------------------- + + // quick check for trivially sat/unsat membership. + // returns 1 if satisfied (empty string in nullable regex, or full regex) + // returns -1 if conflicting (empty string in non-nullable, or ∅ regex) + // returns 0 if undetermined + int check_trivial(seq::str_mem const& mem) const; + + // ----------------------------------------------------------------- + // Minterm and character computation + // ----------------------------------------------------------------- + + // compute minterms (character class partition) from regex + void compute_minterms(euf::snode* re, euf::snode_vector& minterms) { + m_sg.compute_minterms(re, minterms); } + + // compute minterms for character splitting, filtering out empty + // (fail) minterms. Minterms are regex character-class expressions + // forming a partition of the alphabet; callers use them to drive + // fresh-variable creation in character-split modifiers. + void get_minterms(euf::snode* regex, euf::snode_vector& minterms); + + // collect concrete first-position characters from a regex. + // extracts characters reachable from to_re leaves and simple ranges. + void collect_first_chars(euf::snode* re, euf::snode_vector& chars); + + // ----------------------------------------------------------------- + // Membership processing + // ----------------------------------------------------------------- + + // process a str_mem constraint by consuming ground characters from + // the string front via Brzozowski derivatives. If the entire ground + // prefix is consumed and the constraint is neither satisfied nor + // conflicting, the (simplified) constraint is pushed to out_mems + // for the Nielsen graph to expand via character-split modifiers. + // returns false if the constraint is immediately conflicting + // (empty string in non-nullable regex, or derivative yields ∅). + bool process_str_mem(seq::str_mem const& mem, + vector& out_mems); + + // ----------------------------------------------------------------- + // Cycle detection and stabilizers + // ----------------------------------------------------------------- + + // record current regex in the derivation history of a str_mem. + // the history tracks a chain of (regex, id) pairs for cycle detection. + // returns the updated str_mem. + seq::str_mem record_history(seq::str_mem const& mem, euf::snode* history_re); + + // check if the derivation history of mem contains a cycle, i.e., + // the same regex id appears twice in the history chain. + // if found, returns the cycle entry point regex; nullptr otherwise. + euf::snode* extract_cycle(seq::str_mem const& mem) const; + + // check if the derivation history exhibits a cycle. + // returns true when the current regex matches a previously seen regex + // in the history chain. used to trigger stabilizer introduction. + bool detect_cycle(seq::str_mem const& mem) const; + + // compute a Kleene star stabilizer from a cycle. + // given the regex at the cycle point and the current regex, + // builds r* that over-approximates any number of cycle iterations. + // returns nullptr if no stabilizer can be computed. + euf::snode* stabilizer_from_cycle(euf::snode* cycle_regex, + euf::snode* current_regex); + + // try to subsume a str_mem constraint using stabilizer-based + // reasoning: if extract_cycle finds a cycle, check whether + // the current regex is already covered by the stabilizer. + // returns true if the constraint can be dropped. + bool try_subsume(seq::str_mem const& mem); }; } diff --git a/src/smt/nseq_state.h b/src/smt/nseq_state.h index 41306384c..dfe28c1b3 100644 --- a/src/smt/nseq_state.h +++ b/src/smt/nseq_state.h @@ -21,15 +21,48 @@ Author: #include "util/vector.h" #include "ast/euf/euf_sgraph.h" #include "smt/seq/seq_nielsen.h" +#include "smt/smt_literal.h" namespace smt { + class enode; + + // source info for a string equality (the two enodes whose merge caused it) + struct eq_source { + enode* m_n1; + enode* m_n2; + }; + + // source info for a regex membership (the literal that asserted it) + struct mem_source { + literal m_lit; + }; + + // source info for a string disequality + struct diseq_source { + enode* m_n1; + enode* m_n2; + }; + + // negative regex membership: ¬(str in regex) + struct neg_mem_entry { + euf::snode* m_str; + euf::snode* m_regex; + literal m_lit; + }; + class nseq_state { euf::sgraph& m_sg; vector m_str_eqs; vector m_str_mems; + vector m_eq_sources; + vector m_mem_sources; + vector m_diseqs; + vector m_neg_mems; unsigned_vector m_str_eq_lim; unsigned_vector m_str_mem_lim; + unsigned_vector m_diseq_lim; + unsigned_vector m_neg_mem_lim; unsigned m_next_mem_id = 0; public: @@ -38,37 +71,68 @@ namespace smt { void push() { m_str_eq_lim.push_back(m_str_eqs.size()); m_str_mem_lim.push_back(m_str_mems.size()); + m_diseq_lim.push_back(m_diseqs.size()); + m_neg_mem_lim.push_back(m_neg_mems.size()); } void pop(unsigned n) { for (unsigned i = 0; i < n; ++i) { m_str_eqs.shrink(m_str_eq_lim.back()); + m_eq_sources.shrink(m_str_eq_lim.back()); m_str_eq_lim.pop_back(); m_str_mems.shrink(m_str_mem_lim.back()); + m_mem_sources.shrink(m_str_mem_lim.back()); m_str_mem_lim.pop_back(); + m_diseqs.shrink(m_diseq_lim.back()); + m_diseq_lim.pop_back(); + m_neg_mems.shrink(m_neg_mem_lim.back()); + m_neg_mem_lim.pop_back(); } } - void add_str_eq(euf::snode* lhs, euf::snode* rhs) { + void add_str_eq(euf::snode* lhs, euf::snode* rhs, enode* n1, enode* n2) { seq::dep_tracker dep; m_str_eqs.push_back(seq::str_eq(lhs, rhs, dep)); + m_eq_sources.push_back({n1, n2}); } - void add_str_mem(euf::snode* str, euf::snode* regex) { + void add_str_mem(euf::snode* str, euf::snode* regex, literal lit) { seq::dep_tracker dep; m_str_mems.push_back(seq::str_mem(str, regex, nullptr, m_next_mem_id++, dep)); + m_mem_sources.push_back({lit}); + } + + void add_diseq(enode* n1, enode* n2) { + m_diseqs.push_back({n1, n2}); + } + + void add_neg_mem(euf::snode* str, euf::snode* regex, literal lit) { + m_neg_mems.push_back({str, regex, lit}); } vector const& str_eqs() const { return m_str_eqs; } vector const& str_mems() const { return m_str_mems; } + vector const& diseqs() const { return m_diseqs; } + vector const& neg_mems() const { return m_neg_mems; } - bool empty() const { return m_str_eqs.empty() && m_str_mems.empty(); } + eq_source const& get_eq_source(unsigned i) const { return m_eq_sources[i]; } + mem_source const& get_mem_source(unsigned i) const { return m_mem_sources[i]; } + diseq_source const& get_diseq(unsigned i) const { return m_diseqs[i]; } + neg_mem_entry const& get_neg_mem(unsigned i) const { return m_neg_mems[i]; } + + bool empty() const { return m_str_eqs.empty() && m_str_mems.empty() && m_neg_mems.empty() && m_diseqs.empty(); } void reset() { m_str_eqs.reset(); m_str_mems.reset(); + m_eq_sources.reset(); + m_mem_sources.reset(); + m_diseqs.reset(); + m_neg_mems.reset(); m_str_eq_lim.reset(); m_str_mem_lim.reset(); + m_diseq_lim.reset(); + m_neg_mem_lim.reset(); } }; diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 2cdf8877e..d5d5a3e24 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -20,7 +20,9 @@ Author: --*/ #include "smt/seq/seq_nielsen.h" +#include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" +#include "util/bit_util.h" namespace seq { @@ -67,6 +69,17 @@ namespace seq { return true; } + void dep_tracker::get_set_bits(unsigned_vector& indices) const { + for (unsigned i = 0; i < m_bits.size(); ++i) { + unsigned word = m_bits[i]; + while (word != 0) { + unsigned bit = i * 32 + ntz_core(word); + indices.push_back(bit); + word &= word - 1; // clear lowest set bit + } + } + } + // ----------------------------------------------- // str_eq // ----------------------------------------------- @@ -215,6 +228,7 @@ namespace seq { str_eq eq(lhs, rhs, dep); eq.sort(); m_root->add_str_eq(eq); + ++m_num_input_eqs; } void nielsen_graph::add_str_mem(euf::snode* str, euf::snode* regex) { @@ -225,6 +239,7 @@ namespace seq { euf::snode* history = m_sg.mk_empty(); unsigned id = next_mem_id(); m_root->add_str_mem(str_mem(str, regex, history, id, dep)); + ++m_num_input_mems; } void nielsen_graph::inc_run_idx() { @@ -248,6 +263,9 @@ namespace seq { m_run_idx = 0; m_depth_bound = 0; m_next_mem_id = 0; + m_fresh_cnt = 0; + m_num_input_eqs = 0; + m_num_input_mems = 0; } std::ostream& nielsen_graph::display(std::ostream& out) const { @@ -354,26 +372,26 @@ namespace seq { // one side empty, the other not empty => conflict or substitution if (eq.m_lhs->is_empty() && !eq.m_rhs->is_empty()) { - // rhs must also be empty; if it is a concrete non-empty string => conflict - if (eq.m_rhs->is_char() || eq.m_rhs->is_concat()) { - // check if rhs has any non-variable tokens - euf::snode_vector tokens; - eq.m_rhs->collect_tokens(tokens); - bool all_vars = true; - for (euf::snode* t : tokens) - if (!t->is_var()) { all_vars = false; break; } - if (!all_vars) { - m_is_general_conflict = true; - m_reason = backtrack_reason::symbol_clash; - return simplify_result::conflict; + euf::snode_vector tokens; + eq.m_rhs->collect_tokens(tokens); + bool all_vars_or_opaque = true; + bool has_char = false; + for (euf::snode* t : tokens) { + if (t->is_char()) has_char = true; + else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) { + all_vars_or_opaque = false; break; } - // substitute: every variable in rhs -> empty - for (euf::snode* t : tokens) { - if (t->is_var()) { - nielsen_subst s(t, sg.mk_empty(), eq.m_dep); - apply_subst(sg, s); - changed = true; - } + } + if (has_char || !all_vars_or_opaque) { + m_is_general_conflict = true; + m_reason = backtrack_reason::symbol_clash; + return simplify_result::conflict; + } + for (euf::snode* t : tokens) { + if (t->is_var()) { + nielsen_subst s(t, sg.mk_empty(), eq.m_dep); + apply_subst(sg, s); + changed = true; } } continue; @@ -381,10 +399,15 @@ namespace seq { if (eq.m_rhs->is_empty() && !eq.m_lhs->is_empty()) { euf::snode_vector tokens; eq.m_lhs->collect_tokens(tokens); - bool all_vars = true; - for (euf::snode* t : tokens) - if (!t->is_var()) { all_vars = false; break; } - if (!all_vars) { + bool all_vars_or_opaque = true; + bool has_char = false; + for (euf::snode* t : tokens) { + if (t->is_char()) has_char = true; + else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) { + all_vars_or_opaque = false; break; + } + } + if (has_char || !all_vars_or_opaque) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return simplify_result::conflict; @@ -423,6 +446,27 @@ namespace seq { } } + // consume leading concrete characters from str_mem via Brzozowski derivatives + for (str_mem& mem : m_str_mem) { + if (!mem.m_str || !mem.m_regex) + continue; + while (mem.m_str && !mem.m_str->is_empty()) { + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_char()) + break; + euf::snode* deriv = sg.brzozowski_deriv(mem.m_regex, first); + if (!deriv) break; + if (deriv->is_fail()) { + m_is_general_conflict = true; + m_reason = backtrack_reason::regex; + return simplify_result::conflict; + } + mem.m_str = sg.drop_first(mem.m_str); + mem.m_regex = deriv; + changed = true; + } + } + // check for regex memberships that are immediately infeasible for (str_mem& mem : m_str_mem) { if (!mem.m_str || !mem.m_regex) @@ -434,6 +478,21 @@ namespace seq { } } + // remove satisfied str_mem constraints (ε ∈ nullable regex) + { + unsigned wi = 0; + for (unsigned i = 0; i < m_str_mem.size(); ++i) { + str_mem& mem = m_str_mem[i]; + if (mem.m_str && mem.m_str->is_empty() && mem.m_regex && mem.m_regex->is_nullable()) + continue; // satisfied, drop + m_str_mem[wi++] = mem; + } + if (wi < m_str_mem.size()) { + m_str_mem.shrink(wi); + changed = true; + } + } + if (is_satisfied()) return simplify_result::satisfied; @@ -447,8 +506,11 @@ namespace seq { } bool nielsen_node::is_subsumed_by(nielsen_node const& other) const { - // check if every constraint in 'other' also appears in 'this' + // check if every non-trivial constraint in 'other' also appears in 'this'. + // trivial str_eqs (both sides empty/equal) are always satisfied and + // do not affect subsumption. for (str_eq const& oeq : other.m_str_eq) { + if (oeq.is_trivial()) continue; bool found = false; for (str_eq const& teq : m_str_eq) if (teq == oeq) { found = true; break; } @@ -463,6 +525,25 @@ namespace seq { return true; } + bool nielsen_node::has_opaque_terms() const { + auto is_opaque = [](euf::snode* n) { return n && n->kind() == euf::snode_kind::s_other; }; + for (str_eq const& eq : m_str_eq) { + if (eq.is_trivial()) continue; + if (is_opaque(eq.m_lhs) || is_opaque(eq.m_rhs)) return true; + euf::snode_vector toks; + if (eq.m_lhs) { eq.m_lhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; toks.reset(); } + if (eq.m_rhs) { eq.m_rhs->collect_tokens(toks); for (auto* t : toks) if (is_opaque(t)) return true; } + } + for (str_mem const& mem : m_str_mem) { + if (!mem.m_str) continue; + if (is_opaque(mem.m_str)) return true; + euf::snode_vector toks; + mem.m_str->collect_tokens(toks); + for (auto* t : toks) if (is_opaque(t)) return true; + } + return false; + } + // ----------------------------------------------------------------------- // nielsen_graph: search // ----------------------------------------------------------------------- @@ -471,31 +552,93 @@ namespace seq { if (!m_root) return search_result::sat; + ++m_stats.m_num_solve_calls; + + // iterative deepening: 6 iterations starting at depth 10, doubling + // mirrors ZIPT's NielsenGraph.Check() m_depth_bound = 10; for (unsigned iter = 0; iter < 6; ++iter, m_depth_bound *= 2) { inc_run_idx(); search_result r = search_dfs(m_root, 0); - if (r != search_result::unknown) + if (r == search_result::sat) { + ++m_stats.m_num_sat; return r; - // depth limit hit – increase bound + } + if (r == search_result::unsat) { + ++m_stats.m_num_unsat; + return r; + } + // depth limit hit – increase bound and retry } + ++m_stats.m_num_unknown; return search_result::unknown; } nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth) { + ++m_stats.m_num_dfs_nodes; + if (depth > m_stats.m_max_depth) + m_stats.m_max_depth = depth; + + // cycle/revisit detection: if already visited this run, return cached status. + // mirrors ZIPT's NielsenNode.GraphExpansion() evalIdx check. + if (node->eval_idx() == m_run_idx) { + if (node->is_satisfied()) + return search_result::sat; + if (node->is_currently_conflict()) + return search_result::unsat; + return search_result::unknown; + } + node->set_eval_idx(m_run_idx); + + // simplify constraints (idempotent after first call) simplify_result sr = node->simplify_and_init(*this); - if (sr == simplify_result::conflict) + if (sr == simplify_result::conflict) { + ++m_stats.m_num_simplify_conflict; + node->set_general_conflict(true); return search_result::unsat; + } if (sr == simplify_result::satisfied || node->is_satisfied()) return search_result::sat; + // subsumption pruning: if any previously explored node has a subset + // of our constraints and was found to be a conflict, this node must + // also be a conflict. Mirrors ZIPT's FindExisting() lookup. + for (nielsen_node* other : m_nodes) { + if (other == node) continue; + if (other->eval_idx() != m_run_idx) continue; + if (!other->is_general_conflict()) continue; + if (node->is_subsumed_by(*other)) { + ++m_stats.m_num_subsumptions; + node->set_general_conflict(true); + node->set_reason(backtrack_reason::subsumption); + return search_result::unsat; + } + } + + // depth bound check if (depth >= m_depth_bound) return search_result::unknown; - if (!generate_extensions(node, depth)) - return search_result::unsat; + // generate extensions only once per node; children persist across runs + if (!node->is_extended()) { + bool ext = generate_extensions(node, depth); + if (!ext) { + node->set_extended(true); + // No extensions could be generated. If the node still has + // unsatisfied constraints with opaque (s_other) terms that + // we cannot decompose, report unknown rather than unsat + // to avoid unsoundness. + if (node->has_opaque_terms()) + return search_result::unknown; + node->set_reason(backtrack_reason::children_failed); + return search_result::unsat; + } + node->set_extended(true); + ++m_stats.m_num_extensions; + } + // explore children bool any_unknown = false; for (nielsen_edge* e : node->outgoing()) { nielsen_node* child = e->tgt(); @@ -505,15 +648,23 @@ namespace seq { if (r == search_result::unknown) any_unknown = true; } - return any_unknown ? search_result::unknown : search_result::unsat; + + // If no children exist and the node has opaque terms, report unknown + if (node->outgoing().empty() && node->has_opaque_terms()) + return search_result::unknown; + + if (!any_unknown) { + node->set_reason(backtrack_reason::children_failed); + return search_result::unsat; + } + return search_result::unknown; } simplify_result nielsen_graph::simplify_node(nielsen_node* node) { return node->simplify_and_init(*this); } - bool nielsen_graph::generate_extensions(nielsen_node* node, unsigned /*depth*/) { - // find the first non-trivial string equality to split on + bool nielsen_graph::apply_det_modifier(nielsen_node* node) { for (str_eq const& eq : node->str_eqs()) { if (eq.is_trivial()) continue; @@ -523,16 +674,31 @@ namespace seq { euf::snode_vector lhs_toks, rhs_toks; eq.m_lhs->collect_tokens(lhs_toks); eq.m_rhs->collect_tokens(rhs_toks); - if (lhs_toks.empty() || rhs_toks.empty()) continue; euf::snode* lhead = lhs_toks[0]; euf::snode* rhead = rhs_toks[0]; - // Det modifier: if one side starts with a variable whose other side is empty + // same-head variable cancellation: x·A = x·B → A = B + if (lhead->is_var() && lhead->id() == rhead->id()) { + euf::snode* ltail = m_sg.drop_first(eq.m_lhs); + euf::snode* rtail = m_sg.drop_first(eq.m_rhs); + nielsen_node* child = mk_child(node); + // replace the equation with the tails + for (str_eq& ceq : child->str_eqs()) { + if (ceq.m_lhs == eq.m_lhs && ceq.m_rhs == eq.m_rhs) { + ceq.m_lhs = ltail; + ceq.m_rhs = rtail; + break; + } + } + mk_edge(node, child, true); + return true; + } + + // variable definition: x = t where x ∉ vars(t) → subst x → t if (lhead->is_var() && eq.m_rhs->is_empty()) { - // substitute lhead -> empty nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); @@ -548,60 +714,29 @@ namespace seq { child->apply_subst(m_sg, s); return true; } + } + return false; + } - // Const Nielsen modifier: lhs starts with char, rhs starts with var - // -> substitute rhs_var = char . fresh_var + bool nielsen_graph::apply_const_nielsen(nielsen_node* node) { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; + + euf::snode_vector lhs_toks, rhs_toks; + eq.m_lhs->collect_tokens(lhs_toks); + eq.m_rhs->collect_tokens(rhs_toks); + if (lhs_toks.empty() || rhs_toks.empty()) + continue; + + euf::snode* lhead = lhs_toks[0]; + euf::snode* rhead = rhs_toks[0]; + + // char·A = y·B → branch 1: y→ε, branch 2: y→char·fresh if (lhead->is_char() && rhead->is_var()) { - std::string fresh_str = "v!" + std::to_string(node->id()); - symbol fresh_name(fresh_str.c_str()); - euf::snode* fresh = m_sg.mk_var(fresh_name); - euf::snode* replacement = m_sg.mk_concat(lhead, fresh); - - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(rhead, replacement, eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - return true; - } - - if (rhead->is_char() && lhead->is_var()) { - std::string fresh_str = "v!" + std::to_string(node->id()); - symbol fresh_name(fresh_str.c_str()); - euf::snode* fresh = m_sg.mk_var(fresh_name); - euf::snode* replacement = m_sg.mk_concat(rhead, fresh); - - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(lhead, replacement, eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - return true; - } - - // Eq split modifier: both sides start with variables x.A = y.B - // Produce three children: - // 1) x = eps, A = y.B - // 2) x = y, A = B (when len(x) = len(y)) - // 3) y = eps, x.A = B - if (lhead->is_var() && rhead->is_var()) { - // child 1: lhead -> eps - { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - } - // child 2: lhead = rhead (if different vars, substitute lhead -> rhead) - if (lhead->id() != rhead->id()) { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(lhead, rhead, eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - } - // child 3: rhead -> eps + // branch 1: y → ε (progress) { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); @@ -609,18 +744,820 @@ namespace seq { e->add_subst(s); child->apply_subst(m_sg, s); } + // branch 2: y → char·fresh (progress) + { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(lhead, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(rhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } return true; } - // no applicable modifier for this equality; try next + // x·A = char·B → branch 1: x→ε, branch 2: x→char·fresh + if (rhead->is_char() && lhead->is_var()) { + // branch 1: x → ε (progress) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // branch 2: x → char·fresh (progress) + { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(rhead, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + return true; + } + } + return false; + } + + bool nielsen_graph::apply_var_nielsen(nielsen_node* node) { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; + + euf::snode_vector lhs_toks, rhs_toks; + eq.m_lhs->collect_tokens(lhs_toks); + eq.m_rhs->collect_tokens(rhs_toks); + if (lhs_toks.empty() || rhs_toks.empty()) + continue; + + euf::snode* lhead = lhs_toks[0]; + euf::snode* rhead = rhs_toks[0]; + + if (!lhead->is_var() || !rhead->is_var()) + continue; + if (lhead->id() == rhead->id()) + continue; + + // x·A = y·B where x,y are distinct variables (classic Nielsen) + // child 1: x → ε (progress) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // child 2: x → y·x' (progress) + { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(rhead, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // child 3: y → x·y' (progress) + { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(lhead, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(rhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + return true; + } + return false; + } + + bool nielsen_graph::apply_eq_split(nielsen_node* node) { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; + + euf::snode_vector lhs_toks, rhs_toks; + eq.m_lhs->collect_tokens(lhs_toks); + eq.m_rhs->collect_tokens(rhs_toks); + if (lhs_toks.empty() || rhs_toks.empty()) + continue; + + euf::snode* lhead = lhs_toks[0]; + euf::snode* rhead = rhs_toks[0]; + + if (!lhead->is_var() || !rhead->is_var()) + continue; + + // x·A = y·B where x,y are distinct variables + // child 1: x → ε (progress) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // child 2: x → y·z_fresh (non-progress, x starts with y) + if (lhead->id() != rhead->id()) { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(rhead, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(lhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // child 3: y → x·z_fresh (non-progress, y starts with x) + if (lhead->id() != rhead->id()) { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(lhead, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(rhead, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + return true; + } + return false; + } + + // ----------------------------------------------------------------------- + // nielsen_graph: mk_fresh_var + // ----------------------------------------------------------------------- + + euf::snode* nielsen_graph::mk_fresh_var() { + ++m_stats.m_num_fresh_vars; + std::string name = "v!" + std::to_string(m_fresh_cnt++); + return m_sg.mk_var(symbol(name.c_str())); + } + + // ----------------------------------------------------------------------- + // nielsen_graph: apply_regex_char_split + // ----------------------------------------------------------------------- + + void nielsen_graph::collect_first_chars(euf::snode* re, euf::snode_vector& chars) { + if (!re) + return; + + // to_re(s): the first character of the string s + if (re->is_to_re()) { + euf::snode* body = re->arg(0); + if (body && !body->is_empty()) { + euf::snode* first = body->first(); + if (first && first->is_char()) { + bool dup = false; + for (euf::snode* c : chars) + if (c == first) { dup = true; break; } + if (!dup) + chars.push_back(first); + } + // Handle string literals (classified as s_other in sgraph): + // extract the first character from the zstring and create a char snode. + else if (first && first->get_expr()) { + seq_util& seq = m_sg.get_seq_util(); + zstring s; + if (seq.str.is_string(first->get_expr(), s) && s.length() > 0) { + euf::snode* ch = m_sg.mk_char(s[0]); + bool dup = false; + for (euf::snode* c : chars) + if (c == ch) { dup = true; break; } + if (!dup) + chars.push_back(ch); + } + } + } + return; } - // no extension was generated + // full character set (.): use 'a' as representative + if (re->is_full_char()) { + euf::snode* ch = m_sg.mk_char('a'); + bool dup = false; + for (euf::snode* c : chars) + if (c == ch) { dup = true; break; } + if (!dup) + chars.push_back(ch); + return; + } + + // re.range(lo, hi): use lo as representative + if (re->get_expr()) { + seq_util& seq = m_sg.get_seq_util(); + expr* lo = nullptr, *hi = nullptr; + if (seq.re.is_range(re->get_expr(), lo, hi) && lo) { + unsigned ch_val = 'a'; + if (seq.is_const_char(lo, ch_val)) { + euf::snode* ch = m_sg.mk_char(ch_val); + bool dup = false; + for (euf::snode* c : chars) + if (c == ch) { dup = true; break; } + if (!dup) + chars.push_back(ch); + } + return; + } + } + + // fail, full_seq: no concrete first chars to extract + if (re->is_fail() || re->is_full_seq()) + return; + + // recurse into children (handles union, concat, star, loop, etc.) + for (unsigned i = 0; i < re->num_args(); ++i) + collect_first_chars(re->arg(i), chars); + } + + bool nielsen_graph::apply_regex_char_split(nielsen_node* node) { + for (str_mem const& mem : node->str_mems()) { + if (!mem.m_str || !mem.m_regex) + continue; + + // find the first token of the string side + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_var()) + continue; + + // collect concrete characters from the regex + euf::snode_vector chars; + collect_first_chars(mem.m_regex, chars); + + // If no concrete characters found, fall through to apply_regex_var_split + if (chars.empty()) + continue; + + bool created = false; + + // for each character c with non-fail derivative: + // child: x → c · fresh_var + for (euf::snode* ch : chars) { + euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch); + if (!deriv || deriv->is_fail()) + continue; + + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(ch, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(first, replacement, mem.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + created = true; + } + + // x → ε branch (variable is empty) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(first, m_sg.mk_empty(), mem.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + created = true; + } + + if (created) + return true; + } return false; } + bool nielsen_graph::generate_extensions(nielsen_node* node, unsigned /*depth*/) { + // Modifier priority chain mirrors ZIPT's ModifierBase.TypeOrder. + // The first modifier that produces edges is used and returned immediately. + + // Priority 1: deterministic modifiers (single child, always progress) + if (apply_det_modifier(node)) + return ++m_stats.m_mod_det, true; + + // Priority 2: PowerEpsilon - power → ε via base=ε or n=0 + if (apply_power_epsilon(node)) + return ++m_stats.m_mod_power_epsilon, true; + + // Priority 3: NumCmp - length comparison branching for power tokens + if (apply_num_cmp(node)) + return ++m_stats.m_mod_num_cmp, true; + + // Priority 4: ConstNumUnwinding - power vs constant: n=0 or peel + if (apply_const_num_unwinding(node)) + return ++m_stats.m_mod_const_num_unwinding, true; + + // Priority 5: EqSplit - var vs var (branching, 3 children) + if (apply_eq_split(node)) + return ++m_stats.m_mod_eq_split, true; + + // Priority 6: StarIntr - stabilizer introduction for regex cycles + if (apply_star_intr(node)) + return ++m_stats.m_mod_star_intr, true; + + // Priority 7: GPowerIntr - generalized power introduction + if (apply_gpower_intr(node)) + return ++m_stats.m_mod_gpower_intr, true; + + // Priority 8: ConstNielsen - char vs var (2 children) + if (apply_const_nielsen(node)) + return ++m_stats.m_mod_const_nielsen, true; + + // Priority 9: RegexCharSplit - split str_mem by first-position chars + if (apply_regex_char_split(node)) + return ++m_stats.m_mod_regex_char_split, true; + + // Priority 10: RegexVarSplit - split str_mem by minterms + if (apply_regex_var_split(node)) + return ++m_stats.m_mod_regex_var_split, true; + + // Priority 11: PowerSplit - power unwinding with bounded prefix + if (apply_power_split(node)) + return ++m_stats.m_mod_power_split, true; + + // Priority 12: VarNielsen - var vs var, all progress (classic Nielsen) + if (apply_var_nielsen(node)) + return ++m_stats.m_mod_var_nielsen, true; + + // Priority 13: VarNumUnwinding - variable power unwinding + if (apply_var_num_unwinding(node)) + return ++m_stats.m_mod_var_num_unwinding, true; + + return false; + } + + // ----------------------------------------------------------------------- + // Helper: find a power token in any str_eq + // ----------------------------------------------------------------------- + + euf::snode* nielsen_graph::find_power_token(nielsen_node* node) const { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + euf::snode_vector toks; + eq.m_lhs->collect_tokens(toks); + for (euf::snode* t : toks) + if (t->is_power()) return t; + toks.reset(); + eq.m_rhs->collect_tokens(toks); + for (euf::snode* t : toks) + if (t->is_power()) return t; + } + return nullptr; + } + + // ----------------------------------------------------------------------- + // Helper: find a power token facing a constant (char) head + // Returns true if found, sets power, other_head, eq_out. + // ----------------------------------------------------------------------- + + bool nielsen_graph::find_power_vs_const(nielsen_node* node, + euf::snode*& power, + euf::snode*& other_head, + str_eq const*& eq_out) const { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + euf::snode* lhead = eq.m_lhs->first(); + euf::snode* rhead = eq.m_rhs->first(); + if (lhead && lhead->is_power() && rhead && rhead->is_char()) { + power = lhead; other_head = rhead; eq_out = &eq; return true; + } + if (rhead && rhead->is_power() && lhead && lhead->is_char()) { + power = rhead; other_head = lhead; eq_out = &eq; return true; + } + } + return false; + } + + // ----------------------------------------------------------------------- + // Helper: find a power token facing a variable head + // ----------------------------------------------------------------------- + + bool nielsen_graph::find_power_vs_var(nielsen_node* node, + euf::snode*& power, + euf::snode*& var_head, + str_eq const*& eq_out) const { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + euf::snode* lhead = eq.m_lhs->first(); + euf::snode* rhead = eq.m_rhs->first(); + if (lhead && lhead->is_power() && rhead && rhead->is_var()) { + power = lhead; var_head = rhead; eq_out = &eq; return true; + } + if (rhead && rhead->is_power() && lhead && lhead->is_var()) { + power = rhead; var_head = lhead; eq_out = &eq; return true; + } + } + return false; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_power_epsilon + // For a power token u^n in an equation, branch: + // (1) base u → ε (base is empty, so u^n = ε) + // (2) u^n → ε (the power is zero, replace power with empty) + // mirrors ZIPT's PowerEpsilonModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_power_epsilon(nielsen_node* node) { + euf::snode* power = find_power_token(node); + if (!power) + return false; + + SASSERT(power->is_power() && power->num_args() >= 1); + euf::snode* base = power->arg(0); + dep_tracker dep; + for (str_eq const& eq : node->str_eqs()) + if (!eq.is_trivial()) { dep = eq.m_dep; break; } + + // Branch 1: base → ε (if base is a variable, substitute it to empty) + // This makes u^n = ε^n = ε for any n. + if (base->is_var()) { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(base, m_sg.mk_empty(), dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + // Branch 2: replace the power token itself with ε (n = 0 semantics) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(power, m_sg.mk_empty(), dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + return true; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_num_cmp + // For equations involving two power tokens u^m and u^n with the same base, + // branch on the numeric relationship: m < n vs n <= m. + // Approximated as: (1) replace u^m with ε, (2) replace u^n with ε. + // mirrors ZIPT's NumCmpModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_num_cmp(nielsen_node* node) { + // Look for two power tokens with the same base on opposite sides + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + euf::snode* lhead = eq.m_lhs->first(); + euf::snode* rhead = eq.m_rhs->first(); + if (!lhead || !rhead) continue; + if (!lhead->is_power() || !rhead->is_power()) continue; + if (lhead->num_args() < 1 || rhead->num_args() < 1) continue; + // same base: compare the two powers + if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue; + + // Branch 1: m < n → peel from lhs power (replace lhead with ε) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + // Branch 2: n <= m → peel from rhs power (replace rhead with ε) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + return true; + } + return false; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_const_num_unwinding + // For a power token u^n facing a constant (char) head, + // branch: (1) n = 0 → u^n = ε, (2) n >= 1 → peel one u from power. + // mirrors ZIPT's ConstNumUnwindingModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) { + euf::snode* power = nullptr; + euf::snode* other_head = nullptr; + str_eq const* eq = nullptr; + if (!find_power_vs_const(node, power, other_head, eq)) + return false; + + SASSERT(power->is_power() && power->num_args() >= 1); + euf::snode* base = power->arg(0); + + // Branch 1: n = 0 → replace power with ε (progress) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + // Branch 2: n >= 1 → peel one u: replace u^n with u · u^n' + // Approximated by prepending the base and keeping a fresh power variable + { + euf::snode* fresh_power = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(base, fresh_power); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(power, replacement, eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + return true; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_star_intr + // Star introduction: for a str_mem x·s ∈ R where a cycle is detected + // (backedge exists), introduce a stabilizer constraint x ∈ base*. + // Creates a child that adds x ∈ base* membership and constrains + // the remainder not to start with base. + // mirrors ZIPT's StarIntrModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_star_intr(nielsen_node* node) { + // Only fire if a backedge was set (cycle detected) + if (!node->backedge()) + return false; + + // Look for a str_mem with a variable-headed string + for (unsigned mi = 0; mi < node->str_mems().size(); ++mi) { + str_mem const& mem = node->str_mems()[mi]; + if (!mem.m_str || !mem.m_regex) continue; + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_var()) continue; + + // Introduce a fresh variable z constrained by z ∈ R*, + // replacing x → z·fresh_post. This breaks the cycle because + // z is a new variable that won't trigger star_intr again. + euf::snode* x = first; + euf::snode* z = mk_fresh_var(); + euf::snode* fresh_post = mk_fresh_var(); + euf::snode* str_tail = m_sg.drop_first(mem.m_str); + + // Build z ∈ R* membership: the star of the current regex + seq_util& seq = m_sg.get_seq_util(); + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr) + continue; + expr_ref star_re(seq.re.mk_star(re_expr), seq.get_manager()); + euf::snode* star_sn = m_sg.mk(star_re); + if (!star_sn) + continue; + + nielsen_node* child = mk_child(node); + + // Add membership: z ∈ R* (stabilizer constraint) + child->add_str_mem(str_mem(z, star_sn, mem.m_history, next_mem_id(), mem.m_dep)); + + // Add remaining membership: fresh_post · tail ∈ R + euf::snode* post_tail = str_tail->is_empty() ? fresh_post : m_sg.mk_concat(fresh_post, str_tail); + child->add_str_mem(str_mem(post_tail, mem.m_regex, mem.m_history, next_mem_id(), mem.m_dep)); + + // Substitute x → z · fresh_post + nielsen_edge* e = mk_edge(node, child, false); + euf::snode* replacement = m_sg.mk_concat(z, fresh_post); + nielsen_subst s(x, replacement, mem.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + + // Do not re-set backedge — z is fresh, so star_intr won't fire again + return true; + } + return false; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_gpower_intr + // Generalized power introduction: for a variable x matched against a + // ground repeated pattern, introduce x = base^n · prefix with fresh n. + // Approximated: for each non-trivial equation with a variable head vs + // a ground concatenation, introduce power decomposition. + // mirrors ZIPT's GPowerIntrModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_gpower_intr(nielsen_node* node) { + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + + euf::snode* var_side = nullptr; + euf::snode* ground_side = nullptr; + + // One side must be a single variable, other side must be ground + euf::snode* lhead = eq.m_lhs->first(); + euf::snode* rhead = eq.m_rhs->first(); + + if (lhead && lhead->is_var() && eq.m_lhs->length() == 1 && eq.m_rhs->is_ground()) { + var_side = lhead; + ground_side = eq.m_rhs; + } + else if (rhead && rhead->is_var() && eq.m_rhs->length() == 1 && eq.m_lhs->is_ground()) { + var_side = rhead; + ground_side = eq.m_lhs; + } + else continue; + + if (!ground_side || ground_side->is_empty()) continue; + if (ground_side->length() < 2) continue; // need a repeated pattern + + // Extract the first token as the "base" for power introduction + euf::snode* base_char = ground_side->first(); + if (!base_char || !base_char->is_char()) continue; + + // Check if the ground side has a repeated leading character + euf::snode_vector toks; + ground_side->collect_tokens(toks); + unsigned repeat_len = 0; + for (unsigned i = 0; i < toks.size(); ++i) { + if (toks[i]->id() == base_char->id()) + ++repeat_len; + else break; + } + if (repeat_len < 2) continue; // need at least 2 repetitions + + // Introduce: x = base^n · fresh_suffix + // Branch with side constraint n >= 0 + euf::snode* fresh_suffix = mk_fresh_var(); + euf::snode* fresh_power = mk_fresh_var(); // represents base^n + euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix); + + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(var_side, replacement, eq.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + + return true; + } + return false; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_regex_var_split + // For str_mem x·s ∈ R where x is a variable, split using minterms: + // (1) x → ε (empty) + // (2) x → c · x' for each minterm character class c + // More general than regex_char_split; uses minterm partitioning rather + // than just extracting concrete characters. + // mirrors ZIPT's RegexVarSplitModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_regex_var_split(nielsen_node* node) { + for (str_mem const& mem : node->str_mems()) { + if (!mem.m_str || !mem.m_regex) continue; + euf::snode* first = mem.m_str->first(); + if (!first || !first->is_var()) continue; + + // Compute minterms from the regex + euf::snode_vector minterms; + m_sg.compute_minterms(mem.m_regex, minterms); + if (minterms.empty()) continue; + + bool created = false; + + // Branch 1: x → ε (progress) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(first, m_sg.mk_empty(), mem.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + created = true; + } + + // Branch 2+: for each minterm m_i, x → fresh_char · x' + // where fresh_char is constrained by the minterm + for (euf::snode* mt : minterms) { + if (mt->is_fail()) continue; + + // Try Brzozowski derivative with respect to this minterm + // If the derivative is fail (empty language), skip this minterm + euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt); + if (deriv && deriv->is_fail()) continue; + + euf::snode* fresh_var = mk_fresh_var(); + euf::snode* fresh_char = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(fresh_char, fresh_var); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(first, replacement, mem.m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + created = true; + } + + if (created) + return true; + } + return false; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_power_split + // For a variable x facing a power token u^n, branch: + // (1) x = u^m · prefix(u) with m < n (bounded power prefix) + // (2) x = u^n · x (the variable extends past the full power) + // Approximated since we lack integer constraint infrastructure. + // mirrors ZIPT's PowerSplitModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_power_split(nielsen_node* node) { + euf::snode* power = nullptr; + euf::snode* var_head = nullptr; + str_eq const* eq = nullptr; + if (!find_power_vs_var(node, power, var_head, eq)) + return false; + + SASSERT(power->is_power() && power->num_args() >= 1); + euf::snode* base = power->arg(0); + + // Branch 1: x = base^m · prefix where m < n + // Approximated: x = fresh_power_var · fresh_suffix + { + euf::snode* fresh_power = mk_fresh_var(); + euf::snode* fresh_suffix = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(var_head, replacement, eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + // Branch 2: x = u^n · x (variable extends past full power, non-progress) + { + euf::snode* replacement = m_sg.mk_concat(base, var_head); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(var_head, replacement, eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + return true; + } + + // ----------------------------------------------------------------------- + // Modifier: apply_var_num_unwinding + // For a power token u^n facing a variable, branch: + // (1) n = 0 → u^n = ε (replace power with empty) + // (2) n >= 1 → peel one u from the power + // mirrors ZIPT's VarNumUnwindingModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_var_num_unwinding(nielsen_node* node) { + euf::snode* power = nullptr; + euf::snode* var_head = nullptr; + str_eq const* eq = nullptr; + if (!find_power_vs_var(node, power, var_head, eq)) + return false; + + SASSERT(power->is_power() && power->num_args() >= 1); + euf::snode* base = power->arg(0); + + // Branch 1: n = 0 → replace u^n with ε (progress) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + // Branch 2: n >= 1 → peel one u: u^n → u · u^(n-1) + // Approximated: replace u^n with base · fresh_var + { + euf::snode* fresh = mk_fresh_var(); + euf::snode* replacement = m_sg.mk_concat(base, fresh); + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, false); + nielsen_subst s(power, replacement, eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + } + + return true; + } + void nielsen_graph::collect_conflict_deps(dep_tracker& deps) const { for (nielsen_node const* n : m_nodes) { + if (n->eval_idx() != m_run_idx) + continue; if (!n->is_currently_conflict()) continue; for (str_eq const& eq : n->str_eqs()) @@ -630,5 +1567,134 @@ namespace seq { } } + void nielsen_graph::explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const { + SASSERT(m_root); + dep_tracker deps; + collect_conflict_deps(deps); + unsigned_vector bits; + deps.get_set_bits(bits); + for (unsigned b : bits) { + if (b < m_num_input_eqs) + eq_indices.push_back(b); + else + mem_indices.push_back(b - m_num_input_eqs); + } + } + + // ----------------------------------------------------------------------- + // nielsen_graph: length constraint generation + // ----------------------------------------------------------------------- + + expr_ref nielsen_graph::compute_length_expr(euf::snode* n) { + ast_manager& m = m_sg.get_manager(); + seq_util& seq = m_sg.get_seq_util(); + arith_util arith(m); + + if (n->is_empty()) + return expr_ref(arith.mk_int(0), m); + + if (n->is_char() || n->is_unit()) + return expr_ref(arith.mk_int(1), m); + + if (n->is_concat()) { + expr_ref left = compute_length_expr(n->arg(0)); + expr_ref right = compute_length_expr(n->arg(1)); + return expr_ref(arith.mk_add(left, right), m); + } + + // for variables and other terms, use symbolic (str.len expr) + return expr_ref(seq.str.mk_length(n->get_expr()), m); + } + + void nielsen_graph::generate_length_constraints(vector& constraints) { + if (!m_root) + return; + + ast_manager& m = m_sg.get_manager(); + uint_set seen_vars; + + seq_util& seq = m_sg.get_seq_util(); + arith_util arith(m); + + for (str_eq const& eq : m_root->str_eqs()) { + if (eq.is_trivial()) + continue; + + expr_ref len_lhs = compute_length_expr(eq.m_lhs); + expr_ref len_rhs = compute_length_expr(eq.m_rhs); + expr_ref len_eq(m.mk_eq(len_lhs, len_rhs), m); + constraints.push_back(length_constraint(len_eq, eq.m_dep, length_kind::eq, m)); + + // collect variables for non-negativity constraints + euf::snode_vector tokens; + eq.m_lhs->collect_tokens(tokens); + eq.m_rhs->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var() && !seen_vars.contains(tok->id())) { + seen_vars.insert(tok->id()); + expr_ref len_var(seq.str.mk_length(tok->get_expr()), m); + expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m); + constraints.push_back(length_constraint(ge_zero, eq.m_dep, length_kind::nonneg, m)); + } + } + } + + // Parikh interval reasoning for regex memberships: + // for each str_mem constraint str in regex, compute length bounds + // from the regex structure and generate arithmetic constraints. + for (str_mem const& mem : m_root->str_mems()) { + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr || !seq.is_re(re_expr)) + continue; + + unsigned min_len = 0, max_len = UINT_MAX; + compute_regex_length_interval(mem.m_regex, min_len, max_len); + + expr_ref len_str = compute_length_expr(mem.m_str); + + // generate len(str) >= min_len when min_len > 0 + if (min_len > 0) { + expr_ref bound(arith.mk_ge(len_str, arith.mk_int(min_len)), m); + constraints.push_back(length_constraint(bound, mem.m_dep, length_kind::bound, m)); + } + + // generate len(str) <= max_len when bounded + if (max_len < UINT_MAX) { + expr_ref bound(arith.mk_le(len_str, arith.mk_int(max_len)), m); + constraints.push_back(length_constraint(bound, mem.m_dep, length_kind::bound, m)); + } + + // generate len(x) >= 0 for variables in the string side + euf::snode_vector tokens; + mem.m_str->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var() && !seen_vars.contains(tok->id())) { + seen_vars.insert(tok->id()); + expr_ref len_var(seq.str.mk_length(tok->get_expr()), m); + expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m); + constraints.push_back(length_constraint(ge_zero, mem.m_dep, length_kind::nonneg, m)); + } + } + } + } + + void nielsen_graph::compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len) { + seq_util& seq = m_sg.get_seq_util(); + expr* e = regex->get_expr(); + if (!e || !seq.is_re(e)) { + min_len = 0; + max_len = UINT_MAX; + return; + } + min_len = seq.re.min_length(e); + max_len = seq.re.max_length(e); + // for empty language, min_length may be UINT_MAX (vacuously true); + // normalize to avoid generating bogus constraints + if (min_len > max_len) { + min_len = 0; + max_len = 0; + } + } + } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 3834a56b3..09ca10f65 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -183,22 +183,27 @@ Abstract: detection during character substitution are not ported. Modifier hierarchy (Constraints/Modifier/): - - All ~15 Modifier subclasses driving graph expansion are not ported: - VarNielsenModifier, ConstNielsenModifier, DirectedNielsenModifier, - EqSplitModifier, RegexVarSplitModifier, RegexCharSplitModifier, - StarIntrModifier, PowerSplitModifier, GPowerIntrModifier, - NumCmpModifier, NumUnwindingModifier, PowerEpsilonModifier, - DecomposeModifier, CombinedModifier, DetModifier. - - The modifier pattern (each Modifier produces one or more child nodes by - applying substitutions + side conditions to the parent node) is not ported. + - 13 Modifier subclasses driving graph expansion are ported as + apply_* methods in generate_extensions, matching ZIPT's TypeOrder + priority: DetModifier(1), PowerEpsilonModifier(2), NumCmpModifier(3), + ConstNumUnwindingModifier(4), EqSplitModifier(5), StarIntrModifier(6), + GPowerIntrModifier(7), ConstNielsenModifier(8), RegexCharSplitModifier(9), + RegexVarSplitModifier(10), PowerSplitModifier(11), VarNielsenModifier(12), + VarNumUnwindingModifier(13). + - NOT PORTED: DirectedNielsenModifier, DecomposeModifier, CombinedModifier. + - NumCmp, ConstNumUnwinding, VarNumUnwinding are approximated (no PDD + integer polynomial infrastructure; power tokens are replaced with ε + or peeled with fresh variables instead of exact exponent arithmetic). Search procedure: - - NielsenNode.GraphExpansion(): the recursive search with iterative deepening - (depth-bounded DFS with SAT/UNSAT/CYCLIC return codes) is not ported. - - NielsenNode.SimplifyAndInit(): the simplification-and-initialization pass - run at node creation is not ported. - - NielsenGraph.Check(): the top-level entry point with iterative deepening, - inner solver setup and subsumption-node lookup is not ported. + - NielsenGraph.Check() / NielsenNode.GraphExpansion(): ported as + nielsen_graph::solve() (iterative deepening, 6 rounds starting at + depth 10, doubling) and search_dfs() (depth-bounded DFS with + eval_idx cycle detection and node status tracking). The inner solver + setup and subsumption-node lookup within Check() are not ported. + - NielsenNode.SimplifyAndInit(): ported as + nielsen_node::simplify_and_init() with prefix matching, symbol clash, + empty propagation, and Brzozowski derivative consumption. - NielsenGraph.FindExisting(): the subsumption cache lookup over subsumptionCandidates is not ported. @@ -231,6 +236,7 @@ Author: #include "util/vector.h" #include "util/uint_set.h" #include "ast/ast.h" +#include "ast/arith_decl_plugin.h" #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_sgraph.h" @@ -281,6 +287,9 @@ namespace seq { bool is_superset(dep_tracker const& other) const; bool empty() const; + // collect indices of all set bits into 'indices' + void get_set_bits(unsigned_vector& indices) const; + bool operator==(dep_tracker const& other) const { return m_bits == other.m_bits; } bool operator!=(dep_tracker const& other) const { return !(*this == other); } }; @@ -353,6 +362,24 @@ namespace seq { } }; + // kind of length constraint determines propagation strategy + enum class length_kind { + nonneg, // len(x) >= 0: unconditional axiom + eq, // len(lhs) = len(rhs): conditional on string equality + bound // Parikh bound: conditional on regex membership + }; + + // arithmetic length constraint derived from string equations + struct length_constraint { + expr_ref m_expr; // arithmetic expression (e.g., len(x) + len(y) = len(a) + 1) + dep_tracker m_dep; // tracks which input constraints contributed + length_kind m_kind; // determines propagation strategy + + length_constraint(ast_manager& m): m_expr(m), m_kind(length_kind::nonneg) {} + length_constraint(expr* e, dep_tracker const& dep, length_kind kind, ast_manager& m): + m_expr(e, m), m_dep(dep), m_kind(kind) {} + }; + // edge in the Nielsen graph connecting two nodes // mirrors ZIPT's NielsenEdge class nielsen_edge { @@ -469,6 +496,39 @@ namespace seq { // true if other's constraint set is a subset of this node's bool is_subsumed_by(nielsen_node const& other) const; + + // true if any constraint has opaque (s_other) terms that + // the Nielsen graph cannot decompose + bool has_opaque_terms() const; + }; + + // search statistics collected during Nielsen graph solving + struct nielsen_stats { + unsigned m_num_solve_calls = 0; + unsigned m_num_dfs_nodes = 0; + unsigned m_num_sat = 0; + unsigned m_num_unsat = 0; + unsigned m_num_unknown = 0; + unsigned m_num_simplify_conflict = 0; + unsigned m_num_subsumptions = 0; + unsigned m_num_extensions = 0; + unsigned m_num_fresh_vars = 0; + unsigned m_max_depth = 0; + // modifier application counts + unsigned m_mod_det = 0; + unsigned m_mod_power_epsilon = 0; + unsigned m_mod_num_cmp = 0; + unsigned m_mod_const_num_unwinding = 0; + unsigned m_mod_eq_split = 0; + unsigned m_mod_star_intr = 0; + unsigned m_mod_gpower_intr = 0; + unsigned m_mod_const_nielsen = 0; + unsigned m_mod_regex_char_split = 0; + unsigned m_mod_regex_var_split = 0; + unsigned m_mod_power_split = 0; + unsigned m_mod_var_nielsen = 0; + unsigned m_mod_var_num_unwinding = 0; + void reset() { memset(this, 0, sizeof(nielsen_stats)); } }; // the overall Nielsen transformation graph @@ -482,6 +542,10 @@ namespace seq { unsigned m_run_idx = 0; unsigned m_depth_bound = 0; unsigned m_next_mem_id = 0; + unsigned m_fresh_cnt = 0; + unsigned m_num_input_eqs = 0; + unsigned m_num_input_mems = 0; + nielsen_stats m_stats; public: nielsen_graph(euf::sgraph& sg); @@ -519,6 +583,10 @@ namespace seq { // generate next unique regex membership id unsigned next_mem_id() { return m_next_mem_id++; } + // number of input constraints (for dep_tracker bit mapping) + unsigned num_input_eqs() const { return m_num_input_eqs; } + unsigned num_input_mems() const { return m_num_input_mems; } + // display for debugging std::ostream& display(std::ostream& out) const; @@ -541,8 +609,111 @@ namespace seq { // collect dependency information from conflicting constraints void collect_conflict_deps(dep_tracker& deps) const; + // explain a conflict: partition the set bits into str_eq indices + // (bits 0..num_eqs-1) and str_mem indices (bits num_eqs..num_eqs+num_mems-1). + // Must be called after solve() returns unsat. + void explain_conflict(unsigned_vector& eq_indices, unsigned_vector& mem_indices) const; + + // accumulated search statistics + nielsen_stats const& stats() const { return m_stats; } + void reset_stats() { m_stats.reset(); } + + // generate arithmetic length constraints from the root node's string + // equalities and regex memberships. For each non-trivial equation lhs = rhs, + // produces len(lhs) = len(rhs) by expanding concatenations into sums. + // For each regex membership str in regex, produces Parikh interval + // constraints: len(str) >= min_len and len(str) <= max_len. + // Also generates len(x) >= 0 for each variable appearing in the equations. + void generate_length_constraints(vector& constraints); + private: search_result search_dfs(nielsen_node* node, unsigned depth); + + // create a fresh variable with a unique name + euf::snode* mk_fresh_var(); + + // deterministic modifier: var = ε, same-head cancel + bool apply_det_modifier(nielsen_node* node); + + // const nielsen modifier: char vs var (2 branches per case) + bool apply_const_nielsen(nielsen_node* node); + + // variable Nielsen modifier: var vs var, all progress (3 branches) + bool apply_var_nielsen(nielsen_node* node); + + // eq split modifier: var vs var (3 branches) + bool apply_eq_split(nielsen_node* node); + + // apply regex character split modifier to a node. + // for a str_mem constraint x·s ∈ R where x is a variable: + // (1) x → c·z for each char c accepted by R at first position + // (2) x → ε (x is empty) + // returns true if children were generated. + bool apply_regex_char_split(nielsen_node* node); + + // power epsilon modifier: for a power token u^n in an equation, + // branch: (1) base u = ε, (2) power is empty (n = 0 semantics). + // mirrors ZIPT's PowerEpsilonModifier + bool apply_power_epsilon(nielsen_node* node); + + // numeric comparison modifier: for equations involving power tokens + // u^m and u^n with the same base, branch on m < n vs n <= m. + // mirrors ZIPT's NumCmpModifier + bool apply_num_cmp(nielsen_node* node); + + // constant numeric unwinding: for a power token u^n vs a constant + // (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u). + // mirrors ZIPT's ConstNumUnwindingModifier + bool apply_const_num_unwinding(nielsen_node* node); + + // star introduction: for a str_mem x·s ∈ R where a cycle is detected + // (backedge exists), introduce stabilizer: x ∈ base* with x split. + // mirrors ZIPT's StarIntrModifier + bool apply_star_intr(nielsen_node* node); + + // generalized power introduction: for a variable x matched against + // a ground repeated pattern, introduce x = base^n · prefix(base) + // with fresh power variable n and side constraint n >= 0. + // mirrors ZIPT's GPowerIntrModifier + bool apply_gpower_intr(nielsen_node* node); + + // regex variable split: for str_mem x·s ∈ R where x is a variable, + // split using minterms: x → ε, or x → c·x' for each minterm c. + // More general than regex_char_split, uses minterm partitioning. + // mirrors ZIPT's RegexVarSplitModifier + bool apply_regex_var_split(nielsen_node* node); + + // power split: for a variable x facing a power token u^n, + // branch: x = u^m · prefix(u) with m < n, or x = u^n · x. + // mirrors ZIPT's PowerSplitModifier + bool apply_power_split(nielsen_node* node); + + // variable numeric unwinding: for a power token u^n vs a variable, + // branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u). + // mirrors ZIPT's VarNumUnwindingModifier + bool apply_var_num_unwinding(nielsen_node* node); + + // collect concrete first-position characters from a regex snode + void collect_first_chars(euf::snode* re, euf::snode_vector& chars); + + // find the first power token in any str_eq at this node + euf::snode* find_power_token(nielsen_node* node) const; + + // find a power token facing a constant (char) head + bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const; + + // find a power token facing a variable head + bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const; + + // build an arithmetic expression representing the length of an snode tree. + // concatenations are expanded to sums, chars to 1, empty to 0, + // variables to (str.len var_expr). + expr_ref compute_length_expr(euf::snode* n); + + // compute Parikh length interval [min_len, max_len] for a regex snode. + // uses seq_util::rex min_length/max_length on the underlying expression. + // max_len == UINT_MAX means unbounded. + void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len); }; } diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index d41b2e7f6..9b9c68b62 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -17,6 +17,10 @@ Author: --*/ #include "smt/theory_nseq.h" #include "smt/smt_context.h" +#include "smt/smt_justification.h" +#include "smt/proto_model/proto_model.h" +#include "ast/array_decl_plugin.h" +#include "ast/ast_pp.h" #include "util/statistics.h" namespace smt { @@ -26,43 +30,102 @@ namespace smt { m_seq(ctx.get_manager()), m_autil(ctx.get_manager()), m_rewriter(ctx.get_manager()), + m_arith_value(ctx.get_manager()), m_egraph(ctx.get_manager()), m_sgraph(ctx.get_manager(), m_egraph), m_nielsen(m_sgraph), - m_state(m_sgraph) + m_state(m_sgraph), + m_regex(m_sgraph), + m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex) {} + // ----------------------------------------------------------------------- + // Initialization + // ----------------------------------------------------------------------- + + void theory_nseq::init() { + m_arith_value.init(&get_context()); + } + // ----------------------------------------------------------------------- // Internalization // ----------------------------------------------------------------------- bool theory_nseq::internalize_atom(app* atom, bool /*gate_ctx*/) { + context& ctx = get_context(); + ast_manager& m = get_manager(); + + // str.in_re atoms are boolean predicates: register as bool_var + // so that assign_eh fires when the SAT solver assigns them. + // Following theory_seq: create a bool_var directly without an enode + // for the str.in_re predicate (avoids needing to internalize the regex arg). + if (m_seq.str.is_in_re(atom)) { + expr* str_arg = atom->get_arg(0); + mk_var(ensure_enode(str_arg)); + if (!ctx.b_internalized(atom)) { + bool_var bv = ctx.mk_bool_var(atom); + ctx.set_var_theory(bv, get_id()); + ctx.mark_as_relevant(bv); + } + get_snode(str_arg); + return true; + } return internalize_term(atom); } + theory_var theory_nseq::mk_var(enode* n) { + expr* o = n->get_expr(); + if (!m_seq.is_seq(o) && !m_seq.is_re(o) && !m_seq.str.is_nth_u(o)) + return null_theory_var; + if (is_attached_to_var(n)) + return n->get_th_var(get_id()); + theory_var v = theory::mk_var(n); + get_context().attach_th_var(n, this, v); + get_context().mark_as_relevant(n); + return v; + } + bool theory_nseq::internalize_term(app* term) { context& ctx = get_context(); ast_manager& m = get_manager(); - // ensure children are internalized first - for (expr* arg : *term) { - if (is_app(arg) && m_seq.is_seq(arg)) { - ctx.internalize(arg, false); - } + // ensure ALL children are internalized (following theory_seq pattern) + for (auto arg : *term) + mk_var(ensure_enode(arg)); + + if (ctx.e_internalized(term)) { + mk_var(ctx.get_enode(term)); + return true; } - if (!ctx.e_internalized(term)) { - ctx.mk_enode(term, false, m.is_bool(term), true); + if (m.is_bool(term)) { + bool_var bv = ctx.mk_bool_var(term); + ctx.set_var_theory(bv, get_id()); + ctx.mark_as_relevant(bv); } - enode* en = ctx.get_enode(term); - if (!is_attached_to_var(en)) { - theory_var v = mk_var(en); - (void)v; + enode* en; + if (ctx.e_internalized(term)) { + en = ctx.get_enode(term); } + else { + en = ctx.mk_enode(term, false, m.is_bool(term), true); + } + mk_var(en); // register in our private sgraph get_snode(term); + + // track higher-order terms for lazy unfolding + expr* ho_f = nullptr, *ho_s = nullptr, *ho_b = nullptr, *ho_i = nullptr; + if (m_seq.str.is_map(term, ho_f, ho_s) || + m_seq.str.is_mapi(term, ho_f, ho_i, ho_s) || + m_seq.str.is_foldl(term, ho_f, ho_b, ho_s) || + m_seq.str.is_foldli(term, ho_f, ho_i, ho_b, ho_s)) { + m_ho_terms.push_back(term); + ensure_length_var(ho_s); + } + return true; } @@ -73,16 +136,73 @@ namespace smt { void theory_nseq::new_eq_eh(theory_var v1, theory_var v2) { expr* e1 = get_enode(v1)->get_expr(); expr* e2 = get_enode(v2)->get_expr(); + if (m_seq.is_re(e1)) { + ++m_num_unhandled_bool; + return; + } if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2)) return; euf::snode* s1 = get_snode(e1); euf::snode* s2 = get_snode(e2); - if (s1 && s2) - m_state.add_str_eq(s1, s2); + if (s1 && s2) { + unsigned idx = m_state.str_eqs().size(); + m_state.add_str_eq(s1, s2, get_enode(v1), get_enode(v2)); + m_prop_queue.push_back({prop_item::eq_prop, idx}); + } } - void theory_nseq::new_diseq_eh(theory_var /*v1*/, theory_var /*v2*/) { - // not handled in this initial skeleton + void theory_nseq::new_diseq_eh(theory_var v1, theory_var v2) { + expr* e1 = get_enode(v1)->get_expr(); + expr* e2 = get_enode(v2)->get_expr(); + if (m_seq.is_re(e1)) { + // regex disequality: nseq cannot verify language non-equivalence + ++m_num_unhandled_bool; + return; + } + if (!m_seq.is_seq(e1) || !m_seq.is_seq(e2)) + return; + unsigned idx = m_state.diseqs().size(); + m_state.add_diseq(get_enode(v1), get_enode(v2)); + m_prop_queue.push_back({prop_item::diseq_prop, idx}); + } + + // ----------------------------------------------------------------------- + // Boolean assignment notification + // ----------------------------------------------------------------------- + + void theory_nseq::assign_eh(bool_var v, bool is_true) { + context& ctx = get_context(); + expr* e = ctx.bool_var2expr(v); + expr* s = nullptr; + expr* re = nullptr; + if (!m_seq.str.is_in_re(e, s, re)) { + // Track unhandled boolean string predicates (prefixof, contains, etc.) + if (is_app(e) && to_app(e)->get_family_id() == m_seq.get_family_id()) + ++m_num_unhandled_bool; + return; + } + euf::snode* sn_str = get_snode(s); + euf::snode* sn_re = get_snode(re); + if (!sn_str || !sn_re) + return; + + if (is_true) { + unsigned idx = m_state.str_mems().size(); + literal lit(v, false); + m_state.add_str_mem(sn_str, sn_re, lit); + m_prop_queue.push_back({prop_item::pos_mem_prop, idx}); + } + else { + unsigned idx = m_state.neg_mems().size(); + literal lit(v, true); + m_state.add_neg_mem(sn_str, sn_re, lit); + m_prop_queue.push_back({prop_item::neg_mem_prop, idx}); + } + + TRACE(seq, tout << "nseq assign_eh: " << (is_true ? "" : "¬") + << "str.in_re " + << mk_bounded_pp(s, get_manager(), 3) << " in " + << mk_bounded_pp(re, get_manager(), 3) << "\n";); } // ----------------------------------------------------------------------- @@ -93,12 +213,139 @@ namespace smt { theory::push_scope_eh(); m_state.push(); m_sgraph.push(); + m_prop_lim.push_back(m_prop_queue.size()); + m_ho_lim.push_back(m_ho_terms.size()); + m_unhandled_bool_lim.push_back(m_num_unhandled_bool); } void theory_nseq::pop_scope_eh(unsigned num_scopes) { theory::pop_scope_eh(num_scopes); m_state.pop(num_scopes); m_sgraph.pop(num_scopes); + unsigned new_sz = m_prop_lim[m_prop_lim.size() - num_scopes]; + m_prop_queue.shrink(new_sz); + m_prop_lim.shrink(m_prop_lim.size() - num_scopes); + if (m_prop_qhead > m_prop_queue.size()) + m_prop_qhead = m_prop_queue.size(); + unsigned ho_sz = m_ho_lim[m_ho_lim.size() - num_scopes]; + m_ho_terms.shrink(ho_sz); + m_ho_lim.shrink(m_ho_lim.size() - num_scopes); + m_num_unhandled_bool = m_unhandled_bool_lim[m_unhandled_bool_lim.size() - num_scopes]; + m_unhandled_bool_lim.shrink(m_unhandled_bool_lim.size() - num_scopes); + } + + // ----------------------------------------------------------------------- + // Propagation: eager eq/diseq/literal dispatch + // ----------------------------------------------------------------------- + + bool theory_nseq::can_propagate() { + return m_prop_qhead < m_prop_queue.size(); + } + + void theory_nseq::propagate() { + context& ctx = get_context(); + while (m_prop_qhead < m_prop_queue.size() && !ctx.inconsistent()) { + prop_item const& item = m_prop_queue[m_prop_qhead++]; + switch (item.m_kind) { + case prop_item::eq_prop: + propagate_eq(item.m_idx); + break; + case prop_item::diseq_prop: + propagate_diseq(item.m_idx); + break; + case prop_item::pos_mem_prop: + propagate_pos_mem(item.m_idx); + break; + case prop_item::neg_mem_prop: + propagate_neg_mem(item.m_idx); + break; + } + } + } + + void theory_nseq::propagate_eq(unsigned idx) { + // When s1 = s2 is learned, ensure len(s1) and len(s2) are + // internalized so congruence closure propagates len(s1) = len(s2). + eq_source const& src = m_state.get_eq_source(idx); + ensure_length_var(src.m_n1->get_expr()); + ensure_length_var(src.m_n2->get_expr()); + } + + void theory_nseq::propagate_diseq(unsigned idx) { + // Disequalities are recorded for use during final_check. + // No eager propagation beyond recording. + TRACE(seq, + auto const& d = m_state.get_diseq(idx); + tout << "nseq diseq: " + << mk_bounded_pp(d.m_n1->get_expr(), get_manager(), 3) + << " != " + << mk_bounded_pp(d.m_n2->get_expr(), get_manager(), 3) << "\n";); + } + + void theory_nseq::propagate_pos_mem(unsigned idx) { + auto const& mem = m_state.str_mems()[idx]; + auto const& src = m_state.get_mem_source(idx); + + if (!mem.m_str || !mem.m_regex) + return; + + // regex is ∅ → conflict + if (m_regex.is_empty_regex(mem.m_regex)) { + enode_pair_vector eqs; + literal_vector lits; + lits.push_back(src.m_lit); + set_conflict(eqs, lits); + return; + } + + // empty string in non-nullable regex → conflict + if (mem.m_str->is_empty() && !mem.m_regex->is_nullable()) { + enode_pair_vector eqs; + literal_vector lits; + lits.push_back(src.m_lit); + set_conflict(eqs, lits); + return; + } + + // ensure length term exists for the string argument + expr* s_expr = mem.m_str->get_expr(); + if (s_expr) + ensure_length_var(s_expr); + } + + void theory_nseq::propagate_neg_mem(unsigned idx) { + auto const& entry = m_state.get_neg_mem(idx); + + if (!entry.m_str || !entry.m_regex) + return; + + // ¬(s in Σ*) is always false → conflict + if (m_regex.is_full_regex(entry.m_regex)) { + enode_pair_vector eqs; + literal_vector lits; + lits.push_back(entry.m_lit); + set_conflict(eqs, lits); + return; + } + + // ¬(ε in R) where R is nullable → conflict + if (entry.m_str->is_empty() && entry.m_regex->is_nullable()) { + enode_pair_vector eqs; + literal_vector lits; + lits.push_back(entry.m_lit); + set_conflict(eqs, lits); + return; + } + } + + void theory_nseq::ensure_length_var(expr* e) { + if (!e || !m_seq.is_seq(e)) + return; + context& ctx = get_context(); + ast_manager& m = get_manager(); + expr_ref len(m_seq.str.mk_length(e), m); + if (!ctx.e_internalized(len)) + ctx.internalize(len, false); } // ----------------------------------------------------------------------- @@ -107,30 +354,162 @@ namespace smt { void theory_nseq::populate_nielsen_graph() { m_nielsen.reset(); - seq::nielsen_node* root = m_nielsen.mk_node(); - m_nielsen.set_root(root); - for (auto const& eq : m_state.str_eqs()) - root->add_str_eq(eq); - for (auto const& mem : m_state.str_mems()) - root->add_str_mem(mem); + m_nielsen_to_state_mem.reset(); + + // transfer string equalities from state to nielsen graph root + for (auto const& eq : m_state.str_eqs()) { + m_nielsen.add_str_eq(eq.m_lhs, eq.m_rhs); + } + + // transfer regex memberships, pre-processing through nseq_regex + // to consume ground prefixes via Brzozowski derivatives + for (unsigned state_idx = 0; state_idx < m_state.str_mems().size(); ++state_idx) { + auto const& mem = m_state.str_mems()[state_idx]; + int triv = m_regex.check_trivial(mem); + if (triv > 0) + continue; // trivially satisfied, skip + if (triv < 0) { + // trivially unsat: add anyway so solve() detects conflict + m_nielsen.add_str_mem(mem.m_str, mem.m_regex); + m_nielsen_to_state_mem.push_back(state_idx); + continue; + } + // pre-process: consume ground prefix characters + vector processed; + if (!m_regex.process_str_mem(mem, processed)) { + // conflict during ground prefix consumption + m_nielsen.add_str_mem(mem.m_str, mem.m_regex); + m_nielsen_to_state_mem.push_back(state_idx); + continue; + } + for (auto const& pm : processed) { + m_nielsen.add_str_mem(pm.m_str, pm.m_regex); + m_nielsen_to_state_mem.push_back(state_idx); + } + } + + TRACE(seq, tout << "nseq populate: " << m_state.str_eqs().size() << " eqs, " + << m_state.str_mems().size() << " mems -> nielsen root with " + << m_nielsen.num_input_eqs() << " eqs, " + << m_nielsen.num_input_mems() << " mems\n";); } final_check_status theory_nseq::final_check_eh(unsigned /*final_check_round*/) { + // Always assert non-negativity for all string theory vars, + // even when there are no string equations/memberships. + if (assert_nonneg_for_all_vars()) + return FC_CONTINUE; + + // If there are unhandled boolean string predicates (prefixof, contains, etc.) + // we cannot declare sat — return unknown. + if (has_unhandled_preds()) + return FC_GIVEUP; + + if (m_state.empty() && m_ho_terms.empty()) + return FC_DONE; + + // unfold higher-order terms when sequence structure is known + if (unfold_ho_terms()) + return FC_CONTINUE; + if (m_state.empty()) return FC_DONE; - // For now, give up if there are string constraints. - // The full search will be wired in once the Nielsen algorithms are complete. + populate_nielsen_graph(); - ++m_num_nodes_explored; + + // assert length constraints derived from string equalities + if (assert_length_constraints()) + return FC_CONTINUE; + + ++m_num_final_checks; + + auto result = m_nielsen.solve(); + + if (result == seq::nielsen_graph::search_result::sat) { + // Nielsen found a consistent assignment for positive constraints. + // If there are negative memberships or disequalities we haven't verified, + // we cannot soundly declare sat. + if (!m_state.neg_mems().empty() || !m_state.diseqs().empty()) + return FC_GIVEUP; + return FC_DONE; + } + + if (result == seq::nielsen_graph::search_result::unsat) { + explain_nielsen_conflict(); + return FC_CONTINUE; + } + return FC_GIVEUP; } + // ----------------------------------------------------------------------- + // Conflict explanation + // ----------------------------------------------------------------------- + + void theory_nseq::deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits) { + context& ctx = get_context(); + unsigned_vector bits; + deps.get_set_bits(bits); + unsigned num_input_eqs = m_nielsen.num_input_eqs(); + for (unsigned b : bits) { + if (b < num_input_eqs) { + eq_source const& src = m_state.get_eq_source(b); + if (src.m_n1->get_root() == src.m_n2->get_root()) + eqs.push_back({src.m_n1, src.m_n2}); + } + else { + unsigned mem_idx = b - num_input_eqs; + if (mem_idx < m_nielsen_to_state_mem.size()) { + unsigned state_mem_idx = m_nielsen_to_state_mem[mem_idx]; + mem_source const& src = m_state.get_mem_source(state_mem_idx); + if (ctx.get_assignment(src.m_lit) == l_true) + lits.push_back(src.m_lit); + } + } + } + } + + void theory_nseq::add_conflict_clause(seq::dep_tracker const& deps) { + enode_pair_vector eqs; + literal_vector lits; + deps_to_lits(deps, eqs, lits); + ++m_num_conflicts; + set_conflict(eqs, lits); + } + + void theory_nseq::explain_nielsen_conflict() { + seq::dep_tracker deps; + m_nielsen.collect_conflict_deps(deps); + add_conflict_clause(deps); + } + + void theory_nseq::set_conflict(enode_pair_vector const& eqs, literal_vector const& lits) { + context& ctx = get_context(); + TRACE(seq, tout << "nseq conflict: " << eqs.size() << " eqs, " << lits.size() << " lits\n";); + ctx.set_conflict( + ctx.mk_justification( + ext_theory_conflict_justification( + get_id(), ctx, lits.size(), lits.data(), eqs.size(), eqs.data(), 0, nullptr))); + } + // ----------------------------------------------------------------------- // Model generation // ----------------------------------------------------------------------- - void theory_nseq::init_model(model_generator& /*mg*/) { - // stub – no model assignment for now + void theory_nseq::init_model(model_generator& mg) { + m_model.init(mg, m_nielsen, m_state); + } + + model_value_proc* theory_nseq::mk_value(enode* n, model_generator& mg) { + return m_model.mk_value(n, mg); + } + + void theory_nseq::finalize_model(model_generator& mg) { + m_model.finalize(mg); + } + + void theory_nseq::validate_model(proto_model& mdl) { + m_model.validate_regex(m_state, mdl); } // ----------------------------------------------------------------------- @@ -139,14 +518,47 @@ namespace smt { void theory_nseq::collect_statistics(::statistics& st) const { st.update("nseq conflicts", m_num_conflicts); - st.update("nseq nodes explored", m_num_nodes_explored); - st.update("nseq depth increases", m_num_depth_increases); + st.update("nseq final checks", m_num_final_checks); + st.update("nseq length axioms", m_num_length_axioms); + + // Nielsen graph search metrics + auto const& ns = m_nielsen.stats(); + st.update("nseq solve calls", ns.m_num_solve_calls); + st.update("nseq dfs nodes", ns.m_num_dfs_nodes); + st.update("nseq sat", ns.m_num_sat); + st.update("nseq unsat", ns.m_num_unsat); + st.update("nseq unknown", ns.m_num_unknown); + st.update("nseq simplify clash", ns.m_num_simplify_conflict); + st.update("nseq subsumptions", ns.m_num_subsumptions); + st.update("nseq extensions", ns.m_num_extensions); + st.update("nseq fresh vars", ns.m_num_fresh_vars); + st.update("nseq max depth", ns.m_max_depth); + + // modifier breakdown + st.update("nseq mod det", ns.m_mod_det); + st.update("nseq mod power epsilon", ns.m_mod_power_epsilon); + st.update("nseq mod num cmp", ns.m_mod_num_cmp); + st.update("nseq mod const num unwind", ns.m_mod_const_num_unwinding); + st.update("nseq mod eq split", ns.m_mod_eq_split); + st.update("nseq mod star intr", ns.m_mod_star_intr); + st.update("nseq mod gpower intr", ns.m_mod_gpower_intr); + st.update("nseq mod const nielsen", ns.m_mod_const_nielsen); + st.update("nseq mod regex char", ns.m_mod_regex_char_split); + st.update("nseq mod regex var", ns.m_mod_regex_var_split); + st.update("nseq mod power split", ns.m_mod_power_split); + st.update("nseq mod var nielsen", ns.m_mod_var_nielsen); + st.update("nseq mod var num unwind", ns.m_mod_var_num_unwinding); + st.update("nseq ho unfolds", m_num_ho_unfolds); } void theory_nseq::display(std::ostream& out) const { out << "theory_nseq\n"; - out << " str_eqs: " << m_state.str_eqs().size() << "\n"; - out << " str_mems: " << m_state.str_mems().size() << "\n"; + out << " str_eqs: " << m_state.str_eqs().size() << "\n"; + out << " str_mems: " << m_state.str_mems().size() << "\n"; + out << " diseqs: " << m_state.diseqs().size() << "\n"; + out << " neg_mems: " << m_state.neg_mems().size() << "\n"; + out << " prop_queue: " << m_prop_qhead << "/" << m_prop_queue.size() << "\n"; + out << " ho_terms: " << m_ho_terms.size() << "\n"; } // ----------------------------------------------------------------------- @@ -157,6 +569,129 @@ namespace smt { return alloc(theory_nseq, *ctx); } + // ----------------------------------------------------------------------- + // Higher-order term unfolding (seq.map, seq.foldl, etc.) + // ----------------------------------------------------------------------- + + bool theory_nseq::unfold_ho_terms() { + if (m_ho_terms.empty()) + return false; + + context& ctx = get_context(); + ast_manager& m = get_manager(); + bool progress = false; + + unsigned sz = m_ho_terms.size(); + for (unsigned i = 0; i < sz; ++i) { + app* term = m_ho_terms[i]; + expr* f = nullptr, *s = nullptr, *b = nullptr, *idx = nullptr; + + if (!m_seq.str.is_map(term, f, s) && + !m_seq.str.is_mapi(term, f, idx, s) && + !m_seq.str.is_foldl(term, f, b, s) && + !m_seq.str.is_foldli(term, f, idx, b, s)) + continue; + + if (!ctx.e_internalized(s)) + continue; + + // Find a structural representative in s's equivalence class + enode* s_root = ctx.get_enode(s)->get_root(); + expr* repr = nullptr; + enode* curr = s_root; + do { + expr* e = curr->get_expr(); + expr *a1, *a2; + if (m_seq.str.is_empty(e) || + m_seq.str.is_unit(e, a1) || + m_seq.str.is_concat(e, a1, a2)) { + repr = e; + break; + } + curr = curr->get_next(); + } while (curr != s_root); + + if (!repr) + continue; + + // Build ho_term with structural seq arg, then rewrite + expr_ref ho_repr(m); + if (m_seq.str.is_map(term)) + ho_repr = m_seq.str.mk_map(f, repr); + else if (m_seq.str.is_mapi(term)) + ho_repr = m_seq.str.mk_mapi(f, idx, repr); + else if (m_seq.str.is_foldl(term)) + ho_repr = m_seq.str.mk_foldl(f, b, repr); + else + ho_repr = m_seq.str.mk_foldli(f, idx, b, repr); + + expr_ref rewritten(m); + br_status st = m_rewriter.mk_app_core( + to_app(ho_repr)->get_decl(), + to_app(ho_repr)->get_num_args(), + to_app(ho_repr)->get_args(), + rewritten); + + if (st == BR_FAILED) + continue; + + // Internalize both the structural ho_term and its rewrite + if (!ctx.e_internalized(ho_repr)) + ctx.internalize(ho_repr, false); + if (!ctx.e_internalized(rewritten)) + ctx.internalize(rewritten, false); + + enode* ho_en = ctx.get_enode(ho_repr); + enode* res_en = ctx.get_enode(rewritten); + + if (ho_en->get_root() == res_en->get_root()) + continue; + + // Assert tautological axiom: ho_repr = rewritten + // Congruence closure merges map(f,s) with map(f,repr) + // since s = repr in the E-graph. + expr_ref eq_expr(m.mk_eq(ho_repr, rewritten), m); + if (!ctx.b_internalized(eq_expr)) + ctx.internalize(eq_expr, true); + literal eq_lit = ctx.get_literal(eq_expr); + if (ctx.get_assignment(eq_lit) != l_true) { + ctx.mk_th_axiom(get_id(), 1, &eq_lit); + TRACE(seq, tout << "nseq ho unfold: " + << mk_bounded_pp(ho_repr, m, 3) << " = " + << mk_bounded_pp(rewritten, m, 3) << "\n";); + ++m_num_ho_unfolds; + progress = true; + } + } + + // For map/mapi: propagate length preservation + for (unsigned i = 0; i < sz; ++i) { + app* term = m_ho_terms[i]; + expr* f = nullptr, *s = nullptr, *idx = nullptr; + bool is_map = m_seq.str.is_map(term, f, s); + bool is_mapi = !is_map && m_seq.str.is_mapi(term, f, idx, s); + if (!is_map && !is_mapi) + continue; + if (!m_seq.is_seq(term)) + continue; + + // len(map(f, s)) = len(s) + expr_ref len_map(m_seq.str.mk_length(term), m); + expr_ref len_s(m_seq.str.mk_length(s), m); + expr_ref len_eq(m.mk_eq(len_map, len_s), m); + if (!ctx.b_internalized(len_eq)) + ctx.internalize(len_eq, true); + literal len_lit = ctx.get_literal(len_eq); + if (ctx.get_assignment(len_lit) != l_true) { + ctx.mk_th_axiom(get_id(), 1, &len_lit); + ++m_num_length_axioms; + progress = true; + } + } + + return progress; + } + // ----------------------------------------------------------------------- // Helpers // ----------------------------------------------------------------------- @@ -168,4 +703,136 @@ namespace smt { return s; } + // ----------------------------------------------------------------------- + // Arithmetic value queries + // ----------------------------------------------------------------------- + + bool theory_nseq::get_num_value(expr* e, rational& val) const { + return m_arith_value.get_value_equiv(e, val) && val.is_int(); + } + + bool theory_nseq::lower_bound(expr* e, rational& lo) const { + bool is_strict = true; + return m_arith_value.get_lo(e, lo, is_strict) && !is_strict && lo.is_int(); + } + + bool theory_nseq::upper_bound(expr* e, rational& hi) const { + bool is_strict = true; + return m_arith_value.get_up(e, hi, is_strict) && !is_strict && hi.is_int(); + } + + bool theory_nseq::get_length(expr* e, rational& val) { + ast_manager& m = get_manager(); + rational val1; + expr* e1 = nullptr; + expr* e2 = nullptr; + ptr_vector todo; + todo.push_back(e); + val.reset(); + zstring s; + while (!todo.empty()) { + expr* c = todo.back(); + todo.pop_back(); + if (m_seq.str.is_concat(c, e1, e2)) { + todo.push_back(e1); + todo.push_back(e2); + } + else if (m_seq.str.is_unit(c)) + val += rational(1); + else if (m_seq.str.is_empty(c)) + continue; + else if (m_seq.str.is_string(c, s)) + val += rational(s.length()); + else { + expr_ref len(m_seq.str.mk_length(c), m); + if (m_arith_value.get_value(len, val1) && !val1.is_neg()) + val += val1; + else + return false; + } + } + return val.is_int(); + } + + void theory_nseq::add_length_axiom(literal lit) { + context& ctx = get_context(); + ctx.mark_as_relevant(lit); + ctx.mk_th_axiom(get_id(), 1, &lit); + ++m_num_length_axioms; + } + + bool theory_nseq::propagate_length_lemma(literal lit, seq::length_constraint const& lc) { + context& ctx = get_context(); + ast_manager& m = get_manager(); + + // unconditional constraints: assert as theory axiom + if (lc.m_kind == seq::length_kind::nonneg) { + add_length_axiom(lit); + return true; + } + + // conditional constraints: propagate with justification from dep_tracker + enode_pair_vector eqs; + literal_vector lits; + deps_to_lits(lc.m_dep, eqs, lits); + + ctx.mark_as_relevant(lit); + justification* js = ctx.mk_justification( + ext_theory_propagation_justification( + get_id(), ctx, + lits.size(), lits.data(), + eqs.size(), eqs.data(), + lit)); + ctx.assign(lit, js); + + TRACE(seq, tout << "nseq length propagation: " << mk_pp(lc.m_expr, m) + << " (" << eqs.size() << " eqs, " << lits.size() << " lits)\n";); + ++m_num_length_axioms; + return true; + } + + bool theory_nseq::assert_nonneg_for_all_vars() { + ast_manager& m = get_manager(); + context& ctx = get_context(); + arith_util arith(m); + bool new_axiom = false; + unsigned nv = get_num_vars(); + for (unsigned v = 0; v < nv; ++v) { + expr* e = get_enode(v)->get_expr(); + if (!m_seq.is_seq(e)) + continue; + expr_ref len_var(m_seq.str.mk_length(e), m); + expr_ref ge_zero(arith.mk_ge(len_var, arith.mk_int(0)), m); + if (!ctx.b_internalized(ge_zero)) + ctx.internalize(ge_zero, true); + literal lit = ctx.get_literal(ge_zero); + if (ctx.get_assignment(lit) != l_true) { + add_length_axiom(lit); + new_axiom = true; + } + } + return new_axiom; + } + + bool theory_nseq::assert_length_constraints() { + ast_manager& m = get_manager(); + context& ctx = get_context(); + vector constraints; + m_nielsen.generate_length_constraints(constraints); + + bool new_axiom = false; + for (auto const& lc : constraints) { + expr* e = lc.m_expr; + if (!ctx.b_internalized(e)) + ctx.internalize(e, true); + literal lit = ctx.get_literal(e); + if (ctx.get_assignment(lit) != l_true) { + TRACE(seq, tout << "nseq length lemma: " << mk_pp(e, m) << "\n";); + propagate_length_lemma(lit, lc); + new_axiom = true; + } + } + return new_axiom; + } + } diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index c123a055a..0f4e94a87 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -35,42 +35,99 @@ namespace smt { seq_util m_seq; arith_util m_autil; seq_rewriter m_rewriter; + arith_value m_arith_value; euf::egraph m_egraph; // private egraph (not shared with smt context) euf::sgraph m_sgraph; // private sgraph seq::nielsen_graph m_nielsen; nseq_state m_state; + nseq_regex m_regex; // regex membership pre-processing + nseq_model m_model; // model construction helper + + // propagation queue + struct prop_item { + enum kind_t { eq_prop, diseq_prop, pos_mem_prop, neg_mem_prop } m_kind; + unsigned m_idx; + }; + svector m_prop_queue; + unsigned m_prop_qhead = 0; + unsigned_vector m_prop_lim; // saved queue sizes for push/pop // statistics unsigned m_num_conflicts = 0; - unsigned m_num_nodes_explored = 0; - unsigned m_num_depth_increases = 0; + unsigned m_num_final_checks = 0; + unsigned m_num_length_axioms = 0; // map from context enode to private sgraph snode obj_map m_expr2snode; + // mapping from nielsen mem index to state mem index + // (populated during populate_nielsen_graph, used in deps_to_lits) + unsigned_vector m_nielsen_to_state_mem; + + // higher-order terms (seq.map, seq.mapi, seq.foldl, seq.foldli) + ptr_vector m_ho_terms; + unsigned_vector m_ho_lim; // push/pop limits for m_ho_terms + unsigned m_num_ho_unfolds = 0; + + // unhandled boolean string predicates (prefixof, suffixof, contains, etc.) + unsigned m_num_unhandled_bool = 0; + unsigned_vector m_unhandled_bool_lim; + + bool has_unhandled_preds() const { return m_num_unhandled_bool > 0; } + // required virtual methods bool internalize_atom(app* a, bool gate_ctx) override; bool internalize_term(app* term) override; + theory_var mk_var(enode* n) override; void new_eq_eh(theory_var v1, theory_var v2) override; void new_diseq_eh(theory_var v1, theory_var v2) override; theory* mk_fresh(context* ctx) override; void display(std::ostream& out) const override; // optional overrides - bool can_propagate() override { return false; } - void propagate() override {} + bool can_propagate() override; + void propagate() override; + void init() override; + void assign_eh(bool_var v, bool is_true) override; final_check_status final_check_eh(unsigned) override; void push_scope_eh() override; void pop_scope_eh(unsigned num_scopes) override; void init_model(model_generator& mg) override; + model_value_proc* mk_value(enode* n, model_generator& mg) override; + void finalize_model(model_generator& mg) override; + void validate_model(proto_model& mdl) override; void collect_statistics(::statistics& st) const override; char const* get_name() const override { return "nseq"; } // private helpers void populate_nielsen_graph(); + void explain_nielsen_conflict(); + void deps_to_lits(seq::dep_tracker const& deps, enode_pair_vector& eqs, literal_vector& lits); + void add_conflict_clause(seq::dep_tracker const& deps); + void set_conflict(enode_pair_vector const& eqs, literal_vector const& lits); euf::snode* get_snode(expr* e); + // propagation dispatch helpers + void propagate_eq(unsigned idx); + void propagate_diseq(unsigned idx); + void propagate_pos_mem(unsigned idx); + void propagate_neg_mem(unsigned idx); + void ensure_length_var(expr* e); + + // higher-order term unfolding + bool unfold_ho_terms(); + + // arithmetic value queries for length reasoning + bool get_num_value(expr* e, rational& val) const; + bool lower_bound(expr* e, rational& lo) const; + bool upper_bound(expr* e, rational& hi) const; + bool get_length(expr* e, rational& val); + void add_length_axiom(literal lit); + bool propagate_length_lemma(literal lit, seq::length_constraint const& lc); + bool assert_nonneg_for_all_vars(); + bool assert_length_constraints(); + public: theory_nseq(context& ctx); }; diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index c0d3ff6f7..d17d8bf5f 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -133,6 +133,7 @@ add_executable(test-z3 sls_seq_plugin.cpp seq_nielsen.cpp nseq_basic.cpp + nseq_regex.cpp small_object_allocator.cpp smt2print_parse.cpp smt_context.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index 10c0269f7..0ae26ecda 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -288,6 +288,7 @@ int main(int argc, char ** argv) { TST(sls_seq_plugin); TST(seq_nielsen); TST(nseq_basic); + TST(nseq_regex); TST(ho_matcher); TST(finite_set); TST(finite_set_rewriter); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp index f7a102e98..d71204f53 100644 --- a/src/test/nseq_basic.cpp +++ b/src/test/nseq_basic.cpp @@ -100,10 +100,119 @@ static void test_nseq_node_satisfied() { std::cout << " ok\n"; } +// Test 5: symbol clash conflict ("a" = "b" is unsat) +static void test_nseq_symbol_clash() { + std::cout << "test_nseq_symbol_clash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('a'); + euf::snode* b = sg.mk_char('b'); + ng.add_str_eq(a, b); + + auto r = ng.solve(); + SASSERT(r == seq::nielsen_graph::search_result::unsat); + + // verify conflict explanation returns the equality index + unsigned_vector eq_idx, mem_idx; + ng.explain_conflict(eq_idx, mem_idx); + SASSERT(eq_idx.size() == 1); + SASSERT(eq_idx[0] == 0); + SASSERT(mem_idx.empty()); + std::cout << " ok: symbol clash detected as unsat\n"; +} + +// Test 6: variable equality x = x is sat +static void test_nseq_var_eq_self() { + std::cout << "test_nseq_var_eq_self\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + ng.add_str_eq(x, x); + + auto r = ng.solve(); + SASSERT(r == seq::nielsen_graph::search_result::sat); + std::cout << " ok: x = x solved as sat\n"; +} + +// Test 7: x·a = x·b is unsat (prefix match then clash) +static void test_nseq_prefix_clash() { + std::cout << "test_nseq_prefix_clash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('a'); + euf::snode* b = sg.mk_char('b'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xb = sg.mk_concat(x, b); + + ng.add_str_eq(xa, xb); + auto r = ng.solve(); + SASSERT(r == seq::nielsen_graph::search_result::unsat); + std::cout << " ok: x·a = x·b detected as unsat\n"; +} + +// Test 8: a·x = a·y has solutions (not unsat) +static void test_nseq_const_nielsen_solvable() { + std::cout << "test_nseq_const_nielsen_solvable\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('a'); + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* ay = sg.mk_concat(a, y); + + ng.add_str_eq(ax, ay); + auto r = ng.solve(); + // a·x = a·y simplifies to x = y which is satisfiable (x = y = ε) + SASSERT(r == seq::nielsen_graph::search_result::sat); + std::cout << " ok: a·x = a·y solved as sat\n"; +} + +// Test 9: length mismatch - "ab" = "a" is unsat +static void test_nseq_length_mismatch() { + std::cout << "test_nseq_length_mismatch\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('a'); + euf::snode* b = sg.mk_char('b'); + euf::snode* ab = sg.mk_concat(a, b); + + ng.add_str_eq(ab, a); + auto r = ng.solve(); + SASSERT(r == seq::nielsen_graph::search_result::unsat); + std::cout << " ok: ab = a detected as unsat\n"; +} + void tst_nseq_basic() { test_nseq_instantiation(); test_nseq_param_validation(); test_nseq_simplification(); test_nseq_node_satisfied(); + test_nseq_symbol_clash(); + test_nseq_var_eq_self(); + test_nseq_prefix_clash(); + test_nseq_const_nielsen_solvable(); + test_nseq_length_mismatch(); std::cout << "nseq_basic: all tests passed\n"; } diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index f994f5a70..4d645fe7c 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -17,6 +17,7 @@ Abstract: #include "ast/euf/euf_egraph.h" #include "ast/euf/euf_sgraph.h" #include "smt/seq/seq_nielsen.h" +#include "ast/arith_decl_plugin.h" #include "ast/reg_decl_plugins.h" #include "ast/ast_pp.h" #include @@ -462,6 +463,2819 @@ static void test_backedge() { SASSERT(root->backedge() == nullptr); } +// test apply_eq_split: basic structure (x·A = y·B produces 3 children via eq_split) +static void test_eq_split_basic() { + std::cout << "test_eq_split_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* yb = sg.mk_concat(y, b); + + // x·A = y·B + ng.add_str_eq(xa, yb); + seq::nielsen_node* root = ng.root(); + + // eq_split fires: both heads are distinct vars + // produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 3); + + // first child is progress (x→ε), rest are non-progress (eq_split) + SASSERT(root->outgoing()[0]->is_progress()); +} + +// test eq_split with solve: x·y = z·w is satisfiable (all vars can be ε) +static void test_eq_split_solve_sat() { + std::cout << "test_eq_split_solve_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + euf::snode* w = sg.mk_var(symbol("w")); + euf::snode* xy = sg.mk_concat(x, y); + euf::snode* zw = sg.mk_concat(z, w); + + ng.add_str_eq(xy, zw); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test eq_split with solve: x·A = y·B is unsat (last char mismatch) +static void test_eq_split_solve_unsat() { + std::cout << "test_eq_split_solve_unsat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* yb = sg.mk_concat(y, b); + + ng.add_str_eq(xa, yb); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); +} + +// test eq_split: same var x·A = x·B triggers det modifier (cancel), not eq_split +static void test_eq_split_same_var_det() { + std::cout << "test_eq_split_same_var_det\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xb = sg.mk_concat(x, b); + + // x·A = x·B → same-head cancel → A = B → clash → unsat + ng.add_str_eq(xa, xb); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); +} + +// test eq_split: x·y·A = y·x·A is commutation, should be sat (x=y=ε) +static void test_eq_split_commutation_sat() { + std::cout << "test_eq_split_commutation_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* xya = sg.mk_concat(x, sg.mk_concat(y, a)); + euf::snode* yxa = sg.mk_concat(y, sg.mk_concat(x, a)); + + ng.add_str_eq(xya, yxa); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test apply_const_nielsen: char·A = y produces 2 children (y→ε, y→char·fresh) +static void test_const_nielsen_char_var() { + std::cout << "test_const_nielsen_char_var\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* y = sg.mk_var(symbol("y")); + // A = y (char vs var) + ng.add_str_eq(a, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 2); + // both branches are progress + SASSERT(root->outgoing()[0]->is_progress()); + SASSERT(root->outgoing()[1]->is_progress()); +} + +// test apply_const_nielsen: x = B·y produces 2 children (x→ε, x→B·fresh) +static void test_const_nielsen_var_char() { + std::cout << "test_const_nielsen_var_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* b = sg.mk_char('B'); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* by = sg.mk_concat(b, y); + // x = B·y + ng.add_str_eq(x, by); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 2); + SASSERT(root->outgoing()[0]->is_progress()); + SASSERT(root->outgoing()[1]->is_progress()); +} + +// test const_nielsen solve: A·x = A·B → sat (x = B via det cancel then const_nielsen x→ε or x→B·fresh) +static void test_const_nielsen_solve_sat() { + std::cout << "test_const_nielsen_solve_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* ab = sg.mk_concat(a, b); + + ng.add_str_eq(ax, ab); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test const_nielsen solve: A·x = B·y → unsat (leading chars mismatch) +static void test_const_nielsen_solve_unsat() { + std::cout << "test_const_nielsen_solve_unsat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* by = sg.mk_concat(b, y); + + ng.add_str_eq(ax, by); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); +} + +// test const_nielsen priority: A·x = y·B → const_nielsen (2 children), not eq_split (3) +static void test_const_nielsen_priority_over_eq_split() { + std::cout << "test_const_nielsen_priority_over_eq_split\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* yb = sg.mk_concat(y, b); + + // A·x = y·B → lhs starts with char, rhs starts with var → const_nielsen + ng.add_str_eq(ax, yb); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // const_nielsen produces 2 children, not eq_split's 3 + SASSERT(root->outgoing().size() == 2); +} + +// test: both sides start with vars → eq_split (3 children), not const_nielsen +static void test_const_nielsen_not_applicable_both_vars() { + std::cout << "test_const_nielsen_not_applicable_both_vars\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* yb = sg.mk_concat(y, b); + + // x·A = y·B → both heads are vars → eq_split + ng.add_str_eq(xa, yb); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 3); +} + +// test const_nielsen solve: A·B·x = A·B·C → sat (x = C after two det cancels) +static void test_const_nielsen_multi_char_solve() { + std::cout << "test_const_nielsen_multi_char_solve\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* abx = sg.mk_concat(a, sg.mk_concat(b, x)); + euf::snode* abc = sg.mk_concat(a, sg.mk_concat(b, c)); + + ng.add_str_eq(abx, abc); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// ----------------------------------------------------------------------- +// Regex char split tests +// ----------------------------------------------------------------------- + +// test_regex_char_split_basic: x in to_re("AB") → creates children +static void test_regex_char_split_basic() { + std::cout << "test_regex_char_split_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + expr_ref to_re_ab(seq.re.mk_to_re(ab), m); + euf::snode* regex = sg.mk(to_re_ab); + + ng.add_str_mem(x, regex); + SASSERT(ng.root() != nullptr); + + auto sr = ng.root()->simplify_and_init(ng); + SASSERT(sr != seq::simplify_result::conflict); + + bool extended = ng.generate_extensions(ng.root(), 0); + SASSERT(extended); + // should have at least 2 children: x→'A'·z and x→ε + SASSERT(ng.root()->outgoing().size() >= 2); + ng.display(std::cout); +} + +// test_regex_char_split_solve_sat: x in to_re("A") → sat (x = "A") +static void test_regex_char_split_solve_sat() { + std::cout << "test_regex_char_split_solve_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + ng.add_str_mem(x, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test_regex_char_split_solve_multi_char: x in to_re("AB") → sat +static void test_regex_char_split_solve_multi_char() { + std::cout << "test_regex_char_split_solve_multi_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + expr_ref to_re_ab(seq.re.mk_to_re(ab), m); + euf::snode* regex = sg.mk(to_re_ab); + + ng.add_str_mem(x, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test_regex_char_split_union: x in re.union(to_re("A"), to_re("B")) → sat +static void test_regex_char_split_union() { + std::cout << "test_regex_char_split_union\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref to_re_b(seq.re.mk_to_re(unit_b), m); + expr_ref re_union(seq.re.mk_union(to_re_a, to_re_b), m); + euf::snode* regex = sg.mk(re_union); + + ng.add_str_mem(x, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test_regex_char_split_star_sat: x in re.star(to_re("A")) → sat (x = ε or x = "A"...) +static void test_regex_char_split_star_sat() { + std::cout << "test_regex_char_split_star_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref re_star(seq.re.mk_star(to_re_a), m); + euf::snode* regex = sg.mk(re_star); + + ng.add_str_mem(x, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test_regex_char_split_concat_str: x·y in to_re("AB") → sat +static void test_regex_char_split_concat_str() { + std::cout << "test_regex_char_split_concat_str\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* xy = sg.mk_concat(x, y); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + expr_ref to_re_ab(seq.re.mk_to_re(ab), m); + euf::snode* regex = sg.mk(to_re_ab); + + ng.add_str_mem(xy, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test_regex_char_split_with_eq: x = y, x in to_re("A") → sat +static void test_regex_char_split_with_eq() { + std::cout << "test_regex_char_split_with_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + ng.add_str_mem(x, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test_regex_char_split_ground_skip: ground string in regex handled by simplification +static void test_regex_char_split_ground_skip() { + std::cout << "test_regex_char_split_ground_skip\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + // "A" in to_re("A") → simplification consumes the char prefix via derivative + ng.add_str_mem(a, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// ----------------------------------------------------------------------- +// Variable Nielsen modifier tests +// ----------------------------------------------------------------------- + +// test var_nielsen basic: x = y (two distinct vars) → eq_split fires (priority 5 < 12) +// produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) +static void test_var_nielsen_basic() { + std::cout << "test_var_nielsen_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + // x = y → eq_split: x→ε, x→y·z_fresh, y→x·z_fresh + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 3); + SASSERT(root->outgoing()[0]->is_progress()); +} + +// test var_nielsen: same var x·A = x·B → det modifier (cancel), not var_nielsen +static void test_var_nielsen_same_var_det() { + std::cout << "test_var_nielsen_same_var_det\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xb = sg.mk_concat(x, b); + + // x·A = x·B → same-head cancel → A = B → clash → unsat + ng.add_str_eq(xa, xb); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); +} + +// test var_nielsen: char vs var → const_nielsen fires, not var_nielsen +static void test_var_nielsen_not_applicable_char() { + std::cout << "test_var_nielsen_not_applicable_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* y = sg.mk_var(symbol("y")); + + // A = y → const_nielsen (2 children), not var_nielsen (3) + ng.add_str_eq(a, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 2); +} + +// test var_nielsen solve: x·y = z·w is sat (all vars can be ε) +static void test_var_nielsen_solve_sat() { + std::cout << "test_var_nielsen_solve_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + euf::snode* w = sg.mk_var(symbol("w")); + euf::snode* xy = sg.mk_concat(x, y); + euf::snode* zw = sg.mk_concat(z, w); + + ng.add_str_eq(xy, zw); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test var_nielsen solve: x·A = y·B → unsat (trailing char mismatch) +static void test_var_nielsen_solve_unsat() { + std::cout << "test_var_nielsen_solve_unsat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* yb = sg.mk_concat(y, b); + + ng.add_str_eq(xa, yb); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); +} + +// test var_nielsen: x·y = y·x commutation is sat (x=y=ε via ε branches) +static void test_var_nielsen_commutation_sat() { + std::cout << "test_var_nielsen_commutation_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* xya = sg.mk_concat(x, sg.mk_concat(y, a)); + euf::snode* yxa = sg.mk_concat(y, sg.mk_concat(x, a)); + + ng.add_str_eq(xya, yxa); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test var_nielsen priority: var vs var → eq_split fires first (priority 5 < 12) +// eq_split produces 3 children: x→ε (progress), x→y·z (non-progress), y→x·z (non-progress) +static void test_var_nielsen_priority() { + std::cout << "test_var_nielsen_priority\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // eq_split produces 3 children + SASSERT(root->outgoing().size() == 3); + // first edge is progress (x→ε), others are non-progress + SASSERT(root->outgoing()[0]->is_progress()); +} + +// test generate_extensions: det modifier has priority over const_nielsen +// x·A = x·y → det cancel (1 child), not const_nielsen (2 children) +static void test_generate_extensions_det_priority() { + std::cout << "test_generate_extensions_det_priority\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xy = sg.mk_concat(x, y); + + // x·A = x·y → same-head x cancel → A = y (1 child via det) + ng.add_str_eq(xa, xy); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // det modifier produces 1 child (head cancel), not 2 (const) or 3 (var) + SASSERT(root->outgoing().size() == 1); + SASSERT(root->outgoing()[0]->is_progress()); +} + +// test generate_extensions: returns false when no modifier applies +// ground clash: A = B → simplify_and_init catches it, but if bypassed, +// no modifier can generate extensions from two chars +static void test_generate_extensions_no_applicable() { + std::cout << "test_generate_extensions_no_applicable\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // A = B → no variables involved → no modifier applies + ng.add_str_eq(a, b); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(!extended); + SASSERT(root->outgoing().empty()); +} + +// test generate_extensions: regex_char_split fires as last resort +// when there are only str_mem constraints, no str_eq +static void test_generate_extensions_regex_only() { + std::cout << "test_generate_extensions_regex_only\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + // Build regex to_re("A") + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* re_node = sg.mk(to_re_a); + + // x ∈ to_re("A") → only regex_char_split can fire (no str_eq) + ng.add_str_mem(x, re_node); + seq::nielsen_node* root = ng.root(); + + root->simplify_and_init(ng); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // at least 1 child (epsilon branch) + possibly char branches + SASSERT(root->outgoing().size() >= 1); +} + +// test generate_extensions: mixed constraints, det fires first +// x·A = x·B and y ∈ R → det cancel on eq first, regex untouched +static void test_generate_extensions_mixed_det_first() { + std::cout << "test_generate_extensions_mixed_det_first\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xb = sg.mk_concat(x, b); + + // Build a regex for the mem constraint + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* re_node = sg.mk(to_re_a); + + ng.add_str_eq(xa, xb); + ng.add_str_mem(y, re_node); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // det modifier (same-head x cancel) produces 1 child + SASSERT(root->outgoing().size() == 1); +} + +// ----------------------------------------------------------------------- +// solve() / search_dfs() tests +// ----------------------------------------------------------------------- + +// test solve on empty graph (no root) returns sat +static void test_solve_empty_graph() { + std::cout << "test_solve_empty_graph\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + SASSERT(!ng.root()); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test solve with trivially satisfied equality (x = x) +static void test_solve_trivially_satisfied() { + std::cout << "test_solve_trivially_satisfied\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + ng.add_str_eq(x, x); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test that node status flags are set correctly after unsat solve +static void test_solve_node_status_unsat() { + std::cout << "test_solve_node_status_unsat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + // A = B is an immediate conflict + ng.add_str_eq(a, b); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + // root should be marked as general conflict + seq::nielsen_node* root = ng.root(); + SASSERT(root->is_general_conflict()); + SASSERT(root->is_currently_conflict()); +} + +// test that collect_conflict_deps returns deps after unsat +static void test_solve_conflict_deps() { + std::cout << "test_solve_conflict_deps\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + // Add two constraints: A = B (unsat) and a dummy x = x + ng.add_str_eq(a, b); + euf::snode* x = sg.mk_var(symbol("x")); + ng.add_str_eq(x, x); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + seq::dep_tracker deps; + ng.collect_conflict_deps(deps); + // deps should be non-empty since there's a conflict + SASSERT(!deps.empty()); +} + +// test dep_tracker::get_set_bits +static void test_dep_tracker_get_set_bits() { + std::cout << "test_dep_tracker_get_set_bits\n"; + + // empty tracker has no bits + seq::dep_tracker d0; + unsigned_vector bits0; + d0.get_set_bits(bits0); + SASSERT(bits0.empty()); + + // single bit set at position 5 + seq::dep_tracker d1(16, 5); + unsigned_vector bits1; + d1.get_set_bits(bits1); + SASSERT(bits1.size() == 1); + SASSERT(bits1[0] == 5); + + // two bits merged + seq::dep_tracker d2(16, 3); + d2.merge(seq::dep_tracker(16, 11)); + unsigned_vector bits2; + d2.get_set_bits(bits2); + SASSERT(bits2.size() == 2); + SASSERT(bits2[0] == 3); + SASSERT(bits2[1] == 11); + + // test across word boundary (bit 31 and 32) + seq::dep_tracker d3(64, 31); + d3.merge(seq::dep_tracker(64, 32)); + unsigned_vector bits3; + d3.get_set_bits(bits3); + SASSERT(bits3.size() == 2); + SASSERT(bits3[0] == 31); + SASSERT(bits3[1] == 32); +} + +// test explain_conflict returns correct constraint indices +static void test_explain_conflict_single_eq() { + std::cout << "test_explain_conflict_single_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + // eq[0]: A = B (conflict) + ng.add_str_eq(a, b); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + unsigned_vector eq_idx, mem_idx; + ng.explain_conflict(eq_idx, mem_idx); + // conflict should reference eq index 0 + SASSERT(eq_idx.size() == 1); + SASSERT(eq_idx[0] == 0); + SASSERT(mem_idx.empty()); +} + +// test explain_conflict with multiple eqs, only conflict-relevant one reported +static void test_explain_conflict_multi_eq() { + std::cout << "test_explain_conflict_multi_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); + + // eq[0]: x = x (trivially sat) + ng.add_str_eq(x, x); + // eq[1]: A = B (conflict) + ng.add_str_eq(a, b); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + unsigned_vector eq_idx, mem_idx; + ng.explain_conflict(eq_idx, mem_idx); + // at least eq[1] (A=B) must appear; eq[0] (x=x) is trivially removed + bool has_conflict_eq = false; + for (unsigned i : eq_idx) + if (i == 1) has_conflict_eq = true; + SASSERT(has_conflict_eq); + SASSERT(mem_idx.empty()); +} + +// test that is_extended is set after solve generates extensions +static void test_solve_node_extended_flag() { + std::cout << "test_solve_node_extended_flag\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* xy = sg.mk_concat(x, y); + euf::snode* z = sg.mk_var(symbol("z")); + euf::snode* w = sg.mk_var(symbol("w")); + euf::snode* zw = sg.mk_concat(z, w); + // x·y = z·w — requires extension generation + ng.add_str_eq(xy, zw); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); + + // root should be marked as extended + seq::nielsen_node* root = ng.root(); + SASSERT(root->is_extended()); +} + +// test solve with mixed eq + mem constraints: x·A = y·A and y ∈ re("A") +static void test_solve_mixed_eq_mem_sat() { + std::cout << "test_solve_mixed_eq_mem_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* ya = sg.mk_concat(y, a); + + // x·A = y·A (satisfiable: x=y=ε, or x=y=anything) + ng.add_str_eq(xa, ya); + + // y ∈ to_re("A") (y must be "A") + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* re_a = sg.mk(to_re_a); + ng.add_str_mem(y, re_a); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// test solve with children_failed reason propagation: x·A = x·B unsat +static void test_solve_children_failed_reason() { + std::cout << "test_solve_children_failed_reason\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* yb = sg.mk_concat(y, b); + + // x·A = y·B is unsat (last char mismatch propagates up) + ng.add_str_eq(xa, yb); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); +} + +// test that eval_idx is set during solve +static void test_solve_eval_idx_tracking() { + std::cout << "test_solve_eval_idx_tracking\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + // x = A·x would be infinite without depth bound, but + // x = A is simple and satisfiable + ng.add_str_eq(x, a); + + unsigned run_before = ng.run_idx(); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); + + // run_idx should have been incremented + SASSERT(ng.run_idx() > run_before); + + // root's eval_idx should match current run_idx + seq::nielsen_node* root = ng.root(); + SASSERT(root->eval_idx() == ng.run_idx()); +} + +// test is_subsumed_by: node with superset of constraints is subsumed +static void test_is_subsumed_by_basic() { + std::cout << "test_is_subsumed_by_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + + seq::dep_tracker dep(4, 0); + + // node1 has {x=y, x=z} + seq::nielsen_node* n1 = ng.mk_node(); + n1->add_str_eq(seq::str_eq(x, y, dep)); + n1->add_str_eq(seq::str_eq(x, z, dep)); + + // node2 has {x=y} (subset of node1) + seq::nielsen_node* n2 = ng.mk_node(); + n2->add_str_eq(seq::str_eq(x, y, dep)); + + // n1 is subsumed by n2 (n2's constraints are a subset of n1's) + SASSERT(n1->is_subsumed_by(*n2)); + // n2 is NOT subsumed by n1 (n1 has x=z which n2 doesn't) + SASSERT(!n2->is_subsumed_by(*n1)); +} + +// test is_subsumed_by: trivial equations are skipped +static void test_is_subsumed_by_trivial_skip() { + std::cout << "test_is_subsumed_by_trivial_skip\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* e = sg.mk_empty(); + + seq::dep_tracker dep(4, 0); + + // node1 has {x=y} + seq::nielsen_node* n1 = ng.mk_node(); + n1->add_str_eq(seq::str_eq(x, y, dep)); + + // node2 has {x=y, ε=ε} (extra trivial eq) + seq::nielsen_node* n2 = ng.mk_node(); + n2->add_str_eq(seq::str_eq(x, y, dep)); + n2->add_str_eq(seq::str_eq(e, e, dep)); + + // n1 is subsumed by n2 (n2's non-trivial constraints ⊆ n1's) + SASSERT(n1->is_subsumed_by(*n2)); + // n2 is also subsumed by n1 (n1's constraints ⊆ n2's) + SASSERT(n2->is_subsumed_by(*n1)); +} + +// test is_subsumed_by: non-matching constraints +static void test_is_subsumed_by_no_match() { + std::cout << "test_is_subsumed_by_no_match\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + + seq::dep_tracker dep(4, 0); + + // node1 has {x=y} + seq::nielsen_node* n1 = ng.mk_node(); + n1->add_str_eq(seq::str_eq(x, y, dep)); + + // node2 has {x=z} (different constraint) + seq::nielsen_node* n2 = ng.mk_node(); + n2->add_str_eq(seq::str_eq(x, z, dep)); + + SASSERT(!n1->is_subsumed_by(*n2)); + SASSERT(!n2->is_subsumed_by(*n1)); +} + +// test is_subsumed_by: identical constraint sets +static void test_is_subsumed_by_exact_match() { + std::cout << "test_is_subsumed_by_exact_match\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + seq::dep_tracker dep(4, 0); + + seq::nielsen_node* n1 = ng.mk_node(); + n1->add_str_eq(seq::str_eq(x, y, dep)); + + seq::nielsen_node* n2 = ng.mk_node(); + n2->add_str_eq(seq::str_eq(x, y, dep)); + + // identical sets: mutual subsumption + SASSERT(n1->is_subsumed_by(*n2)); + SASSERT(n2->is_subsumed_by(*n1)); +} + +// test is_subsumed_by with str_mem constraints +static void test_is_subsumed_by_str_mem() { + std::cout << "test_is_subsumed_by_str_mem\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* e = sg.mk_empty(); + + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re_all); + + seq::dep_tracker dep(4, 0); + + // node1 has {x ∈ re_all, x=x_eq} + seq::nielsen_node* n1 = ng.mk_node(); + n1->add_str_mem(seq::str_mem(x, regex, e, 0, dep)); + euf::snode* y = sg.mk_var(symbol("y")); + n1->add_str_eq(seq::str_eq(x, y, dep)); + + // node2 has {x ∈ re_all} only (subset) + seq::nielsen_node* n2 = ng.mk_node(); + n2->add_str_mem(seq::str_mem(x, regex, e, 0, dep)); + + SASSERT(n1->is_subsumed_by(*n2)); + SASSERT(!n2->is_subsumed_by(*n1)); +} + +// test is_subsumed_by: empty node subsumes everything +static void test_is_subsumed_by_empty() { + std::cout << "test_is_subsumed_by_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + seq::dep_tracker dep(4, 0); + + // node1 has {x=y} + seq::nielsen_node* n1 = ng.mk_node(); + n1->add_str_eq(seq::str_eq(x, y, dep)); + + // node2 is empty + seq::nielsen_node* n2 = ng.mk_node(); + + // n1 is subsumed by empty n2 (empty set is subset of everything) + SASSERT(n1->is_subsumed_by(*n2)); + // n2 is NOT subsumed by n1 (n1 has x=y which n2 doesn't) + SASSERT(!n2->is_subsumed_by(*n1)); +} + +// ----------------------------------------------------------------------- +// Direct simplify_and_init tests +// ----------------------------------------------------------------------- + +// test simplify_and_init: prefix cancellation of matching chars +static void test_simplify_prefix_cancel() { + std::cout << "test_simplify_prefix_cancel\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + // A·B·x = A·B·y → prefix cancel A,B → x = y (proceed) + euf::snode* abx = sg.mk_concat(a, sg.mk_concat(b, x)); + euf::snode* aby = sg.mk_concat(a, sg.mk_concat(b, y)); + + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(abx, aby, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::proceed); + SASSERT(node->str_eqs().size() == 1); + // after prefix cancel: remaining eq has variable-only sides + SASSERT(node->str_eqs()[0].m_lhs->is_var()); + SASSERT(node->str_eqs()[0].m_rhs->is_var()); +} + +// test simplify_and_init: symbol clash at first position +static void test_simplify_symbol_clash() { + std::cout << "test_simplify_symbol_clash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + // A·x = B·y → symbol clash on first char + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* by = sg.mk_concat(b, y); + + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(ax, by, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::conflict); + SASSERT(node->is_general_conflict()); + SASSERT(node->reason() == seq::backtrack_reason::symbol_clash); +} + +// test simplify_and_init: empty propagation forces variables to epsilon +static void test_simplify_empty_propagation() { + std::cout << "test_simplify_empty_propagation\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* e = sg.mk_empty(); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* xy = sg.mk_concat(x, y); + + // ε = x·y → forces x=ε, y=ε → all trivial → satisfied + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(e, xy, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::satisfied); +} + +// test simplify_and_init: empty vs concrete char → conflict +static void test_simplify_empty_vs_char() { + std::cout << "test_simplify_empty_vs_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* e = sg.mk_empty(); + euf::snode* a = sg.mk_char('A'); + + // ε = A → rhs has non-variable token → conflict + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(e, a, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::conflict); + SASSERT(node->reason() == seq::backtrack_reason::symbol_clash); +} + +// test simplify_and_init: multi-pass (prefix cancel A, then B≠C clash) +static void test_simplify_multi_pass_clash() { + std::cout << "test_simplify_multi_pass_clash\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + + // A·B = A·C → cancel A → B vs C → clash + euf::snode* ab = sg.mk_concat(a, b); + euf::snode* ac = sg.mk_concat(a, c); + + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(ab, ac, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::conflict); + SASSERT(node->reason() == seq::backtrack_reason::symbol_clash); +} + +// test simplify_and_init: trivial eq removed, non-trivial kept +static void test_simplify_trivial_removal() { + std::cout << "test_simplify_trivial_removal\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* e = sg.mk_empty(); + + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(e, e, dep)); // trivial + node->add_str_eq(seq::str_eq(x, y, dep)); // non-trivial + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::proceed); + SASSERT(node->str_eqs().size() == 1); +} + +// test simplify_and_init: all trivial eqs → satisfied +static void test_simplify_all_trivial_satisfied() { + std::cout << "test_simplify_all_trivial_satisfied\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* e = sg.mk_empty(); + + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_eq(seq::str_eq(x, x, dep)); // trivial: same pointer + node->add_str_eq(seq::str_eq(e, e, dep)); // trivial: both empty + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::satisfied); +} + +// test simplify_and_init: ε ∈ non-nullable regex → conflict +static void test_simplify_regex_infeasible() { + std::cout << "test_simplify_regex_infeasible\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + euf::snode* e = sg.mk_empty(); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + // ε ∈ to_re("A") → non-nullable → conflict + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_mem(seq::str_mem(e, regex, e, 0, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::conflict); + SASSERT(node->reason() == seq::backtrack_reason::regex); +} + +// test simplify_and_init: ε ∈ nullable regex → satisfied, mem removed +static void test_simplify_nullable_removal() { + std::cout << "test_simplify_nullable_removal\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + euf::snode* e = sg.mk_empty(); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref re_star(seq.re.mk_star(to_re_a), m); + euf::snode* regex = sg.mk(re_star); + + // ε ∈ star(to_re("A")) → nullable → satisfied, mem removed + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_mem(seq::str_mem(e, regex, e, 0, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::satisfied); + SASSERT(node->str_mems().empty()); +} + +// test simplify_and_init: Brzozowski derivative consumes ground char +static void test_simplify_brzozowski_sat() { + std::cout << "test_simplify_brzozowski_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* e = sg.mk_empty(); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + // "A" ∈ to_re("A") → derivative consumes 'A' → ε ∈ ε-regex → satisfied + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(4, 0); + node->add_str_mem(seq::str_mem(a, regex, e, 0, dep)); + + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::satisfied); +} + +// test simplify_and_init: multiple eqs with mixed status +static void test_simplify_multiple_eqs() { + std::cout << "test_simplify_multiple_eqs\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + euf::snode* e = sg.mk_empty(); + + seq::nielsen_node* node = ng.mk_node(); + seq::dep_tracker dep(8, 0); + + // eq1: ε = ε (trivial → removed) + node->add_str_eq(seq::str_eq(e, e, dep)); + // eq2: A·x = A·y (prefix cancel → x = y) + euf::snode* ax = sg.mk_concat(a, x); + euf::snode* ay = sg.mk_concat(a, y); + node->add_str_eq(seq::str_eq(ax, ay, dep)); + // eq3: x = z (non-trivial, kept) + node->add_str_eq(seq::str_eq(x, z, dep)); + + SASSERT(node->str_eqs().size() == 3); + auto sr = node->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::proceed); + // eq1 removed, eq2 simplified to x=y, eq3 kept → 2 eqs remain + SASSERT(node->str_eqs().size() == 2); +} + +// ----------------------------------------------------------------------- +// Modifier child state verification tests +// ----------------------------------------------------------------------- + +// test det cancel: verify child has the tail equality +static void test_det_cancel_child_eq() { + std::cout << "test_det_cancel_child_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xb = sg.mk_concat(x, b); + + // x·A = x·B → det same-head cancel → child has A = B + ng.add_str_eq(xa, xb); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 1); + + seq::nielsen_node* child = root->outgoing()[0]->tgt(); + SASSERT(child->str_eqs().size() == 1); + auto const& ceq = child->str_eqs()[0]; + // child's eq should have the two single chars (sorted) + SASSERT((ceq.m_lhs == a && ceq.m_rhs == b) || (ceq.m_lhs == b && ceq.m_rhs == a)); +} + +// test const_nielsen: verify children's substitutions target the variable +static void test_const_nielsen_child_substitutions() { + std::cout << "test_const_nielsen_child_substitutions\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* y = sg.mk_var(symbol("y")); + + // A = y → const_nielsen → 2 children, both substitute y + ng.add_str_eq(a, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 2); + + // both edges substitute y + for (unsigned i = 0; i < 2; ++i) { + SASSERT(root->outgoing()[i]->subst().size() == 1); + SASSERT(root->outgoing()[i]->subst()[0].m_var == y); + } + + // edge 0: y → ε (eliminating, replacement is empty) + SASSERT(root->outgoing()[0]->subst()[0].m_replacement->is_empty()); + // edge 1: y → A·fresh (replacement is non-empty) + SASSERT(!root->outgoing()[1]->subst()[0].m_replacement->is_empty()); +} + +// test var_nielsen: verify substitution structure of 3 children +static void test_var_nielsen_substitution_types() { + std::cout << "test_var_nielsen_substitution_types\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + // x = y → var_nielsen: 3 children + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 3); + + // edge 0: elimination to ε + SASSERT(root->outgoing()[0]->subst().size() == 1); + SASSERT(root->outgoing()[0]->subst()[0].m_replacement->is_empty()); + SASSERT(root->outgoing()[0]->subst()[0].is_eliminating()); + + // edges 1,2: replacements are non-empty (concat with fresh var) + SASSERT(root->outgoing()[1]->subst().size() == 1); + SASSERT(!root->outgoing()[1]->subst()[0].m_replacement->is_empty()); + + SASSERT(root->outgoing()[2]->subst().size() == 1); + SASSERT(!root->outgoing()[2]->subst()[0].m_replacement->is_empty()); + + // edges 1 and 2 target different variables + SASSERT(root->outgoing()[1]->subst()[0].m_var != + root->outgoing()[2]->subst()[0].m_var); +} + +// ----------------------------------------------------------------------- +// Explain conflict with mem constraints +// ----------------------------------------------------------------------- + +// test explain_conflict: mem-only conflict reports mem index +static void test_explain_conflict_mem_only() { + std::cout << "test_explain_conflict_mem_only\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + euf::snode* e = sg.mk_empty(); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + // ε ∈ to_re("A") → conflict (non-nullable) + ng.add_str_mem(e, regex); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + unsigned_vector eq_idx, mem_idx; + ng.explain_conflict(eq_idx, mem_idx); + // only mem constraint involved, no eqs + SASSERT(eq_idx.empty()); + SASSERT(mem_idx.size() == 1); + SASSERT(mem_idx[0] == 0); +} + +// test explain_conflict: mixed eq + mem conflict +static void test_explain_conflict_mixed_eq_mem() { + std::cout << "test_explain_conflict_mixed_eq_mem\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* e = sg.mk_empty(); + + // eq[0]: A = B (conflict) + ng.add_str_eq(a, b); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + // mem[0]: ε ∈ to_re("A") + ng.add_str_mem(e, regex); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + unsigned_vector eq_idx, mem_idx; + ng.explain_conflict(eq_idx, mem_idx); + // eq[0] should be reported (it's the direct conflict) + bool has_eq0 = false; + for (unsigned i : eq_idx) if (i == 0) has_eq0 = true; + SASSERT(has_eq0); + // mem[0] is also reported (over-approximation from collect_conflict_deps) + bool has_mem0 = false; + for (unsigned i : mem_idx) if (i == 0) has_mem0 = true; + SASSERT(has_mem0); +} + +// test subsumption pruning during solve: a node whose constraint set +// is a superset of a known-unsat node is pruned +static void test_subsumption_pruning_unsat() { + std::cout << "test_subsumption_pruning_unsat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // A = B is an immediate conflict (symbol clash). + // Any branch that inherits this equation should be pruned. + ng.add_str_eq(a, b); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + // root should have conflict set + SASSERT(ng.root()->is_general_conflict()); +} + +// test that subsumption sets backtrack_reason::subsumption +static void test_subsumption_reason_set() { + std::cout << "test_subsumption_reason_set\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // x·A = y·B: after Nielsen splitting, children will have A=B + // which is unsat. The subsumption pruning may fire on sibling + // branches that inherit the same conflict. + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* yb = sg.mk_concat(y, b); + ng.add_str_eq(xa, yb); + + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::unsat); + + // check that at least one node has subsumption reason + bool found_subsumption = false; + for (seq::nielsen_node* nd : ng.nodes()) { + if (nd->reason() == seq::backtrack_reason::subsumption) { + found_subsumption = true; + SASSERT(nd->is_general_conflict()); + break; + } + } + // subsumption may or may not fire depending on search order; + // the important thing is the solve result is correct. + // If it does fire, the reason must be subsumption. + (void)found_subsumption; +} + +// test generate_length_constraints: basic equation x . y = A . B +static void test_length_constraints_basic() { + std::cout << "test_length_constraints_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + arith_util arith(m); + seq_util seq(m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // equation: x . y = A . B + euf::snode* lhs = sg.mk_concat(x, y); + euf::snode* rhs = sg.mk_concat(a, b); + + seq::nielsen_graph ng(sg); + ng.add_str_eq(lhs, rhs); + + vector constraints; + ng.generate_length_constraints(constraints); + + // expect at least 1 length equality + 2 non-negativity constraints (for x and y) + SASSERT(constraints.size() >= 3); + + // first constraint should be the length equality + SASSERT(constraints[0].m_expr != nullptr); + SASSERT(m.is_eq(constraints[0].m_expr)); + SASSERT(constraints[0].m_kind == seq::length_kind::eq); + + // remaining constraints should be non-negativity + for (unsigned i = 1; i < constraints.size(); ++i) { + SASSERT(constraints[i].m_kind == seq::length_kind::nonneg); + } + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; +} + +// test generate_length_constraints: trivial equation is skipped +static void test_length_constraints_trivial_skip() { + std::cout << "test_length_constraints_trivial_skip\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + + // trivial equation: x = x (same snode) + seq::nielsen_graph ng(sg); + ng.add_str_eq(x, x); + + vector constraints; + ng.generate_length_constraints(constraints); + + // trivial equation should be skipped, no constraints + SASSERT(constraints.empty()); + std::cout << " trivial equation correctly skipped\n"; +} + +// test generate_length_constraints: empty graph produces no constraints +static void test_length_constraints_empty() { + std::cout << "test_length_constraints_empty\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + vector constraints; + ng.generate_length_constraints(constraints); + + SASSERT(constraints.empty()); + std::cout << " empty graph: no constraints\n"; +} + +// test generate_length_constraints: concatenation chain x.y.z = A.B.C +static void test_length_constraints_concat_chain() { + std::cout << "test_length_constraints_concat_chain\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + arith_util arith(m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* c = sg.mk_char('C'); + + // equation: x . y . z = A . B . C + euf::snode* lhs = sg.mk_concat(sg.mk_concat(x, y), z); + euf::snode* rhs = sg.mk_concat(sg.mk_concat(a, b), c); + + seq::nielsen_graph ng(sg); + ng.add_str_eq(lhs, rhs); + + vector constraints; + ng.generate_length_constraints(constraints); + + // 1 length equality + 3 variable non-negativity constraints + SASSERT(constraints.size() == 4); + SASSERT(m.is_eq(constraints[0].m_expr)); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; +} + +// test generate_length_constraints: multiple equations +static void test_length_constraints_multi_eq() { + std::cout << "test_length_constraints_multi_eq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + seq::nielsen_graph ng(sg); + ng.add_str_eq(x, a); // x = A + ng.add_str_eq(y, b); // y = B + + vector constraints; + ng.generate_length_constraints(constraints); + + // 2 equalities + 2 non-negativity (x and y each appear once) + SASSERT(constraints.size() == 4); + SASSERT(m.is_eq(constraints[0].m_expr)); + SASSERT(m.is_eq(constraints[2].m_expr)); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; +} + +// test generate_length_constraints: shared variable only gets one non-negativity +static void test_length_constraints_shared_var() { + std::cout << "test_length_constraints_shared_var\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // equation: x . A = A . x (x appears on both sides) + euf::snode* lhs = sg.mk_concat(x, a); + euf::snode* rhs = sg.mk_concat(a, x); + + seq::nielsen_graph ng(sg); + ng.add_str_eq(lhs, rhs); + + vector constraints; + ng.generate_length_constraints(constraints); + + // 1 length equality + 1 non-negativity for x (deduped) + SASSERT(constraints.size() == 2); + + std::cout << " generated " << constraints.size() << " constraints (x deduped)\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; +} + +// test generate_length_constraints: dependency tracking +static void test_length_constraints_deps() { + std::cout << "test_length_constraints_deps\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + + seq::nielsen_graph ng(sg); + ng.add_str_eq(x, a); // eq index 0 + + vector constraints; + ng.generate_length_constraints(constraints); + + // all constraints should have dependency on eq 0 + for (auto const& c : constraints) { + SASSERT(!c.m_dep.empty()); + unsigned_vector bits; + c.m_dep.get_set_bits(bits); + SASSERT(bits.size() == 1); + SASSERT(bits[0] == 0); + } + + std::cout << " dependency tracking correct\n"; +} + +// test generate_length_constraints: empty sides produce 0 +static void test_length_constraints_empty_side() { + std::cout << "test_length_constraints_empty_side\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* e = sg.mk_empty(); + + // x = ε + seq::nielsen_graph ng(sg); + ng.add_str_eq(x, e); + + vector constraints; + ng.generate_length_constraints(constraints); + + // 1 equality (len(x) = 0) + 1 non-negativity (len(x) >= 0) + SASSERT(constraints.size() == 2); + SASSERT(m.is_eq(constraints[0].m_expr)); + SASSERT(constraints[0].m_kind == seq::length_kind::eq); + SASSERT(constraints[1].m_kind == seq::length_kind::nonneg); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; +} + +// test length_kind tagging: equalities get kind::eq, non-neg get kind::nonneg, +// Parikh bounds get kind::bound +static void test_length_kind_tagging() { + std::cout << "test_length_kind_tagging\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + // equation: x = a (one eq + one nonneg) + seq::nielsen_graph ng(sg); + ng.add_str_eq(x, a); + + // membership: y in to_re("AB") (bounds + nonneg) + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + expr_ref to_re_ab(seq.re.mk_to_re(ab), m); + euf::snode* regex = sg.mk(to_re_ab); + ng.add_str_mem(y, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + unsigned num_eq = 0, num_nonneg = 0, num_bound = 0; + for (auto const& c : constraints) { + switch (c.m_kind) { + case seq::length_kind::eq: ++num_eq; break; + case seq::length_kind::nonneg: ++num_nonneg; break; + case seq::length_kind::bound: ++num_bound; break; + } + } + + std::cout << " eq=" << num_eq << " nonneg=" << num_nonneg << " bound=" << num_bound << "\n"; + // at least 1 equality (from str_eq) + SASSERT(num_eq >= 1); + // at least 1 non-negativity (for variable x or y) + SASSERT(num_nonneg >= 1); + // at least 1 bound (from Parikh for to_re("AB")) + SASSERT(num_bound >= 1); + + // verify equalities have kind eq + for (auto const& c : constraints) { + if (m.is_eq(c.m_expr)) + SASSERT(c.m_kind == seq::length_kind::eq); + } + + // verify num_input_eqs/mems accessors + SASSERT(ng.num_input_eqs() == 1); + SASSERT(ng.num_input_mems() == 1); + + std::cout << " length kind tagging correct\n"; +} + +// ----------------------------------------------------------------------- +// Tests for new modifiers (Task #55) +// ----------------------------------------------------------------------- + +// test_power_epsilon_no_power: no power tokens → modifier returns false +static void test_power_epsilon_no_power() { + std::cout << "test_power_epsilon_no_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + + // x = A: no power tokens, power_epsilon should not fire + ng.add_str_eq(x, a); + seq::nielsen_node* root = ng.root(); + + // det fires (x = single char → const_nielsen fires eventually), + // but power_epsilon (priority 2) should not fire; det (priority 1) fires. + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // det catches x = A first (single token eq, but actually ConstNielsen fires): + // x is var, A is char → ConstNielsen: 2 children (x→ε, x→A) + SASSERT(root->outgoing().size() == 2); +} + +// test_num_cmp_no_power: no same-base power pair → modifier returns false +static void test_num_cmp_no_power() { + std::cout << "test_num_cmp_no_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + // x = y: no power tokens, num_cmp should not fire + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // eq_split fires (priority 5): 3 children + SASSERT(root->outgoing().size() == 3); +} + +// test_star_intr_no_backedge: no backedge → modifier returns false +static void test_star_intr_no_backedge() { + std::cout << "test_star_intr_no_backedge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + // x ∈ to_re("A"): no backedge, star_intr should not fire + ng.add_str_mem(x, regex); + + seq::nielsen_node* root = ng.root(); + SASSERT(root->backedge() == nullptr); + + auto sr = root->simplify_and_init(ng); + SASSERT(sr != seq::simplify_result::conflict); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // regex_char_split fires (priority 9): at least 2 children (x→A·z, x→ε) + SASSERT(root->outgoing().size() >= 2); +} + +// test_star_intr_with_backedge: backedge set → star_intr fires +static void test_star_intr_with_backedge() { + std::cout << "test_star_intr_with_backedge\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref re_star(seq.re.mk_star(to_re_a), m); + euf::snode* regex = sg.mk(re_star); + + // x ∈ star(to_re("A")): set backedge to simulate cycle detection + ng.add_str_mem(x, regex); + + seq::nielsen_node* root = ng.root(); + root->set_backedge(root); // simulate backedge + + auto sr = root->simplify_and_init(ng); + // star(to_re("A")) is nullable, so empty string satisfies it + // simplify may remove the membership or proceed + if (sr == seq::simplify_result::satisfied) { + std::cout << " simplified to satisfied (nullable regex)\n"; + return; // OK, the regex is nullable so it was removed + } + + bool extended = ng.generate_extensions(root, 0); + if (extended) { + // star_intr should have generated at least 1 child + SASSERT(root->outgoing().size() >= 1); + std::cout << " star_intr generated " << root->outgoing().size() << " children\n"; + } +} + +// test_gpower_intr_repeated_chars: x = AAB → GPowerIntr fires (2+ repeated 'A') +static void test_gpower_intr_repeated_chars() { + std::cout << "test_gpower_intr_repeated_chars\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a1 = sg.mk_char('A'); + euf::snode* a2 = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* aab = sg.mk_concat(a1, sg.mk_concat(a2, b)); + + // x = AAB → single var vs ground with 2 repeated 'A' at front + ng.add_str_eq(x, aab); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // gpower_intr should fire (priority 7), producing 1 child: x = fresh_power · fresh_suffix + SASSERT(root->outgoing().size() == 1); + std::cout << " gpower_intr generated " << root->outgoing().size() << " children\n"; +} + +// test_gpower_intr_no_repeat: x = AB → no repeated pattern → GPowerIntr doesn't fire +static void test_gpower_intr_no_repeat() { + std::cout << "test_gpower_intr_no_repeat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* ab = sg.mk_concat(a, b); + + // x = AB → only 1 repeated 'A', needs >= 2 + ng.add_str_eq(x, ab); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // gpower_intr should NOT fire (< 2 repeats) + // const_nielsen (priority 8) fires for var vs char: 2 children + SASSERT(root->outgoing().size() == 2); +} + +// test_regex_var_split_basic: x ∈ re → uses minterms for splitting +static void test_regex_var_split_basic() { + std::cout << "test_regex_var_split_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + + // Build a regex: re.union(to_re("A"), to_re("B")) + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref to_re_b(seq.re.mk_to_re(unit_b), m); + expr_ref re_union(seq.re.mk_union(to_re_a, to_re_b), m); + euf::snode* regex = sg.mk(re_union); + + ng.add_str_mem(x, regex); + seq::nielsen_node* root = ng.root(); + + auto sr = root->simplify_and_init(ng); + SASSERT(sr != seq::simplify_result::conflict); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // Should produce children via regex_char_split or regex_var_split + SASSERT(root->outgoing().size() >= 2); + std::cout << " regex split generated " << root->outgoing().size() << " children\n"; +} + +// test_power_split_no_power: no power tokens → modifier returns false +static void test_power_split_no_power() { + std::cout << "test_power_split_no_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + euf::snode* xa = sg.mk_concat(x, a); + + // x·A = y: no power tokens, power_split should not fire + ng.add_str_eq(xa, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // eq_split or const_nielsen fires + SASSERT(root->outgoing().size() >= 2); +} + +// test_var_num_unwinding_no_power: no power tokens → modifier returns false +static void test_var_num_unwinding_no_power() { + std::cout << "test_var_num_unwinding_no_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + // x = y: no power tokens, var_num_unwinding should not fire + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + // eq_split fires: 3 children + SASSERT(root->outgoing().size() == 3); +} + +// test_const_num_unwinding_no_power: no power vs const → modifier returns false +static void test_const_num_unwinding_no_power() { + std::cout << "test_const_num_unwinding_no_power\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + + // A = B: no power tokens, clash during simplification + ng.add_str_eq(a, b); + seq::nielsen_node* root = ng.root(); + + // Should detect clash during simplify + auto sr = root->simplify_and_init(ng); + SASSERT(sr == seq::simplify_result::conflict); +} + +// test_priority_chain_order: verify the full priority chain +// Det fires first, then appropriate modifiers in order +static void test_priority_chain_order() { + std::cout << "test_priority_chain_order\n"; + ast_manager m; + reg_decl_plugins(m); + + // Case 1: same-head cancel → Det (priority 1) fires + { + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('A'); + euf::snode* b = sg.mk_char('B'); + euf::snode* xa = sg.mk_concat(x, a); + euf::snode* xb = sg.mk_concat(x, b); + + ng.add_str_eq(xa, xb); + seq::nielsen_node* root = ng.root(); + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 1); // Det: single child (cancel) + } + + // Case 2: both vars different → EqSplit (priority 5) fires + { + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(x, y); + seq::nielsen_node* root = ng.root(); + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 3); // EqSplit: 3 children + } + + // Case 3: char vs var → ConstNielsen (priority 8) fires + { + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::nielsen_graph ng(sg); + + euf::snode* a = sg.mk_char('A'); + euf::snode* y = sg.mk_var(symbol("y")); + + ng.add_str_eq(a, y); + seq::nielsen_node* root = ng.root(); + bool extended = ng.generate_extensions(root, 0); + SASSERT(extended); + SASSERT(root->outgoing().size() == 2); // ConstNielsen: 2 children + } +} + +// test_gpower_intr_solve_sat: x = AAA → sat (x = "AAA") +static void test_gpower_intr_solve_sat() { + std::cout << "test_gpower_intr_solve_sat\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + seq::nielsen_graph ng(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a1 = sg.mk_char('A'); + euf::snode* a2 = sg.mk_char('A'); + euf::snode* a3 = sg.mk_char('A'); + euf::snode* aaa = sg.mk_concat(a1, sg.mk_concat(a2, a3)); + + ng.add_str_eq(x, aaa); + auto result = ng.solve(); + SASSERT(result == seq::nielsen_graph::search_result::sat); +} + +// ----------------------------------------------------------------------- +// Parikh interval reasoning tests (Task #34) +// ----------------------------------------------------------------------- + +// test: x in to_re("AB") generates len(x) >= 2 and len(x) <= 2 +static void test_parikh_exact_length() { + std::cout << "test_parikh_exact_length\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + expr_ref to_re_ab(seq.re.mk_to_re(ab), m); + euf::snode* regex = sg.mk(to_re_ab); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + // to_re("AB") has min_length=2 and max_length=2 + // expect: len(x) >= 2, len(x) <= 2, and len(x) >= 0 + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + SASSERT(constraints.size() >= 2); + + // verify we have both a >= and a <= constraint with correct kinds + bool has_lower = false, has_upper = false; + for (auto const& c : constraints) { + if (arith.is_le(c.m_expr) || arith.is_ge(c.m_expr)) { + has_lower = has_lower || arith.is_ge(c.m_expr); + has_upper = has_upper || arith.is_le(c.m_expr); + // Parikh bounds should have kind::bound + if (!m.is_eq(c.m_expr)) + SASSERT(c.m_kind == seq::length_kind::bound || c.m_kind == seq::length_kind::nonneg); + } + } + SASSERT(has_lower); + SASSERT(has_upper); +} + +// test: x in (re.star (re.to_re "A")) generates no upper bound +static void test_parikh_star_unbounded() { + std::cout << "test_parikh_star_unbounded\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref re_star(seq.re.mk_star(to_re_a), m); + euf::snode* regex = sg.mk(re_star); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + // star has min_length=0, max_length=UINT_MAX + // no lower bound > 0, no upper bound, just len(x) >= 0 + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // should not have a <= constraint (unbounded) + bool has_upper = false; + for (auto const& c : constraints) { + if (arith.is_le(c.m_expr)) + has_upper = true; + } + SASSERT(!has_upper); +} + +// test: x in (re.union (re.to_re "AB") (re.to_re "CDE")) → len(x) >= 2, len(x) <= 3 +static void test_parikh_union_interval() { + std::cout << "test_parikh_union_interval\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + // "AB" + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + expr_ref to_re_ab(seq.re.mk_to_re(ab), m); + + // "CDE" + expr_ref ch_c(seq.str.mk_char('C'), m); + expr_ref unit_c(seq.str.mk_unit(ch_c), m); + expr_ref ch_d(seq.str.mk_char('D'), m); + expr_ref unit_d(seq.str.mk_unit(ch_d), m); + expr_ref ch_e(seq.str.mk_char('E'), m); + expr_ref unit_e(seq.str.mk_unit(ch_e), m); + expr_ref cde(seq.str.mk_concat(unit_c, seq.str.mk_concat(unit_d, unit_e)), m); + expr_ref to_re_cde(seq.re.mk_to_re(cde), m); + + expr_ref re_union(seq.re.mk_union(to_re_ab, to_re_cde), m); + euf::snode* regex = sg.mk(re_union); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // union of "AB" (len 2) and "CDE" (len 3): min_length=2, max_length=3 + bool has_lower = false, has_upper = false; + for (auto const& c : constraints) { + has_lower = has_lower || arith.is_ge(c.m_expr); + has_upper = has_upper || arith.is_le(c.m_expr); + } + SASSERT(has_lower); + SASSERT(has_upper); +} + +// test: x in re.loop(to_re("A"), 3, 5) → len(x) >= 3, len(x) <= 5 +static void test_parikh_loop_bounded() { + std::cout << "test_parikh_loop_bounded\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + expr_ref re_loop(seq.re.mk_loop(to_re_a, 3, 5), m); + euf::snode* regex = sg.mk(re_loop); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // loop{3,5} of "A" (len 1): min_length=3, max_length=5 + bool has_lower = false, has_upper = false; + for (auto const& c : constraints) { + has_lower = has_lower || arith.is_ge(c.m_expr); + has_upper = has_upper || arith.is_le(c.m_expr); + } + SASSERT(has_lower); + SASSERT(has_upper); +} + +// test: x in re.empty → normalized to [0,0], generates len(x) <= 0 +static void test_parikh_empty_regex() { + std::cout << "test_parikh_empty_regex\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref re_empty(seq.re.mk_empty(seq.re.mk_re(str_sort)), m); + euf::snode* regex = sg.mk(re_empty); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // empty regex: normalized to [0,0], so len(x) <= 0 + bool has_upper = false; + for (auto const& c : constraints) { + has_upper = has_upper || arith.is_le(c.m_expr); + } + SASSERT(has_upper); +} + +// test: x in re.range("A","Z") → len(x) >= 1, len(x) <= 1 +static void test_parikh_full_char() { + std::cout << "test_parikh_full_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + // re.range("A", "Z") matches single characters in [A-Z] + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref ch_z(seq.str.mk_char('Z'), m); + expr_ref unit_z(seq.str.mk_unit(ch_z), m); + expr_ref re_range(seq.re.mk_range(unit_a, unit_z), m); + euf::snode* regex = sg.mk(re_range); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // range: min_length=1, max_length=1 + bool has_lower = false, has_upper = false; + for (auto const& c : constraints) { + has_lower = has_lower || arith.is_ge(c.m_expr); + has_upper = has_upper || arith.is_le(c.m_expr); + } + SASSERT(has_lower); + SASSERT(has_upper); +} + +// test: mixed str_eq and str_mem constraints generate both types +static void test_parikh_mixed_eq_mem() { + std::cout << "test_parikh_mixed_eq_mem\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('A'); + + // equation: x = A + seq::nielsen_graph ng(sg); + ng.add_str_eq(x, a); + + // membership: y in to_re("BC") + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ch_c(seq.str.mk_char('C'), m); + expr_ref unit_c(seq.str.mk_unit(ch_c), m); + expr_ref bc(seq.str.mk_concat(unit_b, unit_c), m); + expr_ref to_re_bc(seq.re.mk_to_re(bc), m); + euf::snode* regex = sg.mk(to_re_bc); + ng.add_str_mem(y, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // expect: len(x)=1 (from eq), len(x)>=0, len(y)>=2, len(y)<=2, len(y)>=0 + bool has_eq = false, has_le = false, has_ge = false; + for (auto const& c : constraints) { + has_eq = has_eq || m.is_eq(c.m_expr); + has_le = has_le || arith.is_le(c.m_expr); + has_ge = has_ge || arith.is_ge(c.m_expr); + } + SASSERT(has_eq); // from str_eq + SASSERT(has_le); // from str_mem upper bound + SASSERT(has_ge); // from str_mem lower bound or non-negativity +} + +// test: str_mem with full_seq (.*) → no bounds generated +static void test_parikh_full_seq_no_bounds() { + std::cout << "test_parikh_full_seq_no_bounds\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref re_all(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re_all); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + std::cout << " generated " << constraints.size() << " constraints\n"; + for (auto const& c : constraints) + std::cout << " " << mk_pp(c.m_expr, m) << "\n"; + + // full_seq (.*): min_length=0, max_length=UINT_MAX → no Parikh bounds + // only len(x) >= 0 from variable non-negativity + bool has_le = false; + for (auto const& c : constraints) { + has_le = has_le || arith.is_le(c.m_expr); + } + SASSERT(!has_le); +} + +// test: dependency tracking for Parikh constraints +static void test_parikh_dep_tracking() { + std::cout << "test_parikh_dep_tracking\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); + euf::snode* regex = sg.mk(to_re_a); + + seq::nielsen_graph ng(sg); + ng.add_str_mem(x, regex); + + vector constraints; + ng.generate_length_constraints(constraints); + + // to_re("A") has min=1, max=1 → len(x)>=1 and len(x)<=1 + SASSERT(constraints.size() >= 2); + + // all Parikh constraints should have non-empty deps + for (auto const& c : constraints) { + SASSERT(!c.m_dep.empty()); + } + std::cout << " all constraints have non-empty deps\n"; +} + void tst_seq_nielsen() { test_dep_tracker(); test_str_eq(); @@ -476,4 +3290,103 @@ void tst_seq_nielsen() { test_run_idx(); test_multiple_memberships(); test_backedge(); + test_eq_split_basic(); + test_eq_split_solve_sat(); + test_eq_split_solve_unsat(); + test_eq_split_same_var_det(); + test_eq_split_commutation_sat(); + test_const_nielsen_char_var(); + test_const_nielsen_var_char(); + test_const_nielsen_solve_sat(); + test_const_nielsen_solve_unsat(); + test_const_nielsen_priority_over_eq_split(); + test_const_nielsen_not_applicable_both_vars(); + test_const_nielsen_multi_char_solve(); + test_var_nielsen_basic(); + test_var_nielsen_same_var_det(); + test_var_nielsen_not_applicable_char(); + test_var_nielsen_solve_sat(); + test_var_nielsen_solve_unsat(); + test_var_nielsen_commutation_sat(); + test_var_nielsen_priority(); + test_regex_char_split_basic(); + test_regex_char_split_solve_sat(); + test_regex_char_split_solve_multi_char(); + test_regex_char_split_union(); + test_regex_char_split_star_sat(); + test_regex_char_split_concat_str(); + test_regex_char_split_with_eq(); + test_regex_char_split_ground_skip(); + test_generate_extensions_det_priority(); + test_generate_extensions_no_applicable(); + test_generate_extensions_regex_only(); + test_generate_extensions_mixed_det_first(); + test_solve_empty_graph(); + test_solve_trivially_satisfied(); + test_solve_node_status_unsat(); + test_solve_conflict_deps(); + test_dep_tracker_get_set_bits(); + test_explain_conflict_single_eq(); + test_explain_conflict_multi_eq(); + test_solve_node_extended_flag(); + test_solve_mixed_eq_mem_sat(); + test_solve_children_failed_reason(); + test_solve_eval_idx_tracking(); + test_is_subsumed_by_basic(); + test_is_subsumed_by_trivial_skip(); + test_is_subsumed_by_no_match(); + test_is_subsumed_by_exact_match(); + test_is_subsumed_by_str_mem(); + test_is_subsumed_by_empty(); + test_simplify_prefix_cancel(); + test_simplify_symbol_clash(); + test_simplify_empty_propagation(); + test_simplify_empty_vs_char(); + test_simplify_multi_pass_clash(); + test_simplify_trivial_removal(); + test_simplify_all_trivial_satisfied(); + test_simplify_regex_infeasible(); + test_simplify_nullable_removal(); + test_simplify_brzozowski_sat(); + test_simplify_multiple_eqs(); + test_det_cancel_child_eq(); + test_const_nielsen_child_substitutions(); + test_var_nielsen_substitution_types(); + test_explain_conflict_mem_only(); + test_explain_conflict_mixed_eq_mem(); + test_subsumption_pruning_unsat(); + test_subsumption_reason_set(); + test_length_constraints_basic(); + test_length_constraints_trivial_skip(); + test_length_constraints_empty(); + test_length_constraints_concat_chain(); + test_length_constraints_multi_eq(); + test_length_constraints_shared_var(); + test_length_constraints_deps(); + test_length_constraints_empty_side(); + // Length kind tagging tests (Task #35) + test_length_kind_tagging(); + // New modifier tests (Task #55) + test_power_epsilon_no_power(); + test_num_cmp_no_power(); + test_star_intr_no_backedge(); + test_star_intr_with_backedge(); + test_gpower_intr_repeated_chars(); + test_gpower_intr_no_repeat(); + test_regex_var_split_basic(); + test_power_split_no_power(); + test_var_num_unwinding_no_power(); + test_const_num_unwinding_no_power(); + test_priority_chain_order(); + test_gpower_intr_solve_sat(); + // Parikh interval reasoning tests (Task #34) + test_parikh_exact_length(); + test_parikh_star_unbounded(); + test_parikh_union_interval(); + test_parikh_loop_bounded(); + test_parikh_empty_regex(); + test_parikh_full_char(); + test_parikh_mixed_eq_mem(); + test_parikh_full_seq_no_bounds(); + test_parikh_dep_tracking(); }