diff --git a/scripts/compare_seq_solvers.py b/scripts/compare_seq_solvers.py index 205f13c60..9519791b5 100644 --- a/scripts/compare_seq_solvers.py +++ b/scripts/compare_seq_solvers.py @@ -14,13 +14,14 @@ and reports: """ import argparse +import re import subprocess import sys import time from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path -TIMEOUT = 5 # seconds +DEFAULT_TIMEOUT = 5 # seconds COMMON_ARGS = ["model_validate=true"] SOLVERS = { @@ -29,27 +30,72 @@ SOLVERS = { } -def run_z3(z3_bin: str, smt_file: Path, solver_arg: str) -> tuple[str, float]: +_STATUS_RE = re.compile(r'\(\s*set-info\s+:status\s+(sat|unsat|unknown)\s*\)') + + +def read_smtlib_status(smt_file: Path) -> str: + """Read the expected status from the SMT-LIB (set-info :status ...) directive. + Returns 'sat', 'unsat', or 'unknown'. + """ + try: + text = smt_file.read_text(encoding="utf-8", errors="replace") + m = _STATUS_RE.search(text) + if m: + return m.group(1) + except OSError: + pass + return "unknown" + + +def determine_status(res_nseq: str, res_seq: str, smtlib_status: str) -> str: + """Determine the ground-truth status of a problem. + Priority: if both solvers agree on sat/unsat, use that; otherwise if one + solver gives sat/unsat, use that; otherwise fall back to the SMT-LIB + annotation; otherwise 'unknown'. + """ + definite = {"sat", "unsat"} + if res_nseq in definite and res_nseq == res_seq: + return res_nseq + if res_nseq in definite and res_seq not in definite: + return res_nseq + if res_seq in definite and res_nseq not in definite: + return res_seq + # Disagreement (sat vs unsat) — fall back to SMTLIB annotation + if res_nseq in definite and res_seq in definite and res_nseq != res_seq: + if smtlib_status in definite: + return smtlib_status + return "unknown" + # Neither solver gave a definite answer + if smtlib_status in definite: + return smtlib_status + return "unknown" + + +def run_z3(z3_bin: str, smt_file: Path, solver_arg: str, timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]: """Run z3 on a file with the given solver argument. Returns (result, elapsed) where result is 'sat', 'unsat', 'unknown', or 'timeout'/'error'. """ - cmd = [z3_bin, solver_arg] + COMMON_ARGS + [str(smt_file)] + timeout_ms = timeout_s * 1000 + cmd = [z3_bin, f"-t:{timeout_ms}", solver_arg] + COMMON_ARGS + [str(smt_file)] start = time.monotonic() try: proc = subprocess.run( cmd, capture_output=True, text=True, - timeout=TIMEOUT, + timeout=timeout_s + 5, # subprocess grace period beyond Z3's own timeout ) elapsed = time.monotonic() - start output = proc.stdout.strip() # Extract first meaningful line (sat/unsat/unknown) for line in output.splitlines(): line = line.strip() - if line in ("sat", "unsat", "unknown"): + if line in ("sat", "unsat"): return line, elapsed - return "unknown", elapsed + if line == "unknown": + # Z3 returns "unknown" when it hits -t: limit — treat as timeout + return "timeout", elapsed + return "timeout", elapsed except subprocess.TimeoutExpired: elapsed = time.monotonic() - start return "timeout", elapsed @@ -77,17 +123,21 @@ def classify(res_nseq: str, res_seq: str) -> str: return "diverge" -def process_file(z3_bin: str, smt_file: Path) -> dict: - res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"]) - res_seq, t_seq = run_z3(z3_bin, smt_file, SOLVERS["seq"]) +def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT) -> dict: + res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"], timeout_s) + res_seq, t_seq = run_z3(z3_bin, smt_file, SOLVERS["seq"], timeout_s) cat = classify(res_nseq, res_seq) + smtlib_status = read_smtlib_status(smt_file) + status = determine_status(res_nseq, res_seq, smtlib_status) return { - "file": smt_file, - "nseq": res_nseq, - "seq": res_seq, - "t_nseq": t_nseq, - "t_seq": t_seq, - "category": cat, + "file": smt_file, + "nseq": res_nseq, + "seq": res_seq, + "t_nseq": t_nseq, + "t_seq": t_seq, + "category": cat, + "smtlib_status": smtlib_status, + "status": status, } @@ -97,10 +147,13 @@ def main(): parser.add_argument("--z3", required=True, metavar="PATH", help="Path to z3 binary") parser.add_argument("--ext", default=".smt2", help="File extension to search for (default: .smt2)") parser.add_argument("--jobs", type=int, default=4, help="Parallel workers (default: 4)") + parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, metavar="SEC", + help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})") parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file") args = parser.parse_args() z3_bin = args.z3 + timeout_s = args.timeout root = Path(args.path) if not root.exists(): @@ -112,11 +165,11 @@ def main(): print(f"No {args.ext} files found under {root}", file=sys.stderr) sys.exit(1) - print(f"Found {len(files)} files. Running with {args.jobs} parallel workers …\n") + print(f"Found {len(files)} files. Running with {args.jobs} parallel workers, timeout={timeout_s}s …\n") results = [] with ThreadPoolExecutor(max_workers=args.jobs) as pool: - futures = {pool.submit(process_file, z3_bin, f): f for f in files} + futures = {pool.submit(process_file, z3_bin, f, timeout_s): f for f in files} done = 0 for fut in as_completed(futures): done += 1 @@ -138,24 +191,45 @@ def main(): categories.setdefault(r["category"], []).append(r) print("\n" + "="*70) - print("SUMMARY") - print("="*70) - + print("TOTALS") for cat, items in categories.items(): - if not items: - continue + print(f" {cat:40s}: {len(items)}") + print(f"{'='*70}") + + # ── Per-solver timeout & divergence file lists ───────────────────────── + nseq_timeouts = [r for r in results if r["nseq"] == "timeout"] + seq_timeouts = [r for r in results if r["seq"] == "timeout"] + both_to = categories["both_timeout"] + diverged = categories["diverge"] + + def _print_file_list(label: str, items: list[dict]): print(f"\n{'─'*70}") - print(f" {cat.upper().replace('_', ' ')} ({len(items)} files)") + print(f" {label} ({len(items)} files)") print(f"{'─'*70}") for r in sorted(items, key=lambda x: x["file"]): print(f" {r['file']}") - if cat not in ("both_timeout", "both_agree"): - print(f" nseq={r['nseq']:8s} ({r['t_nseq']:.1f}s) seq={r['seq']:8s} ({r['t_seq']:.1f}s)") - print(f"\n{'='*70}") - print(f"TOTALS") - for cat, items in categories.items(): - print(f" {cat:40s}: {len(items)}") + if nseq_timeouts: + _print_file_list("NSEQ TIMES OUT", nseq_timeouts) + if seq_timeouts: + _print_file_list("SEQ TIMES OUT", seq_timeouts) + if both_to: + _print_file_list("BOTH TIME OUT", both_to) + if diverged: + _print_file_list("DIVERGE (sat vs unsat)", diverged) + + print() + + # ── Problem status statistics ──────────────────────────────────────────── + status_counts = {"sat": 0, "unsat": 0, "unknown": 0} + for r in results: + status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1 + + print(f"\nPROBLEM STATUS (total {len(results)} files)") + print(f"{'─'*40}") + print(f" {'sat':12s}: {status_counts['sat']:5d} ({100*status_counts['sat']/len(results):.1f}%)") + print(f" {'unsat':12s}: {status_counts['unsat']:5d} ({100*status_counts['unsat']/len(results):.1f}%)") + print(f" {'unknown':12s}: {status_counts['unknown']:5d} ({100*status_counts['unknown']/len(results):.1f}%)") print(f"{'='*70}\n") # ── Optional CSV output ─────────────────────────────────────────────────── @@ -163,7 +237,7 @@ def main(): import csv csv_path = Path(args.csv) with csv_path.open("w", newline="", encoding="utf-8") as f: - writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category"]) + writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category", "smtlib_status", "status"]) writer.writeheader() for r in sorted(results, key=lambda x: x["file"]): writer.writerow({**r, "file": str(r["file"])}) diff --git a/src/smt/nseq_context_solver.h b/src/smt/nseq_context_solver.h new file mode 100644 index 000000000..5b2c9263a --- /dev/null +++ b/src/smt/nseq_context_solver.h @@ -0,0 +1,69 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + nseq_context_solver.h + +Abstract: + + context_solver: concrete implementation of seq::simple_solver + that delegates arithmetic feasibility checks to an smt::kernel + configured with seq.solver = "seq_len". + +Author: + + Nikolaj Bjorner (nbjorner) 2026-03-10 + +--*/ +#pragma once + +#include "smt/seq/seq_nielsen.h" +#include "smt/smt_kernel.h" +#include "params/smt_params.h" + +namespace smt { + + /** + * Concrete simple_solver that wraps smt::kernel. + * Initializes the kernel with seq.solver = "seq_len" so that + * sequence length constraints are handled by theory_seq_len. + */ + class context_solver : public seq::simple_solver { + smt_params m_params; // must be declared before m_kernel + smt::kernel m_kernel; + + static smt_params make_seq_len_params() { + smt_params p; + p.m_string_solver = symbol("seq_len"); + return p; + } + + public: + context_solver(ast_manager& m) : + m_params(make_seq_len_params()), + m_kernel(m, m_params) {} + + lbool check() override { + return m_kernel.check(); + } + + void assert_expr(expr* e) override { + // std::cout << "Asserting: " << mk_pp(e, m_kernel.m()) << std::endl; + m_kernel.assert_expr(e); + } + + void push() override { + m_kernel.push(); + } + + void pop(unsigned num_scopes) override { + m_kernel.pop(num_scopes); + } + + void get_model(model_ref& mdl) override { + m_kernel.get_model(mdl); + } + }; + +} diff --git a/src/smt/nseq_model.cpp b/src/smt/nseq_model.cpp index c56862c6f..8eb8d5a59 100644 --- a/src/smt/nseq_model.cpp +++ b/src/smt/nseq_model.cpp @@ -35,6 +35,8 @@ namespace smt { m_var_values.reset(); m_var_regex.reset(); m_trail.reset(); + m_int_model = nullptr; + m_mg = &mg; m_factory = alloc(seq_factory, m, m_th.get_family_id(), mg.get_model()); mg.register_factory(m_factory); @@ -42,12 +44,12 @@ namespace smt { register_existing_values(nielsen); collect_var_regex_constraints(state); + // solve integer constraints from the sat_path FIRST so that + // m_int_model is available when snode_to_value evaluates power exponents + nielsen.solve_sat_path_ints(m_int_model); + // extract variable assignments from the satisfying leaf's substitution path - seq::nielsen_node const* sat = nielsen.sat_node(); - IF_VERBOSE(1, verbose_stream() << "nseq model init: sat_node=" << (sat ? "set" : "null") - << " path_len=" << nielsen.sat_path().size() << "\n";); extract_assignments(nielsen.sat_path()); - IF_VERBOSE(1, verbose_stream() << "nseq model: m_var_values has " << m_var_values.size() << " entries\n";); } model_value_proc* nseq_model::mk_value(enode* n, model_generator& mg) { @@ -103,6 +105,8 @@ namespace smt { m_var_values.reset(); m_var_regex.reset(); m_trail.reset(); + m_int_model = nullptr; + m_mg = nullptr; m_factory = nullptr; } @@ -175,6 +179,94 @@ namespace smt { return expr_ref(m); } + if (n->is_power()) { + SASSERT(n->num_args() == 2); + // Evaluate the base and exponent to produce a concrete string. + // The base is a string snode; the exponent is an integer expression + // whose value comes from the sat_path integer model. + expr_ref base_val = snode_to_value(n->arg(0)); + if (!base_val) + return expr_ref(m); + + euf::snode* exp_snode = n->arg(1); + expr* exp_expr = exp_snode ? exp_snode->get_expr() : nullptr; + rational exp_val; + arith_util arith(m); + + // Try to evaluate exponent: first check if it's a numeral, + // then try the int model from sat_path constraints, + // finally fall back to the proto_model from model_generator. + if (exp_expr && arith.is_numeral(exp_expr, exp_val)) { + // already concrete + } else if (exp_expr && m_int_model.get()) { + expr_ref result(m); + if (m_int_model->eval_expr(exp_expr, result, true) && arith.is_numeral(result, exp_val)) { + // evaluated from int model + } else if (m_mg) { + proto_model& pm = m_mg->get_model(); + if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) { + // evaluated from proto_model + } else { + exp_val = rational(0); + } + } else { + exp_val = rational(0); + } + } else if (exp_expr && m_mg) { + expr_ref result(m); + proto_model& pm = m_mg->get_model(); + if (pm.eval(exp_expr, result, true) && arith.is_numeral(result, exp_val)) { + // evaluated from proto_model + } else { + exp_val = rational(0); + } + } else { + exp_val = rational(0); + } + + if (exp_val.is_neg()) + exp_val = rational(0); + + // Build the repeated string: base^exp_val + if (exp_val.is_zero()) + return expr_ref(m_seq.str.mk_empty(m_seq.str.mk_string_sort()), m); + if (exp_val.is_one()) + return base_val; + + // For small exponents, concatenate directly; for large ones, + // build a concrete string constant to avoid enormous AST chains + // that cause cleanup_expr to diverge. + unsigned n_val = exp_val.get_unsigned(); + constexpr unsigned POWER_EXPAND_LIMIT = 1000; + if (n_val > POWER_EXPAND_LIMIT) { + // Try to extract a concrete character from the base (seq.unit(c)) + // and build a string literal directly (O(1) AST node). + unsigned ch = 0; + expr* unit_arg = nullptr; + if (m_seq.str.is_unit(base_val, unit_arg) && m_seq.is_const_char(unit_arg, ch)) { + svector buf(n_val, ch); + zstring result(buf.size(), buf.data()); + return expr_ref(m_seq.str.mk_string(result), m); + } + // Also handle if base is already a string constant + zstring base_str; + if (m_seq.str.is_string(base_val, base_str) && base_str.length() > 0) { + svector buf; + for (unsigned i = 0; i < n_val; ++i) + for (unsigned j = 0; j < base_str.length(); ++j) + buf.push_back(base_str[j]); + zstring result(buf.size(), buf.data()); + return expr_ref(m_seq.str.mk_string(result), m); + } + // Fallback: cap exponent to avoid divergence + n_val = POWER_EXPAND_LIMIT; + } + expr_ref acc(base_val); + for (unsigned i = 1; i < n_val; ++i) + acc = m_seq.str.mk_concat(acc, base_val); + return acc; + } + // fallback: use the underlying expression expr* e = n->get_expr(); return e ? expr_ref(e, m) : expr_ref(m); diff --git a/src/smt/nseq_model.h b/src/smt/nseq_model.h index cd5d13082..14baa40cf 100644 --- a/src/smt/nseq_model.h +++ b/src/smt/nseq_model.h @@ -61,6 +61,10 @@ namespace smt { // trail for GC protection of generated expressions expr_ref_vector m_trail; + // integer variable model from sat_path constraints + model_ref m_int_model; + model_generator* m_mg = nullptr; + // per-variable regex constraints: maps snode id -> intersected regex snode. // collected during init() from the state's str_mem list. u_map m_var_regex; diff --git a/src/smt/seq/CMakeLists.txt b/src/smt/seq/CMakeLists.txt index db63e4c6f..e6ed34c34 100644 --- a/src/smt/seq/CMakeLists.txt +++ b/src/smt/seq/CMakeLists.txt @@ -1,5 +1,6 @@ z3_add_component(smt_seq SOURCES + seq_parikh.cpp seq_nielsen.cpp COMPONENT_DEPENDENCIES euf diff --git a/src/smt/seq/seq_nielsen.cpp b/src/smt/seq/seq_nielsen.cpp index 195b2fa67..be28183f8 100644 --- a/src/smt/seq/seq_nielsen.cpp +++ b/src/smt/seq/seq_nielsen.cpp @@ -20,9 +20,10 @@ Author: --*/ #include "smt/seq/seq_nielsen.h" +#include "smt/seq/seq_parikh.h" #include "ast/arith_decl_plugin.h" #include "ast/ast_pp.h" -#include "math/lp/lar_solver.h" +#include "ast/rewriter/th_rewriter.h" #include "util/hashtable.h" #include #include @@ -30,178 +31,15 @@ Author: namespace seq { - // ----------------------------------------------- - // lp_simple_solver - // Concrete simple_solver built on top of lp::lar_solver. - // This is the default back-end used by nielsen_graph when no - // custom solver is provided. It translates each asserted - // arithmetic expression into LP bounds / terms and delegates - // feasibility checking to lp::lar_solver::find_feasible_solution(). - // ----------------------------------------------- - - class lp_simple_solver : public simple_solver { - ast_manager& m; - scoped_ptr m_lp; - u_map m_expr2lpvar; - unsigned m_ext_cnt = 0; - - // Stack of assertion counts for push/pop. - svector m_scope_stack; - expr_ref_vector m_asserted; - - public: - lp_simple_solver(ast_manager& m_) : m(m_), m_asserted(m_) {} - - lbool check() override { - // Rebuild LP solver from current assertions - m_lp = alloc(lp::lar_solver); - m_expr2lpvar.reset(); - m_ext_cnt = 0; - - arith_util arith(m); - for (expr* e : m_asserted) - add_to_lp(e, arith); - - lp::lp_status status = m_lp->find_feasible_solution(); - return (status == lp::lp_status::INFEASIBLE) ? l_false : l_true; - } - - void assert_expr(expr* e) override { - m_asserted.push_back(e); - } - - void push() override { - m_scope_stack.push_back(m_asserted.size()); - } - - void pop(unsigned num_scopes) override { - for (unsigned i = 0; i < num_scopes && !m_scope_stack.empty(); ++i) { - unsigned sz = m_scope_stack.back(); - m_scope_stack.pop_back(); - m_asserted.shrink(sz); - } - } - - private: - lp::lpvar ensure_var(expr* e, arith_util& arith) { - if (!e) return lp::null_lpvar; - unsigned eid = e->get_id(); - lp::lpvar j; - if (m_expr2lpvar.find(eid, j)) - return j; - - rational val; - if (arith.is_numeral(e, val)) { - j = m_lp->add_var(m_ext_cnt++, true); - m_lp->add_var_bound(j, lp::EQ, lp::mpq(val)); - m_expr2lpvar.insert(eid, j); - return j; - } - expr* e1 = nullptr, *e2 = nullptr; - if (arith.is_add(e, e1, e2)) { - lp::lpvar j1 = ensure_var(e1, arith); - lp::lpvar j2 = ensure_var(e2, arith); - if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(1), j1}); - coeffs.push_back({lp::mpq(1), j2}); - j = m_lp->add_term(coeffs, m_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - if (arith.is_sub(e, e1, e2)) { - lp::lpvar j1 = ensure_var(e1, arith); - lp::lpvar j2 = ensure_var(e2, arith); - if (j1 == lp::null_lpvar || j2 == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(1), j1}); - coeffs.push_back({lp::mpq(-1), j2}); - j = m_lp->add_term(coeffs, m_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - if (arith.is_mul(e, e1, e2)) { - rational coeff; - if (arith.is_numeral(e1, coeff)) { - lp::lpvar jx = ensure_var(e2, arith); - if (jx == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(coeff), jx}); - j = m_lp->add_term(coeffs, m_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - if (arith.is_numeral(e2, coeff)) { - lp::lpvar jx = ensure_var(e1, arith); - if (jx == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(coeff), jx}); - j = m_lp->add_term(coeffs, m_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - } - if (arith.is_uminus(e, e1)) { - lp::lpvar jx = ensure_var(e1, arith); - if (jx == lp::null_lpvar) return lp::null_lpvar; - vector> coeffs; - coeffs.push_back({lp::mpq(-1), jx}); - j = m_lp->add_term(coeffs, m_ext_cnt++); - m_expr2lpvar.insert(eid, j); - return j; - } - // Leaf: fresh LP integer variable - j = m_lp->add_var(m_ext_cnt++, true); - m_expr2lpvar.insert(eid, j); - return j; - } - - void add_to_lp(expr* e, arith_util& arith) { - // Expected forms: (= lhs rhs), (<= lhs rhs), (>= lhs rhs) - expr* lhs = nullptr, *rhs = nullptr; - rational val; - - auto add_var_bound = [&](lp::lpvar j, lp::lconstraint_kind kind, lp::mpq const& bound) { - if (j != lp::null_lpvar) - m_lp->add_var_bound(j, kind, bound); - }; - - auto add_diff_bound = [&](expr* a, expr* b, lp::lconstraint_kind kind) { - lp::lpvar ja = ensure_var(a, arith); - lp::lpvar jb = ensure_var(b, arith); - if (ja == lp::null_lpvar || jb == lp::null_lpvar) return; - vector> coeffs; - coeffs.push_back({lp::mpq(1), ja}); - coeffs.push_back({lp::mpq(-1), jb}); - lp::lpvar term = m_lp->add_term(coeffs, m_ext_cnt++); - m_lp->add_var_bound(term, kind, lp::mpq(0)); - }; - - if (m.is_eq(e, lhs, rhs) && arith.is_int(lhs)) { - if (arith.is_numeral(rhs, val)) - add_var_bound(ensure_var(lhs, arith), lp::EQ, lp::mpq(val)); - else if (arith.is_numeral(lhs, val)) - add_var_bound(ensure_var(rhs, arith), lp::EQ, lp::mpq(val)); - else - add_diff_bound(lhs, rhs, lp::EQ); - } else if (arith.is_le(e, lhs, rhs)) { - if (arith.is_numeral(rhs, val)) - add_var_bound(ensure_var(lhs, arith), lp::LE, lp::mpq(val)); - else if (arith.is_numeral(lhs, val)) - add_var_bound(ensure_var(rhs, arith), lp::GE, lp::mpq(val)); - else - add_diff_bound(lhs, rhs, lp::LE); - } else if (arith.is_ge(e, lhs, rhs)) { - if (arith.is_numeral(rhs, val)) - add_var_bound(ensure_var(lhs, arith), lp::GE, lp::mpq(val)); - else if (arith.is_numeral(lhs, val)) - add_var_bound(ensure_var(rhs, arith), lp::LE, lp::mpq(val)); - else - add_diff_bound(lhs, rhs, lp::GE); - } - // other shapes are silently ignored - } - }; + // Normalize an arithmetic expression using th_rewriter. + // Simplifies e.g. (n - 1 + 1) to n, preventing unbounded growth + // of power exponents during unwind/merge cycles. + static expr_ref normalize_arith(ast_manager& m, expr* e) { + expr_ref result(e, m); + th_rewriter rw(m); + rw(result); + return result; + } // ----------------------------------------------- // str_eq @@ -290,6 +128,8 @@ namespace seq { m_int_constraints.reset(); m_char_diseqs.reset(); m_char_ranges.reset(); + m_var_lb.reset(); + m_var_ub.reset(); for (auto const& eq : parent.m_str_eq) m_str_eq.push_back(str_eq(eq.m_lhs, eq.m_rhs, eq.m_dep)); for (auto const& mem : parent.m_str_mem) @@ -307,6 +147,11 @@ namespace seq { for (auto const& kv : parent.m_char_ranges) { m_char_ranges.insert(kv.m_key, kv.m_value.clone()); } + // clone per-variable integer bounds + for (auto const& kv : parent.m_var_lb) + m_var_lb.insert(kv.m_key, kv.m_value); + for (auto const& kv : parent.m_var_ub) + m_var_ub.insert(kv.m_key, kv.m_value); } void nielsen_node::apply_subst(euf::sgraph& sg, nielsen_subst const& s) { @@ -325,6 +170,8 @@ namespace seq { mem.m_regex = sg.subst(mem.m_regex, s.m_var, s.m_replacement); mem.m_dep |= s.m_dep; } + // VarBoundWatcher: propagate bounds on s.m_var to variables in s.m_replacement + watch_var_bounds(s); } void nielsen_node::add_char_range(euf::snode* sym_char, char_set const& range) { @@ -356,6 +203,185 @@ namespace seq { existing.push_back(other); } + // ----------------------------------------------- + // nielsen_node: IntBounds methods + // mirrors ZIPT's AddLowerIntBound / AddHigherIntBound + // ----------------------------------------------- + + unsigned nielsen_node::var_lb(euf::snode* var) const { + if (!var) return 0; + unsigned v = 0; + m_var_lb.find(var->id(), v); + return v; + } + + unsigned nielsen_node::var_ub(euf::snode* var) const { + if (!var) return UINT_MAX; + unsigned v = UINT_MAX; + m_var_ub.find(var->id(), v); + return v; + } + + bool nielsen_node::add_lower_int_bound(euf::snode* var, unsigned lb, dep_tracker const& dep) { + if (!var || !var->is_var()) return false; + unsigned id = var->id(); + // check against existing lower bound + unsigned cur_lb = 0; + m_var_lb.find(id, cur_lb); + if (lb <= cur_lb) return false; // no tightening + m_var_lb.insert(id, lb); + // conflict if lb > current upper bound + unsigned cur_ub = UINT_MAX; + m_var_ub.find(id, cur_ub); + if (lb > cur_ub) { + m_is_general_conflict = true; + m_reason = backtrack_reason::arithmetic; + return true; + } + // add int_constraint: len(var) >= lb + ast_manager& m = m_graph->sg().get_manager(); + seq_util& seq = m_graph->sg().get_seq_util(); + arith_util arith(m); + expr_ref len_var(seq.str.mk_length(var->get_expr()), m); + expr_ref bound(arith.mk_int(lb), m); + m_int_constraints.push_back(int_constraint(len_var, bound, int_constraint_kind::ge, dep, m)); + return true; + } + + bool nielsen_node::add_upper_int_bound(euf::snode* var, unsigned ub, dep_tracker const& dep) { + if (!var || !var->is_var()) return false; + unsigned id = var->id(); + // check against existing upper bound + unsigned cur_ub = UINT_MAX; + m_var_ub.find(id, cur_ub); + if (ub >= cur_ub) return false; // no tightening + m_var_ub.insert(id, ub); + // conflict if current lower bound > ub + unsigned cur_lb = 0; + m_var_lb.find(id, cur_lb); + if (cur_lb > ub) { + m_is_general_conflict = true; + m_reason = backtrack_reason::arithmetic; + return true; + } + // add int_constraint: len(var) <= ub + ast_manager& m = m_graph->sg().get_manager(); + seq_util& seq = m_graph->sg().get_seq_util(); + arith_util arith(m); + expr_ref len_var(seq.str.mk_length(var->get_expr()), m); + expr_ref bound(arith.mk_int(ub), m); + m_int_constraints.push_back(int_constraint(len_var, bound, int_constraint_kind::le, dep, m)); + return true; + } + + // VarBoundWatcher: after applying substitution s, propagate bounds on s.m_var + // to variables in s.m_replacement. + // If s.m_var has bounds [lo, hi], and the replacement decomposes into + // const_len concrete chars plus a list of variable tokens, then: + // - for a single variable y: lo-const_len <= len(y) <= hi-const_len + // - for multiple variables: each gets an upper bound hi-const_len + // Mirrors ZIPT's VarBoundWatcher mechanism. + void nielsen_node::watch_var_bounds(nielsen_subst const& s) { + if (!s.m_var) return; + unsigned id = s.m_var->id(); + unsigned lo = 0, hi = UINT_MAX; + m_var_lb.find(id, lo); + m_var_ub.find(id, hi); + if (lo == 0 && hi == UINT_MAX) return; // no bounds to propagate + + // decompose replacement into constant length + variable tokens + if (!s.m_replacement) return; + euf::snode_vector tokens; + s.m_replacement->collect_tokens(tokens); + + unsigned const_len = 0; + euf::snode_vector var_tokens; + for (euf::snode* t : tokens) { + if (t->is_char() || t->is_unit()) { + ++const_len; + } else if (t->is_var()) { + var_tokens.push_back(t); + } else { + // power or unknown token: cannot propagate simply, abort + return; + } + } + + if (var_tokens.empty()) { + // all concrete: check if const_len is within [lo, hi] + if (const_len < lo || (hi != UINT_MAX && const_len > hi)) { + m_is_general_conflict = true; + m_reason = backtrack_reason::arithmetic; + } + return; + } + + if (var_tokens.size() == 1) { + euf::snode* y = var_tokens[0]; + // lo <= const_len + len(y) => len(y) >= lo - const_len (if lo > const_len) + if (lo > const_len) + add_lower_int_bound(y, lo - const_len, s.m_dep); + // const_len + len(y) <= hi => len(y) <= hi - const_len + if (hi != UINT_MAX) { + if (const_len > hi) { + m_is_general_conflict = true; + m_reason = backtrack_reason::arithmetic; + } else { + add_upper_int_bound(y, hi - const_len, s.m_dep); + } + } + } else { + // multiple variables: propagate upper bound to each + // (each variable contributes >= 0, so each <= hi - const_len) + if (hi != UINT_MAX) { + if (const_len > hi) { + m_is_general_conflict = true; + m_reason = backtrack_reason::arithmetic; + return; + } + unsigned each_ub = hi - const_len; + for (euf::snode* y : var_tokens) + add_upper_int_bound(y, each_ub, s.m_dep); + } + } + } + + // Initialize per-variable Parikh bounds from this node's regex memberships. + // For each str_mem (str ∈ regex) with bounded regex length [min_len, max_len], + // calls add_lower/upper_int_bound for the primary string variable (if str is + // a single variable) or stores a bound on the length expression otherwise. + void nielsen_node::init_var_bounds_from_mems() { + for (str_mem const& mem : m_str_mem) { + if (!mem.m_str || !mem.m_regex) continue; + unsigned min_len = 0, max_len = UINT_MAX; + m_graph->compute_regex_length_interval(mem.m_regex, min_len, max_len); + if (min_len == 0 && max_len == UINT_MAX) continue; + + // if str is a single variable, apply bounds directly + if (mem.m_str->is_var()) { + if (min_len > 0) + add_lower_int_bound(mem.m_str, min_len, mem.m_dep); + if (max_len < UINT_MAX) + add_upper_int_bound(mem.m_str, max_len, mem.m_dep); + } else { + // str is a concatenation or other term: add as general int_constraints + ast_manager& m = m_graph->sg().get_manager(); + arith_util arith(m); + expr_ref len_str = m_graph->compute_length_expr(mem.m_str); + if (min_len > 0) { + expr_ref bound(arith.mk_int(min_len), m); + m_int_constraints.push_back( + int_constraint(len_str, bound, int_constraint_kind::ge, mem.m_dep, m)); + } + if (max_len < UINT_MAX) { + expr_ref bound(arith.mk_int(max_len), m); + m_int_constraints.push_back( + int_constraint(len_str, bound, int_constraint_kind::le, mem.m_dep, m)); + } + } + } + } + void nielsen_node::apply_char_subst(euf::sgraph& sg, char_subst const& s) { if (!s.m_var) return; @@ -415,23 +441,15 @@ namespace seq { // nielsen_graph // ----------------------------------------------- - nielsen_graph::nielsen_graph(euf::sgraph& sg): - m_sg(sg) { - // Create the default LP-based solver - m_owned_solver = alloc(lp_simple_solver, sg.get_manager()); - m_solver = m_owned_solver.get(); - } - - nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver* solver): + nielsen_graph::nielsen_graph(euf::sgraph& sg, simple_solver& solver): m_sg(sg), - m_solver(solver) { - if (!m_solver) { - m_owned_solver = alloc(lp_simple_solver, sg.get_manager()); - m_solver = m_owned_solver.get(); - } + m_solver(solver), + m_parikh(alloc(seq_parikh, sg)) { + m_len_vars(sg.get_manager()) { } nielsen_graph::~nielsen_graph() { + dealloc(m_parikh); reset(); } @@ -445,6 +463,7 @@ namespace seq { nielsen_node* nielsen_graph::mk_child(nielsen_node* parent) { nielsen_node* child = mk_node(); child->clone_from(*parent); + child->m_parent_ic_count = parent->int_constraints().size(); return child; } @@ -503,6 +522,10 @@ namespace seq { m_fresh_cnt = 0; m_num_input_eqs = 0; m_num_input_mems = 0; + m_root_constraints_asserted = false; + m_mod_cnt.reset(); + m_len_var_cache.clear(); + m_len_vars.reset(); } std::ostream& nielsen_graph::display(std::ostream& out) const { @@ -595,10 +618,84 @@ namespace seq { return result; } - // Helper: render an snode as a human-readable label. + // Helper: render an arithmetic/integer expression in infix HTML notation. + // Recognises +, -, *, unary minus, numerals, str.len, and named constants; + // falls back to HTML-escaped mk_pp for anything else. + static std::string arith_expr_html(expr* e, ast_manager& m) { + if (!e) return "null"; + arith_util arith(m); + seq_util seq(m); + rational val; + if (arith.is_numeral(e, val)) + return val.to_string(); + if (!is_app(e)) { + std::ostringstream os; + os << mk_pp(e, m); + return dot_html_escape(os.str()); + } + app* a = to_app(e); + expr* x, * y; + if (arith.is_add(e)) { + std::string r = arith_expr_html(a->get_arg(0), m); + for (unsigned i = 1; i < a->get_num_args(); ++i) { + expr* arg = a->get_arg(i); + // render (+ x (- y)) as "x - y" and (+ x (- n)) as "x - n" + expr* neg_inner; + rational neg_val; + if (arith.is_uminus(arg, neg_inner)) { + r += " − "; // minus sign + r += arith_expr_html(neg_inner, m); + } else if (arith.is_numeral(arg, neg_val) && neg_val.is_neg()) { + r += " − "; // minus sign + r += (-neg_val).to_string(); + } else { + r += " + "; + r += arith_expr_html(arg, m); + } + } + return r; + } + if (arith.is_sub(e, x, y)) + return arith_expr_html(x, m) + " − " + arith_expr_html(y, m); + if (arith.is_uminus(e, x)) + return "−" + arith_expr_html(x, m); + if (arith.is_mul(e)) { + std::string r; + for (unsigned i = 0; i < a->get_num_args(); ++i) { + if (i > 0) r += " · "; // middle dot + r += arith_expr_html(a->get_arg(i), m); + } + return r; + } + if (seq.str.is_length(e, x)) { + return "|" + dot_html_escape(to_app(x)->get_decl()->get_name().str()) + "|"; + } + // named constant (fresh variable like n!0) + if (a->get_num_args() == 0) + return dot_html_escape(a->get_decl()->get_name().str()); + // fallback + std::ostringstream os; + os << mk_pp(e, m); + return dot_html_escape(os.str()); + } + + // Helper: render an int_constraint as an HTML string for DOT edge labels. + static std::string int_constraint_html(int_constraint const& ic, ast_manager& m) { + std::string r = arith_expr_html(ic.m_lhs, m); + switch (ic.m_kind) { + case int_constraint_kind::eq: r += " = "; break; + case int_constraint_kind::le: r += " ≤ "; break; // ≤ + case int_constraint_kind::ge: r += " ≥ "; break; // ≥ + } + r += arith_expr_html(ic.m_rhs, m); + return r; + } + + // Helper: render an snode as an HTML label for DOT output. // Groups consecutive s_char tokens into quoted strings, renders s_var by name, - // and falls back to mk_pp for other token kinds. - static std::string snode_label(euf::snode const* n, ast_manager& m) { + // shows s_power with superscripts, s_unit by its inner expression, + // and falls back to mk_pp (HTML-escaped) for other token kinds. + static std::string snode_label_html(euf::snode const* n, ast_manager& m) { if (!n) return "null"; seq_util seq(m); @@ -617,7 +714,7 @@ namespace seq { auto flush_chars = [&]() { if (char_acc.empty()) return; if (!first) result += " + "; - result += "\"" + char_acc + "\""; + result += "\"" + dot_html_escape(char_acc) + "\""; first = false; char_acc.clear(); }; @@ -651,11 +748,31 @@ namespace seq { if (!e) { result += "#" + std::to_string(tok->id()); } else if (tok->is_var()) { - result += to_app(e)->get_decl()->get_name().str(); + result += dot_html_escape(to_app(e)->get_decl()->get_name().str()); + } else if (tok->is_unit()) { + // seq.unit with non-literal character: show the character expression + expr* ch = to_app(e)->get_arg(0); + if (is_app(ch)) { + result += dot_html_escape(to_app(ch)->get_decl()->get_name().str()); + } else { + std::ostringstream os; + os << mk_pp(ch, m); + result += dot_html_escape(os.str()); + } + } else if (tok->is_power()) { + // seq.power(base, n): render as basen + std::string base_html = snode_label_html(tok->arg(0), m); + if (tok->arg(0)->length() > 1) + base_html = "(" + base_html + ")"; + result += base_html; + result += ""; + expr* exp_expr = to_app(e)->get_arg(1); + result += arith_expr_html(exp_expr, m); + result += ""; } else { std::ostringstream os; os << mk_pp(e, m); - result += os.str(); + result += dot_html_escape(os.str()); } } flush_chars(); @@ -668,17 +785,17 @@ namespace seq { // string equalities for (auto const& eq : m_str_eq) { if (!any) { out << "Cnstr:
"; any = true; } - out << dot_html_escape(snode_label(eq.m_lhs, m)) + out << snode_label_html(eq.m_lhs, m) << " = " - << dot_html_escape(snode_label(eq.m_rhs, m)) + << snode_label_html(eq.m_rhs, m) << "
"; } // regex memberships for (auto const& mem : m_str_mem) { if (!any) { out << "Cnstr:
"; any = true; } - out << dot_html_escape(snode_label(mem.m_str, m)) + out << snode_label_html(mem.m_str, m) << " ∈ " - << dot_html_escape(snode_label(mem.m_regex, m)) + << snode_label_html(mem.m_regex, m) << "
"; } // character ranges @@ -695,6 +812,11 @@ namespace seq { out << "?" << kv.m_key << " ≠ ?" << d->id() << "
"; } } + // integer constraints + for (auto const& ic : m_int_constraints) { + if (!any) { out << "Cnstr:
"; any = true; } + out << int_constraint_html(ic, m) << "
"; + } if (!any) out << "⊤"; // ⊤ (trivially satisfied) @@ -755,7 +877,7 @@ namespace seq { // --- nodes --- for (nielsen_node const* n : m_nodes) { - out << "\t" << n->id() << " [label=<" + out << " " << n->id() << " [label=<" << n->id() << ": "; n->display_html(out, m); // append conflict reason if this is a direct conflict @@ -779,16 +901,16 @@ namespace seq { // --- edges --- for (nielsen_node const* n : m_nodes) { for (nielsen_edge const* e : n->outgoing()) { - out << "\t" << n->id() << " -> " << e->tgt()->id() << " [label=<"; + out << " " << n->id() << " -> " << e->tgt()->id() << " [label=<"; // edge label: substitutions joined by
bool first = true; for (auto const& s : e->subst()) { if (!first) out << "
"; first = false; - out << dot_html_escape(snode_label(s.m_var, m)) + out << snode_label_html(s.m_var, m) << " → " // mapping arrow - << dot_html_escape(snode_label(s.m_replacement, m)); + << snode_label_html(s.m_replacement, m); } for (auto const& cs : e->char_substs()) { if (!first) out << "
"; @@ -796,6 +918,34 @@ namespace seq { out << "?" << cs.m_var->id() << " → ?" << cs.m_val->id(); } + // side constraints: string equalities + for (auto const* eq : e->side_str_eq()) { + if (!first) out << "
"; + first = false; + out << "" + << snode_label_html(eq->m_lhs, m) + << " = " + << snode_label_html(eq->m_rhs, m) + << ""; + } + // side constraints: regex memberships + for (auto const* mem : e->side_str_mem()) { + if (!first) out << "
"; + first = false; + out << "" + << snode_label_html(mem->m_str, m) + << " ∈ " + << snode_label_html(mem->m_regex, m) + << ""; + } + // side constraints: integer equalities/inequalities + for (auto const& ic : e->side_int()) { + if (!first) out << "
"; + first = false; + out << "" + << int_constraint_html(ic, m) + << ""; + } out << ">"; // colour @@ -812,7 +962,7 @@ namespace seq { // backedge as dotted arrow if (n->backedge()) - out << "\t" << n->id() << " -> " << n->backedge()->id() + out << " " << n->id() << " -> " << n->backedge()->id() << " [style=dotted];\n"; } @@ -834,31 +984,329 @@ namespace seq { dep_tracker const& dep, bool& changed) { euf::snode_vector tokens; non_empty_side->collect_tokens(tokens); - bool all_vars_or_opaque = true; + bool all_eliminable = true; bool has_char = false; for (euf::snode* t : tokens) { if (t->is_char()) has_char = true; - else if (!t->is_var() && t->kind() != euf::snode_kind::s_other) { - all_vars_or_opaque = false; break; + else if (!t->is_var() && !t->is_power() && t->kind() != euf::snode_kind::s_other) { + all_eliminable = false; break; } } - if (has_char || !all_vars_or_opaque) { + if (has_char || !all_eliminable) { m_is_general_conflict = true; m_reason = backtrack_reason::symbol_clash; return true; } + ast_manager& m = sg.get_manager(); + arith_util arith(m); for (euf::snode* t : tokens) { if (t->is_var()) { nielsen_subst s(t, sg.mk_empty(), dep); apply_subst(sg, s); changed = true; } + else if (t->is_power()) { + // Power equated to empty → exponent must be 0. + expr* e = t->get_expr(); + expr* pow_exp = (e && is_app(e) && to_app(e)->get_num_args() >= 2) + ? to_app(e)->get_arg(1) : nullptr; + if (pow_exp) { + expr* zero = arith.mk_numeral(rational(0), true); + m_int_constraints.push_back( + int_constraint(pow_exp, zero, int_constraint_kind::eq, dep, m)); + } + nielsen_subst s(t, sg.mk_empty(), dep); + apply_subst(sg, s); + changed = true; + } } return false; } - simplify_result nielsen_node::simplify_and_init(nielsen_graph& g) { + // ----------------------------------------------------------------------- + // Power simplification helpers (mirrors ZIPT's MergePowers, + // SimplifyPowerElim/CommPower, SimplifyPowerSingle) + // ----------------------------------------------------------------------- + + // Check if exponent b equals exponent a + diff for some rational constant diff. + // Uses syntactic matching on Z3 expression structure: pointer equality + // detects shared sub-expressions created during ConstNumUnwinding. + static bool get_const_power_diff(expr* b, expr* a, arith_util& arith, rational& diff) { + if (a == b) { diff = rational(0); return true; } + // b = (+ a k) ? + if (arith.is_add(b) && to_app(b)->get_num_args() == 2) { + rational val; + if (to_app(b)->get_arg(0) == a && arith.is_numeral(to_app(b)->get_arg(1), val)) + { diff = val; return true; } + if (to_app(b)->get_arg(1) == a && arith.is_numeral(to_app(b)->get_arg(0), val)) + { diff = val; return true; } + } + // a = (+ b k) → diff = -k + if (arith.is_add(a) && to_app(a)->get_num_args() == 2) { + rational val; + if (to_app(a)->get_arg(0) == b && arith.is_numeral(to_app(a)->get_arg(1), val)) + { diff = -val; return true; } + if (to_app(a)->get_arg(1) == b && arith.is_numeral(to_app(a)->get_arg(0), val)) + { diff = -val; return true; } + } + // b = (- a k) → diff = -k + if (arith.is_sub(b) && to_app(b)->get_num_args() == 2) { + rational val; + if (to_app(b)->get_arg(0) == a && arith.is_numeral(to_app(b)->get_arg(1), val)) + { diff = -val; return true; } + } + // a = (- b k) → diff = k + if (arith.is_sub(a) && to_app(a)->get_num_args() == 2) { + rational val; + if (to_app(a)->get_arg(0) == b && arith.is_numeral(to_app(a)->get_arg(1), val)) + { diff = val; return true; } + } + return false; + } + + // Get the base expression of a power snode. + static expr* get_power_base_expr(euf::snode* power) { + if (!power || !power->is_power()) return nullptr; + expr* e = power->get_expr(); + return (e && is_app(e) && to_app(e)->get_num_args() >= 2) ? to_app(e)->get_arg(0) : nullptr; + } + + // Get the exponent expression of a power snode. + static expr* get_power_exp_expr(euf::snode* power) { + if (!power || !power->is_power()) return nullptr; + expr* e = power->get_expr(); + return (e && is_app(e) && to_app(e)->get_num_args() >= 2) ? to_app(e)->get_arg(1) : nullptr; + } + + // Merge adjacent tokens with the same power base on one side of an equation. + // Handles: char(c) · power(c^e) → power(c^(e+1)), + // power(c^e) · char(c) → power(c^(e+1)), + // power(c^e1) · power(c^e2) → power(c^(e1+e2)). + // Returns new snode if merging happened, nullptr otherwise. + static euf::snode* merge_adjacent_powers(euf::sgraph& sg, euf::snode* side) { + if (!side || side->is_empty() || side->is_token()) + return nullptr; + + euf::snode_vector tokens; + side->collect_tokens(tokens); + if (tokens.size() < 2) return nullptr; + + ast_manager& m = sg.get_manager(); + arith_util arith(m); + seq_util seq(m); + + bool merged = false; + euf::snode_vector result; + + unsigned i = 0; + while (i < tokens.size()) { + euf::snode* tok = tokens[i]; + + // Case 1: current is a power token — absorb following same-base tokens. + // Skip at leading position (i == 0) to keep exponents small: CommPower + // cross-side cancellation works better with unmerged leading powers + // (e.g., w^k trivially ≤ 1+k, but w^(2k) vs 1+k requires k ≥ 1). + if (tok->is_power() && i > 0) { + expr* base_e = get_power_base_expr(tok); + expr* exp_acc = get_power_exp_expr(tok); + if (!base_e || !exp_acc) { result.push_back(tok); ++i; continue; } + + bool local_merged = false; + unsigned j = i + 1; + while (j < tokens.size()) { + euf::snode* next = tokens[j]; + if (next->is_power()) { + expr* nb = get_power_base_expr(next); + if (nb == base_e) { + exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(next)); + local_merged = true; ++j; continue; + } + } + if (next->is_char() && next->get_expr() == base_e) { + exp_acc = arith.mk_add(exp_acc, arith.mk_int(1)); + local_merged = true; ++j; continue; + } + break; + } + if (local_merged) { + merged = true; + expr_ref norm_exp = normalize_arith(m, exp_acc); + expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m); + result.push_back(sg.mk(new_pow)); + } else { + result.push_back(tok); + } + i = j; + continue; + } + + // Case 2: current is a char — check if next is a same-base power. + // Skip at leading position (i == 0) to avoid undoing power unwinding: + // unwind produces u · u^(n-1); merging it back to u^n creates an infinite cycle. + if (i > 0 && tok->is_char() && tok->get_expr() && i + 1 < tokens.size()) { + euf::snode* next = tokens[i + 1]; + if (next->is_power() && get_power_base_expr(next) == tok->get_expr()) { + expr* base_e = tok->get_expr(); + // Use same arg order as Case 1: add(exp, 1), not add(1, exp), + // so that merging "c · c^e" and "c^e · c" both produce add(e, 1) + // and the resulting power expression is hash-consed identically. + expr* exp_acc = arith.mk_add(get_power_exp_expr(next), arith.mk_int(1)); + unsigned j = i + 2; + while (j < tokens.size()) { + euf::snode* further = tokens[j]; + if (further->is_power() && get_power_base_expr(further) == base_e) { + exp_acc = arith.mk_add(exp_acc, get_power_exp_expr(further)); + ++j; continue; + } + if (further->is_char() && further->get_expr() == base_e) { + exp_acc = arith.mk_add(exp_acc, arith.mk_int(1)); + ++j; continue; + } + break; + } + merged = true; + expr_ref norm_exp = normalize_arith(m, exp_acc); + expr_ref new_pow(seq.str.mk_power(base_e, norm_exp), m); + result.push_back(sg.mk(new_pow)); + i = j; + continue; + } + } + + result.push_back(tok); + ++i; + } + + if (!merged) return nullptr; + + // Rebuild snode from merged token list + euf::snode* rebuilt = sg.mk_empty(); + for (unsigned k = 0; k < result.size(); ++k) { + rebuilt = (k == 0) ? result[k] : sg.mk_concat(rebuilt, result[k]); + } + return rebuilt; + } + + // Simplify constant-exponent powers: base^0 → ε, base^1 → base. + // Returns new snode if any simplification happened, nullptr otherwise. + static euf::snode* simplify_const_powers(euf::sgraph& sg, euf::snode* side) { + if (!side || side->is_empty()) return nullptr; + + euf::snode_vector tokens; + side->collect_tokens(tokens); + + ast_manager& m = sg.get_manager(); + arith_util arith(m); + + bool simplified = false; + euf::snode_vector result; + + for (euf::snode* tok : tokens) { + if (tok->is_power()) { + expr* exp_e = get_power_exp_expr(tok); + rational val; + if (exp_e && arith.is_numeral(exp_e, val)) { + if (val.is_zero()) { + // base^0 → ε (skip this token entirely) + simplified = true; + continue; + } + if (val.is_one()) { + // base^1 → base + euf::snode* base_sn = tok->arg(0); + if (base_sn) { + result.push_back(base_sn); + simplified = true; + continue; + } + } + } + } + result.push_back(tok); + } + + if (!simplified) return nullptr; + + if (result.empty()) return sg.mk_empty(); + euf::snode* rebuilt = result[0]; + for (unsigned k = 1; k < result.size(); ++k) + rebuilt = sg.mk_concat(rebuilt, result[k]); + return rebuilt; + } + + // CommPower: count how many times a power's base pattern appears in + // the prefix of the other side. Mirrors ZIPT's CommPower helper. + // Returns (count_expr, num_tokens_consumed). count_expr is nullptr + // when no complete base-pattern match is found. + static std::pair comm_power( + euf::snode* base_sn, euf::snode* side, ast_manager& m) { + arith_util arith(m); + euf::snode_vector base_tokens, side_tokens; + base_sn->collect_tokens(base_tokens); + side->collect_tokens(side_tokens); + if (base_tokens.empty() || side_tokens.empty()) + return {expr_ref(nullptr, m), 0}; + + expr* sum = nullptr; + unsigned pos = 0; + expr* last_stable_sum = nullptr; + unsigned last_stable_idx = 0; + + unsigned i = 0; + for (; i < side_tokens.size(); i++) { + euf::snode* t = side_tokens[i]; + if (pos == 0) { + last_stable_idx = i; + last_stable_sum = sum; + } + // Case 1: direct token match with base pattern + if (pos < base_tokens.size() && t == base_tokens[pos]) { + pos++; + if (pos >= base_tokens.size()) { + pos = 0; + sum = sum ? arith.mk_add(sum, arith.mk_int(1)) + : arith.mk_int(1); + } + continue; + } + // Case 2: power token whose base matches our base pattern (at pos == 0) + if (pos == 0 && t->is_power()) { + euf::snode* pow_base = t->arg(0); + if (pow_base) { + euf::snode_vector pb_tokens; + pow_base->collect_tokens(pb_tokens); + if (pb_tokens.size() == base_tokens.size()) { + bool match = true; + for (unsigned j = 0; j < pb_tokens.size() && match; j++) + match = (pb_tokens[j] == base_tokens[j]); + if (match) { + expr* pow_exp = get_power_exp_expr(t); + if (pow_exp) { + sum = sum ? arith.mk_add(sum, pow_exp) : pow_exp; + continue; + } + } + } + } + } + break; + } + // After loop: i = break index or side_tokens.size() + if (pos == 0) { + last_stable_idx = i; + last_stable_sum = sum; + } + return {expr_ref(last_stable_sum, m), last_stable_idx}; + } + + simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector const& cur_path) { + if (m_is_extended) + return simplify_result::proceed; + euf::sgraph& sg = g.sg(); + ast_manager& m = sg.get_manager(); + arith_util arith(m); + seq_util seq(m); bool changed = true; while (changed) { @@ -941,6 +1389,192 @@ namespace seq { changed = true; } } + + // pass 2b: power-character prefix inconsistency + // (mirrors ZIPT's SimplifyDir power unit case + IsPrefixConsistent) + // When one side starts with a power u^n whose base starts with + // char a, and the other side starts with a different char b, + // the power exponent must be 0. + // Creates a single deterministic child with the substitution and + // constraint as edge labels so they appear in the graph. + // Guard: skip if we already created a child (re-entry via iterative deepening). + if (!eq.is_trivial() && eq.m_lhs && eq.m_rhs) { + euf::snode* lh = eq.m_lhs->first(); + euf::snode* rh = eq.m_rhs->first(); + for (int dir = 0; dir < 2; dir++) { + euf::snode* pow_head = (dir == 0) ? lh : rh; + euf::snode* other_head = (dir == 0) ? rh : lh; + if (!pow_head || !pow_head->is_power() || !other_head || !other_head->is_char()) + continue; + euf::snode* base_sn = pow_head->arg(0); + if (!base_sn) continue; + euf::snode* base_first = base_sn->first(); + if (!base_first || !base_first->is_char()) continue; + if (base_first->id() == other_head->id()) continue; + // base starts with different char → create child with exp=0 + power→ε + nielsen_node* child = g.mk_child(this); + nielsen_edge* e = g.mk_edge(this, child, true); + nielsen_subst s(pow_head, sg.mk_empty(), eq.m_dep); + e->add_subst(s); + child->apply_subst(sg, s); + expr* pow_exp = get_power_exp_expr(pow_head); + if (pow_exp) { + expr* zero = arith.mk_numeral(rational(0), true); + e->add_side_int(g.mk_int_constraint( + pow_exp, zero, int_constraint_kind::eq, eq.m_dep)); + } + set_extended(true); + return simplify_result::proceed; + } + } + } + + // pass 3: power simplification (mirrors ZIPT's LcpCompression + + // SimplifyPowerElim + SimplifyPowerSingle) + for (str_eq& eq : m_str_eq) { + if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs) + continue; + + // 3a: simplify constant-exponent powers (base^0 → ε, base^1 → base) + if (euf::snode* s = simplify_const_powers(sg, eq.m_lhs)) + { eq.m_lhs = s; changed = true; } + if (euf::snode* s = simplify_const_powers(sg, eq.m_rhs)) + { eq.m_rhs = s; changed = true; } + + // 3b: merge adjacent same-base tokens into combined powers + if (euf::snode* s = merge_adjacent_powers(sg, eq.m_lhs)) + { eq.m_lhs = s; changed = true; } + if (euf::snode* s = merge_adjacent_powers(sg, eq.m_rhs)) + { eq.m_rhs = s; changed = true; } + + // 3c: CommPower-based power elimination — when one side starts + // with a power w^p, count base-pattern occurrences c on the + // other side's prefix. If we can determine the ordering between + // p and c, cancel the matched portion. Mirrors ZIPT's + // SimplifyPowerElim calling CommPower. + if (!eq.m_lhs || !eq.m_rhs) continue; + for (int dir = 0; dir < 2; dir++) { + euf::snode*& pow_side = (dir == 0) ? eq.m_lhs : eq.m_rhs; + euf::snode*& other_side = (dir == 0) ? eq.m_rhs : eq.m_lhs; + if (!pow_side || !other_side) continue; + euf::snode* first_tok = pow_side->first(); + if (!first_tok || !first_tok->is_power()) continue; + euf::snode* base_sn = first_tok->arg(0); + expr* pow_exp = get_power_exp_expr(first_tok); + if (!base_sn || !pow_exp) continue; + + auto [count, consumed] = comm_power(base_sn, other_side, m); + if (!count.get() || consumed == 0) continue; + + expr_ref norm_count = normalize_arith(m, count); + bool pow_le_count = false, count_le_pow = false; + rational diff; + if (get_const_power_diff(norm_count, pow_exp, arith, diff)) { + count_le_pow = diff.is_nonpos(); + pow_le_count = diff.is_nonneg(); + } else if (!cur_path.empty()) { + pow_le_count = g.check_lp_le(pow_exp, norm_count, this, cur_path); + count_le_pow = g.check_lp_le(norm_count, pow_exp, this, cur_path); + } + if (!pow_le_count && !count_le_pow) continue; + + pow_side = sg.drop_first(pow_side); + other_side = sg.drop_left(other_side, consumed); + expr* base_e = get_power_base_expr(first_tok); + if (pow_le_count && count_le_pow) { + // equal: both cancel completely + } else if (pow_le_count) { + // pow <= count: remainder goes to other_side + expr_ref rem = normalize_arith(m, arith.mk_sub(norm_count, pow_exp)); + expr_ref pw(seq.str.mk_power(base_e, rem), m); + other_side = sg.mk_concat(sg.mk(pw), other_side); + } else { + // count <= pow: remainder goes to pow_side + expr_ref rem = normalize_arith(m, arith.mk_sub(pow_exp, norm_count)); + expr_ref pw(seq.str.mk_power(base_e, rem), m); + pow_side = sg.mk_concat(sg.mk(pw), pow_side); + } + changed = true; + break; + } + + // After any change in passes 3a–3c, restart the while loop + // before running 3d/3e. This lets 3a simplify new constant- + // exponent powers (e.g. base^1 → base created by 3c) before + // 3e attempts LP-based elimination which would introduce a + // needless fresh variable. + if (changed) + continue; + + // 3d: power prefix elimination — when both sides start with a + // power of the same base, cancel the common power prefix. + // (Subsumed by 3c for many cases, but handles same-base-power + // pairs that CommPower may miss when both leading tokens are powers.) + if (!eq.m_lhs || !eq.m_rhs) continue; + euf::snode* lh = eq.m_lhs->first(); + euf::snode* rh = eq.m_rhs->first(); + if (lh && rh && lh->is_power() && rh->is_power()) { + expr* lb = get_power_base_expr(lh); + expr* rb = get_power_base_expr(rh); + if (lb && rb && lb == rb) { + expr* lp = get_power_exp_expr(lh); + expr* rp = get_power_exp_expr(rh); + rational diff; + if (lp && rp && get_const_power_diff(rp, lp, arith, diff)) { + // rp = lp + diff (constant difference) + eq.m_lhs = sg.drop_first(eq.m_lhs); + eq.m_rhs = sg.drop_first(eq.m_rhs); + if (diff.is_pos()) { + // rp > lp: prepend base^diff to RHS + expr_ref de(arith.mk_int(diff), m); + expr_ref pw(seq.str.mk_power(lb, de), m); + eq.m_rhs = sg.mk_concat(sg.mk(pw), eq.m_rhs); + } else if (diff.is_neg()) { + // lp > rp: prepend base^(-diff) to LHS + expr_ref de(arith.mk_int(-diff), m); + expr_ref pw(seq.str.mk_power(lb, de), m); + eq.m_lhs = sg.mk_concat(sg.mk(pw), eq.m_lhs); + } + // diff == 0: both powers cancel completely + changed = true; + } + // 3e: LP-aware power prefix elimination + // When the exponent difference is non-constant, query the + // LP solver to determine which exponent is larger, and + // cancel deterministically. This avoids the branching + // that NumCmp would otherwise create, matching ZIPT's + // SimplifyPowerElim that calls node.IsLe()/node.IsLt(). + else if (lp && rp && !cur_path.empty()) { + bool lp_le_rp = g.check_lp_le(lp, rp, this, cur_path); + bool rp_le_lp = g.check_lp_le(rp, lp, this, cur_path); + if (lp_le_rp || rp_le_lp) { + expr* smaller_exp = lp_le_rp ? lp : rp; + expr* larger_exp = lp_le_rp ? rp : lp; + eq.m_lhs = sg.drop_first(eq.m_lhs); + eq.m_rhs = sg.drop_first(eq.m_rhs); + if (lp_le_rp && rp_le_lp) { + // both ≤ → equal → both cancel completely + // Record the equality constraint so the model knows lp = rp. + add_int_constraint(g.mk_int_constraint(lp, rp, int_constraint_kind::eq, eq.m_dep)); + } else { + // strictly less: create diff power d = larger - smaller ≥ 1 + expr_ref d(g.mk_fresh_int_var()); + expr_ref one_e(arith.mk_int(1), m); + expr_ref zero_e(arith.mk_int(0), m); + expr_ref d_plus_smaller(arith.mk_add(d, smaller_exp), m); + // d ≥ 1 (strict inequality) + add_int_constraint(g.mk_int_constraint(d, one_e, int_constraint_kind::ge, eq.m_dep)); + // d + smaller = larger + add_int_constraint(g.mk_int_constraint(d_plus_smaller, larger_exp, int_constraint_kind::eq, eq.m_dep)); + expr_ref pw(seq.str.mk_power(lb, d), m); + euf::snode*& larger_side = lp_le_rp ? eq.m_rhs : eq.m_lhs; + larger_side = sg.mk_concat(sg.mk(pw), larger_side); + } + changed = true; + } + } + } + } } } @@ -987,6 +1621,215 @@ namespace seq { if (wi < m_str_mem.size()) m_str_mem.shrink(wi); + // IntBounds initialization: derive per-variable Parikh length bounds from + // remaining regex memberships and add to m_int_constraints. + init_var_bounds_from_mems(); + + // pass 5: variable-character look-ahead substitution + // (mirrors ZIPT's StrEq.SimplifyFinal) + // When one side starts with a variable x and the other with chars, + // look ahead to find how many leading chars can be deterministically + // assigned to x without splitting, by checking prefix consistency. + // Guard: skip if we already created a child (re-entry via iterative deepening). + for (str_eq& eq : m_str_eq) { + if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs) + continue; + + // Orient: var_side starts with a variable, char_side starts with a char + euf::snode* var_side = nullptr; + euf::snode* char_side = nullptr; + euf::snode* lhead = eq.m_lhs->first(); + euf::snode* rhead = eq.m_rhs->first(); + if (!lhead || !rhead) continue; + + if (lhead->is_var() && rhead->is_char()) { + var_side = eq.m_lhs; + char_side = eq.m_rhs; + } else if (rhead->is_var() && lhead->is_char()) { + var_side = eq.m_rhs; + char_side = eq.m_lhs; + } else { + continue; + } + + euf::snode_vector var_toks, char_toks; + var_side->collect_tokens(var_toks); + char_side->collect_tokens(char_toks); + if (var_toks.size() <= 1 || char_toks.empty()) + continue; + + euf::snode* var_node = var_toks[0]; + SASSERT(var_node->is_var()); + + // For increasing prefix lengths i (chars from char_side), + // check if x → char_toks[0..i-1] · x would be consistent by + // comparing tokens after x on the var_side against the shifted + // char_side tokens. + // Mirrors ZIPT's SimplifyFinal loop: when prefix i is proven + // inconsistent (char clash), we continue to i+1. When we reach + // a prefix that is consistent or indeterminate, we stop. + // The final i is the substitution length: x → char_toks[0..i-1] · x. + // If ALL prefixes are inconsistent, i equals the full leading-char + // count and we still substitute (x must be at least that long). + unsigned i = 0; + for (; i < char_toks.size() && char_toks[i]->is_char(); ++i) { + unsigned j1 = 1; // index into var_toks (after the variable) + unsigned j2 = i; // index into char_toks (after copied prefix) + bool failed = false; + + while (j1 < var_toks.size() && j2 < char_toks.size()) { + euf::snode* st1 = var_toks[j1]; + euf::snode* st2 = char_toks[j2]; + + if (!st2->is_char()) + break; // can't compare against non-char — indeterminate + + if (st1->is_char()) { + if (st1->id() == st2->id()) { + j1++; + j2++; + continue; + } + failed = true; + break; + } + + if (st1->id() != var_node->id()) + break; // different variable/power — indeterminate + + // st1 is the same variable x again — compare against + // the chars we would copy (char_toks[0..i-1]) + bool inner_indet = false; + for (unsigned l = 0; j2 < char_toks.size() && l < i; ++l) { + st2 = char_toks[j2]; + if (!st2->is_char()) { + inner_indet = true; + break; + } + if (st2->id() == char_toks[l]->id()) { + j2++; + continue; + } + failed = true; + break; + } + if (inner_indet || failed) break; + j1++; + } + + if (failed) + continue; // prefix i is inconsistent — try longer + break; // prefix i is consistent/indeterminate — stop + } + + if (i == 0) + continue; + + // Divergence guard (mirrors ZIPT's HasDepCycle + power skip): + // Check whether the next named variable or power token on the + // char_side (past the char prefix) would create a dependency + // cycle or involve a power (which would cause infinite unwinding). + + // Step 1: find the first variable or power past the char prefix + euf::snode* next_var = nullptr; + for (unsigned k = i; k < char_toks.size(); ++k) { + euf::snode* t = char_toks[k]; + if (t->is_power()) { + // Power token → skip this equation (would cause divergence) + next_var = nullptr; + goto skip_eq; + } + if (t->is_var()) { + next_var = t; + break; + } + } + + // Step 2: if there is a variable, check for Nielsen dependency cycle + if (next_var) { + // Build Nielsen dependency graph: for each equation, if one side + // starts with variable x, then x depends on the first variable + // on the other side. (Mirrors ZIPT's GetNielsenDep.) + // Then check if there's a path from next_var back to var_node. + // Use a u_map to represent edges: var_id → first_dep_var_id. + u_map dep_edges; // var snode id → first dependent var snode id + for (str_eq const& other_eq : m_str_eq) { + if (other_eq.is_trivial()) continue; + if (!other_eq.m_lhs || !other_eq.m_rhs) continue; + + euf::snode* lh2 = other_eq.m_lhs->first(); + euf::snode* rh2 = other_eq.m_rhs->first(); + if (!lh2 || !rh2) continue; + + // Orient: head_var leads one side, scan other side for first var + auto record_dep = [&](euf::snode* head_var, euf::snode* other_side) { + euf::snode_vector other_toks; + other_side->collect_tokens(other_toks); + if (lh2->is_var() && rh2->is_var()) { + // Both sides start with vars: bidirectional dependency + if (!dep_edges.contains(lh2->id())) + dep_edges.insert(lh2->id(), rh2->id()); + if (!dep_edges.contains(rh2->id())) + dep_edges.insert(rh2->id(), lh2->id()); + return; + } + for (unsigned idx = 0; idx < other_toks.size(); ++idx) { + if (other_toks[idx]->is_var()) { + if (!dep_edges.contains(head_var->id())) + dep_edges.insert(head_var->id(), other_toks[idx]->id()); + return; + } + } + }; + + if (lh2->is_var() && !rh2->is_var()) + record_dep(lh2, other_eq.m_rhs); + else if (rh2->is_var() && !lh2->is_var()) + record_dep(rh2, other_eq.m_lhs); + else if (lh2->is_var() && rh2->is_var()) + record_dep(lh2, other_eq.m_rhs); + } + + // DFS from next_var to see if we can reach var_node + uint_set visited; + svector worklist; + worklist.push_back(next_var->id()); + bool cycle_found = false; + while (!worklist.empty() && !cycle_found) { + unsigned cur = worklist.back(); + worklist.pop_back(); + if (cur == var_node->id()) { + cycle_found = true; + break; + } + if (visited.contains(cur)) + continue; + visited.insert(cur); + unsigned dep_id; + if (dep_edges.find(cur, dep_id)) + worklist.push_back(dep_id); + } + if (cycle_found) + continue; + } + + { + // Create a single deterministic child with the substitution as edge label + euf::snode* prefix_sn = char_toks[0]; + for (unsigned j = 1; j < i; ++j) + prefix_sn = sg.mk_concat(prefix_sn, char_toks[j]); + euf::snode* replacement = sg.mk_concat(prefix_sn, var_node); + nielsen_subst s(var_node, replacement, eq.m_dep); + nielsen_node* child = g.mk_child(this); + nielsen_edge* e = g.mk_edge(this, child, true); + e->add_subst(s); + child->apply_subst(sg, s); + set_extended(true); + return simplify_result::proceed; + } + skip_eq:; + } + if (is_satisfied()) return simplify_result::satisfied; @@ -1024,45 +1867,96 @@ namespace seq { // nielsen_graph: search // ----------------------------------------------------------------------- + void nielsen_graph::apply_parikh_to_node(nielsen_node& node) { + if (node.m_parikh_applied) return; + node.m_parikh_applied = true; + + // Generate modular length constraints (len(str) = min_len + stride·k, etc.) + // and append them to the node's integer constraint list. + m_parikh->apply_to_node(node); + + // Lightweight feasibility pre-check: does the Parikh modular constraint + // contradict the variable's current integer bounds? If so, mark this + // node as a Parikh-image conflict immediately (avoids a solver call). + if (!node.is_currently_conflict() && m_parikh->check_parikh_conflict(node)) { + node.set_general_conflict(true); + node.set_reason(backtrack_reason::parikh_image); + } + } + + void nielsen_graph::assert_root_constraints_to_solver() { + if (m_root_constraints_asserted) return; + m_root_constraints_asserted = true; + // Constraint.Shared: assert all root-level length/Parikh constraints + // to m_solver at the base level (no push/pop). These include: + // - len(lhs) = len(rhs) for each non-trivial string equality + // - len(str) >= min_len and len(str) <= max_len for each regex membership + // - len(x) >= 0 for each variable appearing in the root constraints + // Making these visible to the solver before the DFS allows arithmetic + // pruning at every node, not just the root. + vector constraints; + generate_length_constraints(constraints); + for (auto const& lc : constraints) + m_solver.assert_expr(lc.m_expr); + } + nielsen_graph::search_result nielsen_graph::solve() { if (!m_root) return search_result::sat; - ++m_stats.m_num_solve_calls; - m_sat_node = nullptr; - m_sat_path.reset(); + try { + ++m_stats.m_num_solve_calls; + m_sat_node = nullptr; + m_sat_path.reset(); - // Iterative deepening: increment by 1 on each failure. - // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. - m_depth_bound = 3; - while (true) { - if (m_cancel_fn && m_cancel_fn()) { + // Constraint.Shared: assert root-level length/Parikh constraints to the + // solver at the base level, so they are visible during all feasibility checks. + assert_root_constraints_to_solver(); + + // Iterative deepening: increment by 1 on each failure. + // m_max_search_depth == 0 means unlimited; otherwise stop when bound exceeds it. + m_depth_bound = 10; + while (true) { + if (m_cancel_fn && m_cancel_fn()) { #ifdef Z3DEBUG - // Examining the Nielsen graph is probably the best way of debugging - std::string dot = to_dot(); + // Examining the Nielsen graph is probably the best way of debugging + std::string dot = to_dot(); #endif - break; + break; + } + if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) + break; + inc_run_idx(); + svector cur_path; + search_result r = search_dfs(m_root, 0, cur_path); + IF_VERBOSE(1, verbose_stream() + << " depth_bound=" << m_depth_bound << " dfs_nodes=" << m_stats.m_num_dfs_nodes + << " max_depth=" << m_stats.m_max_depth << " extensions=" << m_stats.m_num_extensions + << " arith_prune=" << m_stats.m_num_arith_infeasible << " result=" + << (r == search_result::sat ? "SAT" + : r == search_result::unsat ? "UNSAT" + : "UNKNOWN") + << "\n";); + if (r == search_result::sat) { + ++m_stats.m_num_sat; + return r; + } + if (r == search_result::unsat) { + ++m_stats.m_num_unsat; + return r; + } + // depth limit hit – double the bound and retry + m_depth_bound *= 2; } - if (m_max_search_depth > 0 && m_depth_bound > m_max_search_depth) - break; - inc_run_idx(); - svector cur_path; - search_result r = search_dfs(m_root, 0, cur_path); - if (r == search_result::sat) { - ++m_stats.m_num_sat; - return r; - } - if (r == search_result::unsat) { - ++m_stats.m_num_unsat; - return r; - } - // depth limit hit – increment bound by 1 and retry - if (m_max_search_depth > 0 && m_depth_bound >= m_max_search_depth) - break; - ++m_depth_bound; + ++m_stats.m_num_unknown; + return search_result::unknown; + } + catch(const std::exception& ex) { +#ifdef Z3DEBUG + std::string dot = to_dot(); +#endif + throw; } - ++m_stats.m_num_unknown; - return search_result::unknown; } nielsen_graph::search_result nielsen_graph::search_dfs(nielsen_node* node, unsigned depth, svector& cur_path) { @@ -1088,26 +1982,48 @@ namespace seq { node->set_eval_idx(m_run_idx); // simplify constraints (idempotent after first call) - simplify_result sr = node->simplify_and_init(*this); + simplify_result sr = node->simplify_and_init(*this, cur_path); if (sr == simplify_result::conflict) { ++m_stats.m_num_simplify_conflict; node->set_general_conflict(true); return search_result::unsat; } + + // Apply Parikh image filter: generate modular length constraints and + // perform a lightweight feasibility pre-check. The filter is guarded + // internally (m_parikh_applied) so it only runs once per node. + // Note: Parikh filtering is skipped for satisfied nodes (returned above); + // a fully satisfied node has no remaining memberships to filter. + apply_parikh_to_node(*node); + if (node->is_currently_conflict()) { + ++m_stats.m_num_simplify_conflict; + return search_result::unsat; + } + + // Assert any new int_constraints added during simplify_and_init for this + // node into the current solver scope. Constraints inherited from the parent + // (indices 0..m_parent_ic_count-1) are already present at the enclosing + // scope level; only the newly-added tail needs to be asserted here. + // Also generate per-node |LHS| = |RHS| length constraints for descendant + // equations (root constraints are already at the base level). + generate_node_length_constraints(node); + assert_node_new_int_constraints(node); + + // integer feasibility check: the solver now holds all path constraints + // incrementally; just query the solver directly + if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { + node->set_reason(backtrack_reason::arithmetic); + ++m_stats.m_num_arith_infeasible; + return search_result::unsat; + } + if (sr == simplify_result::satisfied || node->is_satisfied()) { m_sat_node = node; m_sat_path = cur_path; return search_result::sat; } - // integer feasibility check: collect side constraints along the path - // and verify they are jointly satisfiable using the LP solver - if (!cur_path.empty() && !check_int_feasibility(node, cur_path)) { - node->set_reason(backtrack_reason::arithmetic); - return search_result::unsat; - } - // depth bound check if (depth >= m_depth_bound) return search_result::unknown; @@ -1132,9 +2048,35 @@ namespace seq { // explore children bool any_unknown = false; - for (nielsen_edge* e : node->outgoing()) { + for (nielsen_edge *e : node->outgoing()) { cur_path.push_back(e); - search_result r = search_dfs(e->tgt(), depth + 1, cur_path); + // Push a solver scope for this edge and assert its side integer + // constraints. The child's own new int_constraints will be asserted + // inside the recursive call (above). On return, pop the scope so + // that backtracking removes those assertions. + m_solver.push(); + + // Lazily compute substitution length constraints (|x| = |u|) on first + // traversal. This must happen before asserting side_int and before + // bumping mod counts, so that LHS uses the parent's counts and RHS + // uses the temporarily-bumped counts. + if (!e->len_constraints_computed()) { + add_subst_length_constraints(e); + e->set_len_constraints_computed(true); + } + + for (auto const &ic : e->side_int()) + m_solver.assert_expr(int_constraint_to_expr(ic)); + + // Bump modification counts for the child's context. + inc_edge_mod_counts(e); + + search_result r = search_dfs(e->tgt(), e->is_progress() ? depth : depth + 1, cur_path); + + // Restore modification counts on backtrack. + dec_edge_mod_counts(e); + + m_solver.pop(1); if (r == search_result::sat) return search_result::sat; cur_path.pop_back(); @@ -1213,7 +2155,7 @@ namespace seq { euf::snode* lhead = lhs_toks[0]; euf::snode* rhead = rhs_toks[0]; - // char·A = y·B → branch 1: y→ε, branch 2: y→char·fresh + // char·A = y·B → branch 1: y→ε, branch 2: y→char·y if (lhead->is_char() && rhead->is_var()) { // branch 1: y → ε (progress) { @@ -1223,12 +2165,11 @@ namespace seq { e->add_subst(s); child->apply_subst(m_sg, s); } - // branch 2: y → char·fresh (progress) + // branch 2: y → char·y (no progress) { - euf::snode* fresh = mk_fresh_var(rhead->get_sort()); - euf::snode* replacement = m_sg.mk_concat(lhead, fresh); + euf::snode* replacement = m_sg.mk_concat(lhead, rhead); nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); + nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(rhead, replacement, eq.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); @@ -1236,7 +2177,7 @@ namespace seq { return true; } - // x·A = char·B → branch 1: x→ε, branch 2: x→char·fresh + // x·A = char·B → branch 1: x→ε, branch 2: x→char·x if (rhead->is_char() && lhead->is_var()) { // branch 1: x → ε (progress) { @@ -1246,12 +2187,11 @@ namespace seq { e->add_subst(s); child->apply_subst(m_sg, s); } - // branch 2: x → char·fresh (progress) + // branch 2: x → char·x (no progress) { - euf::snode* fresh = mk_fresh_var(lhead->get_sort()); - euf::snode* replacement = m_sg.mk_concat(rhead, fresh); + euf::snode* replacement = m_sg.mk_concat(rhead, lhead); nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); + nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(lhead, replacement, eq.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); @@ -1292,22 +2232,20 @@ namespace seq { e->add_subst(s); child->apply_subst(m_sg, s); } - // child 2: x → y·x' (progress) + // child 2: x → y·x (no progress) { - euf::snode* fresh = mk_fresh_var(lhead->get_sort()); - euf::snode* replacement = m_sg.mk_concat(rhead, fresh); + euf::snode* replacement = m_sg.mk_concat(rhead, lhead); nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); + nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(lhead, replacement, eq.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); } - // child 3: y → x·y' (progress) + // child 3: y → x·y (progress) { - euf::snode* fresh = mk_fresh_var(rhead->get_sort()); - euf::snode* replacement = m_sg.mk_concat(lhead, fresh); + euf::snode* replacement = m_sg.mk_concat(lhead, rhead); nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); + nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(rhead, replacement, eq.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); @@ -1699,16 +2637,15 @@ namespace seq { bool created = false; // for each character c with non-fail derivative: - // child: x → c · fresh_var + // child: x → c · x for (euf::snode* ch : chars) { euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch); if (!deriv || deriv->is_fail()) continue; - euf::snode* fresh = mk_fresh_var(first->get_sort()); - euf::snode* replacement = m_sg.mk_concat(ch, fresh); + euf::snode* replacement = m_sg.mk_concat(ch, first); nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); + nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(first, replacement, mem.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); @@ -1746,6 +2683,12 @@ namespace seq { if (apply_num_cmp(node)) return ++m_stats.m_mod_num_cmp, true; + // Priority 3b: SplitPowerElim - CommPower-based branching when + // one side has a power and the other side has same-base occurrences + // but ordering is unknown. + if (apply_split_power_elim(node)) + return ++m_stats.m_mod_split_power_elim, true; + // Priority 4: ConstNumUnwinding - power vs constant: n=0 or peel if (apply_const_num_unwinding(node)) return ++m_stats.m_mod_const_num_unwinding, true; @@ -1814,7 +2757,7 @@ namespace seq { // Returns true if found, sets power, other_head, eq_out. // ----------------------------------------------------------------------- - bool nielsen_graph::find_power_vs_const(nielsen_node* node, + bool nielsen_graph::find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const { @@ -1823,11 +2766,22 @@ namespace seq { if (!eq.m_lhs || !eq.m_rhs) continue; euf::snode* lhead = eq.m_lhs->first(); euf::snode* rhead = eq.m_rhs->first(); - if (lhead && lhead->is_power() && rhead && rhead->is_char()) { - power = lhead; other_head = rhead; eq_out = &eq; return true; + // Match power vs any non-variable, non-empty token (char, unit, + // power with different base, etc.). + // Same-base power vs power is handled by NumCmp (priority 3). + // Power vs variable is handled by PowerSplit (priority 11). + // Power vs empty is handled by PowerEpsilon (priority 2). + if (lhead && lhead->is_power() && rhead && !rhead->is_var() && !rhead->is_empty()) { + power = lhead; + other_head = rhead; + eq_out = &eq; + return true; } - if (rhead && rhead->is_power() && lhead && lhead->is_char()) { - power = rhead; other_head = lhead; eq_out = &eq; return true; + if (rhead && rhead->is_power() && lhead && !lhead->is_var() && !lhead->is_empty()) { + power = rhead; + other_head = lhead; + eq_out = &eq; + return true; } } return false; @@ -1858,26 +2812,41 @@ namespace seq { // ----------------------------------------------------------------------- // Modifier: apply_power_epsilon - // For a power token u^n in an equation, branch: + // Fires only when one side of an equation is empty and the other side + // starts with a power token u^n. In that case, branch: // (1) base u → ε (base is empty, so u^n = ε) // (2) u^n → ε (the power is zero, replace power with empty) - // mirrors ZIPT's PowerEpsilonModifier + // mirrors ZIPT's PowerEpsilonModifier (which requires LHS.IsEmpty()) // ----------------------------------------------------------------------- bool nielsen_graph::apply_power_epsilon(nielsen_node* node) { - euf::snode *power = find_power_token(node); + // Match only when one equation side is empty and the other starts + // with a power. This mirrors ZIPT where PowerEpsilonModifier is + // constructed only inside the "if (LHS.IsEmpty())" branch of + // ExtendDir. When both sides are non-empty and a power faces a + // constant, ConstNumUnwinding (priority 4) handles it with both + // n=0 and n≥1 branches. + euf::snode* power = nullptr; + dep_tracker dep; + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.m_lhs->is_empty() && eq.m_rhs->first() && eq.m_rhs->first()->is_power()) { + power = eq.m_rhs->first(); + dep = eq.m_dep; + break; + } + if (eq.m_rhs->is_empty() && eq.m_lhs->first() && eq.m_lhs->first()->is_power()) { + power = eq.m_lhs->first(); + dep = eq.m_dep; + break; + } + } if (!power) return false; SASSERT(power->is_power() && power->num_args() >= 1); euf::snode *base = power->arg(0); - dep_tracker dep; - for (str_eq const &eq : node->str_eqs()) { - if (!eq.is_trivial()) { - dep = eq.m_dep; - break; - } - } nielsen_node* child; nielsen_edge* e; @@ -1916,51 +2885,123 @@ namespace seq { // Look for two power tokens with the same base on opposite sides for (str_eq const& eq : node->str_eqs()) { - if (eq.is_trivial()) continue; - if (!eq.m_lhs || !eq.m_rhs) continue; + if (eq.is_trivial()) + continue; + if (!eq.m_lhs || !eq.m_rhs) + continue; euf::snode* lhead = eq.m_lhs->first(); euf::snode* rhead = eq.m_rhs->first(); if (!lhead || !rhead) continue; - if (!lhead->is_power() || !rhead->is_power()) continue; - if (lhead->num_args() < 1 || rhead->num_args() < 1) continue; + if (!lhead->is_power() || !rhead->is_power()) + continue; + if (lhead->num_args() < 1 || rhead->num_args() < 1) + continue; // same base: compare the two powers - if (lhead->arg(0)->id() != rhead->arg(0)->id()) continue; + if (lhead->arg(0)->id() != rhead->arg(0)->id()) + continue; - // Get exponent expressions (m and n from u^m and u^n) + // Skip if the exponents differ by a constant — simplify_and_init's + // power prefix elimination already handles that case. expr* exp_m = get_power_exponent(lhead); expr* exp_n = get_power_exponent(rhead); + if (!exp_m || !exp_n) + continue; + rational diff; + SASSERT(!get_const_power_diff(exp_n, exp_m, arith, diff)); // handled by simplification - // Branch 1: m <= n → cancel u^m from both sides - // Side constraint: m <= n - // After cancellation: ε·A = u^(n-m)·B + // Only add ordering constraints — do NOT use global substitution. + // The child's simplify_and_init (pass 3d/3e) will see same-base + // leading powers and cancel them using the LP-entailed ordering. + + // Branch 1 (explored first): n < m (i.e., m ≥ n + 1) + // After constraint, simplify_and_init sees m > n and cancels u^n, + // leaving u^(m-n) on LHS. { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(lhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_m && exp_n) - e->add_side_int(mk_int_constraint(exp_m, exp_n, int_constraint_kind::le, eq.m_dep)); + expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint( + exp_m, n_plus_1, + int_constraint_kind::ge, eq.m_dep)); } - // Branch 2: n < m → cancel u^n from both sides - // Side constraint: n < m, i.e., n + 1 <= m, i.e., m >= n + 1 - // After cancellation: u^(m-n)·A = ε·B + // Branch 2 (explored second): m ≤ n (i.e., n ≥ m) { nielsen_node* child = mk_child(node); nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(rhead, m_sg.mk_empty(), eq.m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_m && exp_n) { - expr_ref n_plus_1(arith.mk_add(exp_n, arith.mk_int(1)), m); - e->add_side_int(mk_int_constraint(exp_m, n_plus_1, int_constraint_kind::ge, eq.m_dep)); - } + e->add_side_int(mk_int_constraint( + exp_n, exp_m, + int_constraint_kind::ge, eq.m_dep)); } return true; } return false; } + // ----------------------------------------------------------------------- + // Modifier: apply_split_power_elim + // When one side starts with a power w^p, call CommPower on the other + // side to count base-pattern occurrences c. If c > 0 and the ordering + // between p and c cannot be determined, create two branches: + // Branch 1: p < c (add constraint c ≥ p + 1) + // Branch 2: c ≤ p (add constraint p ≥ c) + // After branching, simplify_and_init's CommPower pass (3c) can resolve + // the ordering deterministically and cancel the matched portion. + // mirrors ZIPT's SplitPowerElim + NumCmpModifier + // ----------------------------------------------------------------------- + + bool nielsen_graph::apply_split_power_elim(nielsen_node* node) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) continue; + if (!eq.m_lhs || !eq.m_rhs) continue; + + for (int dir = 0; dir < 2; dir++) { + euf::snode* pow_side = (dir == 0) ? eq.m_lhs : eq.m_rhs; + euf::snode* other_side = (dir == 0) ? eq.m_rhs : eq.m_lhs; + if (!pow_side || !other_side) continue; + + euf::snode* first_tok = pow_side->first(); + if (!first_tok || !first_tok->is_power()) continue; + euf::snode* base_sn = first_tok->arg(0); + expr* pow_exp = get_power_exp_expr(first_tok); + if (!base_sn || !pow_exp) continue; + + auto [count, consumed] = comm_power(base_sn, other_side, m); + if (!count.get() || consumed == 0) continue; + + expr_ref norm_count = normalize_arith(m, count); + + // Skip if ordering is already deterministic — simplify_and_init + // pass 3c should have handled it. + rational diff; + if (get_const_power_diff(norm_count, pow_exp, arith, diff)) + continue; + + // Branch 1: pow_exp < count (i.e., count ≥ pow_exp + 1) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + expr_ref pow_plus1(arith.mk_add(pow_exp, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint( + norm_count, pow_plus1, + int_constraint_kind::ge, eq.m_dep)); + } + // Branch 2: count ≤ pow_exp (i.e., pow_exp ≥ count) + { + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + e->add_side_int(mk_int_constraint( + pow_exp, norm_count, + int_constraint_kind::ge, eq.m_dep)); + } + return true; + } + } + return false; + } + // ----------------------------------------------------------------------- // Modifier: apply_const_num_unwinding // For a power token u^n facing a constant (char) head, @@ -1970,47 +3011,54 @@ namespace seq { // ----------------------------------------------------------------------- bool nielsen_graph::apply_const_num_unwinding(nielsen_node* node) { - ast_manager& m = m_sg.get_manager(); + ast_manager &m = m_sg.get_manager(); arith_util arith(m); - euf::snode* power = nullptr; - euf::snode* other_head = nullptr; - str_eq const* eq = nullptr; - if (!find_power_vs_const(node, power, other_head, eq)) + euf::snode *power = nullptr; + euf::snode *other_head = nullptr; + str_eq const *eq = nullptr; + if (!find_power_vs_non_var(node, power, other_head, eq)) return false; SASSERT(power->is_power() && power->num_args() >= 1); - euf::snode* base = power->arg(0); - expr* exp_n = get_power_exponent(power); - expr* zero = arith.mk_int(0); - expr* one = arith.mk_int(1); + euf::snode *base = power->arg(0); + expr *exp_n = get_power_exponent(power); + expr *zero = arith.mk_int(0); + expr *one = arith.mk_int(1); - // Branch 1: n = 0 → replace power with ε (progress) + // Branch 1 (explored first): n = 0 → replace power with ε (progress) // Side constraint: n = 0 - { - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(power, m_sg.mk_empty(), eq->m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_n) - e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); - } + nielsen_node *child = mk_child(node); + nielsen_edge *e = mk_edge(node, child, true); + nielsen_subst s1(power, m_sg.mk_empty(), eq->m_dep); + e->add_subst(s1); + child->apply_subst(m_sg, s1); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::eq, eq->m_dep)); - // Branch 2: n >= 1 → peel one u: replace u^n with u · u^(n-1) + // Branch 2 (explored second): n >= 1 → peel one u: replace u^n with u · u^(n-1) // Side constraint: n >= 1 - // Use fresh power variable for u^(n-1) - { - euf::snode* fresh_power = mk_fresh_var(base->get_sort()); - euf::snode* replacement = m_sg.mk_concat(base, fresh_power); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, false); - nielsen_subst s(power, replacement, eq->m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - if (exp_n) - e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); - } + // Create a proper nested power base^(n-1) instead of a fresh string variable. + // This preserves power structure so that simplify_and_init can merge and + // cancel adjacent same-base powers (mirroring ZIPT's SimplifyPowerUnwind). + // Explored first because the n≥1 branch is typically more productive + // for SAT instances (preserves power structure). + seq_util &seq = m_sg.get_seq_util(); + expr *power_e = power->get_expr(); + SASSERT(power_e); + expr *base_expr = to_app(power_e)->get_arg(0); + expr_ref n_minus_1 = normalize_arith(m, arith.mk_sub(exp_n, one)); + expr_ref nested_pow(seq.str.mk_power(base_expr, n_minus_1), m); + euf::snode *nested_power_snode = m_sg.mk(nested_pow); + + euf::snode *replacement = m_sg.mk_concat(base, nested_power_snode); + child = mk_child(node); + e = mk_edge(node, child, false); + nielsen_subst s2(power, replacement, eq->m_dep); + e->add_subst(s2); + child->apply_subst(m_sg, s2); + if (exp_n) + e->add_side_int(mk_int_constraint(exp_n, one, int_constraint_kind::ge, eq->m_dep)); return true; } @@ -2139,10 +3187,47 @@ namespace seq { bool nielsen_graph::fire_gpower_intro( nielsen_node* node, str_eq const& eq, - euf::snode* var, euf::snode_vector const& ground_prefix) { + euf::snode* var, euf::snode_vector const& ground_prefix_orig) { ast_manager& m = m_sg.get_manager(); arith_util arith(m); seq_util& seq = m_sg.get_seq_util(); + + // Compress repeated patterns in the ground prefix (mirrors ZIPT's LcpCompressionFull). + // E.g., [a,b,a,b] has minimal period 2 → use [a,b] as the power base. + // This ensures we use the minimal repeating unit: x = (ab)^n · suffix + // instead of x = (abab)^n · suffix. + euf::snode_vector ground_prefix; + unsigned n = ground_prefix_orig.size(); + unsigned period = n; + for (unsigned p = 1; p <= n / 2; ++p) { + if (n % p != 0) continue; + bool match = true; + for (unsigned i = p; i < n && match; ++i) + match = (ground_prefix_orig[i]->id() == ground_prefix_orig[i % p]->id()); + if (match) { period = p; break; } + } + for (unsigned i = 0; i < period; ++i) + ground_prefix.push_back(ground_prefix_orig[i]); + + // If the compressed prefix is a single power snode, unwrap it to use + // its base tokens, avoiding nested powers. + // E.g., [(ab)^3] → [a, b] so we get (ab)^n instead of ((ab)^3)^n. + // (mirrors ZIPT: if b.Length == 1 && b is PowerToken pt => b = pt.Base) + if (ground_prefix.size() == 1 && ground_prefix[0]->is_power()) { + expr* base_e = get_power_base_expr(ground_prefix[0]); + if (base_e) { + euf::snode* base_sn = m_sg.mk(base_e); + if (base_sn) { + euf::snode_vector base_toks; + base_sn->collect_tokens(base_toks); + if (!base_toks.empty()) { + ground_prefix.reset(); + ground_prefix.append(base_toks); + } + } + } + } + unsigned base_len = ground_prefix.size(); // Build base string expression from ground prefix tokens. @@ -2165,17 +3250,46 @@ namespace seq { expr* zero = arith.mk_int(0); - // Generate one child per proper prefix of the base, mirroring ZIPT's - // GetDecompose. For base [t0, t1, ..., t_{k-1}], the proper prefixes - // are: ε, t0, t0·t1, ..., t0·t1·...·t_{k-2}. - // Child i substitutes var → base^n · prefix_i with n >= 0. - for (unsigned pfx_len = 0; pfx_len < base_len; ++pfx_len) { - euf::snode* replacement = power_snode; - if (pfx_len > 0) { - euf::snode* prefix_snode = ground_prefix[0]; - for (unsigned j = 1; j < pfx_len; ++j) - prefix_snode = m_sg.mk_concat(prefix_snode, ground_prefix[j]); - replacement = m_sg.mk_concat(power_snode, prefix_snode); + // Generate children mirroring ZIPT's GetDecompose: + // P(t0 · t1 · ... · t_{k-1}) = P(t0) | t0·P(t1) | ... | t0·...·t_{k-2}·P(t_{k-1}) + // For char tokens P(c) = {ε}, for power tokens P(u^m) = {u^m', 0 ≤ m' ≤ m}. + // Child at position i substitutes var → base^n · t0·...·t_{i-1} · P(t_i). + for (unsigned i = 0; i < base_len; ++i) { + euf::snode* tok = ground_prefix[i]; + + // Skip char position when preceding token is a power: + // The power case at i-1 with 0 ≤ m' ≤ exp already covers m' = exp, + // which produces the same result. Using the original exponent here + // creates a rigid coupling that causes cycling. + if (!tok->is_power() && i > 0 && ground_prefix[i - 1]->is_power()) + continue; + + // Build full-token prefix: ground_prefix[0..i-1] + euf::snode* prefix_sn = nullptr; + for (unsigned j = 0; j < i; ++j) + prefix_sn = (j == 0) ? ground_prefix[0] : m_sg.mk_concat(prefix_sn, ground_prefix[j]); + + euf::snode* replacement; + expr_ref fresh_m(m); + + if (tok->is_power()) { + // Token is a power u^exp: use fresh m' with 0 ≤ m' ≤ exp + expr* inner_exp = get_power_exponent(tok); + expr* inner_base = get_power_base_expr(tok); + if (inner_exp && inner_base) { + fresh_m = mk_fresh_int_var(); + expr_ref partial_pow(seq.str.mk_power(inner_base, fresh_m), m); + euf::snode* partial_sn = m_sg.mk(partial_pow); + euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, partial_sn) : partial_sn; + replacement = m_sg.mk_concat(power_snode, suffix_sn); + } else { + // Fallback: use full token (shouldn't normally happen) + euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, tok) : tok; + replacement = m_sg.mk_concat(power_snode, suffix_sn); + } + } else { + // Token is a char: P(char) = ε, suffix = just the prefix + replacement = prefix_sn ? m_sg.mk_concat(power_snode, prefix_sn) : power_snode; } nielsen_node* child = mk_child(node); @@ -2186,6 +3300,15 @@ namespace seq { // Side constraint: n >= 0 e->add_side_int(mk_int_constraint(fresh_n, zero, int_constraint_kind::ge, eq.m_dep)); + + // Side constraints for fresh partial exponent + if (fresh_m.get()) { + expr* inner_exp = get_power_exponent(tok); + // m' >= 0 + e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq.m_dep)); + // m' <= inner_exp + e->add_side_int(mk_int_constraint(inner_exp, fresh_m, int_constraint_kind::ge, eq.m_dep)); + } } return true; @@ -2224,7 +3347,7 @@ namespace seq { created = true; } - // Branch 2+: for each minterm m_i, x → ?c · x' + // Branch 2+: for each minterm m_i, x → ?c · x // where ?c is a symbolic char constrained by the minterm for (euf::snode* mt : minterms) { if (mt->is_fail()) continue; @@ -2234,16 +3357,22 @@ namespace seq { euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt); if (deriv && deriv->is_fail()) continue; - euf::snode* fresh_var = mk_fresh_var(first->get_sort()); euf::snode* fresh_char = mk_fresh_char_var(); - euf::snode* replacement = m_sg.mk_concat(fresh_char, fresh_var); + euf::snode* replacement = m_sg.mk_concat(fresh_char, first); nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); + nielsen_edge* e = mk_edge(node, child, false); nielsen_subst s(first, replacement, mem.m_dep); e->add_subst(s); child->apply_subst(m_sg, s); - // TODO: derive char_set from minterm and add as range constraint - // child->add_char_range(fresh_char, minterm_to_char_set(mt)); + // Constrain fresh_char to the character class of this minterm. + // This is the key Parikh pruning step: when x → ?c · x' is + // generated from minterm m_i, ?c must belong to the character + // class described by m_i so that str ∈ derivative(R, m_i). + if (mt->get_expr()) { + char_set cs = m_parikh->minterm_to_char_set(mt->get_expr()); + if (!cs.is_empty()) + child->add_char_range(fresh_char, cs); + } created = true; } @@ -2278,29 +3407,82 @@ namespace seq { expr* exp_n = get_power_exponent(power); expr* zero = arith.mk_int(0); - // Branch 1: x = base^m · prefix where 0 <= m < n - // Side constraints: m >= 0, m < n (i.e., n >= m + 1) + // Branch 1: enumerate all decompositions of the base. + // x = base^m · prefix_i(base) where 0 <= m < n + // Uses the same GetDecompose pattern as fire_gpower_intro. { + euf::snode_vector base_toks; + base->collect_tokens(base_toks); + unsigned base_len = base_toks.size(); + expr* base_expr = get_power_base_expr(power); + if (!base_expr || base_len == 0) + return false; + expr_ref fresh_m = mk_fresh_int_var(); - euf::snode* fresh_power = mk_fresh_var(base->get_sort()); // represents base^m - euf::snode* fresh_suffix = mk_fresh_var(base->get_sort()); // represents prefix(base) - euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix); - nielsen_node* child = mk_child(node); - nielsen_edge* e = mk_edge(node, child, true); - nielsen_subst s(var_head, replacement, eq->m_dep); - e->add_subst(s); - child->apply_subst(m_sg, s); - // m >= 0 - e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep)); - // m < n ⟺ n >= m + 1 - if (exp_n) { - expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m); - e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep)); + expr_ref power_m_expr(seq.str.mk_power(base_expr, fresh_m), m); + euf::snode* power_m_sn = m_sg.mk(power_m_expr); + if (!power_m_sn) + return false; + + for (unsigned i = 0; i < base_len; ++i) { + euf::snode* tok = base_toks[i]; + + // Skip char position when preceding token is a power: + // the power case at i-1 with 0 <= m' <= exp already covers m' = exp. + if (!tok->is_power() && i > 0 && base_toks[i - 1]->is_power()) + continue; + + // Build full-token prefix: base_toks[0..i-1] + euf::snode* prefix_sn = nullptr; + for (unsigned j = 0; j < i; ++j) + prefix_sn = (j == 0) ? base_toks[0] : m_sg.mk_concat(prefix_sn, base_toks[j]); + + euf::snode* replacement; + expr_ref fresh_inner_m(m); + + if (tok->is_power()) { + // Token is a power u^exp: decompose with fresh m', 0 <= m' <= exp + expr* inner_exp = get_power_exponent(tok); + expr* inner_base_e = get_power_base_expr(tok); + if (inner_exp && inner_base_e) { + fresh_inner_m = mk_fresh_int_var(); + expr_ref partial_pow(seq.str.mk_power(inner_base_e, fresh_inner_m), m); + euf::snode* partial_sn = m_sg.mk(partial_pow); + euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, partial_sn) : partial_sn; + replacement = m_sg.mk_concat(power_m_sn, suffix_sn); + } else { + euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, tok) : tok; + replacement = m_sg.mk_concat(power_m_sn, suffix_sn); + } + } else { + // P(char) = ε, suffix is just the prefix + replacement = prefix_sn ? m_sg.mk_concat(power_m_sn, prefix_sn) : power_m_sn; + } + + nielsen_node* child = mk_child(node); + nielsen_edge* e = mk_edge(node, child, true); + nielsen_subst s(var_head, replacement, eq->m_dep); + e->add_subst(s); + child->apply_subst(m_sg, s); + + // m >= 0 + e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep)); + // m < n ⟺ n >= m + 1 + if (exp_n) { + expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m); + e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep)); + } + + // Inner power constraints: 0 <= m' <= inner_exp + if (fresh_inner_m.get()) { + expr* inner_exp = get_power_exponent(tok); + e->add_side_int(mk_int_constraint(fresh_inner_m, zero, int_constraint_kind::ge, eq->m_dep)); + e->add_side_int(mk_int_constraint(inner_exp, fresh_inner_m, int_constraint_kind::ge, eq->m_dep)); + } } } // Branch 2: x = u^n · x' (variable extends past full power, non-progress) - // Side constraint: n >= 0 { euf::snode* fresh_tail = mk_fresh_var(base->get_sort()); // Peel one base unit (approximation of extending past the power) @@ -2310,8 +3492,6 @@ namespace seq { nielsen_subst s(var_head, replacement, eq->m_dep); e->add_subst(s); child->apply_subst(m_sg, s); - if (exp_n) - e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::ge, eq->m_dep)); } return true; @@ -2417,7 +3597,18 @@ namespace seq { return expr_ref(arith.mk_add(left, right), m); } - // for variables and other terms, use symbolic (str.len expr) + // For variables: consult modification counter. + // mod_count > 0 means the variable has been reused by a non-eliminating + // substitution; use a fresh integer variable to avoid the circular + // |x| = 1 + |x| problem. + if (n->is_var()) { + unsigned mc = 0; + m_mod_cnt.find(n->id(), mc); + if (mc > 0) + return get_or_create_len_var(n, mc); + } + + // for variables at mod_count 0 and other terms, use symbolic (str.len expr) return expr_ref(seq.str.mk_length(n->get_expr()), m); } @@ -2548,41 +3739,238 @@ namespace seq { return expr_ref(m.mk_true(), m); } - void nielsen_graph::collect_path_int_constraints(nielsen_node* node, - svector const& cur_path, - vector& out) { - // collect from root node - if (m_root) { - for (auto const& ic : m_root->int_constraints()) - out.push_back(ic); + // ----------------------------------------------------------------------- + // Modification counter: substitution length tracking + // mirrors ZIPT's LocalInfo.CurrentModificationCnt + NielsenEdge.IncModCount/DecModCount + // + NielsenNode constructor length assertion logic + // ----------------------------------------------------------------------- + + expr_ref nielsen_graph::get_or_create_len_var(euf::snode* var, unsigned mod_count) { + ast_manager& m = m_sg.get_manager(); + SASSERT(var && var->is_var()); + SASSERT(mod_count > 0); + + auto key = std::make_pair(var->id(), mod_count); + auto it = m_len_var_cache.find(key); + if (it != m_len_var_cache.end()) + return expr_ref(it->second, m); + + // Create a fresh integer variable: len__ + arith_util arith(m); + std::string name = "len!" + std::to_string(var->id()) + "!" + std::to_string(mod_count); + expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); + m_len_vars.push_back(fresh); + m_len_var_cache.insert({key, fresh.get()}); + return fresh; + } + + void nielsen_graph::add_subst_length_constraints(nielsen_edge* e) { + auto const& substs = e->subst(); + + // Quick check: any non-eliminating substitutions? + bool has_non_elim = false; + for (auto const& s : substs) + if (!s.is_eliminating()) { has_non_elim = true; break; } + if (!has_non_elim) return; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + + // Step 1: Compute LHS (|x|) for each non-eliminating substitution + // using current m_mod_cnt (before bumping). + // Also assert |x|_k >= 0 (mirrors ZIPT's NielsenNode constructor line 172). + svector> lhs_exprs; + for (unsigned i = 0; i < substs.size(); ++i) { + auto const& s = substs[i]; + if (s.is_eliminating()) continue; + SASSERT(s.m_var && s.m_var->is_var()); + expr_ref lhs = compute_length_expr(s.m_var); + lhs_exprs.push_back({i, lhs.get()}); + // Assert LHS >= 0 + e->add_side_int(int_constraint(lhs, arith.mk_int(0), + int_constraint_kind::ge, s.m_dep, m)); } - // collect from edges along the path and their target nodes - for (nielsen_edge* e : cur_path) { - for (auto const& ic : e->side_int()) - out.push_back(ic); - if (e->tgt()) { - for (auto const& ic : e->tgt()->int_constraints()) - out.push_back(ic); + // Step 2: Bump mod counts for all non-eliminating variables at once. + for (auto const& s : substs) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + m_mod_cnt.insert(id, prev + 1); + } + + // Step 3: Compute RHS (|u|) with bumped mod counts and add |x| = |u|. + for (auto const& p : lhs_exprs) { + unsigned idx = p.first; + expr* lhs_expr = p.second; + auto const& s = substs[idx]; + expr_ref rhs = compute_length_expr(s.m_replacement); + e->add_side_int(int_constraint(lhs_expr, rhs, int_constraint_kind::eq, + s.m_dep, m)); + + // Assert non-negativity for any fresh length variables in the RHS + // (variables at mod_count > 0 that are newly created). + euf::snode_vector tokens; + s.m_replacement->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var()) { + unsigned mc = 0; + m_mod_cnt.find(tok->id(), mc); + if (mc > 0) { + expr_ref len_var = get_or_create_len_var(tok, mc); + e->add_side_int(int_constraint(len_var, arith.mk_int(0), + int_constraint_kind::ge, s.m_dep, m)); + } + } + } + } + + // Step 4: Restore mod counts (temporary bump for computing RHS only). + for (auto const& s : substs) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + SASSERT(prev >= 1); + if (prev <= 1) + m_mod_cnt.remove(id); + else + m_mod_cnt.insert(id, prev - 1); + } + } + + void nielsen_graph::inc_edge_mod_counts(nielsen_edge* e) { + for (auto const& s : e->subst()) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + m_mod_cnt.insert(id, prev + 1); + } + } + + void nielsen_graph::dec_edge_mod_counts(nielsen_edge* e) { + for (auto const& s : e->subst()) { + if (s.is_eliminating()) continue; + unsigned id = s.m_var->id(); + unsigned prev = 0; + m_mod_cnt.find(id, prev); + SASSERT(prev >= 1); + if (prev <= 1) + m_mod_cnt.remove(id); + else + m_mod_cnt.insert(id, prev - 1); + } + } + + void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) { + // Assert only the int_constraints that are new to this node (beyond those + // inherited from its parent via clone_from). The parent's constraints are + // already present in the enclosing solver scope; asserting them again would + // be redundant (though harmless). This is called by search_dfs right after + // simplify_and_init, which is where new constraints are produced. + for (unsigned i = node->m_parent_ic_count; i < node->int_constraints().size(); ++i) + m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i])); + } + + void nielsen_graph::generate_node_length_constraints(nielsen_node* node) { + if (node->m_node_len_constraints_generated) + return; + node->m_node_len_constraints_generated = true; + + // Skip the root node — its length constraints are already asserted + // at the base solver level by assert_root_constraints_to_solver(). + if (node == m_root) + return; + + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); + uint_set seen_vars; + + for (str_eq const& eq : node->str_eqs()) { + if (eq.is_trivial()) + continue; + + expr_ref len_lhs = compute_length_expr(eq.m_lhs); + expr_ref len_rhs = compute_length_expr(eq.m_rhs); + node->add_int_constraint(int_constraint(len_lhs, len_rhs, + int_constraint_kind::eq, eq.m_dep, m)); + + // non-negativity for each variable (mod-count-aware) + euf::snode_vector tokens; + eq.m_lhs->collect_tokens(tokens); + eq.m_rhs->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var() && !seen_vars.contains(tok->id())) { + seen_vars.insert(tok->id()); + expr_ref len_var = compute_length_expr(tok); + node->add_int_constraint(int_constraint(len_var, arith.mk_int(0), + int_constraint_kind::ge, eq.m_dep, m)); + } + } + } + + // Parikh interval bounds for regex memberships at this node + seq_util& seq = m_sg.get_seq_util(); + for (str_mem const& mem : node->str_mems()) { + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr || !seq.is_re(re_expr)) + continue; + + unsigned min_len = 0, max_len = UINT_MAX; + compute_regex_length_interval(mem.m_regex, min_len, max_len); + + expr_ref len_str = compute_length_expr(mem.m_str); + + if (min_len > 0) { + node->add_int_constraint(int_constraint(len_str, arith.mk_int(min_len), + int_constraint_kind::ge, mem.m_dep, m)); + } + if (max_len < UINT_MAX) { + node->add_int_constraint(int_constraint(len_str, arith.mk_int(max_len), + int_constraint_kind::le, mem.m_dep, m)); + } + + // non-negativity for string-side variables + euf::snode_vector tokens; + mem.m_str->collect_tokens(tokens); + for (euf::snode* tok : tokens) { + if (tok->is_var() && !seen_vars.contains(tok->id())) { + seen_vars.insert(tok->id()); + expr_ref len_var = compute_length_expr(tok); + node->add_int_constraint(int_constraint(len_var, arith.mk_int(0), + int_constraint_kind::ge, mem.m_dep, m)); + } } } } bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector const& cur_path) { - vector constraints; - collect_path_int_constraints(node, cur_path, constraints); + // In incremental mode the solver already holds all path constraints + // (root length constraints at the base level, edge side_int and node + // int_constraints pushed/popped as the DFS descends and backtracks). + // A plain check() is therefore sufficient. + return m_solver.check() != l_false; + } - if (constraints.empty()) - return true; // no integer constraints, trivially feasible + bool nielsen_graph::check_lp_le(expr* lhs, expr* rhs, + nielsen_node* node, + svector const& cur_path) { + ast_manager& m = m_sg.get_manager(); + arith_util arith(m); - SASSERT(m_solver); - m_solver->push(); - for (auto const& ic : constraints) - m_solver->assert_expr(int_constraint_to_expr(ic)); + // The solver already holds all path constraints incrementally. + // Temporarily add NOT(lhs <= rhs), i.e. lhs >= rhs + 1. + // If that is unsat, then lhs <= rhs is entailed. + expr_ref one(arith.mk_int(1), m); + expr_ref rhs_plus_one(arith.mk_add(rhs, one), m); - lbool result = m_solver->check(); - m_solver->pop(1); - return result != l_false; + m_solver.push(); + m_solver.assert_expr(arith.mk_ge(lhs, rhs_plus_one)); + lbool result = m_solver.check(); + m_solver.pop(1); + return result == l_false; } int_constraint nielsen_graph::mk_int_constraint(expr* lhs, expr* rhs, @@ -2605,5 +3993,41 @@ namespace seq { return expr_ref(m.mk_fresh_const(name.c_str(), arith.mk_int()), m); } + bool nielsen_graph::solve_sat_path_ints(model_ref& mdl) { + mdl = nullptr; + if (m_sat_path.empty() && (!m_sat_node || m_sat_node->int_constraints().empty())) + return false; + + // Re-assert the sat-path constraints into m_solver (which holds only root-level + // constraints at this point) and extract a model via the injected simple_solver. + // The sat path was found by DFS which verified feasibility at every step via + // check_int_feasibility, so all constraints along this path are jointly satisfiable + // and do not require incremental skipping. + IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints: sat_path length=" << m_sat_path.size() << "\n";); + m_solver.push(); + for (nielsen_edge* e : m_sat_path) + for (auto const& ic : e->side_int()) + m_solver.assert_expr(int_constraint_to_expr(ic)); + if (m_sat_node) + for (auto const& ic : m_sat_node->int_constraints()) + m_solver.assert_expr(int_constraint_to_expr(ic)); + lbool result = m_solver.check(); + IF_VERBOSE(1, verbose_stream() << "solve_sat_path_ints result: " << result << "\n";); + if (result == l_true) { + m_solver.get_model(mdl); + IF_VERBOSE(1, if (mdl) { + ast_manager& m = m_sg.get_manager(); + verbose_stream() << " int_model:\n"; + for (unsigned i = 0; i < mdl->get_num_constants(); ++i) { + func_decl* fd = mdl->get_constant(i); + expr* val = mdl->get_const_interp(fd); + if (val) verbose_stream() << " " << fd->get_name() << " = " << mk_bounded_pp(val, m, 3) << "\n"; + } + }); + } + m_solver.pop(1); + return mdl.get() != nullptr; + } + } diff --git a/src/smt/seq/seq_nielsen.h b/src/smt/seq/seq_nielsen.h index 8af96d962..ada7ee5b5 100644 --- a/src/smt/seq/seq_nielsen.h +++ b/src/smt/seq/seq_nielsen.h @@ -164,9 +164,10 @@ Abstract: has no ConstraintsIntEq or ConstraintsIntLe in nielsen_node. - IntBounds / VarBoundWatcher: per-variable integer interval bounds and the watcher mechanism that reruns bound propagation when a string variable is - substituted are not ported. + substituted — PORTED as nielsen_node::{add_lower_int_bound, + add_upper_int_bound, watch_var_bounds, init_var_bounds_from_mems}. - AddLowerIntBound() / AddHigherIntBound(): incremental interval tightening - with restart signaling is not ported. + — PORTED as the above add_lower/upper_int_bound methods. Character-level handling: - CharSubst: character-level variable substitution (symbolic char -> concrete @@ -214,7 +215,10 @@ Abstract: - GetSignature(): the constraint-pair signature used for subsumption candidate matching is not ported. - Constraint.Shared: the flag indicating whether a constraint should be - forwarded to the outer solver is not ported. + forwarded to the outer solver — PORTED as + nielsen_graph::assert_root_constraints_to_solver(), called at the start + of solve() to make all root-level length/Parikh constraints immediately + visible to m_solver. - Interpretation: the model-extraction class mapping string and integer variables to concrete values is not ported. ----------------------------------------------------------------------- @@ -237,6 +241,8 @@ Author: #include "ast/seq_decl_plugin.h" #include "ast/euf/euf_sgraph.h" #include +#include +#include "model/model.h" namespace seq { @@ -244,14 +250,15 @@ namespace seq { class nielsen_node; class nielsen_edge; class nielsen_graph; + class seq_parikh; // Parikh image filter (see seq_parikh.h) /** * Abstract interface for an incremental solver used by nielsen_graph * to check integer/arithmetic feasibility of path constraints. * * Users of nielsen_graph can wrap smt::kernel or any other solver - * to serve as the arithmetic back-end. The default back-end is an - * lp_simple_solver built on top of lp::lar_solver. + * to serve as the arithmetic back-end. When no solver is provided, + * integer feasibility checks are skipped (optimistically assumed feasible). */ class simple_solver { public: @@ -260,6 +267,7 @@ namespace seq { virtual void assert_expr(expr* e) = 0; virtual void push() = 0; virtual void pop(unsigned num_scopes) = 0; + virtual void get_model(model_ref& mdl) { mdl = nullptr; } }; // simplification result for constraint processing @@ -368,6 +376,7 @@ namespace seq { }; // string variable substitution: var -> replacement + // (can be used as well to substitute arbitrary nodes - like powers) // mirrors ZIPT's Subst struct nielsen_subst { euf::snode* m_var; @@ -448,6 +457,7 @@ namespace seq { ptr_vector m_side_str_mem; // side constraints: regex memberships vector m_side_int; // side constraints: integer equalities/inequalities bool m_is_progress; // does this edge represent progress? + bool m_len_constraints_computed = false; // lazily computed substitution length constraints public: nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress); @@ -472,6 +482,9 @@ namespace seq { bool is_progress() const { return m_is_progress; } + bool len_constraints_computed() const { return m_len_constraints_computed; } + void set_len_constraints_computed(bool v) { m_len_constraints_computed = v; } + bool operator==(nielsen_edge const& other) const { return m_src == other.m_src && m_tgt == other.m_tgt; } @@ -490,6 +503,12 @@ namespace seq { vector m_str_mem; // regex memberships vector m_int_constraints; // integer equalities/inequalities (mirrors ZIPT's IntEq/IntLe) + // per-variable integer bounds for len(var). Mirrors ZIPT's IntBounds. + // key: snode id of the string variable. + // default lb = 0 (unrestricted); default ub = UINT_MAX (unrestricted). + u_map m_var_lb; // lower bound: lb <= len(var) + u_map m_var_ub; // upper bound: len(var) <= ub + // character constraints (mirrors ZIPT's DisEqualities and CharRanges) // key: snode id of the s_unit symbolic character u_map> m_char_diseqs; // ?c != {?d, ?e, ...} @@ -505,10 +524,20 @@ namespace seq { bool m_is_extended = false; backtrack_reason m_reason = backtrack_reason::unevaluated; bool m_is_progress = false; + bool m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs // evaluation index for run tracking unsigned m_eval_idx = 0; + // Parikh filter: set to true once apply_parikh_to_node has been applied + // to this node. Prevents duplicate constraint generation across DFS runs. + bool m_parikh_applied = false; + // number of int_constraints inherited from the parent node at clone time. + // int_constraints[0..m_parent_ic_count) are already asserted at the + // parent's solver scope; only [m_parent_ic_count..end) need to be + // asserted when this node's solver scope is entered. + unsigned m_parent_ic_count = 0; + public: nielsen_node(nielsen_graph* graph, unsigned id); @@ -528,6 +557,28 @@ namespace seq { vector const& int_constraints() const { return m_int_constraints; } vector& int_constraints() { return m_int_constraints; } + // IntBounds: tighten the lower bound for len(var). + // Returns true if the bound was tightened (lb > current lower bound). + // When tightened without conflict, adds an int_constraint len(var) >= lb. + // When lb > current upper bound, sets arithmetic conflict (no constraint added) + // and still returns true (the bound value changed). Check is_general_conflict() + // separately to distinguish tightening-with-conflict from normal tightening. + // Mirrors ZIPT's AddLowerIntBound(). + bool add_lower_int_bound(euf::snode* var, unsigned lb, dep_tracker const& dep); + + // IntBounds: tighten the upper bound for len(var). + // Returns true if the bound was tightened (ub < current upper bound). + // When tightened without conflict, adds an int_constraint len(var) <= ub. + // When current lower bound > ub, sets arithmetic conflict (no constraint added) + // and still returns true (the bound value changed). Check is_general_conflict() + // separately to distinguish tightening-with-conflict from normal tightening. + // Mirrors ZIPT's AddHigherIntBound(). + bool add_upper_int_bound(euf::snode* var, unsigned ub, dep_tracker const& dep); + + // Query current bounds for a variable (default: 0 / UINT_MAX if not set). + unsigned var_lb(euf::snode* var) const; + unsigned var_ub(euf::snode* var) const; + // character constraint access (mirrors ZIPT's DisEqualities / CharRanges) u_map> const& char_diseqs() const { return m_char_diseqs; } u_map const& char_ranges() const { return m_char_ranges; } @@ -581,8 +632,10 @@ namespace seq { void apply_subst(euf::sgraph& sg, nielsen_subst const& s); // simplify all constraints at this node and initialize status. + // cur_path provides the path from root to this node so that the + // LP solver can be queried for deterministic power cancellation. // Returns proceed, conflict, satisfied, or restart. - simplify_result simplify_and_init(nielsen_graph& g); + simplify_result simplify_and_init(nielsen_graph& g, svector const& cur_path = svector()); // true if all str_eqs are trivial and there are no str_mems bool is_satisfied() const; @@ -603,6 +656,19 @@ namespace seq { // sets changed=true, and returns false. bool handle_empty_side(euf::sgraph& sg, euf::snode* non_empty_side, dep_tracker const& dep, bool& changed); + + // VarBoundWatcher: after applying substitution s, propagate the bounds + // of s.m_var to variables appearing in s.m_replacement. + // When var has bounds [lo, hi], derives bounds for variables in replacement + // using the known constant-length contribution of non-variable tokens. + // Mirrors ZIPT's VarBoundWatcher re-check mechanism. + void watch_var_bounds(nielsen_subst const& s); + + // Initialize per-variable Parikh bounds from this node's regex memberships. + // For each str_mem constraint (str ∈ regex) where regex has length bounds + // [min_len, max_len], adds lower/upper bound constraints for len(str). + // Called from simplify_and_init to populate IntBounds at node creation. + void init_var_bounds_from_mems(); }; // search statistics collected during Nielsen graph solving @@ -615,11 +681,13 @@ namespace seq { unsigned m_num_simplify_conflict = 0; unsigned m_num_extensions = 0; unsigned m_num_fresh_vars = 0; + unsigned m_num_arith_infeasible = 0; unsigned m_max_depth = 0; // modifier application counts unsigned m_mod_det = 0; unsigned m_mod_power_epsilon = 0; unsigned m_mod_num_cmp = 0; + unsigned m_mod_split_power_elim = 0; unsigned m_mod_const_num_unwinding = 0; unsigned m_mod_eq_split = 0; unsigned m_mod_star_intr = 0; @@ -636,6 +704,7 @@ namespace seq { // the overall Nielsen transformation graph // mirrors ZIPT's NielsenGraph class nielsen_graph { + friend class nielsen_node; euf::sgraph& m_sg; region m_region; ptr_vector m_nodes; @@ -657,18 +726,42 @@ namespace seq { // ----------------------------------------------- // Integer subsolver (abstract interface) - // When m_solver is null, check_int_feasibility uses an - // internal lp_simple_solver (created on demand). + // When m_solver is null, check_int_feasibility skips arithmetic checking + // and optimistically assumes feasibility. // ----------------------------------------------- - simple_solver* m_solver = nullptr; - scoped_ptr m_owned_solver; // non-null when we own the solver + simple_solver& m_solver; + + // Constraint.Shared: guards re-assertion of root-level constraints. + // Set to true after assert_root_constraints_to_solver() is first called. + bool m_root_constraints_asserted = false; + + // Parikh image filter: generates modular length constraints from regex + // memberships. Allocated in the constructor; owned by this graph. + seq_parikh* m_parikh = nullptr; + // ----------------------------------------------- + // Modification counter for substitution length tracking. + // mirrors ZIPT's LocalInfo.CurrentModificationCnt + // ----------------------------------------------- + + // Maps snode id of string variable → current modification (reuse) count + // along the DFS path. When a non-eliminating substitution x/u is applied + // (x appears in u), x's count is bumped. This produces distinct length + // variables for x before and after substitution, avoiding the unsatisfiable + // |x| = 1 + |x| that results from reusing the same length symbol. + u_map m_mod_cnt; + + // Cache: (var snode id, modification count) → fresh integer variable. + // Variables at mod_count 0 use str.len(var_expr) (standard form). + // Variables at mod_count > 0 get a fresh Z3 integer constant. + std::map, expr*> m_len_var_cache; + + // Pins the fresh length variable expressions so they aren't garbage collected. + expr_ref_vector m_len_vars; public: - // Construct without a custom solver; an lp_simple_solver is used internally. - nielsen_graph(euf::sgraph& sg); // Construct with a caller-supplied solver. Ownership is NOT transferred; // the caller is responsible for keeping the solver alive. - nielsen_graph(euf::sgraph& sg, simple_solver* solver); + nielsen_graph(euf::sgraph& sg, simple_solver& solver); ~nielsen_graph(); euf::sgraph& sg() { return m_sg; } @@ -757,9 +850,36 @@ namespace seq { // Also generates len(x) >= 0 for each variable appearing in the equations. void generate_length_constraints(vector& constraints); + // build an arithmetic expression representing the length of an snode tree. + // concatenations are expanded to sums, chars to 1, empty to 0, + // variables to (str.len var_expr). + expr_ref compute_length_expr(euf::snode* n); + + // compute Parikh length interval [min_len, max_len] for a regex snode. + // uses seq_util::rex min_length/max_length on the underlying expression. + // max_len == UINT_MAX means unbounded. + void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len); + + // solve all integer constraints along the sat_path and return + // a model mapping integer variables to concrete values. + // Must be called after solve() returns sat. + // Returns true if a satisfying model was found. + // Caller takes ownership of the returned model pointer. + bool solve_sat_path_ints(model_ref& mdl); + private: search_result search_dfs(nielsen_node* node, unsigned depth, svector& cur_path); + // Apply the Parikh image filter to a node: generate modular length + // constraints from regex memberships and append them to the node's + // int_constraints. Also performs a lightweight feasibility pre-check; + // if a Parikh conflict is detected the node's conflict flag is set with + // backtrack_reason::parikh_image. + // + // Guarded by node.m_parikh_applied so that constraints are generated + // only once per node across DFS iterations. + void apply_parikh_to_node(nielsen_node& node); + // create a fresh variable with a unique name euf::snode* mk_fresh_var(sort* s); @@ -815,6 +935,15 @@ namespace seq { // mirrors ZIPT's NumCmpModifier bool apply_num_cmp(nielsen_node* node); + // CommPower-based power elimination split: when one side starts with + // a power w^p and CommPower finds c base-pattern occurrences on the + // other side but the ordering between p and c is unknown, branch: + // (1) p < c (2) c ≤ p + // After branching, simplify_and_init's CommPower pass resolves the + // cancellation deterministically. + // mirrors ZIPT's SplitPowerElim + NumCmpModifier + bool apply_split_power_elim(nielsen_node* node); + // constant numeric unwinding: for a power token u^n vs a constant // (non-variable), branch: (1) n = 0 (u^n = ε), (2) n >= 1 (peel one u). // mirrors ZIPT's ConstNumUnwindingModifier @@ -833,7 +962,7 @@ namespace seq { // helper for apply_gpower_intr: fires the substitution bool fire_gpower_intro(nielsen_node* node, str_eq const& eq, - euf::snode* var, euf::snode_vector const& ground_prefix); + euf::snode* var, euf::snode_vector const& ground_prefix_orig); // regex variable split: for str_mem x·s ∈ R where x is a variable, // split using minterms: x → ε, or x → c·x' for each minterm c. @@ -858,35 +987,50 @@ namespace seq { euf::snode* find_power_token(nielsen_node* node) const; // find a power token facing a constant (char) head - bool find_power_vs_const(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const; + bool find_power_vs_non_var(nielsen_node* node, euf::snode*& power, euf::snode*& other_head, str_eq const*& eq_out) const; // find a power token facing a variable head bool find_power_vs_var(nielsen_node* node, euf::snode*& power, euf::snode*& var_head, str_eq const*& eq_out) const; - // build an arithmetic expression representing the length of an snode tree. - // concatenations are expanded to sums, chars to 1, empty to 0, - // variables to (str.len var_expr). - expr_ref compute_length_expr(euf::snode* n); - - // compute Parikh length interval [min_len, max_len] for a regex snode. - // uses seq_util::rex min_length/max_length on the underlying expression. - // max_len == UINT_MAX means unbounded. - void compute_regex_length_interval(euf::snode* regex, unsigned& min_len, unsigned& max_len); - // ----------------------------------------------- // Integer feasibility subsolver methods // ----------------------------------------------- - // collect int_constraints along the path from root to the given node, - // including constraints from edges and nodes. - void collect_path_int_constraints(nielsen_node* node, - svector const& cur_path, - vector& out); + // Constraint.Shared: assert all root-level length/Parikh constraints to + // m_solver at the base level (outside push/pop). Called once at the start + // of solve(). Makes derived constraints immediately visible to m_solver + // for arithmetic pruning at every DFS node, not just the root. + // Mirrors ZIPT's Constraint.Shared forwarding mechanism. + void assert_root_constraints_to_solver(); + + // Assert the int_constraints of `node` that are new relative to its + // parent (indices [m_parent_ic_count..end)) into the current solver scope. + // Called by search_dfs after simplify_and_init so that the newly derived + // bounds become visible to subsequent check() and check_lp_le() calls. + void assert_node_new_int_constraints(nielsen_node* node); + + // Generate |LHS| = |RHS| length constraints for a non-root node's own + // string equalities and add them as int_constraints on the node. + // Called once per node (guarded by m_node_len_constraints_generated). + // Uses compute_length_expr (mod-count-aware) so that variables with + // non-zero modification counts get fresh length variables. + // Mirrors ZIPT's Constraint.Shared forwarding for per-node equations. + void generate_node_length_constraints(nielsen_node* node); // check integer feasibility of the constraints along the current path. - // returns true if feasible, false if infeasible. + // returns true if feasible (including unknown), false only if l_false. + // Precondition: all path constraints have been incrementally asserted to + // m_solver by search_dfs via push/pop, so a plain check() suffices. + // l_undef (resource limit / timeout) is treated as feasible so that the + // search continues rather than reporting a false unsatisfiability. bool check_int_feasibility(nielsen_node* node, svector const& cur_path); + // check whether lhs <= rhs is implied by the path constraints. + // mirrors ZIPT's NielsenNode.IsLe(): temporarily asserts NOT(lhs <= rhs) + // and returns true iff the result is unsatisfiable (i.e., lhs <= rhs is + // entailed). Path constraints are already in the solver incrementally. + bool check_lp_le(expr* lhs, expr* rhs, nielsen_node* node, svector const& cur_path); + // create an integer constraint: lhs rhs int_constraint mk_int_constraint(expr* lhs, expr* rhs, int_constraint_kind kind, dep_tracker const& dep); @@ -898,6 +1042,30 @@ namespace seq { // convert an int_constraint to an expr* assertion expr_ref int_constraint_to_expr(int_constraint const& ic); + + // ----------------------------------------------- + // Modification counter methods for substitution length tracking. + // mirrors ZIPT's NielsenEdge.IncModCount / DecModCount and + // NielsenNode constructor length assertion logic. + // ----------------------------------------------- + + // Get or create a fresh integer variable for len(var) at the given + // modification count. Returns str.len(var_expr) when mod_count == 0. + expr_ref get_or_create_len_var(euf::snode* var, unsigned mod_count); + + // Compute and add |x| = |u| length constraints to an edge for all + // its non-eliminating substitutions. Uses current m_mod_cnt. + // Temporarily bumps m_mod_cnt for RHS computation, then restores. + // Called lazily on first edge traversal in search_dfs. + void add_subst_length_constraints(nielsen_edge* e); + + // Bump modification counts for an edge's non-eliminating substitutions. + // Called when entering an edge during DFS. + void inc_edge_mod_counts(nielsen_edge* e); + + // Restore modification counts for an edge's non-eliminating substitutions. + // Called when backtracking from an edge during DFS. + void dec_edge_mod_counts(nielsen_edge* e); }; } diff --git a/src/smt/seq/seq_parikh.cpp b/src/smt/seq/seq_parikh.cpp new file mode 100644 index 000000000..3f6deba40 --- /dev/null +++ b/src/smt/seq/seq_parikh.cpp @@ -0,0 +1,387 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_parikh.cpp + +Abstract: + + Parikh image filter implementation for the ZIPT-based Nielsen string + solver. See seq_parikh.h for the full design description. + + The key operation is compute_length_stride(re), which performs a + structural traversal of the regex to find the period k such that all + string lengths in L(re) are congruent to min_length(re) modulo k. + The stride is used to generate modular length constraints that help + the integer subsolver prune infeasible Nielsen graph nodes. + +Author: + + Clemens Eisenhofer 2026-03-10 + Nikolaj Bjorner (nbjorner) 2026-03-10 + +--*/ + +#include "smt/seq/seq_parikh.h" +#include "util/mpz.h" +#include + +namespace seq { + + seq_parikh::seq_parikh(euf::sgraph& sg) + : m(sg.get_manager()), seq(m), a(m), m_fresh_cnt(0) {} + + expr_ref seq_parikh::mk_fresh_int_var() { + std::string name = "pk!" + std::to_string(m_fresh_cnt++); + return expr_ref(m.mk_fresh_const(name.c_str(), a.mk_int()), m); + } + + // ----------------------------------------------------------------------- + // Stride computation + // ----------------------------------------------------------------------- + + // compute_length_stride: structural traversal of regex expression. + // + // Return value semantics: + // 0 — fixed length (or empty language): no modular constraint needed + // beyond the min == max bounds. + // 1 — all integer lengths ≥ min_len are achievable: no useful modular + // constraint. + // k > 1 — all lengths in L(re) satisfy len ≡ min_len (mod k): + // modular constraint len(str) = min_len + k·j is useful. + unsigned seq_parikh::compute_length_stride(expr* re) { + if (!re) return 1; + + expr* r1 = nullptr, *r2 = nullptr, *s = nullptr; + unsigned lo = 0, hi = 0; + + // Empty language: no strings exist; stride is irrelevant. + if (seq.re.is_empty(re)) + return 0; + + // Epsilon regex {""}: single fixed length 0. + if (seq.re.is_epsilon(re)) + return 0; + + // to_re(concrete_string): fixed-length, no modular constraint needed. + if (seq.re.is_to_re(re, s)) { + // min_length == max_length, covered by bounds. + return 0; + } + + // Single character: range, full_char — fixed length 1. + if (seq.re.is_range(re) || seq.re.is_full_char(re)) + return 0; + + // full_seq (.* / Σ*): every length ≥ 0 is possible. + if (seq.re.is_full_seq(re)) + return 1; + + // r* — Kleene star. + // L(r*) = {ε} ∪ L(r) ∪ L(r)·L(r) ∪ ... + // If all lengths in L(r) are congruent to c modulo s (c = min_len, s = stride), + // then L(r*) includes lengths {0, c, c+s, 2c, 2c+s, 2c+2s, ...} and + // the overall GCD is gcd(c, s). This is strictly more accurate than + // the previous gcd(min, max) approximation, which can be unsound when + // the body contains lengths whose GCD is smaller than gcd(min, max). + if (seq.re.is_star(re, r1)) { + unsigned mn = seq.re.min_length(r1); + unsigned inner = compute_length_stride(r1); + // stride(r*) = gcd(min_length(r), stride(r)) + // when inner=0 (fixed-length body), gcd(mn, 0) = mn → stride = mn + return u_gcd(mn, inner); + } + + // r+ — one or more: same stride analysis as r*. + if (seq.re.is_plus(re, r1)) { + unsigned mn = seq.re.min_length(r1); + unsigned inner = compute_length_stride(r1); + return u_gcd(mn, inner); + } + + // r? — zero or one: lengths = {0} ∪ lengths(r) + // stride = GCD(mn_r, stride(r)) unless stride(r) is 0 (fixed length). + if (seq.re.is_opt(re, r1)) { + unsigned mn = seq.re.min_length(r1); + unsigned inner = compute_length_stride(r1); + // L(r?) includes length 0 and all lengths of L(r). + // GCD(stride(r), min_len(r)) is a valid stride because: + // - the gap from 0 to min_len(r) is min_len(r) itself, and + // - subsequent lengths grow in steps governed by stride(r). + // A result > 1 gives a useful modular constraint; result == 1 + // means every non-negative integer is achievable (no constraint). + if (inner == 0) + return u_gcd(mn, 0); // gcd(mn, 0) = mn; useful when mn > 1 + return u_gcd(inner, mn); + } + + // concat(r1, r2): lengths add → stride = GCD(stride(r1), stride(r2)). + if (seq.re.is_concat(re, r1, r2)) { + unsigned s1 = compute_length_stride(r1); + unsigned s2 = compute_length_stride(r2); + return u_gcd(s1, s2); + } + + // union(r1, r2): lengths from either branch → need GCD of both + // strides and the difference between their minimum lengths. + if (seq.re.is_union(re, r1, r2)) { + unsigned s1 = compute_length_stride(r1); + unsigned s2 = compute_length_stride(r2); + unsigned m1 = seq.re.min_length(r1); + unsigned m2 = seq.re.min_length(r2); + unsigned d = (m1 >= m2) ? (m1 - m2) : (m2 - m1); + // Replace 0-strides with d for GCD computation: + // a fixed-length branch only introduces constraint via its offset. + unsigned g = u_gcd(s1 == 0 ? d : s1, s2 == 0 ? d : s2); + g = u_gcd(g, d); + return g; + } + + // loop(r, lo, hi): the length of any word is a sum of lo..hi copies of + // lengths from L(r). Since all lengths in L(r) are ≡ min_len(r) mod + // stride(r), the overall stride is gcd(min_len(r), stride(r)). + if (seq.re.is_loop(re, r1, lo, hi)) { + unsigned mn = seq.re.min_length(r1); + unsigned inner = compute_length_stride(r1); + return u_gcd(mn, inner); + } + if (seq.re.is_loop(re, r1, lo)) { + unsigned mn = seq.re.min_length(r1); + unsigned inner = compute_length_stride(r1); + return u_gcd(mn, inner); + } + + // intersection(r1, r2): lengths must be in both languages. + // A conservative safe choice: GCD(stride(r1), stride(r2)) is a valid + // stride for the intersection (every length in the intersection is + // also in r1 and in r2). + if (seq.re.is_intersection(re, r1, r2)) { + unsigned s1 = compute_length_stride(r1); + unsigned s2 = compute_length_stride(r2); + return u_gcd(s1, s2); + } + + // For complement, diff, reverse, derivative, of_pred, and anything + // else we cannot analyse statically: be conservative and return 1 + // (no useful modular constraint rather than an unsound one). + return 1; + } + + // ----------------------------------------------------------------------- + // Constraint generation + // ----------------------------------------------------------------------- + + void seq_parikh::generate_parikh_constraints(str_mem const& mem, + vector& out) { + if (!mem.m_regex || !mem.m_str) + return; + + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr || !seq.is_re(re_expr)) + return; + + // Length bounds from the regex. + unsigned min_len = seq.re.min_length(re_expr); + unsigned max_len = seq.re.max_length(re_expr); + + // If min_len >= max_len the bounds already pin the length exactly + // (or the language is empty — empty language is detected by simplify_and_init + // via Brzozowski derivative / is_empty checks, not here). + // We only generate modular constraints when the length is variable. + if (min_len >= max_len) + return; + + unsigned stride = compute_length_stride(re_expr); + + // stride == 1: every integer length is possible — no useful constraint. + // stride == 0: fixed length or empty — handled by bounds. + if (stride <= 1) + return; + + // Build len(str) as an arithmetic expression. + expr_ref len_str(seq.str.mk_length(mem.m_str->get_expr()), m); + + // Introduce fresh integer variable k ≥ 0. + expr_ref k_var = mk_fresh_int_var(); + + // Constraint 1: len(str) = min_len + stride · k + expr_ref min_expr(a.mk_int(min_len), m); + expr_ref stride_expr(a.mk_int(stride), m); + expr_ref stride_k(a.mk_mul(stride_expr, k_var), m); + expr_ref rhs(a.mk_add(min_expr, stride_k), m); + out.push_back(int_constraint(len_str, rhs, + int_constraint_kind::eq, mem.m_dep, m)); + + // Constraint 2: k ≥ 0 + expr_ref zero(a.mk_int(0), m); + out.push_back(int_constraint(k_var, zero, + int_constraint_kind::ge, mem.m_dep, m)); + + // Constraint 3 (optional): k ≤ max_k when max_len is bounded. + // max_k = floor((max_len - min_len) / stride) + // This gives the solver an explicit upper bound on k. + // The subtraction is safe because min_len < max_len is guaranteed + // by the early return above. + if (max_len != UINT_MAX) { + SASSERT(max_len > min_len); + unsigned range = max_len - min_len; + unsigned max_k = range / stride; + expr_ref max_k_expr(a.mk_int(max_k), m); + out.push_back(int_constraint(k_var, max_k_expr, + int_constraint_kind::le, mem.m_dep, m)); + } + } + + void seq_parikh::apply_to_node(nielsen_node& node) { + vector constraints; + for (str_mem const& mem : node.str_mems()) + generate_parikh_constraints(mem, constraints); + for (auto& ic : constraints) + node.add_int_constraint(ic); + } + + // ----------------------------------------------------------------------- + // Quick Parikh feasibility check (no solver call) + // ----------------------------------------------------------------------- + + // Returns true if a Parikh conflict is detected: there exists a membership + // str ∈ re for a single-variable str where the modular length constraint + // len(str) = min_len + stride * k (k ≥ 0) + // is inconsistent with the variable's current integer bounds [lb, ub]. + // + // This check is lightweight — it uses only modular arithmetic on the already- + // known regex min/max lengths and the per-variable bounds stored in the node. + bool seq_parikh::check_parikh_conflict(nielsen_node& node) { + for (str_mem const& mem : node.str_mems()) { + if (!mem.m_str || !mem.m_regex || !mem.m_str->is_var()) + continue; + + expr* re_expr = mem.m_regex->get_expr(); + if (!re_expr || !seq.is_re(re_expr)) + continue; + + unsigned min_len = seq.re.min_length(re_expr); + unsigned max_len = seq.re.max_length(re_expr); + if (min_len >= max_len) continue; // fixed or empty — no stride constraint + + unsigned stride = compute_length_stride(re_expr); + if (stride <= 1) continue; // no useful modular constraint + // stride > 1 guaranteed from here onward. + SASSERT(stride > 1); + + unsigned lb = node.var_lb(mem.m_str); + unsigned ub = node.var_ub(mem.m_str); + + // Check: ∃k ≥ 0 such that lb ≤ min_len + stride * k ≤ ub ? + // + // First find the smallest k satisfying the lower bound: + // k_min = 0 if min_len ≥ lb + // k_min = ⌈(lb - min_len) / stride⌉ otherwise + // + // Then verify min_len + stride * k_min ≤ ub. + unsigned k_min = 0; + if (lb > min_len) { + unsigned gap = lb - min_len; + // Ceiling division: k_min = ceil(gap / stride). + // Guard: (gap + stride - 1) may overflow if gap is close to UINT_MAX. + // In that case k_min would be huge, and min_len + stride*k_min would + // also overflow ub → treat as a conflict immediately. + if (gap > UINT_MAX - (stride - 1)) { + return true; // ceiling division would overflow → k_min too large + } + k_min = (gap + stride - 1) / stride; + } + + // Overflow guard: stride * k_min may overflow unsigned. + unsigned len_at_k_min; + if (k_min > (UINT_MAX - min_len) / stride) { + // Overflow: min_len + stride * k_min > UINT_MAX ≥ ub → conflict. + return true; + } + len_at_k_min = min_len + stride * k_min; + + if (ub != UINT_MAX && len_at_k_min > ub) + return true; // no valid k exists → Parikh conflict + } + return false; + } + + // ----------------------------------------------------------------------- + // minterm → char_set conversion + // ----------------------------------------------------------------------- + + // Convert a regex minterm expression to a char_set. + // + // Minterms are Boolean combinations of character-class predicates + // (re.range, re.full_char, complement, intersection) produced by + // sgraph::compute_minterms(). This function converts them to a + // concrete char_set so that fresh character variables introduced by + // apply_regex_var_split can be constrained with add_char_range(). + // + // The implementation is recursive and handles: + // re.full_char → [0, max_char] (full alphabet) + // re.range(lo, hi) → [lo, hi+1) (hi inclusive in Z3) + // re.complement(r) → alphabet \ minterm_to_char_set(r) + // re.inter(r1, r2) → intersection of both sides + // re.diff(r1, r2) → r1 ∩ complement(r2) + // re.to_re(str.unit(c)) → singleton {c} + // re.empty → empty set + // anything else → [0, max_char] (conservative) + char_set seq_parikh::minterm_to_char_set(expr* re_expr) { + unsigned max_c = seq.max_char(); + + if (!re_expr) + return char_set::full(max_c); + + // full_char: the whole alphabet [0, max_char] + if (seq.re.is_full_char(re_expr)) + return char_set::full(max_c); + + // range [lo, hi] (hi inclusive in Z3's regex representation) + unsigned lo = 0, hi = 0; + if (seq.re.is_range(re_expr, lo, hi)) { + // lo > hi is a degenerate range; should not arise from well-formed minterms + SASSERT(lo <= hi); + if (lo > hi) return char_set(); + // char_range uses exclusive upper bound; Z3 hi is inclusive + return char_set(char_range(lo, hi + 1)); + } + + // complement: alphabet minus the inner set + expr* inner = nullptr; + if (seq.re.is_complement(re_expr, inner)) + return minterm_to_char_set(inner).complement(max_c); + + // intersection: characters present in both sets + expr* r1 = nullptr, *r2 = nullptr; + if (seq.re.is_intersection(re_expr, r1, r2)) + return minterm_to_char_set(r1).intersect_with(minterm_to_char_set(r2)); + + // difference: r1 minus r2 = r1 ∩ complement(r2) + if (seq.re.is_diff(re_expr, r1, r2)) + return minterm_to_char_set(r1).intersect_with( + minterm_to_char_set(r2).complement(max_c)); + + // to_re(str.unit(c)): singleton character set + expr* str_arg = nullptr; + expr* ch_expr = nullptr; + unsigned char_val = 0; + if (seq.re.is_to_re(re_expr, str_arg) && + seq.str.is_unit(str_arg, ch_expr) && + seq.is_const_char(ch_expr, char_val)) { + char_set cs; + cs.add(char_val); + return cs; + } + + // empty regex: no characters can appear + if (seq.re.is_empty(re_expr)) + return char_set(); + + // Conservative fallback: return the full alphabet so that + // no unsound constraints are added for unrecognised expressions. + return char_set::full(max_c); + } + +} // namespace seq diff --git a/src/smt/seq/seq_parikh.h b/src/smt/seq/seq_parikh.h new file mode 100644 index 000000000..8e8a4e178 --- /dev/null +++ b/src/smt/seq/seq_parikh.h @@ -0,0 +1,150 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_parikh.h + +Abstract: + + Parikh image filter for the ZIPT-based Nielsen string solver. + + Implements Parikh-based arithmetic constraint generation for + nielsen_node instances. For a regex membership constraint str ∈ r, + the Parikh image of r constrains the multiset of characters in str. + This module computes the "length stride" (period) of the length + language of r and generates modular arithmetic constraints of the form + + len(str) = min_len + stride · k (k ≥ 0, k fresh integer) + + which tighten the arithmetic subproblem beyond the simple min/max + length bounds already produced by nielsen_node::init_var_bounds_from_mems(). + + For example: + • str ∈ (ab)* → min_len = 0, stride = 2 → len(str) = 2·k + • str ∈ a(bc)* → min_len = 1, stride = 2 → len(str) = 1 + 2·k + • str ∈ ab|abc → stride = 1 (no useful modular constraint) + + The generated int_constraints are added to the node's integer constraint + set and discharged by the integer subsolver (see seq_nielsen.h, + simple_solver). + + Implements the Parikh filter described in ZIPT + (https://github.com/CEisenhofer/ZIPT/tree/parikh/ZIPT/Constraints) + replacing ZIPT's PDD-based Parikh subsolver with Z3's linear arithmetic. + +Author: + + Clemens Eisenhofer 2026-03-10 + Nikolaj Bjorner (nbjorner) 2026-03-10 + +--*/ +#pragma once + +#include "ast/arith_decl_plugin.h" +#include "ast/seq_decl_plugin.h" +#include "smt/seq/seq_nielsen.h" + +namespace seq { + + /** + * Parikh image filter: generates modular length constraints from + * regex membership constraints in a nielsen_node. + * + * Usage: + * seq_parikh parikh(sg); + * parikh.apply_to_node(node); // adds constraints to node + * + * Or per-membership: + * vector out; + * parikh.generate_parikh_constraints(mem, out); + */ + class seq_parikh { + ast_manager& m; + seq_util seq; + arith_util a; + unsigned m_fresh_cnt; // counter for fresh variable names + + // Compute the stride (period) of the length language of a regex. + // + // The stride k satisfies: all lengths in L(re) are congruent to + // min_length(re) modulo k. A stride of 1 means every integer + // length is possible (no useful modular constraint). A stride of + // 0 is a sentinel meaning the language is empty or has a single + // fixed length (already captured by bounds). + // + // Examples: + // stride(to_re("ab")) = 0 (fixed length 2) + // stride((ab)*) = 2 (lengths 0, 2, 4, ...) + // stride((abc)*) = 3 (lengths 0, 3, 6, ...) + // stride(a*b*) = 1 (all lengths possible) + // stride((ab)*(cd)*) = 2 (lengths 0, 2, 4, ...) + // stride((ab)*|(abc)*) = 1 (lengths 0, 2, 3, 4, ...) + unsigned compute_length_stride(expr* re); + + // Create a fresh integer variable (name "pk!N") for use as the + // Parikh multiplier variable k in len(str) = min_len + stride·k. + expr_ref mk_fresh_int_var(); + + public: + explicit seq_parikh(euf::sgraph& sg); + + // Generate Parikh modular length constraints for one membership. + // + // When stride > 1 and min_len < max_len (bounds don't pin length exactly, + // and the language is non-empty): + // adds: len(str) = min_len + stride · k (equality) + // k ≥ 0 (non-negativity) + // k ≤ (max_len - min_len) / stride (upper bound, when max_len bounded) + // These tighten the integer constraint set for the subsolver. + // Dependencies are copied from mem.m_dep. + // Does nothing when min_len ≥ max_len (empty or fixed-length language). + void generate_parikh_constraints(str_mem const& mem, + vector& out); + + // Apply Parikh constraints to all memberships at a node. + // Calls generate_parikh_constraints for each str_mem in the node + // and appends the resulting int_constraints to node.int_constraints(). + void apply_to_node(nielsen_node& node); + + // Quick Parikh feasibility check (no solver call). + // + // For each single-variable membership str ∈ re, checks whether the + // modular constraint len(str) = min_len + stride · k (k ≥ 0) + // has any solution given the current per-variable bounds stored in + // node.var_lb(str) and node.var_ub(str). + // + // Returns true when a conflict is detected (no valid k exists for + // some membership). The caller should then mark the node with + // backtrack_reason::parikh_image. + // + // This is a lightweight pre-check that avoids calling the integer + // subsolver. It is sound (never returns true for a satisfiable node) + // but incomplete (may miss conflicts that require the full solver). + bool check_parikh_conflict(nielsen_node& node); + + // Compute the length stride of a regex expression. + // Exposed for testing and external callers. + unsigned get_length_stride(expr* re) { return compute_length_stride(re); } + + // Convert a regex minterm expression to a char_set. + // + // A minterm is a Boolean combination of character-class predicates + // (re.range, re.full_char, complement, intersection) that names a + // single, indivisible character equivalence class. Minterms are + // produced by sgraph::compute_minterms and used in + // apply_regex_var_split to constrain fresh character variables. + // + // Supported expressions: + // re.full_char → full set [0, max_char] + // re.range(lo, hi) → char_set [lo, hi] (inclusive on both ends) + // re.complement(r) → complement of minterm_to_char_set(r) + // re.inter(r1, r2) → intersection of both sets + // re.diff(r1, r2) → r1 minus r2 (= r1 ∩ complement(r2)) + // re.to_re(unit(c)) → singleton {c} + // re.empty → empty set + // anything else → full set (conservative, sound over-approximation) + char_set minterm_to_char_set(expr* minterm_re); + }; + +} // namespace seq diff --git a/src/smt/theory_nseq.cpp b/src/smt/theory_nseq.cpp index 46673e91f..6ed3038f1 100644 --- a/src/smt/theory_nseq.cpp +++ b/src/smt/theory_nseq.cpp @@ -33,7 +33,8 @@ namespace smt { m_arith_value(ctx.get_manager()), m_egraph(ctx.get_manager()), m_sgraph(ctx.get_manager(), m_egraph), - m_nielsen(m_sgraph), + m_context_solver(ctx.get_manager()), + m_nielsen(m_sgraph, m_context_solver), m_state(m_sgraph), m_regex(m_sgraph), m_model(*this, ctx.get_manager(), m_seq, m_rewriter, m_sgraph, m_regex) @@ -420,8 +421,8 @@ namespace smt { // here the actual Nielsen solving happens auto result = m_nielsen.solve(); std::cout << "Result: " << (result == seq::nielsen_graph::search_result::sat ? "SAT" : result == seq::nielsen_graph::search_result::unsat ? "UNSAT" : "UNKNOWN") << "\n"; - m_nielsen.to_dot(std::cout); - std::cout << std::endl; + // m_nielsen.to_dot(std::cout); + // std::cout << std::endl; if (result == seq::nielsen_graph::search_result::sat) { IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node=" diff --git a/src/smt/theory_nseq.h b/src/smt/theory_nseq.h index 6e7be11b9..24f5ad566 100644 --- a/src/smt/theory_nseq.h +++ b/src/smt/theory_nseq.h @@ -28,6 +28,7 @@ Author: #include "smt/nseq_state.h" #include "smt/nseq_regex.h" #include "smt/nseq_model.h" +#include "smt/nseq_context_solver.h" namespace smt { @@ -38,6 +39,9 @@ namespace smt { arith_value m_arith_value; euf::egraph m_egraph; // private egraph (not shared with smt context) euf::sgraph m_sgraph; // private sgraph + // m_context_solver must be declared before m_nielsen: its address is passed + // to the m_nielsen constructor and must remain stable for the object's lifetime. + context_solver m_context_solver; seq::nielsen_graph m_nielsen; nseq_state m_state; nseq_regex m_regex; // regex membership pre-processing diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index acc598edf..1620bd790 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -132,6 +132,7 @@ add_executable(test-z3 sls_test.cpp sls_seq_plugin.cpp seq_nielsen.cpp + seq_parikh.cpp nseq_basic.cpp nseq_regex.cpp nseq_zipt.cpp diff --git a/src/test/main.cpp b/src/test/main.cpp index 9fc8bf1ec..0f378df67 100644 --- a/src/test/main.cpp +++ b/src/test/main.cpp @@ -287,6 +287,7 @@ int main(int argc, char ** argv) { TST(scoped_vector); TST(sls_seq_plugin); TST(seq_nielsen); + TST(seq_parikh); TST(nseq_basic); TST(nseq_regex); TST(nseq_zipt); diff --git a/src/test/nseq_basic.cpp b/src/test/nseq_basic.cpp index 7472755fb..ff9318e84 100644 --- a/src/test/nseq_basic.cpp +++ b/src/test/nseq_basic.cpp @@ -21,6 +21,15 @@ Abstract: #include "smt/theory_nseq.h" #include +// Trivial solver that always returns sat and ignores all assertions. +class nseq_basic_dummy_solver : public seq::simple_solver { +public: + void push() override {} + void pop(unsigned) override {} + void assert_expr(expr*) override {} + lbool check() override { return l_true; } +}; + // Test 1: instantiation of nielsen_graph compiles and doesn't crash static void test_nseq_instantiation() { std::cout << "test_nseq_instantiation\n"; @@ -28,7 +37,8 @@ static void test_nseq_instantiation() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); SASSERT(ng.root() == nullptr); SASSERT(ng.num_nodes() == 0); std::cout << " ok\n"; @@ -83,7 +93,8 @@ static void test_nseq_simplification() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); // Add a trivial equality: empty = empty euf::snode* empty1 = sg.mk_empty(); @@ -104,7 +115,8 @@ static void test_nseq_node_satisfied() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); seq::nielsen_node* node = ng.mk_node(); // empty node has no constraints => satisfied @@ -130,7 +142,8 @@ static void test_nseq_symbol_clash() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('a'); euf::snode* b = sg.mk_char('b'); @@ -155,7 +168,8 @@ static void test_nseq_var_eq_self() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); ng.add_str_eq(x, x); @@ -172,7 +186,8 @@ static void test_nseq_prefix_clash() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('a'); @@ -193,7 +208,8 @@ static void test_nseq_const_nielsen_solvable() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -215,7 +231,8 @@ static void test_nseq_length_mismatch() { reg_decl_plugins(m); euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + nseq_basic_dummy_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('a'); euf::snode* b = sg.mk_char('b'); diff --git a/src/test/nseq_zipt.cpp b/src/test/nseq_zipt.cpp index ba2c1d2dd..930749de3 100644 --- a/src/test/nseq_zipt.cpp +++ b/src/test/nseq_zipt.cpp @@ -29,6 +29,27 @@ Abstract: #include #include +// Trivial solver that always returns sat and ignores all assertions. +class nseq_zipt_dummy_solver : public seq::simple_solver { +public: + void push() override {} + void pop(unsigned) override {} + void assert_expr(expr*) override {} + lbool check() override { return l_true; } +}; + +// ----------------------------------------------------------------------- +// Trivial simple_solver stub: optimistically assumes integer constraints +// are always feasible (returns l_true without actually checking). +// ----------------------------------------------------------------------- +class zipt_dummy_simple_solver : public seq::simple_solver { +public: + void push() override {} + void pop(unsigned) override {} + void assert_expr(expr*) override {} + lbool check() override { return l_true; } +}; + // ----------------------------------------------------------------------- // Helper: build a string snode from a notation string. // Uppercase = fresh variable, lowercase/digit = concrete character. @@ -159,6 +180,7 @@ struct nseq_fixture { ast_manager m; euf::egraph eg; euf::sgraph sg; + zipt_dummy_simple_solver dummy_solver; seq::nielsen_graph ng; seq_util su; str_builder sb; @@ -168,7 +190,7 @@ struct nseq_fixture { static ast_manager& init(ast_manager& m) { reg_decl_plugins(m); return m; } nseq_fixture() - : eg(init(m)), sg(m, eg), ng(sg), su(m), sb(sg), rb(m, su, sg) + : eg(init(m)), sg(m, eg), ng(sg, dummy_solver), su(m), sb(sg), rb(m, su, sg) {} euf::snode* S(const char* s) { return sb.parse(s); } @@ -576,75 +598,83 @@ static void test_zipt_parikh() { static void test_tricky_str_equations() { std::cout << "test_tricky_str_equations\n"; - // --- SAT: commutativity / rotation / symmetry --- + // --------------------------------------------------------------- + // SAT — Conjugacy equations: Xw₂ = w₁X + // SAT iff w₂ is a rotation of w₁. In that case w₁ = qp, w₂ = pq + // and X = (qp)^n · q is a family of solutions (n ≥ 0). + // All of these are UNSAT for X = ε (the ground parts don't match), + // so the solver must find a non-trivial witness. + // --------------------------------------------------------------- - // XY = YX (classic commutativity; witness: X="ab", Y="abab") - VERIFY(eq_sat("XY", "YX")); + // "ba" is a rotation of "ab" (p="b", q="a"). X = "a". + VERIFY(eq_sat("Xba", "abX")); - // Xab = abX (X commutes with the word "ab"; witness: X="ab") - VERIFY(eq_sat("Xab", "abX")); + // "cb" is a rotation of "bc" (p="c", q="b"). X = "b". + VERIFY(eq_sat("Xcb", "bcX")); - // XaY = YaX (swap-symmetric; witness: X=Y=any, e.g. X=Y="b") - VERIFY(eq_sat("XaY", "YaX")); + // "bca" is a rotation of "abc" (p="bc", q="a"). X = "a". + VERIFY(eq_sat("Xbca", "abcX")); - // XYX = YXY (Markov-type; witness: X=Y) - VERIFY(eq_sat("XYX", "YXY")); + // "cab" is a rotation of "abc" (p="c", q="ab"). X = "ab". + VERIFY(eq_sat("Xcab", "abcX")); - // XYZ = ZYX (reverse-palindrome; witness: X="a",Y="b",Z="a") - VERIFY(eq_sat("XYZ", "ZYX")); + // --------------------------------------------------------------- + // SAT — Power decomposition (ε NOT a solution) + // --------------------------------------------------------------- - // XabY = YabX (rotation-like; witness: X="",Y="ab") - VERIFY(eq_sat("XabY", "YabX")); + // XX = aa ⇒ X = "a". (ε gives "" ≠ "aa") + VERIFY(eq_sat("XX", "aa")); - // aXYa = aYXa (cancel outer 'a'; reduces to XY=YX; witness: X=Y="") - VERIFY(eq_sat("aXYa", "aYXa")); + // XbX = aba ⇒ 2|X| + 1 = 3 forces |X| = 1; X = "a". + // (ε gives "b" ≠ "aba") + VERIFY(eq_sat("XbX", "aba")); - // XaXb = YaYb (both halves share variable; witness: X=Y) - VERIFY(eq_sat("XaXb", "YaYb")); + // --------------------------------------------------------------- + // SAT — Multi-variable (ε NOT a solution) + // --------------------------------------------------------------- - // abXba = Xabba (witness: X="" gives "abba"="abba") - VERIFY(eq_sat("abXba", "Xabba")); + // X = "b", Y = "a" gives "baab" = "baab". (ε gives "ab" ≠ "ba") + VERIFY(eq_sat("XaYb", "bYaX")); - // --- UNSAT: first-character mismatch --- + // X = "b", Y = "a" gives "abba" = "abba". (ε gives "ab" ≠ "ba") + VERIFY(eq_sat("abXY", "YXba")); - // abXba = baXab (LHS starts 'a', RHS starts 'b') - VERIFY(eq_unsat("abXba", "baXab")); + // --------------------------------------------------------------- + // UNSAT — Non-conjugate rotation + // Xw₂ = w₁X where w₂ is NOT a rotation of w₁. + // Heads are variable vs char, so never a trivial first-char clash. + // GPowerIntr introduces periodicity, then the period boundaries + // give a character mismatch. + // --------------------------------------------------------------- - // XabX = XbaX (cancel X prefix/suffix → "ab"="ba"; 'a'≠'b') - VERIFY(eq_unsat("XabX", "XbaX")); + // "cba" is the reverse of "abc", NOT a rotation. + // Rotations of "abc" are: abc, bca, cab. + VERIFY(eq_unsat("Xcba", "abcX")); - // --- UNSAT: mismatch exposed after cancellation --- + // "acb" is a transposition of "abc", NOT a rotation. + VERIFY(eq_unsat("Xacb", "abcX")); - // XaYb = XbYa (cancel X prefix → aYb=bYa; first char 'a'≠'b') - VERIFY(eq_unsat("XaYb", "XbYa")); + // --------------------------------------------------------------- + // UNSAT — Induction via GPowerIntr + // One side starts with a variable that also appears on the other + // side behind a ground prefix → self-cycle. GPowerIntr forces + // periodicity; all period branches yield character mismatches. + // None of these has a trivial first-char or last-char clash. + // --------------------------------------------------------------- - // XaYbX = XbYaX (cancel X prefix+suffix → aYb=bYa; first char 'a'≠'b') - VERIFY(eq_unsat("XaYbX", "XbYaX")); + // Xa = bX: LHS head = X. Scan RHS: [b], X → self-cycle, base "b". + // X = b^n ⇒ b^n·a = b^{n+1} ⇒ last chars a ≠ b ⊥. + VERIFY(eq_unsat("Xa", "bX")); - // XaXbX = XbXaX (cancel X prefix+suffix → aXb=bXa; first char 'a'≠'b') - VERIFY(eq_unsat("XaXbX", "XbXaX")); + // acX = Xbc: RHS head = X. Scan LHS: [a,c], X → self-cycle, base "ac". + // X = (ac)^n ⇒ ac = bc ⊥ (a ≠ b). + // X = (ac)^n·a ⇒ aca = abc ⊥ (c ≠ b). + VERIFY(eq_unsat("acX", "Xbc")); - // --- UNSAT: induction --- - - // aXb = Xba (forces X=a^n; final step requires a=b) - VERIFY(eq_unsat("aXb", "Xba")); - - // XaY = YbX (a≠b; recursive unrolling forces a=b) - VERIFY(eq_unsat("XaY", "YbX")); - - // --- UNSAT: length parity --- - - // XaX = YY (|XaX|=2|X|+1 is odd; |YY|=2|Y| is even) - VERIFY(eq_unsat("XaX", "YY")); - - // XaaX = YbY (|XaaX|=2|X|+2 is even; |YbY|=2|Y|+1 is odd) - VERIFY(eq_unsat("XaaX", "YbY")); - - // --- UNSAT: midpoint argument --- - - // XaX = YbY (equal length forces |X|=|Y|; midpoint position |X| - // holds 'a' in LHS and 'b' in RHS, but 'a'≠'b') - VERIFY(eq_unsat("XaX", "YbY")); + // bcX = Xab: RHS head = X. Scan LHS: [b,c], X → self-cycle, base "bc". + // X = (bc)^n ⇒ bc = ab ⊥ (b ≠ a). + // X = (bc)^n·b ⇒ bcb = bab ⊥ (c ≠ a). + VERIFY(eq_unsat("bcX", "Xab")); std::cout << " ok\n"; } diff --git a/src/test/seq_nielsen.cpp b/src/test/seq_nielsen.cpp index 92d5bb72e..c0d58fdd2 100644 --- a/src/test/seq_nielsen.cpp +++ b/src/test/seq_nielsen.cpp @@ -22,6 +22,18 @@ Abstract: #include "ast/ast_pp.h" #include +class dummy_simple_solver : public seq::simple_solver { +public: + dummy_simple_solver() : seq::simple_solver() {} + void push() override {} + void pop(unsigned n) override {} + void assert_expr(expr *e) override {} + lbool check() override { + return l_true; + } + +}; + // test dep_tracker (uint_set) basic operations static void test_dep_tracker() { std::cout << "test_dep_tracker\n"; @@ -173,7 +185,8 @@ static void test_nielsen_node() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -215,7 +228,8 @@ static void test_nielsen_edge() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -250,7 +264,8 @@ static void test_nielsen_graph_populate() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -289,7 +304,8 @@ static void test_nielsen_subst_apply() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -323,7 +339,8 @@ static void test_nielsen_graph_reset() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -346,7 +363,8 @@ static void test_nielsen_expansion() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -395,7 +413,8 @@ static void test_run_idx() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); SASSERT(ng.run_idx() == 0); ng.inc_run_idx(); @@ -415,7 +434,8 @@ static void test_multiple_memberships() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -451,7 +471,8 @@ static void test_backedge() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -474,7 +495,8 @@ static void test_eq_split_basic() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -504,7 +526,8 @@ static void test_eq_split_solve_sat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -526,7 +549,7 @@ static void test_eq_split_solve_unsat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -548,7 +571,8 @@ static void test_eq_split_same_var_det() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); @@ -570,7 +594,7 @@ static void test_eq_split_commutation_sat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -592,7 +616,8 @@ static void test_const_nielsen_char_var() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* y = sg.mk_var(symbol("y")); @@ -615,7 +640,8 @@ static void test_const_nielsen_var_char() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* b = sg.mk_char('B'); @@ -640,7 +666,8 @@ static void test_const_nielsen_solve_sat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -661,7 +688,8 @@ static void test_const_nielsen_solve_unsat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -683,7 +711,8 @@ static void test_const_nielsen_priority_over_eq_split() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -710,7 +739,8 @@ static void test_const_nielsen_not_applicable_both_vars() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -736,7 +766,8 @@ static void test_const_nielsen_multi_char_solve() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -764,7 +795,8 @@ static void test_regex_char_split_basic() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -799,7 +831,8 @@ static void test_regex_char_split_solve_sat() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -823,7 +856,8 @@ static void test_regex_char_split_solve_multi_char() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -850,7 +884,8 @@ static void test_regex_char_split_union() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -878,7 +913,8 @@ static void test_regex_char_split_star_sat() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -903,7 +939,8 @@ static void test_regex_char_split_concat_str() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -932,7 +969,8 @@ static void test_regex_char_split_with_eq() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -959,7 +997,8 @@ static void test_regex_char_split_ground_skip() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); @@ -987,7 +1026,8 @@ static void test_var_nielsen_basic() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1010,7 +1050,8 @@ static void test_var_nielsen_same_var_det() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); @@ -1032,7 +1073,8 @@ static void test_var_nielsen_not_applicable_char() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* y = sg.mk_var(symbol("y")); @@ -1054,7 +1096,8 @@ static void test_var_nielsen_solve_sat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1076,7 +1119,8 @@ static void test_var_nielsen_solve_unsat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1098,7 +1142,8 @@ static void test_var_nielsen_commutation_sat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1120,7 +1165,7 @@ static void test_var_nielsen_priority() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1145,7 +1190,7 @@ static void test_generate_extensions_det_priority() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1169,7 +1214,7 @@ static void test_generate_extensions_no_applicable() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -1193,7 +1238,7 @@ static void test_generate_extensions_regex_only() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); // Build regex to_re("A") @@ -1223,7 +1268,7 @@ static void test_generate_extensions_mixed_det_first() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1257,7 +1302,7 @@ static void test_solve_empty_graph() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); SASSERT(!ng.root()); auto result = ng.solve(); SASSERT(result == seq::nielsen_graph::search_result::sat); @@ -1271,7 +1316,7 @@ static void test_solve_trivially_satisfied() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); ng.add_str_eq(x, x); auto result = ng.solve(); @@ -1286,7 +1331,7 @@ static void test_solve_node_status_unsat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); // A = B is an immediate conflict @@ -1308,7 +1353,7 @@ static void test_solve_conflict_deps() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); // Add two constraints: A = B (unsat) and a dummy x = x @@ -1372,7 +1417,7 @@ static void test_explain_conflict_single_eq() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); // eq[0]: A = B (conflict) @@ -1397,7 +1442,7 @@ static void test_explain_conflict_multi_eq() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); euf::snode* x = sg.mk_var(symbol("x")); @@ -1428,7 +1473,7 @@ static void test_solve_node_extended_flag() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* xy = sg.mk_concat(x, y); @@ -1455,7 +1500,7 @@ static void test_solve_mixed_eq_mem_sat() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); @@ -1484,7 +1529,7 @@ static void test_solve_children_failed_reason() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); @@ -1506,7 +1551,7 @@ static void test_solve_eval_idx_tracking() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); // x = A·x would be infinite without depth bound, but @@ -1538,7 +1583,8 @@ static void test_simplify_prefix_cancel() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); euf::snode* x = sg.mk_var(symbol("x")); @@ -1568,7 +1614,7 @@ static void test_simplify_symbol_clash() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); euf::snode* x = sg.mk_var(symbol("x")); @@ -1596,7 +1642,7 @@ static void test_simplify_empty_propagation() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* e = sg.mk_empty(); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1619,7 +1665,7 @@ static void test_simplify_empty_vs_char() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* e = sg.mk_empty(); euf::snode* a = sg.mk_char('A'); @@ -1641,7 +1687,7 @@ static void test_simplify_multi_pass_clash() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); euf::snode* c = sg.mk_char('C'); @@ -1667,7 +1713,7 @@ static void test_simplify_trivial_removal() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* e = sg.mk_empty(); @@ -1690,7 +1736,7 @@ static void test_simplify_all_trivial_satisfied() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* e = sg.mk_empty(); @@ -1712,7 +1758,7 @@ static void test_simplify_regex_infeasible() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* e = sg.mk_empty(); expr_ref ch_a(seq.str.mk_char('A'), m); @@ -1739,7 +1785,7 @@ static void test_simplify_nullable_removal() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* e = sg.mk_empty(); expr_ref ch_a(seq.str.mk_char('A'), m); @@ -1767,7 +1813,7 @@ static void test_simplify_brzozowski_sat() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* e = sg.mk_empty(); @@ -1793,7 +1839,7 @@ static void test_simplify_multiple_eqs() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1831,7 +1877,7 @@ static void test_det_cancel_child_eq() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -1853,7 +1899,7 @@ static void test_const_nielsen_child_substitutions() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); euf::snode* x = sg.mk_var(symbol("x")); @@ -1889,7 +1935,7 @@ static void test_var_nielsen_substitution_types() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -1919,7 +1965,7 @@ static void test_explain_conflict_mem_only() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* e = sg.mk_empty(); expr_ref ch_a(seq.str.mk_char('A'), m); @@ -1949,7 +1995,7 @@ static void test_explain_conflict_mixed_eq_mem() { euf::sgraph sg(m, eg); seq_util seq(m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); euf::snode* e = sg.mk_empty(); @@ -1989,7 +2035,7 @@ static void test_subsumption_pruning_unsat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -2011,7 +2057,7 @@ static void test_subsumption_reason_set() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); euf::snode* a = sg.mk_char('A'); @@ -2061,7 +2107,7 @@ static void test_length_constraints_basic() { euf::snode* lhs = sg.mk_concat(x, y); euf::snode* rhs = sg.mk_concat(a, b); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(lhs, rhs); vector constraints; @@ -2096,7 +2142,7 @@ static void test_length_constraints_trivial_skip() { euf::snode* x = sg.mk_var(symbol("x")); // trivial equation: x = x (same snode) - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(x, x); vector constraints; @@ -2115,7 +2161,7 @@ static void test_length_constraints_empty() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); vector constraints; ng.generate_length_constraints(constraints); @@ -2144,7 +2190,7 @@ static void test_length_constraints_concat_chain() { euf::snode* lhs = sg.mk_concat(sg.mk_concat(x, y), z); euf::snode* rhs = sg.mk_concat(sg.mk_concat(a, b), c); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(lhs, rhs); vector constraints; @@ -2172,7 +2218,7 @@ static void test_length_constraints_multi_eq() { euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(x, a); // x = A ng.add_str_eq(y, b); // y = B @@ -2205,7 +2251,7 @@ static void test_length_constraints_shared_var() { euf::snode* lhs = sg.mk_concat(x, a); euf::snode* rhs = sg.mk_concat(a, x); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(lhs, rhs); vector constraints; @@ -2230,7 +2276,7 @@ static void test_length_constraints_deps() { euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(x, a); // eq index 0 vector constraints; @@ -2260,7 +2306,7 @@ static void test_length_constraints_empty_side() { euf::snode* e = sg.mk_empty(); // x = ε - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(x, e); vector constraints; @@ -2294,7 +2340,7 @@ static void test_length_kind_tagging() { euf::snode* a = sg.mk_char('A'); // equation: x = a (one eq + one nonneg) - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(x, a); // membership: y in to_re("AB") (bounds + nonneg) @@ -2352,7 +2398,7 @@ static void test_power_epsilon_no_power() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a = sg.mk_char('A'); @@ -2376,7 +2422,7 @@ static void test_num_cmp_no_power() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -2401,7 +2447,7 @@ static void test_star_intr_no_backedge() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); expr_ref ch_a(seq.str.mk_char('A'), m); @@ -2434,7 +2480,7 @@ static void test_star_intr_with_backedge() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); expr_ref ch_a(seq.str.mk_char('A'), m); @@ -2473,7 +2519,7 @@ static void test_gpower_intr_self_cycle() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a1 = sg.mk_char('A'); @@ -2501,7 +2547,7 @@ static void test_gpower_intr_no_cycle() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -2531,7 +2577,7 @@ static void test_regex_var_split_basic() { seq_util seq(m); sort_ref str_sort(seq.str.mk_string_sort(), m); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); @@ -2566,7 +2612,7 @@ static void test_power_split_no_power() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -2592,7 +2638,7 @@ static void test_var_num_unwinding_no_power() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -2615,7 +2661,7 @@ static void test_const_num_unwinding_no_power() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* b = sg.mk_char('B'); @@ -2642,7 +2688,7 @@ static void test_priority_chain_order() { { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -2659,7 +2705,7 @@ static void test_priority_chain_order() { { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* y = sg.mk_var(symbol("y")); @@ -2675,7 +2721,7 @@ static void test_priority_chain_order() { { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* a = sg.mk_char('A'); euf::snode* y = sg.mk_var(symbol("y")); @@ -2696,7 +2742,7 @@ static void test_gpower_intr_solve_sat() { euf::egraph eg(m); euf::sgraph sg(m, eg); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); euf::snode* x = sg.mk_var(symbol("x")); euf::snode* a1 = sg.mk_char('A'); @@ -2734,7 +2780,7 @@ static void test_parikh_exact_length() { expr_ref to_re_ab(seq.re.mk_to_re(ab), m); euf::snode* regex = sg.mk(to_re_ab); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -2781,7 +2827,7 @@ static void test_parikh_star_unbounded() { expr_ref re_star(seq.re.mk_star(to_re_a), m); euf::snode* regex = sg.mk(re_star); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -2836,7 +2882,7 @@ static void test_parikh_union_interval() { expr_ref re_union(seq.re.mk_union(to_re_ab, to_re_cde), m); euf::snode* regex = sg.mk(re_union); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -2875,7 +2921,7 @@ static void test_parikh_loop_bounded() { expr_ref re_loop(seq.re.mk_loop(to_re_a, 3, 5), m); euf::snode* regex = sg.mk(re_loop); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -2911,7 +2957,7 @@ static void test_parikh_empty_regex() { expr_ref re_empty(seq.re.mk_empty(seq.re.mk_re(str_sort)), m); euf::snode* regex = sg.mk(re_empty); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -2950,7 +2996,7 @@ static void test_parikh_full_char() { expr_ref re_range(seq.re.mk_range(unit_a, unit_z), m); euf::snode* regex = sg.mk(re_range); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -2986,7 +3032,7 @@ static void test_parikh_mixed_eq_mem() { euf::snode* a = sg.mk_char('A'); // equation: x = A - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_eq(x, a); // membership: y in to_re("BC") @@ -3034,7 +3080,7 @@ static void test_parikh_full_seq_no_bounds() { expr_ref re_all(seq.re.mk_full_seq(str_sort), m); euf::snode* regex = sg.mk(re_all); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -3071,7 +3117,7 @@ static void test_parikh_dep_tracking() { expr_ref to_re_a(seq.re.mk_to_re(unit_a), m); euf::snode* regex = sg.mk(to_re_a); - seq::nielsen_graph ng(sg); + dummy_simple_solver solver; seq::nielsen_graph ng(sg, solver); ng.add_str_mem(x, regex); vector constraints; @@ -3087,6 +3133,390 @@ static void test_parikh_dep_tracking() { std::cout << " all constraints have non-empty deps\n"; } +// ----------------------------------------------------------------------- +// IntBounds / VarBoundWatcher tests (Task: IntBounds/Constraint.Shared) +// ----------------------------------------------------------------------- + +// tracking solver: records assert_expr calls for inspection +class tracking_solver : public seq::simple_solver { +public: + vector asserted; + ast_manager& m; + unsigned push_count = 0; + unsigned pop_count = 0; + lbool check_result = l_true; + + tracking_solver(ast_manager& m) : m(m) {} + void push() override { ++push_count; } + void pop(unsigned n) override { pop_count += n; } + void assert_expr(expr* e) override { asserted.push_back(expr_ref(e, m)); } + lbool check() override { return check_result; } + void reset_tracking() { + asserted.reset(); + push_count = 0; + pop_count = 0; + } +}; + +// test add_lower_int_bound: basic tightening adds int_constraint +static void test_add_lower_int_bound_basic() { + std::cout << "test_add_lower_int_bound_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + euf::snode* x = sg.mk_var(symbol("x")); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, x); // create root node + + seq::nielsen_node* node = ng.root(); + seq::dep_tracker dep; + + // initially no bounds + SASSERT(node->var_lb(x) == 0); + SASSERT(node->var_ub(x) == UINT_MAX); + SASSERT(node->int_constraints().empty()); + + // add lower bound lb=3: should tighten and add constraint + bool tightened = node->add_lower_int_bound(x, 3, dep); + SASSERT(tightened); + SASSERT(node->var_lb(x) == 3); + SASSERT(node->int_constraints().size() == 1); + SASSERT(node->int_constraints()[0].m_kind == seq::int_constraint_kind::ge); + + // add weaker lb=2: no tightening + tightened = node->add_lower_int_bound(x, 2, dep); + SASSERT(!tightened); + SASSERT(node->var_lb(x) == 3); + SASSERT(node->int_constraints().size() == 1); + + // add tighter lb=5: should tighten and add another constraint + tightened = node->add_lower_int_bound(x, 5, dep); + SASSERT(tightened); + SASSERT(node->var_lb(x) == 5); + SASSERT(node->int_constraints().size() == 2); + + std::cout << " ok\n"; +} + +// test add_upper_int_bound: basic tightening adds int_constraint +static void test_add_upper_int_bound_basic() { + std::cout << "test_add_upper_int_bound_basic\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, x); + + seq::nielsen_node* node = ng.root(); + seq::dep_tracker dep; + + SASSERT(node->var_ub(x) == UINT_MAX); + + // add upper bound ub=10: tightens + bool tightened = node->add_upper_int_bound(x, 10, dep); + SASSERT(tightened); + SASSERT(node->var_ub(x) == 10); + SASSERT(node->int_constraints().size() == 1); + SASSERT(node->int_constraints()[0].m_kind == seq::int_constraint_kind::le); + + // add weaker ub=20: no tightening + tightened = node->add_upper_int_bound(x, 20, dep); + SASSERT(!tightened); + SASSERT(node->var_ub(x) == 10); + SASSERT(node->int_constraints().size() == 1); + + // add tighter ub=5: tightens + tightened = node->add_upper_int_bound(x, 5, dep); + SASSERT(tightened); + SASSERT(node->var_ub(x) == 5); + SASSERT(node->int_constraints().size() == 2); + + std::cout << " ok\n"; +} + +// test add_lower_int_bound: conflict when lb > ub +static void test_add_bound_lb_gt_ub_conflict() { + std::cout << "test_add_bound_lb_gt_ub_conflict\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, x); + + seq::nielsen_node* node = ng.root(); + seq::dep_tracker dep; + + // set ub=3 first + node->add_upper_int_bound(x, 3, dep); + SASSERT(!node->is_general_conflict()); + + // now set lb=5 > ub=3: should trigger conflict + node->add_lower_int_bound(x, 5, dep); + SASSERT(node->is_general_conflict()); + SASSERT(node->reason() == seq::backtrack_reason::arithmetic); + + std::cout << " ok\n"; +} + +// test clone_from: child inherits parent bounds +static void test_bounds_cloned() { + std::cout << "test_bounds_cloned\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, y); + + seq::nielsen_node* parent = ng.root(); + seq::dep_tracker dep; + + // set bounds on parent + parent->add_lower_int_bound(x, 2, dep); + parent->add_upper_int_bound(x, 7, dep); + parent->add_lower_int_bound(y, 1, dep); + + // clone to child + seq::nielsen_node* child = ng.mk_child(parent); + + // child should have same bounds + SASSERT(child->var_lb(x) == 2); + SASSERT(child->var_ub(x) == 7); + SASSERT(child->var_lb(y) == 1); + SASSERT(child->var_ub(y) == UINT_MAX); + + // child's int_constraints should also be cloned (3 constraints: lb_x, ub_x, lb_y) + SASSERT(child->int_constraints().size() == parent->int_constraints().size()); + + std::cout << " ok\n"; +} + +// test VarBoundWatcher: substitution x→a·y propagates bounds from x to y +static void test_var_bound_watcher_single_var() { + std::cout << "test_var_bound_watcher_single_var\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* a = sg.mk_char('a'); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, y); + + seq::nielsen_node* node = ng.root(); + seq::dep_tracker dep; + + // set bounds: 3 <= len(x) <= 7 + node->add_lower_int_bound(x, 3, dep); + node->add_upper_int_bound(x, 7, dep); + node->int_constraints().reset(); // clear for clean count + + // apply substitution x → a·y + euf::snode* ay = sg.mk_concat(a, y); + seq::nielsen_subst s(x, ay, dep); + node->apply_subst(sg, s); + + // VarBoundWatcher should propagate: 3 <= 1+len(y) <= 7 + // => len(y) >= 2, len(y) <= 6 + SASSERT(node->var_lb(y) == 2); + SASSERT(node->var_ub(y) == 6); + + std::cout << " ok\n"; +} + +// test VarBoundWatcher: substitution with all-concrete replacement detects conflict +static void test_var_bound_watcher_conflict() { + std::cout << "test_var_bound_watcher_conflict\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('a'); + euf::snode* b = sg.mk_char('b'); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, a); + + seq::nielsen_node* node = ng.root(); + seq::dep_tracker dep; + + // set bounds: 3 <= len(x) (so x must have at least 3 chars) + node->add_lower_int_bound(x, 3, dep); + node->int_constraints().reset(); + + // apply substitution x → a·b (const_len=2 < lb=3) + euf::snode* ab = sg.mk_concat(a, b); + seq::nielsen_subst s(x, ab, dep); + node->apply_subst(sg, s); + + // should detect conflict: len(x) >= 3 but replacement has len=2 + SASSERT(node->is_general_conflict()); + SASSERT(node->reason() == seq::backtrack_reason::arithmetic); + + std::cout << " ok\n"; +} + +// test init_var_bounds_from_mems: simplify_and_init adds Parikh bounds +static void test_simplify_adds_parikh_bounds() { + std::cout << "test_simplify_adds_parikh_bounds\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + euf::snode* x = sg.mk_var(symbol("x")); + + // create regex: to_re("AB") — exactly 2 chars + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref re_ab(seq.re.mk_concat(seq.re.mk_to_re(unit_a), seq.re.mk_to_re(unit_b)), m); + euf::snode* regex = sg.mk(re_ab); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_mem(x, regex); + + seq::nielsen_node* node = ng.root(); + + // simplify_and_init should call init_var_bounds_from_mems + seq::simplify_result sr = node->simplify_and_init(ng); + (void)sr; + + // x ∈ to_re("AB") has min_len=2, max_len=2 + // so lb=2, ub=2 should be set on x + SASSERT(node->var_lb(x) == 2); + SASSERT(node->var_ub(x) == 2); + + std::cout << " ok\n"; +} + +// test assert_root_constraints_to_solver: root constraints are forwarded +static void test_assert_root_constraints_to_solver() { + std::cout << "test_assert_root_constraints_to_solver\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* a = sg.mk_char('a'); + euf::snode* b = sg.mk_char('b'); + euf::snode* ab = sg.mk_concat(a, b); + + tracking_solver ts(m); + seq::nielsen_graph ng(sg, ts); + // equation: x = a·b → generates len(x) = 2 and len(x) >= 0 + ng.add_str_eq(x, ab); + + // solve() calls assert_root_constraints_to_solver() internally + ts.reset_tracking(); + ng.solve(); + + // should have asserted at least: len(x) = 2, len(x) >= 0 + SASSERT(ts.asserted.size() >= 2); + std::cout << " asserted " << ts.asserted.size() << " root constraints to solver\n"; + for (auto& e : ts.asserted) + std::cout << " " << mk_pp(e, m) << "\n"; + + std::cout << " ok\n"; +} + +// test assert_root_constraints_to_solver: called only once even across iterations +static void test_assert_root_constraints_once() { + std::cout << "test_assert_root_constraints_once\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + + tracking_solver ts(m); + seq::nielsen_graph ng(sg, ts); + ng.add_str_eq(x, y); + + // solve is called (iterative deepening runs multiple iterations) + ng.solve(); + unsigned count_first = ts.asserted.size(); + + // after reset, assert count should be 0 then non-zero again + // (reset clears m_root_constraints_asserted) + // We can't call solve() again on the same graph without reset, but + // we can verify the count is stable between iterations by checking + // that the same constraints weren't added multiple times. + // The simplest check: count > 0 (constraints were asserted) + SASSERT(count_first > 0); // x=y produces at least len(x)=len(y) and non-neg constraints + std::cout << " asserted " << count_first << " constraints total during solve\n"; + std::cout << " ok\n"; +} + +// test VarBoundWatcher with multiple variables in replacement +static void test_var_bound_watcher_multi_var() { + std::cout << "test_var_bound_watcher_multi_var\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + + euf::snode* x = sg.mk_var(symbol("x")); + euf::snode* y = sg.mk_var(symbol("y")); + euf::snode* z = sg.mk_var(symbol("z")); + + dummy_simple_solver solver; + seq::nielsen_graph ng(sg, solver); + ng.add_str_eq(x, y); + + seq::nielsen_node* node = ng.root(); + seq::dep_tracker dep; + + // set upper bound: len(x) <= 5 + node->add_upper_int_bound(x, 5, dep); + node->int_constraints().reset(); + + // apply substitution x → y·z (two vars, no constants) + euf::snode* yz = sg.mk_concat(y, z); + seq::nielsen_subst s(x, yz, dep); + node->apply_subst(sg, s); + + // len(y·z) <= 5 → each of y, z gets ub=5 + SASSERT(node->var_ub(y) == 5); + SASSERT(node->var_ub(z) == 5); + + std::cout << " ok\n"; +} + void tst_seq_nielsen() { test_dep_tracker(); test_str_eq(); @@ -3194,4 +3624,15 @@ void tst_seq_nielsen() { test_parikh_mixed_eq_mem(); test_parikh_full_seq_no_bounds(); test_parikh_dep_tracking(); + // IntBounds / VarBoundWatcher / Constraint.Shared tests + test_add_lower_int_bound_basic(); + test_add_upper_int_bound_basic(); + test_add_bound_lb_gt_ub_conflict(); + test_bounds_cloned(); + test_var_bound_watcher_single_var(); + test_var_bound_watcher_conflict(); + test_simplify_adds_parikh_bounds(); + test_assert_root_constraints_to_solver(); + test_assert_root_constraints_once(); + test_var_bound_watcher_multi_var(); } diff --git a/src/test/seq_parikh.cpp b/src/test/seq_parikh.cpp new file mode 100644 index 000000000..83bbc3da4 --- /dev/null +++ b/src/test/seq_parikh.cpp @@ -0,0 +1,883 @@ +/*++ +Copyright (c) 2026 Microsoft Corporation + +Module Name: + + seq_parikh.cpp + +Abstract: + + Unit tests for seq_parikh (Parikh image filter for the ZIPT Nielsen solver). + + Tests cover: + - compute_length_stride / get_length_stride for all regex forms + - generate_parikh_constraints: constraint shape, count, and dependency + - apply_to_node: integration with nielsen_node + - check_parikh_conflict: lightweight feasibility pre-check + - minterm_to_char_set: regex-minterm to char_set conversion + +Author: + + Clemens Eisenhofer 2026-03-11 + Nikolaj Bjorner (nbjorner) 2026-03-11 + +--*/ + +#include "util/util.h" +#include "util/zstring.h" +#include "ast/euf/euf_egraph.h" +#include "ast/euf/euf_sgraph.h" +#include "smt/seq/seq_nielsen.h" +#include "smt/seq/seq_parikh.h" +#include "ast/arith_decl_plugin.h" +#include "ast/reg_decl_plugins.h" +#include "ast/ast_pp.h" +#include + +// --------------------------------------------------------------------------- +// Minimal solver stub (no-op) +// --------------------------------------------------------------------------- +class parikh_test_solver : public seq::simple_solver { +public: + void push() override {} + void pop(unsigned) override {} + void assert_expr(expr*) override {} + lbool check() override { return l_true; } +}; + +// --------------------------------------------------------------------------- +// Helpers to build common regex expressions +// --------------------------------------------------------------------------- + +// build to_re("AB") — a fixed two-character string regex +static expr_ref mk_to_re_ab(ast_manager& m, seq_util& seq) { + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref ab(seq.str.mk_concat(unit_a, unit_b), m); + return expr_ref(seq.re.mk_to_re(ab), m); +} + +// build (ab)* — star of the two-character sequence +static expr_ref mk_ab_star(ast_manager& m, seq_util& seq) { + return expr_ref(seq.re.mk_star(mk_to_re_ab(m, seq)), m); +} + +// build (abc)* — star of a three-character sequence +static expr_ref mk_abc_star(ast_manager& m, seq_util& seq) { + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref ch_b(seq.str.mk_char('B'), m); + expr_ref ch_c(seq.str.mk_char('C'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref unit_b(seq.str.mk_unit(ch_b), m); + expr_ref unit_c(seq.str.mk_unit(ch_c), m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + expr_ref abc(seq.str.mk_concat(unit_a, seq.str.mk_concat(unit_b, unit_c)), m); + return expr_ref(seq.re.mk_star(seq.re.mk_to_re(abc)), m); +} + +// build to_re("A") — single-character regex +static expr_ref mk_to_re_a(ast_manager& m, seq_util& seq) { + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + return expr_ref(seq.re.mk_to_re(unit_a), m); +} + +// --------------------------------------------------------------------------- +// Stride tests: compute_length_stride / get_length_stride +// --------------------------------------------------------------------------- + +// stride(to_re("AB")) == 0 (fixed length) +static void test_stride_fixed_length() { + std::cout << "test_stride_fixed_length\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref re = mk_to_re_ab(m, seq); + SASSERT(parikh.get_length_stride(re) == 0); +} + +// stride((ab)*) == 2 +static void test_stride_star_fixed_body() { + std::cout << "test_stride_star_fixed_body\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref re = mk_ab_star(m, seq); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride((ab)*) = " << stride << "\n"; + SASSERT(stride == 2); +} + +// stride((abc)*) == 3 +static void test_stride_star_three_char() { + std::cout << "test_stride_star_three_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref re = mk_abc_star(m, seq); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride((abc)*) = " << stride << "\n"; + SASSERT(stride == 3); +} + +// stride((ab)+) == 2 +static void test_stride_plus() { + std::cout << "test_stride_plus\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref re_body = mk_to_re_ab(m, seq); + expr_ref re(seq.re.mk_plus(re_body), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride((ab)+) = " << stride << "\n"; + SASSERT(stride == 2); +} + +// stride(a* b*) == 1 — union of independent stars → every length possible +static void test_stride_concat_stars() { + std::cout << "test_stride_concat_stars\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref a_star(seq.re.mk_star(mk_to_re_a(m, seq)), m); + expr_ref b_star(seq.re.mk_star(mk_to_re_a(m, seq)), m); + expr_ref re(seq.re.mk_concat(a_star, b_star), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride(a* b*) = " << stride << "\n"; + // both stars have stride 1 (single-char body → gcd(1,0)=1) → gcd(1,1)=1 + SASSERT(stride == 1); +} + +// stride((ab)* | (abc)*) == gcd(2,3) = 1 +static void test_stride_union_no_common_period() { + std::cout << "test_stride_union_no_common_period\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref ab_star = mk_ab_star(m, seq); + expr_ref abc_star = mk_abc_star(m, seq); + expr_ref re(seq.re.mk_union(ab_star, abc_star), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride((ab)*|(abc)*) = " << stride << "\n"; + // lengths: {0,2,4,...} union {0,3,6,...} → GCD(2,3)=1 + SASSERT(stride == 1); +} + +// stride((ab)*|(de)*) == gcd(2,2) = 2 +static void test_stride_union_same_period() { + std::cout << "test_stride_union_same_period\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref ab_star = mk_ab_star(m, seq); + // de_star: (de)* — same stride 2 + expr_ref ch_d(seq.str.mk_char('D'), m); + expr_ref ch_e(seq.str.mk_char('E'), m); + expr_ref unit_d(seq.str.mk_unit(ch_d), m); + expr_ref unit_e(seq.str.mk_unit(ch_e), m); + expr_ref de(seq.str.mk_concat(unit_d, unit_e), m); + expr_ref de_star(seq.re.mk_star(seq.re.mk_to_re(de)), m); + expr_ref re(seq.re.mk_union(ab_star, de_star), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride((ab)*|(de)*) = " << stride << "\n"; + SASSERT(stride == 2); +} + +// stride(loop((ab), 1, 3)) == 2 — loop with fixed-length body +static void test_stride_loop() { + std::cout << "test_stride_loop\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref ab = mk_to_re_ab(m, seq); + expr_ref re(seq.re.mk_loop(ab, 1, 3), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride(loop(ab,1,3)) = " << stride << "\n"; + SASSERT(stride == 2); +} + +// stride(re.full_seq) == 1 (every length possible) +static void test_stride_full_seq() { + std::cout << "test_stride_full_seq\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref re(seq.re.mk_full_seq(str_sort), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride(.*) = " << stride << "\n"; + SASSERT(stride == 1); +} + +// stride(re.empty) == 0 +static void test_stride_empty_regex() { + std::cout << "test_stride_empty_regex\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref re(seq.re.mk_empty(str_sort), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride(empty) = " << stride << "\n"; + SASSERT(stride == 0); +} + +// stride(re.epsilon) == 0 +static void test_stride_epsilon() { + std::cout << "test_stride_epsilon\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + // epsilon is to_re("") — empty string + sort_ref str_s(seq.str.mk_string_sort(), m); + expr_ref empty_str(seq.str.mk_empty(str_s), m); + expr_ref re(seq.re.mk_to_re(empty_str), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride(epsilon) = " << stride << "\n"; + SASSERT(stride == 0); +} + +// stride((ab)?) == 2 (gcd(2, 0) = 2 via opt handling; min_len(ab)=2) +static void test_stride_opt() { + std::cout << "test_stride_opt\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + expr_ref ab = mk_to_re_ab(m, seq); + expr_ref re(seq.re.mk_opt(ab), m); + unsigned stride = parikh.get_length_stride(re); + std::cout << " stride((ab)?) = " << stride << "\n"; + SASSERT(stride == 2); +} + +// --------------------------------------------------------------------------- +// generate_parikh_constraints tests +// --------------------------------------------------------------------------- + +// (ab)* → len(x) = 0 + 2·k, k ≥ 0 (stride 2, min_len 0) +static void test_generate_constraints_ab_star() { + std::cout << "test_generate_constraints_ab_star\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + arith_util arith(m); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_ab_star(m, seq); + euf::snode* regex = sg.mk(re); + seq::dep_tracker dep; dep.insert(0); + seq::str_mem mem(x, regex, nullptr, 0, dep); + + vector out; + parikh.generate_parikh_constraints(mem, out); + + // expect at least: len(x)=0+2k and k>=0 + // (no upper bound because max_length is UINT_MAX for unbounded star) + std::cout << " generated " << out.size() << " constraints\n"; + SASSERT(out.size() >= 2); + + // check that one constraint is an equality (len(x) = 0 + 2·k) + bool has_eq = false; + for (auto const& ic : out) + if (ic.m_kind == seq::int_constraint_kind::eq) has_eq = true; + SASSERT(has_eq); + + // check that one constraint is k >= 0 + bool has_ge = false; + for (auto const& ic : out) + if (ic.m_kind == seq::int_constraint_kind::ge) has_ge = true; + SASSERT(has_ge); + + // should NOT have an upper bound (star is unbounded) + bool has_le = false; + for (auto const& ic : out) + if (ic.m_kind == seq::int_constraint_kind::le) has_le = true; + SASSERT(!has_le); +} + +// loop((ab), 1, 3): bounded → k ≤ floor((6-2)/2) = 2 +static void test_generate_constraints_bounded_loop() { + std::cout << "test_generate_constraints_bounded_loop\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + // loop("ab", 1, 3): min_len=2, max_len=6, stride=2 + expr_ref ab = mk_to_re_ab(m, seq); + expr_ref re(seq.re.mk_loop(ab, 1, 3), m); + euf::snode* regex = sg.mk(re); + seq::dep_tracker dep; dep.insert(0); + seq::str_mem mem(x, regex, nullptr, 0, dep); + + vector out; + parikh.generate_parikh_constraints(mem, out); + + // expect: eq + ge + le = 3 constraints + std::cout << " generated " << out.size() << " constraints\n"; + SASSERT(out.size() == 3); + + bool has_eq = false, has_ge = false, has_le = false; + for (auto const& ic : out) { + if (ic.m_kind == seq::int_constraint_kind::eq) has_eq = true; + if (ic.m_kind == seq::int_constraint_kind::ge) has_ge = true; + if (ic.m_kind == seq::int_constraint_kind::le) has_le = true; + } + SASSERT(has_eq); + SASSERT(has_ge); + SASSERT(has_le); +} + +// stride == 1 → no constraints generated +static void test_generate_constraints_stride_one() { + std::cout << "test_generate_constraints_stride_one\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + euf::snode* x = sg.mk_var(symbol("x")); + // full_seq: stride=1 → no modular constraint + expr_ref re(seq.re.mk_full_seq(str_sort), m); + euf::snode* regex = sg.mk(re); + seq::dep_tracker dep; dep.insert(0); + seq::str_mem mem(x, regex, nullptr, 0, dep); + + vector out; + parikh.generate_parikh_constraints(mem, out); + std::cout << " generated " << out.size() << " constraints (expect 0)\n"; + SASSERT(out.empty()); +} + +// fixed-length regex (min == max) → no constraints generated +static void test_generate_constraints_fixed_length() { + std::cout << "test_generate_constraints_fixed_length\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_to_re_ab(m, seq); // fixed len 2 + euf::snode* regex = sg.mk(re); + seq::dep_tracker dep; dep.insert(0); + seq::str_mem mem(x, regex, nullptr, 0, dep); + + vector out; + parikh.generate_parikh_constraints(mem, out); + std::cout << " generated " << out.size() << " constraints (expect 0)\n"; + SASSERT(out.empty()); +} + +// dependency is propagated to all generated constraints +static void test_generate_constraints_dep_propagated() { + std::cout << "test_generate_constraints_dep_propagated\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_ab_star(m, seq); + euf::snode* regex = sg.mk(re); + seq::dep_tracker dep; dep.insert(7); + seq::str_mem mem(x, regex, nullptr, 0, dep); + + vector out; + parikh.generate_parikh_constraints(mem, out); + + // all generated constraints must carry bit 7 in their dependency + for (auto const& ic : out) { + SASSERT(!ic.m_dep.empty()); + seq::dep_tracker d7; d7.insert(7); + SASSERT(d7.subset_of(ic.m_dep)); + } + std::cout << " all constraints carry dep bit 7\n"; +} + +// --------------------------------------------------------------------------- +// apply_to_node tests +// --------------------------------------------------------------------------- + +// applying to a node with one membership adds constraints to node +static void test_apply_to_node_adds_constraints() { + std::cout << "test_apply_to_node_adds_constraints\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_ab_star(m, seq); // stride 2 → generates constraints + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + // root node should have no int_constraints initially + SASSERT(ng.root() != nullptr); + unsigned before = ng.root()->int_constraints().size(); + + parikh.apply_to_node(*ng.root()); + + unsigned after = ng.root()->int_constraints().size(); + std::cout << " before=" << before << " after=" << after << "\n"; + SASSERT(after > before); +} + +// applying twice is idempotent (m_parikh_applied would prevent double-add +// via nielsen_graph::apply_parikh_to_node, but seq_parikh::apply_to_node +// itself does not guard — so calling apply_to_node directly adds again; +// this test verifies the direct call does add, not the idempotency guard) +static void test_apply_to_node_stride_one_no_constraints() { + std::cout << "test_apply_to_node_stride_one_no_constraints\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re(seq.re.mk_full_seq(str_sort), m); // stride 1 → no constraints + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + unsigned before = ng.root()->int_constraints().size(); + parikh.apply_to_node(*ng.root()); + unsigned after = ng.root()->int_constraints().size(); + std::cout << " before=" << before << " after=" << after << " (expect no change)\n"; + SASSERT(after == before); +} + +// --------------------------------------------------------------------------- +// check_parikh_conflict tests +// --------------------------------------------------------------------------- + +// no conflict when var_lb=0, var_ub=UINT_MAX (unconstrained) +static void test_check_conflict_unconstrained_no_conflict() { + std::cout << "test_check_conflict_unconstrained_no_conflict\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_ab_star(m, seq); // stride 2, min_len 0 + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + // no bounds set → default lb=0, ub=UINT_MAX → no conflict + bool conflict = parikh.check_parikh_conflict(*ng.root()); + std::cout << " conflict = " << conflict << " (expect 0)\n"; + SASSERT(!conflict); +} + +// conflict: lb=3, ub=5, stride=2, min_len=0 +// valid lengths: 0,2,4,6,... ∩ [3,5] = {4} → no conflict +static void test_check_conflict_valid_k_exists() { + std::cout << "test_check_conflict_valid_k_exists\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_ab_star(m, seq); // stride 2, min_len 0; lengths 0,2,4,... + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + // lb=3, ub=5: length 4 is achievable (k=2) → no conflict + seq::dep_tracker dep; dep.insert(0); + ng.root()->add_lower_int_bound(x, 3, dep); + ng.root()->add_upper_int_bound(x, 5, dep); + + bool conflict = parikh.check_parikh_conflict(*ng.root()); + std::cout << " conflict = " << conflict << " (expect 0)\n"; + SASSERT(!conflict); +} + +// conflict: lb=3, ub=3, stride=2, min_len=0 +// valid lengths: {0,2,4,...} ∩ [3,3] = {} → conflict +static void test_check_conflict_no_valid_k() { + std::cout << "test_check_conflict_no_valid_k\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_ab_star(m, seq); // stride 2, min_len 0; lengths {0,2,4,...} + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + // lb=3, ub=3: only odd length 3 — never a multiple of 2 → conflict + seq::dep_tracker dep; dep.insert(0); + ng.root()->add_lower_int_bound(x, 3, dep); + ng.root()->add_upper_int_bound(x, 3, dep); + + bool conflict = parikh.check_parikh_conflict(*ng.root()); + std::cout << " conflict = " << conflict << " (expect 1)\n"; + SASSERT(conflict); +} + +// conflict: lb=5, ub=5, stride=3, min_len=0 +// valid lengths of (abc)*: {0,3,6,...} ∩ {5} = {} → conflict +static void test_check_conflict_abc_star() { + std::cout << "test_check_conflict_abc_star\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re = mk_abc_star(m, seq); // stride 3, min_len 0; lengths {0,3,6,...} + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + // lb=5, ub=5 → no valid k (5 is not a multiple of 3) → conflict + seq::dep_tracker dep; dep.insert(0); + ng.root()->add_lower_int_bound(x, 5, dep); + ng.root()->add_upper_int_bound(x, 5, dep); + + bool conflict = parikh.check_parikh_conflict(*ng.root()); + std::cout << " conflict = " << conflict << " (expect 1)\n"; + SASSERT(conflict); +} + +// no conflict for stride==1 regex even with narrow bounds +static void test_check_conflict_stride_one_never_conflicts() { + std::cout << "test_check_conflict_stride_one_never_conflicts\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + sort_ref str_sort(seq.str.mk_string_sort(), m); + parikh_test_solver solver; + seq::nielsen_graph ng(sg, solver); + seq::seq_parikh parikh(sg); + + euf::snode* x = sg.mk_var(symbol("x")); + expr_ref re(seq.re.mk_full_seq(str_sort), m); // stride 1 → no constraint + euf::snode* regex = sg.mk(re); + ng.add_str_mem(x, regex); + + seq::dep_tracker dep; dep.insert(0); + ng.root()->add_lower_int_bound(x, 7, dep); + ng.root()->add_upper_int_bound(x, 7, dep); + + bool conflict = parikh.check_parikh_conflict(*ng.root()); + std::cout << " conflict = " << conflict << " (expect 0: stride=1 skipped)\n"; + SASSERT(!conflict); +} + +// --------------------------------------------------------------------------- +// minterm_to_char_set tests +// --------------------------------------------------------------------------- + +// re.full_char → full alphabet [0, max_char] +static void test_minterm_full_char() { + std::cout << "test_minterm_full_char\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref re(seq.re.mk_full_char(str_sort), m); + char_set cs = parikh.minterm_to_char_set(re); + std::cout << " full_char char_count = " << cs.char_count() << "\n"; + SASSERT(cs.is_full(seq.max_char())); +} + +// re.empty → empty char_set +static void test_minterm_empty_regex() { + std::cout << "test_minterm_empty_regex\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref re(seq.re.mk_empty(str_sort), m); + char_set cs = parikh.minterm_to_char_set(re); + std::cout << " empty regex → char_set empty: " << cs.is_empty() << "\n"; + SASSERT(cs.is_empty()); +} + +// re.range('A','Z') → 26 characters +static void test_minterm_range() { + std::cout << "test_minterm_range\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + // Z3 re.range takes string arguments "A" and "Z" + expr_ref lo_str(seq.str.mk_string(zstring("A")), m); + expr_ref hi_str(seq.str.mk_string(zstring("Z")), m); + expr_ref re(seq.re.mk_range(lo_str, hi_str), m); + char_set cs = parikh.minterm_to_char_set(re); + std::cout << " range(A,Z) char_count = " << cs.char_count() << "\n"; + SASSERT(cs.char_count() == 26); + SASSERT(cs.contains('A')); + SASSERT(cs.contains('Z')); + SASSERT(!cs.contains('a')); +} + +// complement of re.range('A','Z') should not contain A-Z +static void test_minterm_complement() { + std::cout << "test_minterm_complement\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + sort_ref str_sort(seq.str.mk_string_sort(), m); + + expr_ref lo_str(seq.str.mk_string(zstring("A")), m); + expr_ref hi_str(seq.str.mk_string(zstring("Z")), m); + expr_ref range(seq.re.mk_range(lo_str, hi_str), m); + expr_ref re(seq.re.mk_complement(range), m); + char_set cs = parikh.minterm_to_char_set(re); + + // complement of [A-Z] should not contain any letter in A-Z + for (unsigned c = 'A'; c <= 'Z'; ++c) + SASSERT(!cs.contains(c)); + // but should contain e.g. '0' + SASSERT(cs.contains('0')); + std::cout << " complement ok: A-Z excluded, '0' included\n"; +} + +// intersection of range('A','Z') and range('M','Z') == range('M','Z') +static void test_minterm_intersection() { + std::cout << "test_minterm_intersection\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + expr_ref lo_az(seq.str.mk_string(zstring("A")), m); + expr_ref hi_az(seq.str.mk_string(zstring("Z")), m); + expr_ref lo_mz(seq.str.mk_string(zstring("M")), m); + + expr_ref range_az(seq.re.mk_range(lo_az, hi_az), m); + expr_ref range_mz(seq.re.mk_range(lo_mz, hi_az), m); + expr_ref re(seq.re.mk_inter(range_az, range_mz), m); + char_set cs = parikh.minterm_to_char_set(re); + + // intersection [A-Z] ∩ [M-Z] = [M-Z]: 14 characters + std::cout << " intersection [A-Z]∩[M-Z] char_count = " << cs.char_count() << "\n"; + SASSERT(cs.char_count() == 14); // M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z + SASSERT(!cs.contains('A')); + SASSERT(cs.contains('M')); + SASSERT(cs.contains('Z')); +} + +// diff(range('A','Z'), range('A','M')) == range('N','Z') +static void test_minterm_diff() { + std::cout << "test_minterm_diff\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + expr_ref lo_az(seq.str.mk_string(zstring("A")), m); + expr_ref hi_az(seq.str.mk_string(zstring("Z")), m); + expr_ref lo_am(seq.str.mk_string(zstring("A")), m); + expr_ref hi_am(seq.str.mk_string(zstring("M")), m); + + expr_ref range_az(seq.re.mk_range(lo_az, hi_az), m); + expr_ref range_am(seq.re.mk_range(lo_am, hi_am), m); + expr_ref re(seq.re.mk_diff(range_az, range_am), m); + char_set cs = parikh.minterm_to_char_set(re); + + // diff [A-Z] \ [A-M] = [N-Z]: 13 characters + std::cout << " diff [A-Z]\\[A-M] char_count = " << cs.char_count() << "\n"; + SASSERT(cs.char_count() == 13); // N..Z + SASSERT(!cs.contains('A')); + SASSERT(!cs.contains('M')); + SASSERT(cs.contains('N')); + SASSERT(cs.contains('Z')); +} + +// to_re(unit('A')) → singleton {'A'} +static void test_minterm_singleton() { + std::cout << "test_minterm_singleton\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq_util seq(m); + seq::seq_parikh parikh(sg); + + expr_ref ch_a(seq.str.mk_char('A'), m); + expr_ref unit_a(seq.str.mk_unit(ch_a), m); + expr_ref re(seq.re.mk_to_re(unit_a), m); + char_set cs = parikh.minterm_to_char_set(re); + + std::cout << " singleton char_count = " << cs.char_count() << "\n"; + SASSERT(cs.char_count() == 1); + SASSERT(cs.contains('A')); + SASSERT(!cs.contains('B')); +} + +// nullptr → full set (conservative fallback) +static void test_minterm_nullptr_is_full() { + std::cout << "test_minterm_nullptr_is_full\n"; + ast_manager m; + reg_decl_plugins(m); + euf::egraph eg(m); + euf::sgraph sg(m, eg); + seq::seq_parikh parikh(sg); + seq_util seq(m); + + char_set cs = parikh.minterm_to_char_set(nullptr); + SASSERT(cs.is_full(seq.max_char())); + std::cout << " nullptr → full set ok\n"; +} + +// --------------------------------------------------------------------------- +// Entry point +// --------------------------------------------------------------------------- + +void tst_seq_parikh() { + // stride tests + test_stride_fixed_length(); + test_stride_star_fixed_body(); + test_stride_star_three_char(); + test_stride_plus(); + test_stride_concat_stars(); + test_stride_union_no_common_period(); + test_stride_union_same_period(); + test_stride_loop(); + test_stride_full_seq(); + test_stride_empty_regex(); + test_stride_epsilon(); + test_stride_opt(); + + // generate_parikh_constraints tests + test_generate_constraints_ab_star(); + test_generate_constraints_bounded_loop(); + test_generate_constraints_stride_one(); + test_generate_constraints_fixed_length(); + test_generate_constraints_dep_propagated(); + + // apply_to_node tests + test_apply_to_node_adds_constraints(); + test_apply_to_node_stride_one_no_constraints(); + + // check_parikh_conflict tests + test_check_conflict_unconstrained_no_conflict(); + test_check_conflict_valid_k_exists(); + test_check_conflict_no_valid_k(); + test_check_conflict_abc_star(); + test_check_conflict_stride_one_never_conflicts(); + + // minterm_to_char_set tests + test_minterm_full_char(); + test_minterm_empty_regex(); + test_minterm_range(); + test_minterm_complement(); + test_minterm_intersection(); + test_minterm_diff(); + test_minterm_singleton(); + test_minterm_nullptr_is_full(); +}