mirror of
https://github.com/Z3Prover/z3
synced 2026-03-23 04:49:11 +00:00
Merge branch 'c3' into copilot/add-parikh-filter-implementation-again
This commit is contained in:
commit
5d0f5dc4e5
6 changed files with 810 additions and 91 deletions
|
|
@ -14,13 +14,14 @@ and reports:
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
TIMEOUT = 5 # seconds
|
DEFAULT_TIMEOUT = 5 # seconds
|
||||||
COMMON_ARGS = ["model_validate=true"]
|
COMMON_ARGS = ["model_validate=true"]
|
||||||
|
|
||||||
SOLVERS = {
|
SOLVERS = {
|
||||||
|
|
@ -29,27 +30,72 @@ SOLVERS = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def run_z3(z3_bin: str, smt_file: Path, solver_arg: str) -> tuple[str, float]:
|
_STATUS_RE = re.compile(r'\(\s*set-info\s+:status\s+(sat|unsat|unknown)\s*\)')
|
||||||
|
|
||||||
|
|
||||||
|
def read_smtlib_status(smt_file: Path) -> str:
|
||||||
|
"""Read the expected status from the SMT-LIB (set-info :status ...) directive.
|
||||||
|
Returns 'sat', 'unsat', or 'unknown'.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
text = smt_file.read_text(encoding="utf-8", errors="replace")
|
||||||
|
m = _STATUS_RE.search(text)
|
||||||
|
if m:
|
||||||
|
return m.group(1)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def determine_status(res_nseq: str, res_seq: str, smtlib_status: str) -> str:
|
||||||
|
"""Determine the ground-truth status of a problem.
|
||||||
|
Priority: if both solvers agree on sat/unsat, use that; otherwise if one
|
||||||
|
solver gives sat/unsat, use that; otherwise fall back to the SMT-LIB
|
||||||
|
annotation; otherwise 'unknown'.
|
||||||
|
"""
|
||||||
|
definite = {"sat", "unsat"}
|
||||||
|
if res_nseq in definite and res_nseq == res_seq:
|
||||||
|
return res_nseq
|
||||||
|
if res_nseq in definite and res_seq not in definite:
|
||||||
|
return res_nseq
|
||||||
|
if res_seq in definite and res_nseq not in definite:
|
||||||
|
return res_seq
|
||||||
|
# Disagreement (sat vs unsat) — fall back to SMTLIB annotation
|
||||||
|
if res_nseq in definite and res_seq in definite and res_nseq != res_seq:
|
||||||
|
if smtlib_status in definite:
|
||||||
|
return smtlib_status
|
||||||
|
return "unknown"
|
||||||
|
# Neither solver gave a definite answer
|
||||||
|
if smtlib_status in definite:
|
||||||
|
return smtlib_status
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
|
def run_z3(z3_bin: str, smt_file: Path, solver_arg: str, timeout_s: int = DEFAULT_TIMEOUT) -> tuple[str, float]:
|
||||||
"""Run z3 on a file with the given solver argument.
|
"""Run z3 on a file with the given solver argument.
|
||||||
Returns (result, elapsed) where result is 'sat', 'unsat', 'unknown', or 'timeout'/'error'.
|
Returns (result, elapsed) where result is 'sat', 'unsat', 'unknown', or 'timeout'/'error'.
|
||||||
"""
|
"""
|
||||||
cmd = [z3_bin, solver_arg] + COMMON_ARGS + [str(smt_file)]
|
timeout_ms = timeout_s * 1000
|
||||||
|
cmd = [z3_bin, f"-t:{timeout_ms}", solver_arg] + COMMON_ARGS + [str(smt_file)]
|
||||||
start = time.monotonic()
|
start = time.monotonic()
|
||||||
try:
|
try:
|
||||||
proc = subprocess.run(
|
proc = subprocess.run(
|
||||||
cmd,
|
cmd,
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
timeout=TIMEOUT,
|
timeout=timeout_s + 5, # subprocess grace period beyond Z3's own timeout
|
||||||
)
|
)
|
||||||
elapsed = time.monotonic() - start
|
elapsed = time.monotonic() - start
|
||||||
output = proc.stdout.strip()
|
output = proc.stdout.strip()
|
||||||
# Extract first meaningful line (sat/unsat/unknown)
|
# Extract first meaningful line (sat/unsat/unknown)
|
||||||
for line in output.splitlines():
|
for line in output.splitlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line in ("sat", "unsat", "unknown"):
|
if line in ("sat", "unsat"):
|
||||||
return line, elapsed
|
return line, elapsed
|
||||||
return "unknown", elapsed
|
if line == "unknown":
|
||||||
|
# Z3 returns "unknown" when it hits -t: limit — treat as timeout
|
||||||
|
return "timeout", elapsed
|
||||||
|
return "timeout", elapsed
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
elapsed = time.monotonic() - start
|
elapsed = time.monotonic() - start
|
||||||
return "timeout", elapsed
|
return "timeout", elapsed
|
||||||
|
|
@ -77,17 +123,21 @@ def classify(res_nseq: str, res_seq: str) -> str:
|
||||||
return "diverge"
|
return "diverge"
|
||||||
|
|
||||||
|
|
||||||
def process_file(z3_bin: str, smt_file: Path) -> dict:
|
def process_file(z3_bin: str, smt_file: Path, timeout_s: int = DEFAULT_TIMEOUT) -> dict:
|
||||||
res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"])
|
res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"], timeout_s)
|
||||||
res_seq, t_seq = run_z3(z3_bin, smt_file, SOLVERS["seq"])
|
res_seq, t_seq = run_z3(z3_bin, smt_file, SOLVERS["seq"], timeout_s)
|
||||||
cat = classify(res_nseq, res_seq)
|
cat = classify(res_nseq, res_seq)
|
||||||
|
smtlib_status = read_smtlib_status(smt_file)
|
||||||
|
status = determine_status(res_nseq, res_seq, smtlib_status)
|
||||||
return {
|
return {
|
||||||
"file": smt_file,
|
"file": smt_file,
|
||||||
"nseq": res_nseq,
|
"nseq": res_nseq,
|
||||||
"seq": res_seq,
|
"seq": res_seq,
|
||||||
"t_nseq": t_nseq,
|
"t_nseq": t_nseq,
|
||||||
"t_seq": t_seq,
|
"t_seq": t_seq,
|
||||||
"category": cat,
|
"category": cat,
|
||||||
|
"smtlib_status": smtlib_status,
|
||||||
|
"status": status,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -97,10 +147,13 @@ def main():
|
||||||
parser.add_argument("--z3", required=True, metavar="PATH", help="Path to z3 binary")
|
parser.add_argument("--z3", required=True, metavar="PATH", help="Path to z3 binary")
|
||||||
parser.add_argument("--ext", default=".smt2", help="File extension to search for (default: .smt2)")
|
parser.add_argument("--ext", default=".smt2", help="File extension to search for (default: .smt2)")
|
||||||
parser.add_argument("--jobs", type=int, default=4, help="Parallel workers (default: 4)")
|
parser.add_argument("--jobs", type=int, default=4, help="Parallel workers (default: 4)")
|
||||||
|
parser.add_argument("--timeout", type=int, default=DEFAULT_TIMEOUT, metavar="SEC",
|
||||||
|
help=f"Per-solver timeout in seconds (default: {DEFAULT_TIMEOUT})")
|
||||||
parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file")
|
parser.add_argument("--csv", metavar="FILE", help="Also write results to a CSV file")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
z3_bin = args.z3
|
z3_bin = args.z3
|
||||||
|
timeout_s = args.timeout
|
||||||
|
|
||||||
root = Path(args.path)
|
root = Path(args.path)
|
||||||
if not root.exists():
|
if not root.exists():
|
||||||
|
|
@ -112,11 +165,11 @@ def main():
|
||||||
print(f"No {args.ext} files found under {root}", file=sys.stderr)
|
print(f"No {args.ext} files found under {root}", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
print(f"Found {len(files)} files. Running with {args.jobs} parallel workers …\n")
|
print(f"Found {len(files)} files. Running with {args.jobs} parallel workers, timeout={timeout_s}s …\n")
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
with ThreadPoolExecutor(max_workers=args.jobs) as pool:
|
with ThreadPoolExecutor(max_workers=args.jobs) as pool:
|
||||||
futures = {pool.submit(process_file, z3_bin, f): f for f in files}
|
futures = {pool.submit(process_file, z3_bin, f, timeout_s): f for f in files}
|
||||||
done = 0
|
done = 0
|
||||||
for fut in as_completed(futures):
|
for fut in as_completed(futures):
|
||||||
done += 1
|
done += 1
|
||||||
|
|
@ -138,24 +191,45 @@ def main():
|
||||||
categories.setdefault(r["category"], []).append(r)
|
categories.setdefault(r["category"], []).append(r)
|
||||||
|
|
||||||
print("\n" + "="*70)
|
print("\n" + "="*70)
|
||||||
print("SUMMARY")
|
print("TOTALS")
|
||||||
print("="*70)
|
|
||||||
|
|
||||||
for cat, items in categories.items():
|
for cat, items in categories.items():
|
||||||
if not items:
|
print(f" {cat:40s}: {len(items)}")
|
||||||
continue
|
print(f"{'='*70}")
|
||||||
|
|
||||||
|
# ── Per-solver timeout & divergence file lists ─────────────────────────
|
||||||
|
nseq_timeouts = [r for r in results if r["nseq"] == "timeout"]
|
||||||
|
seq_timeouts = [r for r in results if r["seq"] == "timeout"]
|
||||||
|
both_to = categories["both_timeout"]
|
||||||
|
diverged = categories["diverge"]
|
||||||
|
|
||||||
|
def _print_file_list(label: str, items: list[dict]):
|
||||||
print(f"\n{'─'*70}")
|
print(f"\n{'─'*70}")
|
||||||
print(f" {cat.upper().replace('_', ' ')} ({len(items)} files)")
|
print(f" {label} ({len(items)} files)")
|
||||||
print(f"{'─'*70}")
|
print(f"{'─'*70}")
|
||||||
for r in sorted(items, key=lambda x: x["file"]):
|
for r in sorted(items, key=lambda x: x["file"]):
|
||||||
print(f" {r['file']}")
|
print(f" {r['file']}")
|
||||||
if cat not in ("both_timeout", "both_agree"):
|
|
||||||
print(f" nseq={r['nseq']:8s} ({r['t_nseq']:.1f}s) seq={r['seq']:8s} ({r['t_seq']:.1f}s)")
|
|
||||||
|
|
||||||
print(f"\n{'='*70}")
|
if nseq_timeouts:
|
||||||
print(f"TOTALS")
|
_print_file_list("NSEQ TIMES OUT", nseq_timeouts)
|
||||||
for cat, items in categories.items():
|
if seq_timeouts:
|
||||||
print(f" {cat:40s}: {len(items)}")
|
_print_file_list("SEQ TIMES OUT", seq_timeouts)
|
||||||
|
if both_to:
|
||||||
|
_print_file_list("BOTH TIME OUT", both_to)
|
||||||
|
if diverged:
|
||||||
|
_print_file_list("DIVERGE (sat vs unsat)", diverged)
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ── Problem status statistics ────────────────────────────────────────────
|
||||||
|
status_counts = {"sat": 0, "unsat": 0, "unknown": 0}
|
||||||
|
for r in results:
|
||||||
|
status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1
|
||||||
|
|
||||||
|
print(f"\nPROBLEM STATUS (total {len(results)} files)")
|
||||||
|
print(f"{'─'*40}")
|
||||||
|
print(f" {'sat':12s}: {status_counts['sat']:5d} ({100*status_counts['sat']/len(results):.1f}%)")
|
||||||
|
print(f" {'unsat':12s}: {status_counts['unsat']:5d} ({100*status_counts['unsat']/len(results):.1f}%)")
|
||||||
|
print(f" {'unknown':12s}: {status_counts['unknown']:5d} ({100*status_counts['unknown']/len(results):.1f}%)")
|
||||||
print(f"{'='*70}\n")
|
print(f"{'='*70}\n")
|
||||||
|
|
||||||
# ── Optional CSV output ───────────────────────────────────────────────────
|
# ── Optional CSV output ───────────────────────────────────────────────────
|
||||||
|
|
@ -163,7 +237,7 @@ def main():
|
||||||
import csv
|
import csv
|
||||||
csv_path = Path(args.csv)
|
csv_path = Path(args.csv)
|
||||||
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
||||||
writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category"])
|
writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category", "smtlib_status", "status"])
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for r in sorted(results, key=lambda x: x["file"]):
|
for r in sorted(results, key=lambda x: x["file"]):
|
||||||
writer.writerow({**r, "file": str(r["file"])})
|
writer.writerow({**r, "file": str(r["file"])})
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,7 @@ namespace smt {
|
||||||
}
|
}
|
||||||
|
|
||||||
void assert_expr(expr* e) override {
|
void assert_expr(expr* e) override {
|
||||||
|
// std::cout << "Asserting: " << mk_pp(e, m_kernel.m()) << std::endl;
|
||||||
m_kernel.assert_expr(e);
|
m_kernel.assert_expr(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -233,8 +233,34 @@ namespace smt {
|
||||||
if (exp_val.is_one())
|
if (exp_val.is_one())
|
||||||
return base_val;
|
return base_val;
|
||||||
|
|
||||||
// For small exponents, concatenate directly
|
// For small exponents, concatenate directly; for large ones,
|
||||||
|
// build a concrete string constant to avoid enormous AST chains
|
||||||
|
// that cause cleanup_expr to diverge.
|
||||||
unsigned n_val = exp_val.get_unsigned();
|
unsigned n_val = exp_val.get_unsigned();
|
||||||
|
constexpr unsigned POWER_EXPAND_LIMIT = 1000;
|
||||||
|
if (n_val > POWER_EXPAND_LIMIT) {
|
||||||
|
// Try to extract a concrete character from the base (seq.unit(c))
|
||||||
|
// and build a string literal directly (O(1) AST node).
|
||||||
|
unsigned ch = 0;
|
||||||
|
expr* unit_arg = nullptr;
|
||||||
|
if (m_seq.str.is_unit(base_val, unit_arg) && m_seq.is_const_char(unit_arg, ch)) {
|
||||||
|
svector<unsigned> buf(n_val, ch);
|
||||||
|
zstring result(buf.size(), buf.data());
|
||||||
|
return expr_ref(m_seq.str.mk_string(result), m);
|
||||||
|
}
|
||||||
|
// Also handle if base is already a string constant
|
||||||
|
zstring base_str;
|
||||||
|
if (m_seq.str.is_string(base_val, base_str) && base_str.length() > 0) {
|
||||||
|
svector<unsigned> buf;
|
||||||
|
for (unsigned i = 0; i < n_val; ++i)
|
||||||
|
for (unsigned j = 0; j < base_str.length(); ++j)
|
||||||
|
buf.push_back(base_str[j]);
|
||||||
|
zstring result(buf.size(), buf.data());
|
||||||
|
return expr_ref(m_seq.str.mk_string(result), m);
|
||||||
|
}
|
||||||
|
// Fallback: cap exponent to avoid divergence
|
||||||
|
n_val = POWER_EXPAND_LIMIT;
|
||||||
|
}
|
||||||
expr_ref acc(base_val);
|
expr_ref acc(base_val);
|
||||||
for (unsigned i = 1; i < n_val; ++i)
|
for (unsigned i = 1; i < n_val; ++i)
|
||||||
acc = m_seq.str.mk_concat(acc, base_val);
|
acc = m_seq.str.mk_concat(acc, base_val);
|
||||||
|
|
|
||||||
|
|
@ -445,6 +445,7 @@ namespace seq {
|
||||||
m_sg(sg),
|
m_sg(sg),
|
||||||
m_solver(solver),
|
m_solver(solver),
|
||||||
m_parikh(alloc(seq_parikh, sg)) {
|
m_parikh(alloc(seq_parikh, sg)) {
|
||||||
|
m_len_vars(sg.get_manager()) {
|
||||||
}
|
}
|
||||||
|
|
||||||
nielsen_graph::~nielsen_graph() {
|
nielsen_graph::~nielsen_graph() {
|
||||||
|
|
@ -522,6 +523,9 @@ namespace seq {
|
||||||
m_num_input_eqs = 0;
|
m_num_input_eqs = 0;
|
||||||
m_num_input_mems = 0;
|
m_num_input_mems = 0;
|
||||||
m_root_constraints_asserted = false;
|
m_root_constraints_asserted = false;
|
||||||
|
m_mod_cnt.reset();
|
||||||
|
m_len_var_cache.clear();
|
||||||
|
m_len_vars.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& nielsen_graph::display(std::ostream& out) const {
|
std::ostream& nielsen_graph::display(std::ostream& out) const {
|
||||||
|
|
@ -873,7 +877,7 @@ namespace seq {
|
||||||
|
|
||||||
// --- nodes ---
|
// --- nodes ---
|
||||||
for (nielsen_node const* n : m_nodes) {
|
for (nielsen_node const* n : m_nodes) {
|
||||||
out << "\t" << n->id() << " [label=<"
|
out << " " << n->id() << " [label=<"
|
||||||
<< n->id() << ": ";
|
<< n->id() << ": ";
|
||||||
n->display_html(out, m);
|
n->display_html(out, m);
|
||||||
// append conflict reason if this is a direct conflict
|
// append conflict reason if this is a direct conflict
|
||||||
|
|
@ -897,7 +901,7 @@ namespace seq {
|
||||||
// --- edges ---
|
// --- edges ---
|
||||||
for (nielsen_node const* n : m_nodes) {
|
for (nielsen_node const* n : m_nodes) {
|
||||||
for (nielsen_edge const* e : n->outgoing()) {
|
for (nielsen_edge const* e : n->outgoing()) {
|
||||||
out << "\t" << n->id() << " -> " << e->tgt()->id() << " [label=<";
|
out << " " << n->id() << " -> " << e->tgt()->id() << " [label=<";
|
||||||
|
|
||||||
// edge label: substitutions joined by <br/>
|
// edge label: substitutions joined by <br/>
|
||||||
bool first = true;
|
bool first = true;
|
||||||
|
|
@ -958,7 +962,7 @@ namespace seq {
|
||||||
|
|
||||||
// backedge as dotted arrow
|
// backedge as dotted arrow
|
||||||
if (n->backedge())
|
if (n->backedge())
|
||||||
out << "\t" << n->id() << " -> " << n->backedge()->id()
|
out << " " << n->id() << " -> " << n->backedge()->id()
|
||||||
<< " [style=dotted];\n";
|
<< " [style=dotted];\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1296,6 +1300,9 @@ namespace seq {
|
||||||
}
|
}
|
||||||
|
|
||||||
simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector<nielsen_edge*> const& cur_path) {
|
simplify_result nielsen_node::simplify_and_init(nielsen_graph& g, svector<nielsen_edge*> const& cur_path) {
|
||||||
|
if (m_is_extended)
|
||||||
|
return simplify_result::proceed;
|
||||||
|
|
||||||
euf::sgraph& sg = g.sg();
|
euf::sgraph& sg = g.sg();
|
||||||
ast_manager& m = sg.get_manager();
|
ast_manager& m = sg.get_manager();
|
||||||
arith_util arith(m);
|
arith_util arith(m);
|
||||||
|
|
@ -1382,6 +1389,44 @@ namespace seq {
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pass 2b: power-character prefix inconsistency
|
||||||
|
// (mirrors ZIPT's SimplifyDir power unit case + IsPrefixConsistent)
|
||||||
|
// When one side starts with a power u^n whose base starts with
|
||||||
|
// char a, and the other side starts with a different char b,
|
||||||
|
// the power exponent must be 0.
|
||||||
|
// Creates a single deterministic child with the substitution and
|
||||||
|
// constraint as edge labels so they appear in the graph.
|
||||||
|
// Guard: skip if we already created a child (re-entry via iterative deepening).
|
||||||
|
if (!eq.is_trivial() && eq.m_lhs && eq.m_rhs) {
|
||||||
|
euf::snode* lh = eq.m_lhs->first();
|
||||||
|
euf::snode* rh = eq.m_rhs->first();
|
||||||
|
for (int dir = 0; dir < 2; dir++) {
|
||||||
|
euf::snode* pow_head = (dir == 0) ? lh : rh;
|
||||||
|
euf::snode* other_head = (dir == 0) ? rh : lh;
|
||||||
|
if (!pow_head || !pow_head->is_power() || !other_head || !other_head->is_char())
|
||||||
|
continue;
|
||||||
|
euf::snode* base_sn = pow_head->arg(0);
|
||||||
|
if (!base_sn) continue;
|
||||||
|
euf::snode* base_first = base_sn->first();
|
||||||
|
if (!base_first || !base_first->is_char()) continue;
|
||||||
|
if (base_first->id() == other_head->id()) continue;
|
||||||
|
// base starts with different char → create child with exp=0 + power→ε
|
||||||
|
nielsen_node* child = g.mk_child(this);
|
||||||
|
nielsen_edge* e = g.mk_edge(this, child, true);
|
||||||
|
nielsen_subst s(pow_head, sg.mk_empty(), eq.m_dep);
|
||||||
|
e->add_subst(s);
|
||||||
|
child->apply_subst(sg, s);
|
||||||
|
expr* pow_exp = get_power_exp_expr(pow_head);
|
||||||
|
if (pow_exp) {
|
||||||
|
expr* zero = arith.mk_numeral(rational(0), true);
|
||||||
|
e->add_side_int(g.mk_int_constraint(
|
||||||
|
pow_exp, zero, int_constraint_kind::eq, eq.m_dep));
|
||||||
|
}
|
||||||
|
set_extended(true);
|
||||||
|
return simplify_result::proceed;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// pass 3: power simplification (mirrors ZIPT's LcpCompression +
|
// pass 3: power simplification (mirrors ZIPT's LcpCompression +
|
||||||
|
|
@ -1458,7 +1503,8 @@ namespace seq {
|
||||||
// exponent powers (e.g. base^1 → base created by 3c) before
|
// exponent powers (e.g. base^1 → base created by 3c) before
|
||||||
// 3e attempts LP-based elimination which would introduce a
|
// 3e attempts LP-based elimination which would introduce a
|
||||||
// needless fresh variable.
|
// needless fresh variable.
|
||||||
if (changed) continue;
|
if (changed)
|
||||||
|
continue;
|
||||||
|
|
||||||
// 3d: power prefix elimination — when both sides start with a
|
// 3d: power prefix elimination — when both sides start with a
|
||||||
// power of the same base, cancel the common power prefix.
|
// power of the same base, cancel the common power prefix.
|
||||||
|
|
@ -1579,6 +1625,211 @@ namespace seq {
|
||||||
// remaining regex memberships and add to m_int_constraints.
|
// remaining regex memberships and add to m_int_constraints.
|
||||||
init_var_bounds_from_mems();
|
init_var_bounds_from_mems();
|
||||||
|
|
||||||
|
// pass 5: variable-character look-ahead substitution
|
||||||
|
// (mirrors ZIPT's StrEq.SimplifyFinal)
|
||||||
|
// When one side starts with a variable x and the other with chars,
|
||||||
|
// look ahead to find how many leading chars can be deterministically
|
||||||
|
// assigned to x without splitting, by checking prefix consistency.
|
||||||
|
// Guard: skip if we already created a child (re-entry via iterative deepening).
|
||||||
|
for (str_eq& eq : m_str_eq) {
|
||||||
|
if (eq.is_trivial() || !eq.m_lhs || !eq.m_rhs)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Orient: var_side starts with a variable, char_side starts with a char
|
||||||
|
euf::snode* var_side = nullptr;
|
||||||
|
euf::snode* char_side = nullptr;
|
||||||
|
euf::snode* lhead = eq.m_lhs->first();
|
||||||
|
euf::snode* rhead = eq.m_rhs->first();
|
||||||
|
if (!lhead || !rhead) continue;
|
||||||
|
|
||||||
|
if (lhead->is_var() && rhead->is_char()) {
|
||||||
|
var_side = eq.m_lhs;
|
||||||
|
char_side = eq.m_rhs;
|
||||||
|
} else if (rhead->is_var() && lhead->is_char()) {
|
||||||
|
var_side = eq.m_rhs;
|
||||||
|
char_side = eq.m_lhs;
|
||||||
|
} else {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
euf::snode_vector var_toks, char_toks;
|
||||||
|
var_side->collect_tokens(var_toks);
|
||||||
|
char_side->collect_tokens(char_toks);
|
||||||
|
if (var_toks.size() <= 1 || char_toks.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
euf::snode* var_node = var_toks[0];
|
||||||
|
SASSERT(var_node->is_var());
|
||||||
|
|
||||||
|
// For increasing prefix lengths i (chars from char_side),
|
||||||
|
// check if x → char_toks[0..i-1] · x would be consistent by
|
||||||
|
// comparing tokens after x on the var_side against the shifted
|
||||||
|
// char_side tokens.
|
||||||
|
// Mirrors ZIPT's SimplifyFinal loop: when prefix i is proven
|
||||||
|
// inconsistent (char clash), we continue to i+1. When we reach
|
||||||
|
// a prefix that is consistent or indeterminate, we stop.
|
||||||
|
// The final i is the substitution length: x → char_toks[0..i-1] · x.
|
||||||
|
// If ALL prefixes are inconsistent, i equals the full leading-char
|
||||||
|
// count and we still substitute (x must be at least that long).
|
||||||
|
unsigned i = 0;
|
||||||
|
for (; i < char_toks.size() && char_toks[i]->is_char(); ++i) {
|
||||||
|
unsigned j1 = 1; // index into var_toks (after the variable)
|
||||||
|
unsigned j2 = i; // index into char_toks (after copied prefix)
|
||||||
|
bool failed = false;
|
||||||
|
|
||||||
|
while (j1 < var_toks.size() && j2 < char_toks.size()) {
|
||||||
|
euf::snode* st1 = var_toks[j1];
|
||||||
|
euf::snode* st2 = char_toks[j2];
|
||||||
|
|
||||||
|
if (!st2->is_char())
|
||||||
|
break; // can't compare against non-char — indeterminate
|
||||||
|
|
||||||
|
if (st1->is_char()) {
|
||||||
|
if (st1->id() == st2->id()) {
|
||||||
|
j1++;
|
||||||
|
j2++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (st1->id() != var_node->id())
|
||||||
|
break; // different variable/power — indeterminate
|
||||||
|
|
||||||
|
// st1 is the same variable x again — compare against
|
||||||
|
// the chars we would copy (char_toks[0..i-1])
|
||||||
|
bool inner_indet = false;
|
||||||
|
for (unsigned l = 0; j2 < char_toks.size() && l < i; ++l) {
|
||||||
|
st2 = char_toks[j2];
|
||||||
|
if (!st2->is_char()) {
|
||||||
|
inner_indet = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (st2->id() == char_toks[l]->id()) {
|
||||||
|
j2++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
failed = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (inner_indet || failed) break;
|
||||||
|
j1++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (failed)
|
||||||
|
continue; // prefix i is inconsistent — try longer
|
||||||
|
break; // prefix i is consistent/indeterminate — stop
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Divergence guard (mirrors ZIPT's HasDepCycle + power skip):
|
||||||
|
// Check whether the next named variable or power token on the
|
||||||
|
// char_side (past the char prefix) would create a dependency
|
||||||
|
// cycle or involve a power (which would cause infinite unwinding).
|
||||||
|
|
||||||
|
// Step 1: find the first variable or power past the char prefix
|
||||||
|
euf::snode* next_var = nullptr;
|
||||||
|
for (unsigned k = i; k < char_toks.size(); ++k) {
|
||||||
|
euf::snode* t = char_toks[k];
|
||||||
|
if (t->is_power()) {
|
||||||
|
// Power token → skip this equation (would cause divergence)
|
||||||
|
next_var = nullptr;
|
||||||
|
goto skip_eq;
|
||||||
|
}
|
||||||
|
if (t->is_var()) {
|
||||||
|
next_var = t;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: if there is a variable, check for Nielsen dependency cycle
|
||||||
|
if (next_var) {
|
||||||
|
// Build Nielsen dependency graph: for each equation, if one side
|
||||||
|
// starts with variable x, then x depends on the first variable
|
||||||
|
// on the other side. (Mirrors ZIPT's GetNielsenDep.)
|
||||||
|
// Then check if there's a path from next_var back to var_node.
|
||||||
|
// Use a u_map<unsigned> to represent edges: var_id → first_dep_var_id.
|
||||||
|
u_map<unsigned> dep_edges; // var snode id → first dependent var snode id
|
||||||
|
for (str_eq const& other_eq : m_str_eq) {
|
||||||
|
if (other_eq.is_trivial()) continue;
|
||||||
|
if (!other_eq.m_lhs || !other_eq.m_rhs) continue;
|
||||||
|
|
||||||
|
euf::snode* lh2 = other_eq.m_lhs->first();
|
||||||
|
euf::snode* rh2 = other_eq.m_rhs->first();
|
||||||
|
if (!lh2 || !rh2) continue;
|
||||||
|
|
||||||
|
// Orient: head_var leads one side, scan other side for first var
|
||||||
|
auto record_dep = [&](euf::snode* head_var, euf::snode* other_side) {
|
||||||
|
euf::snode_vector other_toks;
|
||||||
|
other_side->collect_tokens(other_toks);
|
||||||
|
if (lh2->is_var() && rh2->is_var()) {
|
||||||
|
// Both sides start with vars: bidirectional dependency
|
||||||
|
if (!dep_edges.contains(lh2->id()))
|
||||||
|
dep_edges.insert(lh2->id(), rh2->id());
|
||||||
|
if (!dep_edges.contains(rh2->id()))
|
||||||
|
dep_edges.insert(rh2->id(), lh2->id());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (unsigned idx = 0; idx < other_toks.size(); ++idx) {
|
||||||
|
if (other_toks[idx]->is_var()) {
|
||||||
|
if (!dep_edges.contains(head_var->id()))
|
||||||
|
dep_edges.insert(head_var->id(), other_toks[idx]->id());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (lh2->is_var() && !rh2->is_var())
|
||||||
|
record_dep(lh2, other_eq.m_rhs);
|
||||||
|
else if (rh2->is_var() && !lh2->is_var())
|
||||||
|
record_dep(rh2, other_eq.m_lhs);
|
||||||
|
else if (lh2->is_var() && rh2->is_var())
|
||||||
|
record_dep(lh2, other_eq.m_rhs);
|
||||||
|
}
|
||||||
|
|
||||||
|
// DFS from next_var to see if we can reach var_node
|
||||||
|
uint_set visited;
|
||||||
|
svector<unsigned> worklist;
|
||||||
|
worklist.push_back(next_var->id());
|
||||||
|
bool cycle_found = false;
|
||||||
|
while (!worklist.empty() && !cycle_found) {
|
||||||
|
unsigned cur = worklist.back();
|
||||||
|
worklist.pop_back();
|
||||||
|
if (cur == var_node->id()) {
|
||||||
|
cycle_found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (visited.contains(cur))
|
||||||
|
continue;
|
||||||
|
visited.insert(cur);
|
||||||
|
unsigned dep_id;
|
||||||
|
if (dep_edges.find(cur, dep_id))
|
||||||
|
worklist.push_back(dep_id);
|
||||||
|
}
|
||||||
|
if (cycle_found)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Create a single deterministic child with the substitution as edge label
|
||||||
|
euf::snode* prefix_sn = char_toks[0];
|
||||||
|
for (unsigned j = 1; j < i; ++j)
|
||||||
|
prefix_sn = sg.mk_concat(prefix_sn, char_toks[j]);
|
||||||
|
euf::snode* replacement = sg.mk_concat(prefix_sn, var_node);
|
||||||
|
nielsen_subst s(var_node, replacement, eq.m_dep);
|
||||||
|
nielsen_node* child = g.mk_child(this);
|
||||||
|
nielsen_edge* e = g.mk_edge(this, child, true);
|
||||||
|
e->add_subst(s);
|
||||||
|
child->apply_subst(sg, s);
|
||||||
|
set_extended(true);
|
||||||
|
return simplify_result::proceed;
|
||||||
|
}
|
||||||
|
skip_eq:;
|
||||||
|
}
|
||||||
|
|
||||||
if (is_satisfied())
|
if (is_satisfied())
|
||||||
return simplify_result::satisfied;
|
return simplify_result::satisfied;
|
||||||
|
|
||||||
|
|
@ -1700,7 +1951,7 @@ namespace seq {
|
||||||
++m_stats.m_num_unknown;
|
++m_stats.m_num_unknown;
|
||||||
return search_result::unknown;
|
return search_result::unknown;
|
||||||
}
|
}
|
||||||
catch (...) {
|
catch(const std::exception& ex) {
|
||||||
#ifdef Z3DEBUG
|
#ifdef Z3DEBUG
|
||||||
std::string dot = to_dot();
|
std::string dot = to_dot();
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -1754,6 +2005,9 @@ namespace seq {
|
||||||
// node into the current solver scope. Constraints inherited from the parent
|
// node into the current solver scope. Constraints inherited from the parent
|
||||||
// (indices 0..m_parent_ic_count-1) are already present at the enclosing
|
// (indices 0..m_parent_ic_count-1) are already present at the enclosing
|
||||||
// scope level; only the newly-added tail needs to be asserted here.
|
// scope level; only the newly-added tail needs to be asserted here.
|
||||||
|
// Also generate per-node |LHS| = |RHS| length constraints for descendant
|
||||||
|
// equations (root constraints are already at the base level).
|
||||||
|
generate_node_length_constraints(node);
|
||||||
assert_node_new_int_constraints(node);
|
assert_node_new_int_constraints(node);
|
||||||
|
|
||||||
// integer feasibility check: the solver now holds all path constraints
|
// integer feasibility check: the solver now holds all path constraints
|
||||||
|
|
@ -1794,16 +2048,34 @@ namespace seq {
|
||||||
|
|
||||||
// explore children
|
// explore children
|
||||||
bool any_unknown = false;
|
bool any_unknown = false;
|
||||||
for (nielsen_edge* e : node->outgoing()) {
|
for (nielsen_edge *e : node->outgoing()) {
|
||||||
cur_path.push_back(e);
|
cur_path.push_back(e);
|
||||||
// Push a solver scope for this edge and assert its side integer
|
// Push a solver scope for this edge and assert its side integer
|
||||||
// constraints. The child's own new int_constraints will be asserted
|
// constraints. The child's own new int_constraints will be asserted
|
||||||
// inside the recursive call (above). On return, pop the scope so
|
// inside the recursive call (above). On return, pop the scope so
|
||||||
// that backtracking removes those assertions.
|
// that backtracking removes those assertions.
|
||||||
m_solver.push();
|
m_solver.push();
|
||||||
for (auto const& ic : e->side_int())
|
|
||||||
|
// Lazily compute substitution length constraints (|x| = |u|) on first
|
||||||
|
// traversal. This must happen before asserting side_int and before
|
||||||
|
// bumping mod counts, so that LHS uses the parent's counts and RHS
|
||||||
|
// uses the temporarily-bumped counts.
|
||||||
|
if (!e->len_constraints_computed()) {
|
||||||
|
add_subst_length_constraints(e);
|
||||||
|
e->set_len_constraints_computed(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto const &ic : e->side_int())
|
||||||
m_solver.assert_expr(int_constraint_to_expr(ic));
|
m_solver.assert_expr(int_constraint_to_expr(ic));
|
||||||
search_result r = search_dfs(e->tgt(), depth + 1, cur_path);
|
|
||||||
|
// Bump modification counts for the child's context.
|
||||||
|
inc_edge_mod_counts(e);
|
||||||
|
|
||||||
|
search_result r = search_dfs(e->tgt(), e->is_progress() ? depth : depth + 1, cur_path);
|
||||||
|
|
||||||
|
// Restore modification counts on backtrack.
|
||||||
|
dec_edge_mod_counts(e);
|
||||||
|
|
||||||
m_solver.pop(1);
|
m_solver.pop(1);
|
||||||
if (r == search_result::sat)
|
if (r == search_result::sat)
|
||||||
return search_result::sat;
|
return search_result::sat;
|
||||||
|
|
@ -1883,7 +2155,7 @@ namespace seq {
|
||||||
euf::snode* lhead = lhs_toks[0];
|
euf::snode* lhead = lhs_toks[0];
|
||||||
euf::snode* rhead = rhs_toks[0];
|
euf::snode* rhead = rhs_toks[0];
|
||||||
|
|
||||||
// char·A = y·B → branch 1: y→ε, branch 2: y→char·fresh
|
// char·A = y·B → branch 1: y→ε, branch 2: y→char·y
|
||||||
if (lhead->is_char() && rhead->is_var()) {
|
if (lhead->is_char() && rhead->is_var()) {
|
||||||
// branch 1: y → ε (progress)
|
// branch 1: y → ε (progress)
|
||||||
{
|
{
|
||||||
|
|
@ -1893,12 +2165,11 @@ namespace seq {
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
}
|
}
|
||||||
// branch 2: y → char·fresh (progress)
|
// branch 2: y → char·y (no progress)
|
||||||
{
|
{
|
||||||
euf::snode* fresh = mk_fresh_var();
|
euf::snode* replacement = m_sg.mk_concat(lhead, rhead);
|
||||||
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
|
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(rhead, replacement, eq.m_dep);
|
nielsen_subst s(rhead, replacement, eq.m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
|
|
@ -1906,7 +2177,7 @@ namespace seq {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// x·A = char·B → branch 1: x→ε, branch 2: x→char·fresh
|
// x·A = char·B → branch 1: x→ε, branch 2: x→char·x
|
||||||
if (rhead->is_char() && lhead->is_var()) {
|
if (rhead->is_char() && lhead->is_var()) {
|
||||||
// branch 1: x → ε (progress)
|
// branch 1: x → ε (progress)
|
||||||
{
|
{
|
||||||
|
|
@ -1916,12 +2187,11 @@ namespace seq {
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
}
|
}
|
||||||
// branch 2: x → char·fresh (progress)
|
// branch 2: x → char·x (no progress)
|
||||||
{
|
{
|
||||||
euf::snode* fresh = mk_fresh_var();
|
euf::snode* replacement = m_sg.mk_concat(rhead, lhead);
|
||||||
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
|
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(lhead, replacement, eq.m_dep);
|
nielsen_subst s(lhead, replacement, eq.m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
|
|
@ -1962,22 +2232,20 @@ namespace seq {
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
}
|
}
|
||||||
// child 2: x → y·x' (progress)
|
// child 2: x → y·x (no progress)
|
||||||
{
|
{
|
||||||
euf::snode* fresh = mk_fresh_var();
|
euf::snode* replacement = m_sg.mk_concat(rhead, lhead);
|
||||||
euf::snode* replacement = m_sg.mk_concat(rhead, fresh);
|
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(lhead, replacement, eq.m_dep);
|
nielsen_subst s(lhead, replacement, eq.m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
}
|
}
|
||||||
// child 3: y → x·y' (progress)
|
// child 3: y → x·y (progress)
|
||||||
{
|
{
|
||||||
euf::snode* fresh = mk_fresh_var();
|
euf::snode* replacement = m_sg.mk_concat(lhead, rhead);
|
||||||
euf::snode* replacement = m_sg.mk_concat(lhead, fresh);
|
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(rhead, replacement, eq.m_dep);
|
nielsen_subst s(rhead, replacement, eq.m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
|
|
@ -2369,16 +2637,15 @@ namespace seq {
|
||||||
bool created = false;
|
bool created = false;
|
||||||
|
|
||||||
// for each character c with non-fail derivative:
|
// for each character c with non-fail derivative:
|
||||||
// child: x → c · fresh_var
|
// child: x → c · x
|
||||||
for (euf::snode* ch : chars) {
|
for (euf::snode* ch : chars) {
|
||||||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch);
|
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, ch);
|
||||||
if (!deriv || deriv->is_fail())
|
if (!deriv || deriv->is_fail())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
euf::snode* fresh = mk_fresh_var();
|
euf::snode* replacement = m_sg.mk_concat(ch, first);
|
||||||
euf::snode* replacement = m_sg.mk_concat(ch, fresh);
|
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(first, replacement, mem.m_dep);
|
nielsen_subst s(first, replacement, mem.m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
|
|
@ -2920,10 +3187,47 @@ namespace seq {
|
||||||
|
|
||||||
bool nielsen_graph::fire_gpower_intro(
|
bool nielsen_graph::fire_gpower_intro(
|
||||||
nielsen_node* node, str_eq const& eq,
|
nielsen_node* node, str_eq const& eq,
|
||||||
euf::snode* var, euf::snode_vector const& ground_prefix) {
|
euf::snode* var, euf::snode_vector const& ground_prefix_orig) {
|
||||||
ast_manager& m = m_sg.get_manager();
|
ast_manager& m = m_sg.get_manager();
|
||||||
arith_util arith(m);
|
arith_util arith(m);
|
||||||
seq_util& seq = m_sg.get_seq_util();
|
seq_util& seq = m_sg.get_seq_util();
|
||||||
|
|
||||||
|
// Compress repeated patterns in the ground prefix (mirrors ZIPT's LcpCompressionFull).
|
||||||
|
// E.g., [a,b,a,b] has minimal period 2 → use [a,b] as the power base.
|
||||||
|
// This ensures we use the minimal repeating unit: x = (ab)^n · suffix
|
||||||
|
// instead of x = (abab)^n · suffix.
|
||||||
|
euf::snode_vector ground_prefix;
|
||||||
|
unsigned n = ground_prefix_orig.size();
|
||||||
|
unsigned period = n;
|
||||||
|
for (unsigned p = 1; p <= n / 2; ++p) {
|
||||||
|
if (n % p != 0) continue;
|
||||||
|
bool match = true;
|
||||||
|
for (unsigned i = p; i < n && match; ++i)
|
||||||
|
match = (ground_prefix_orig[i]->id() == ground_prefix_orig[i % p]->id());
|
||||||
|
if (match) { period = p; break; }
|
||||||
|
}
|
||||||
|
for (unsigned i = 0; i < period; ++i)
|
||||||
|
ground_prefix.push_back(ground_prefix_orig[i]);
|
||||||
|
|
||||||
|
// If the compressed prefix is a single power snode, unwrap it to use
|
||||||
|
// its base tokens, avoiding nested powers.
|
||||||
|
// E.g., [(ab)^3] → [a, b] so we get (ab)^n instead of ((ab)^3)^n.
|
||||||
|
// (mirrors ZIPT: if b.Length == 1 && b is PowerToken pt => b = pt.Base)
|
||||||
|
if (ground_prefix.size() == 1 && ground_prefix[0]->is_power()) {
|
||||||
|
expr* base_e = get_power_base_expr(ground_prefix[0]);
|
||||||
|
if (base_e) {
|
||||||
|
euf::snode* base_sn = m_sg.mk(base_e);
|
||||||
|
if (base_sn) {
|
||||||
|
euf::snode_vector base_toks;
|
||||||
|
base_sn->collect_tokens(base_toks);
|
||||||
|
if (!base_toks.empty()) {
|
||||||
|
ground_prefix.reset();
|
||||||
|
ground_prefix.append(base_toks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned base_len = ground_prefix.size();
|
unsigned base_len = ground_prefix.size();
|
||||||
|
|
||||||
// Build base string expression from ground prefix tokens.
|
// Build base string expression from ground prefix tokens.
|
||||||
|
|
@ -3043,7 +3347,7 @@ namespace seq {
|
||||||
created = true;
|
created = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Branch 2+: for each minterm m_i, x → ?c · x'
|
// Branch 2+: for each minterm m_i, x → ?c · x
|
||||||
// where ?c is a symbolic char constrained by the minterm
|
// where ?c is a symbolic char constrained by the minterm
|
||||||
for (euf::snode* mt : minterms) {
|
for (euf::snode* mt : minterms) {
|
||||||
if (mt->is_fail()) continue;
|
if (mt->is_fail()) continue;
|
||||||
|
|
@ -3053,11 +3357,10 @@ namespace seq {
|
||||||
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt);
|
euf::snode* deriv = m_sg.brzozowski_deriv(mem.m_regex, mt);
|
||||||
if (deriv && deriv->is_fail()) continue;
|
if (deriv && deriv->is_fail()) continue;
|
||||||
|
|
||||||
euf::snode* fresh_var = mk_fresh_var();
|
|
||||||
euf::snode* fresh_char = mk_fresh_char_var();
|
euf::snode* fresh_char = mk_fresh_char_var();
|
||||||
euf::snode* replacement = m_sg.mk_concat(fresh_char, fresh_var);
|
euf::snode* replacement = m_sg.mk_concat(fresh_char, first);
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(first, replacement, mem.m_dep);
|
nielsen_subst s(first, replacement, mem.m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
|
|
@ -3104,40 +3407,90 @@ namespace seq {
|
||||||
expr* exp_n = get_power_exponent(power);
|
expr* exp_n = get_power_exponent(power);
|
||||||
expr* zero = arith.mk_int(0);
|
expr* zero = arith.mk_int(0);
|
||||||
|
|
||||||
// Branch 1: x = base^m · prefix where 0 <= m < n
|
// Branch 1: enumerate all decompositions of the base.
|
||||||
// Side constraints: m >= 0, m < n (i.e., n >= m + 1)
|
// x = base^m · prefix_i(base) where 0 <= m < n
|
||||||
|
// Uses the same GetDecompose pattern as fire_gpower_intro.
|
||||||
{
|
{
|
||||||
|
euf::snode_vector base_toks;
|
||||||
|
base->collect_tokens(base_toks);
|
||||||
|
unsigned base_len = base_toks.size();
|
||||||
|
expr* base_expr = get_power_base_expr(power);
|
||||||
|
if (!base_expr || base_len == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
expr_ref fresh_m = mk_fresh_int_var();
|
expr_ref fresh_m = mk_fresh_int_var();
|
||||||
euf::snode* fresh_power = mk_fresh_var(); // represents base^m
|
expr_ref power_m_expr(seq.str.mk_power(base_expr, fresh_m), m);
|
||||||
euf::snode* fresh_suffix = mk_fresh_var(); // represents prefix(base)
|
euf::snode* power_m_sn = m_sg.mk(power_m_expr);
|
||||||
euf::snode* replacement = m_sg.mk_concat(fresh_power, fresh_suffix);
|
if (!power_m_sn)
|
||||||
nielsen_node* child = mk_child(node);
|
return false;
|
||||||
nielsen_edge* e = mk_edge(node, child, true);
|
|
||||||
nielsen_subst s(var_head, replacement, eq->m_dep);
|
for (unsigned i = 0; i < base_len; ++i) {
|
||||||
e->add_subst(s);
|
euf::snode* tok = base_toks[i];
|
||||||
child->apply_subst(m_sg, s);
|
|
||||||
// m >= 0
|
// Skip char position when preceding token is a power:
|
||||||
e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep));
|
// the power case at i-1 with 0 <= m' <= exp already covers m' = exp.
|
||||||
// m < n ⟺ n >= m + 1
|
if (!tok->is_power() && i > 0 && base_toks[i - 1]->is_power())
|
||||||
if (exp_n) {
|
continue;
|
||||||
expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m);
|
|
||||||
e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep));
|
// Build full-token prefix: base_toks[0..i-1]
|
||||||
|
euf::snode* prefix_sn = nullptr;
|
||||||
|
for (unsigned j = 0; j < i; ++j)
|
||||||
|
prefix_sn = (j == 0) ? base_toks[0] : m_sg.mk_concat(prefix_sn, base_toks[j]);
|
||||||
|
|
||||||
|
euf::snode* replacement;
|
||||||
|
expr_ref fresh_inner_m(m);
|
||||||
|
|
||||||
|
if (tok->is_power()) {
|
||||||
|
// Token is a power u^exp: decompose with fresh m', 0 <= m' <= exp
|
||||||
|
expr* inner_exp = get_power_exponent(tok);
|
||||||
|
expr* inner_base_e = get_power_base_expr(tok);
|
||||||
|
if (inner_exp && inner_base_e) {
|
||||||
|
fresh_inner_m = mk_fresh_int_var();
|
||||||
|
expr_ref partial_pow(seq.str.mk_power(inner_base_e, fresh_inner_m), m);
|
||||||
|
euf::snode* partial_sn = m_sg.mk(partial_pow);
|
||||||
|
euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, partial_sn) : partial_sn;
|
||||||
|
replacement = m_sg.mk_concat(power_m_sn, suffix_sn);
|
||||||
|
} else {
|
||||||
|
euf::snode* suffix_sn = prefix_sn ? m_sg.mk_concat(prefix_sn, tok) : tok;
|
||||||
|
replacement = m_sg.mk_concat(power_m_sn, suffix_sn);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// P(char) = ε, suffix is just the prefix
|
||||||
|
replacement = prefix_sn ? m_sg.mk_concat(power_m_sn, prefix_sn) : power_m_sn;
|
||||||
|
}
|
||||||
|
|
||||||
|
nielsen_node* child = mk_child(node);
|
||||||
|
nielsen_edge* e = mk_edge(node, child, true);
|
||||||
|
nielsen_subst s(var_head, replacement, eq->m_dep);
|
||||||
|
e->add_subst(s);
|
||||||
|
child->apply_subst(m_sg, s);
|
||||||
|
|
||||||
|
// m >= 0
|
||||||
|
e->add_side_int(mk_int_constraint(fresh_m, zero, int_constraint_kind::ge, eq->m_dep));
|
||||||
|
// m < n ⟺ n >= m + 1
|
||||||
|
if (exp_n) {
|
||||||
|
expr_ref m_plus_1(arith.mk_add(fresh_m, arith.mk_int(1)), m);
|
||||||
|
e->add_side_int(mk_int_constraint(exp_n, m_plus_1, int_constraint_kind::ge, eq->m_dep));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Inner power constraints: 0 <= m' <= inner_exp
|
||||||
|
if (fresh_inner_m.get()) {
|
||||||
|
expr* inner_exp = get_power_exponent(tok);
|
||||||
|
e->add_side_int(mk_int_constraint(fresh_inner_m, zero, int_constraint_kind::ge, eq->m_dep));
|
||||||
|
e->add_side_int(mk_int_constraint(inner_exp, fresh_inner_m, int_constraint_kind::ge, eq->m_dep));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Branch 2: x = u^n · x' (variable extends past full power, non-progress)
|
// Branch 2: x = u^n · x' (variable extends past full power, non-progress)
|
||||||
// Side constraint: n >= 0
|
|
||||||
{
|
{
|
||||||
euf::snode* fresh_tail = mk_fresh_var();
|
euf::snode* fresh_tail = mk_fresh_var();
|
||||||
// Peel one base unit (approximation of extending past the power)
|
euf::snode* replacement = m_sg.mk_concat(power, fresh_tail);
|
||||||
euf::snode* replacement = m_sg.mk_concat(base, fresh_tail);
|
|
||||||
nielsen_node* child = mk_child(node);
|
nielsen_node* child = mk_child(node);
|
||||||
nielsen_edge* e = mk_edge(node, child, false);
|
nielsen_edge* e = mk_edge(node, child, false);
|
||||||
nielsen_subst s(var_head, replacement, eq->m_dep);
|
nielsen_subst s(var_head, replacement, eq->m_dep);
|
||||||
e->add_subst(s);
|
e->add_subst(s);
|
||||||
child->apply_subst(m_sg, s);
|
child->apply_subst(m_sg, s);
|
||||||
if (exp_n)
|
|
||||||
e->add_side_int(mk_int_constraint(exp_n, zero, int_constraint_kind::ge, eq->m_dep));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -3243,7 +3596,18 @@ namespace seq {
|
||||||
return expr_ref(arith.mk_add(left, right), m);
|
return expr_ref(arith.mk_add(left, right), m);
|
||||||
}
|
}
|
||||||
|
|
||||||
// for variables and other terms, use symbolic (str.len expr)
|
// For variables: consult modification counter.
|
||||||
|
// mod_count > 0 means the variable has been reused by a non-eliminating
|
||||||
|
// substitution; use a fresh integer variable to avoid the circular
|
||||||
|
// |x| = 1 + |x| problem.
|
||||||
|
if (n->is_var()) {
|
||||||
|
unsigned mc = 0;
|
||||||
|
m_mod_cnt.find(n->id(), mc);
|
||||||
|
if (mc > 0)
|
||||||
|
return get_or_create_len_var(n, mc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// for variables at mod_count 0 and other terms, use symbolic (str.len expr)
|
||||||
return expr_ref(seq.str.mk_length(n->get_expr()), m);
|
return expr_ref(seq.str.mk_length(n->get_expr()), m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3374,6 +3738,131 @@ namespace seq {
|
||||||
return expr_ref(m.mk_true(), m);
|
return expr_ref(m.mk_true(), m);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
// Modification counter: substitution length tracking
|
||||||
|
// mirrors ZIPT's LocalInfo.CurrentModificationCnt + NielsenEdge.IncModCount/DecModCount
|
||||||
|
// + NielsenNode constructor length assertion logic
|
||||||
|
// -----------------------------------------------------------------------
|
||||||
|
|
||||||
|
expr_ref nielsen_graph::get_or_create_len_var(euf::snode* var, unsigned mod_count) {
|
||||||
|
ast_manager& m = m_sg.get_manager();
|
||||||
|
SASSERT(var && var->is_var());
|
||||||
|
SASSERT(mod_count > 0);
|
||||||
|
|
||||||
|
auto key = std::make_pair(var->id(), mod_count);
|
||||||
|
auto it = m_len_var_cache.find(key);
|
||||||
|
if (it != m_len_var_cache.end())
|
||||||
|
return expr_ref(it->second, m);
|
||||||
|
|
||||||
|
// Create a fresh integer variable: len_<varname>_<mod_count>
|
||||||
|
arith_util arith(m);
|
||||||
|
std::string name = "len!" + std::to_string(var->id()) + "!" + std::to_string(mod_count);
|
||||||
|
expr_ref fresh(m.mk_fresh_const(name.c_str(), arith.mk_int()), m);
|
||||||
|
m_len_vars.push_back(fresh);
|
||||||
|
m_len_var_cache.insert({key, fresh.get()});
|
||||||
|
return fresh;
|
||||||
|
}
|
||||||
|
|
||||||
|
void nielsen_graph::add_subst_length_constraints(nielsen_edge* e) {
|
||||||
|
auto const& substs = e->subst();
|
||||||
|
|
||||||
|
// Quick check: any non-eliminating substitutions?
|
||||||
|
bool has_non_elim = false;
|
||||||
|
for (auto const& s : substs)
|
||||||
|
if (!s.is_eliminating()) { has_non_elim = true; break; }
|
||||||
|
if (!has_non_elim) return;
|
||||||
|
|
||||||
|
ast_manager& m = m_sg.get_manager();
|
||||||
|
arith_util arith(m);
|
||||||
|
|
||||||
|
// Step 1: Compute LHS (|x|) for each non-eliminating substitution
|
||||||
|
// using current m_mod_cnt (before bumping).
|
||||||
|
// Also assert |x|_k >= 0 (mirrors ZIPT's NielsenNode constructor line 172).
|
||||||
|
svector<std::pair<unsigned, expr*>> lhs_exprs;
|
||||||
|
for (unsigned i = 0; i < substs.size(); ++i) {
|
||||||
|
auto const& s = substs[i];
|
||||||
|
if (s.is_eliminating()) continue;
|
||||||
|
SASSERT(s.m_var && s.m_var->is_var());
|
||||||
|
expr_ref lhs = compute_length_expr(s.m_var);
|
||||||
|
lhs_exprs.push_back({i, lhs.get()});
|
||||||
|
// Assert LHS >= 0
|
||||||
|
e->add_side_int(int_constraint(lhs, arith.mk_int(0),
|
||||||
|
int_constraint_kind::ge, s.m_dep, m));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Bump mod counts for all non-eliminating variables at once.
|
||||||
|
for (auto const& s : substs) {
|
||||||
|
if (s.is_eliminating()) continue;
|
||||||
|
unsigned id = s.m_var->id();
|
||||||
|
unsigned prev = 0;
|
||||||
|
m_mod_cnt.find(id, prev);
|
||||||
|
m_mod_cnt.insert(id, prev + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Compute RHS (|u|) with bumped mod counts and add |x| = |u|.
|
||||||
|
for (auto const& p : lhs_exprs) {
|
||||||
|
unsigned idx = p.first;
|
||||||
|
expr* lhs_expr = p.second;
|
||||||
|
auto const& s = substs[idx];
|
||||||
|
expr_ref rhs = compute_length_expr(s.m_replacement);
|
||||||
|
e->add_side_int(int_constraint(lhs_expr, rhs, int_constraint_kind::eq,
|
||||||
|
s.m_dep, m));
|
||||||
|
|
||||||
|
// Assert non-negativity for any fresh length variables in the RHS
|
||||||
|
// (variables at mod_count > 0 that are newly created).
|
||||||
|
euf::snode_vector tokens;
|
||||||
|
s.m_replacement->collect_tokens(tokens);
|
||||||
|
for (euf::snode* tok : tokens) {
|
||||||
|
if (tok->is_var()) {
|
||||||
|
unsigned mc = 0;
|
||||||
|
m_mod_cnt.find(tok->id(), mc);
|
||||||
|
if (mc > 0) {
|
||||||
|
expr_ref len_var = get_or_create_len_var(tok, mc);
|
||||||
|
e->add_side_int(int_constraint(len_var, arith.mk_int(0),
|
||||||
|
int_constraint_kind::ge, s.m_dep, m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Restore mod counts (temporary bump for computing RHS only).
|
||||||
|
for (auto const& s : substs) {
|
||||||
|
if (s.is_eliminating()) continue;
|
||||||
|
unsigned id = s.m_var->id();
|
||||||
|
unsigned prev = 0;
|
||||||
|
m_mod_cnt.find(id, prev);
|
||||||
|
SASSERT(prev >= 1);
|
||||||
|
if (prev <= 1)
|
||||||
|
m_mod_cnt.remove(id);
|
||||||
|
else
|
||||||
|
m_mod_cnt.insert(id, prev - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void nielsen_graph::inc_edge_mod_counts(nielsen_edge* e) {
|
||||||
|
for (auto const& s : e->subst()) {
|
||||||
|
if (s.is_eliminating()) continue;
|
||||||
|
unsigned id = s.m_var->id();
|
||||||
|
unsigned prev = 0;
|
||||||
|
m_mod_cnt.find(id, prev);
|
||||||
|
m_mod_cnt.insert(id, prev + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void nielsen_graph::dec_edge_mod_counts(nielsen_edge* e) {
|
||||||
|
for (auto const& s : e->subst()) {
|
||||||
|
if (s.is_eliminating()) continue;
|
||||||
|
unsigned id = s.m_var->id();
|
||||||
|
unsigned prev = 0;
|
||||||
|
m_mod_cnt.find(id, prev);
|
||||||
|
SASSERT(prev >= 1);
|
||||||
|
if (prev <= 1)
|
||||||
|
m_mod_cnt.remove(id);
|
||||||
|
else
|
||||||
|
m_mod_cnt.insert(id, prev - 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) {
|
void nielsen_graph::assert_node_new_int_constraints(nielsen_node* node) {
|
||||||
// Assert only the int_constraints that are new to this node (beyond those
|
// Assert only the int_constraints that are new to this node (beyond those
|
||||||
// inherited from its parent via clone_from). The parent's constraints are
|
// inherited from its parent via clone_from). The parent's constraints are
|
||||||
|
|
@ -3384,6 +3873,78 @@ namespace seq {
|
||||||
m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i]));
|
m_solver.assert_expr(int_constraint_to_expr(node->int_constraints()[i]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nielsen_graph::generate_node_length_constraints(nielsen_node* node) {
|
||||||
|
if (node->m_node_len_constraints_generated)
|
||||||
|
return;
|
||||||
|
node->m_node_len_constraints_generated = true;
|
||||||
|
|
||||||
|
// Skip the root node — its length constraints are already asserted
|
||||||
|
// at the base solver level by assert_root_constraints_to_solver().
|
||||||
|
if (node == m_root)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ast_manager& m = m_sg.get_manager();
|
||||||
|
arith_util arith(m);
|
||||||
|
uint_set seen_vars;
|
||||||
|
|
||||||
|
for (str_eq const& eq : node->str_eqs()) {
|
||||||
|
if (eq.is_trivial())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
expr_ref len_lhs = compute_length_expr(eq.m_lhs);
|
||||||
|
expr_ref len_rhs = compute_length_expr(eq.m_rhs);
|
||||||
|
node->add_int_constraint(int_constraint(len_lhs, len_rhs,
|
||||||
|
int_constraint_kind::eq, eq.m_dep, m));
|
||||||
|
|
||||||
|
// non-negativity for each variable (mod-count-aware)
|
||||||
|
euf::snode_vector tokens;
|
||||||
|
eq.m_lhs->collect_tokens(tokens);
|
||||||
|
eq.m_rhs->collect_tokens(tokens);
|
||||||
|
for (euf::snode* tok : tokens) {
|
||||||
|
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||||||
|
seen_vars.insert(tok->id());
|
||||||
|
expr_ref len_var = compute_length_expr(tok);
|
||||||
|
node->add_int_constraint(int_constraint(len_var, arith.mk_int(0),
|
||||||
|
int_constraint_kind::ge, eq.m_dep, m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Parikh interval bounds for regex memberships at this node
|
||||||
|
seq_util& seq = m_sg.get_seq_util();
|
||||||
|
for (str_mem const& mem : node->str_mems()) {
|
||||||
|
expr* re_expr = mem.m_regex->get_expr();
|
||||||
|
if (!re_expr || !seq.is_re(re_expr))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
unsigned min_len = 0, max_len = UINT_MAX;
|
||||||
|
compute_regex_length_interval(mem.m_regex, min_len, max_len);
|
||||||
|
|
||||||
|
expr_ref len_str = compute_length_expr(mem.m_str);
|
||||||
|
|
||||||
|
if (min_len > 0) {
|
||||||
|
node->add_int_constraint(int_constraint(len_str, arith.mk_int(min_len),
|
||||||
|
int_constraint_kind::ge, mem.m_dep, m));
|
||||||
|
}
|
||||||
|
if (max_len < UINT_MAX) {
|
||||||
|
node->add_int_constraint(int_constraint(len_str, arith.mk_int(max_len),
|
||||||
|
int_constraint_kind::le, mem.m_dep, m));
|
||||||
|
}
|
||||||
|
|
||||||
|
// non-negativity for string-side variables
|
||||||
|
euf::snode_vector tokens;
|
||||||
|
mem.m_str->collect_tokens(tokens);
|
||||||
|
for (euf::snode* tok : tokens) {
|
||||||
|
if (tok->is_var() && !seen_vars.contains(tok->id())) {
|
||||||
|
seen_vars.insert(tok->id());
|
||||||
|
expr_ref len_var = compute_length_expr(tok);
|
||||||
|
node->add_int_constraint(int_constraint(len_var, arith.mk_int(0),
|
||||||
|
int_constraint_kind::ge, mem.m_dep, m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path) {
|
bool nielsen_graph::check_int_feasibility(nielsen_node* node, svector<nielsen_edge*> const& cur_path) {
|
||||||
// In incremental mode the solver already holds all path constraints
|
// In incremental mode the solver already holds all path constraints
|
||||||
// (root length constraints at the base level, edge side_int and node
|
// (root length constraints at the base level, edge side_int and node
|
||||||
|
|
|
||||||
|
|
@ -241,6 +241,7 @@ Author:
|
||||||
#include "ast/seq_decl_plugin.h"
|
#include "ast/seq_decl_plugin.h"
|
||||||
#include "ast/euf/euf_sgraph.h"
|
#include "ast/euf/euf_sgraph.h"
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <map>
|
||||||
#include "model/model.h"
|
#include "model/model.h"
|
||||||
|
|
||||||
namespace seq {
|
namespace seq {
|
||||||
|
|
@ -456,6 +457,7 @@ namespace seq {
|
||||||
ptr_vector<str_mem> m_side_str_mem; // side constraints: regex memberships
|
ptr_vector<str_mem> m_side_str_mem; // side constraints: regex memberships
|
||||||
vector<int_constraint> m_side_int; // side constraints: integer equalities/inequalities
|
vector<int_constraint> m_side_int; // side constraints: integer equalities/inequalities
|
||||||
bool m_is_progress; // does this edge represent progress?
|
bool m_is_progress; // does this edge represent progress?
|
||||||
|
bool m_len_constraints_computed = false; // lazily computed substitution length constraints
|
||||||
public:
|
public:
|
||||||
nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
|
nielsen_edge(nielsen_node* src, nielsen_node* tgt, bool is_progress);
|
||||||
|
|
||||||
|
|
@ -480,6 +482,9 @@ namespace seq {
|
||||||
|
|
||||||
bool is_progress() const { return m_is_progress; }
|
bool is_progress() const { return m_is_progress; }
|
||||||
|
|
||||||
|
bool len_constraints_computed() const { return m_len_constraints_computed; }
|
||||||
|
void set_len_constraints_computed(bool v) { m_len_constraints_computed = v; }
|
||||||
|
|
||||||
bool operator==(nielsen_edge const& other) const {
|
bool operator==(nielsen_edge const& other) const {
|
||||||
return m_src == other.m_src && m_tgt == other.m_tgt;
|
return m_src == other.m_src && m_tgt == other.m_tgt;
|
||||||
}
|
}
|
||||||
|
|
@ -519,6 +524,7 @@ namespace seq {
|
||||||
bool m_is_extended = false;
|
bool m_is_extended = false;
|
||||||
backtrack_reason m_reason = backtrack_reason::unevaluated;
|
backtrack_reason m_reason = backtrack_reason::unevaluated;
|
||||||
bool m_is_progress = false;
|
bool m_is_progress = false;
|
||||||
|
bool m_node_len_constraints_generated = false; // true after generate_node_length_constraints runs
|
||||||
|
|
||||||
// evaluation index for run tracking
|
// evaluation index for run tracking
|
||||||
unsigned m_eval_idx = 0;
|
unsigned m_eval_idx = 0;
|
||||||
|
|
@ -732,6 +738,25 @@ namespace seq {
|
||||||
// Parikh image filter: generates modular length constraints from regex
|
// Parikh image filter: generates modular length constraints from regex
|
||||||
// memberships. Allocated in the constructor; owned by this graph.
|
// memberships. Allocated in the constructor; owned by this graph.
|
||||||
seq_parikh* m_parikh = nullptr;
|
seq_parikh* m_parikh = nullptr;
|
||||||
|
// -----------------------------------------------
|
||||||
|
// Modification counter for substitution length tracking.
|
||||||
|
// mirrors ZIPT's LocalInfo.CurrentModificationCnt
|
||||||
|
// -----------------------------------------------
|
||||||
|
|
||||||
|
// Maps snode id of string variable → current modification (reuse) count
|
||||||
|
// along the DFS path. When a non-eliminating substitution x/u is applied
|
||||||
|
// (x appears in u), x's count is bumped. This produces distinct length
|
||||||
|
// variables for x before and after substitution, avoiding the unsatisfiable
|
||||||
|
// |x| = 1 + |x| that results from reusing the same length symbol.
|
||||||
|
u_map<unsigned> m_mod_cnt;
|
||||||
|
|
||||||
|
// Cache: (var snode id, modification count) → fresh integer variable.
|
||||||
|
// Variables at mod_count 0 use str.len(var_expr) (standard form).
|
||||||
|
// Variables at mod_count > 0 get a fresh Z3 integer constant.
|
||||||
|
std::map<std::pair<unsigned, unsigned>, expr*> m_len_var_cache;
|
||||||
|
|
||||||
|
// Pins the fresh length variable expressions so they aren't garbage collected.
|
||||||
|
expr_ref_vector m_len_vars;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Construct with a caller-supplied solver. Ownership is NOT transferred;
|
// Construct with a caller-supplied solver. Ownership is NOT transferred;
|
||||||
|
|
@ -937,7 +962,7 @@ namespace seq {
|
||||||
|
|
||||||
// helper for apply_gpower_intr: fires the substitution
|
// helper for apply_gpower_intr: fires the substitution
|
||||||
bool fire_gpower_intro(nielsen_node* node, str_eq const& eq,
|
bool fire_gpower_intro(nielsen_node* node, str_eq const& eq,
|
||||||
euf::snode* var, euf::snode_vector const& ground_prefix);
|
euf::snode* var, euf::snode_vector const& ground_prefix_orig);
|
||||||
|
|
||||||
// regex variable split: for str_mem x·s ∈ R where x is a variable,
|
// regex variable split: for str_mem x·s ∈ R where x is a variable,
|
||||||
// split using minterms: x → ε, or x → c·x' for each minterm c.
|
// split using minterms: x → ε, or x → c·x' for each minterm c.
|
||||||
|
|
@ -984,6 +1009,14 @@ namespace seq {
|
||||||
// bounds become visible to subsequent check() and check_lp_le() calls.
|
// bounds become visible to subsequent check() and check_lp_le() calls.
|
||||||
void assert_node_new_int_constraints(nielsen_node* node);
|
void assert_node_new_int_constraints(nielsen_node* node);
|
||||||
|
|
||||||
|
// Generate |LHS| = |RHS| length constraints for a non-root node's own
|
||||||
|
// string equalities and add them as int_constraints on the node.
|
||||||
|
// Called once per node (guarded by m_node_len_constraints_generated).
|
||||||
|
// Uses compute_length_expr (mod-count-aware) so that variables with
|
||||||
|
// non-zero modification counts get fresh length variables.
|
||||||
|
// Mirrors ZIPT's Constraint.Shared forwarding for per-node equations.
|
||||||
|
void generate_node_length_constraints(nielsen_node* node);
|
||||||
|
|
||||||
// check integer feasibility of the constraints along the current path.
|
// check integer feasibility of the constraints along the current path.
|
||||||
// returns true if feasible (including unknown), false only if l_false.
|
// returns true if feasible (including unknown), false only if l_false.
|
||||||
// Precondition: all path constraints have been incrementally asserted to
|
// Precondition: all path constraints have been incrementally asserted to
|
||||||
|
|
@ -1009,6 +1042,30 @@ namespace seq {
|
||||||
|
|
||||||
// convert an int_constraint to an expr* assertion
|
// convert an int_constraint to an expr* assertion
|
||||||
expr_ref int_constraint_to_expr(int_constraint const& ic);
|
expr_ref int_constraint_to_expr(int_constraint const& ic);
|
||||||
|
|
||||||
|
// -----------------------------------------------
|
||||||
|
// Modification counter methods for substitution length tracking.
|
||||||
|
// mirrors ZIPT's NielsenEdge.IncModCount / DecModCount and
|
||||||
|
// NielsenNode constructor length assertion logic.
|
||||||
|
// -----------------------------------------------
|
||||||
|
|
||||||
|
// Get or create a fresh integer variable for len(var) at the given
|
||||||
|
// modification count. Returns str.len(var_expr) when mod_count == 0.
|
||||||
|
expr_ref get_or_create_len_var(euf::snode* var, unsigned mod_count);
|
||||||
|
|
||||||
|
// Compute and add |x| = |u| length constraints to an edge for all
|
||||||
|
// its non-eliminating substitutions. Uses current m_mod_cnt.
|
||||||
|
// Temporarily bumps m_mod_cnt for RHS computation, then restores.
|
||||||
|
// Called lazily on first edge traversal in search_dfs.
|
||||||
|
void add_subst_length_constraints(nielsen_edge* e);
|
||||||
|
|
||||||
|
// Bump modification counts for an edge's non-eliminating substitutions.
|
||||||
|
// Called when entering an edge during DFS.
|
||||||
|
void inc_edge_mod_counts(nielsen_edge* e);
|
||||||
|
|
||||||
|
// Restore modification counts for an edge's non-eliminating substitutions.
|
||||||
|
// Called when backtracking from an edge during DFS.
|
||||||
|
void dec_edge_mod_counts(nielsen_edge* e);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -421,8 +421,8 @@ namespace smt {
|
||||||
// here the actual Nielsen solving happens
|
// here the actual Nielsen solving happens
|
||||||
auto result = m_nielsen.solve();
|
auto result = m_nielsen.solve();
|
||||||
std::cout << "Result: " << (result == seq::nielsen_graph::search_result::sat ? "SAT" : result == seq::nielsen_graph::search_result::unsat ? "UNSAT" : "UNKNOWN") << "\n";
|
std::cout << "Result: " << (result == seq::nielsen_graph::search_result::sat ? "SAT" : result == seq::nielsen_graph::search_result::unsat ? "UNSAT" : "UNKNOWN") << "\n";
|
||||||
m_nielsen.to_dot(std::cout);
|
// m_nielsen.to_dot(std::cout);
|
||||||
std::cout << std::endl;
|
// std::cout << std::endl;
|
||||||
|
|
||||||
if (result == seq::nielsen_graph::search_result::sat) {
|
if (result == seq::nielsen_graph::search_result::sat) {
|
||||||
IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node="
|
IF_VERBOSE(1, verbose_stream() << "nseq final_check: solve SAT, sat_node="
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue