diff --git a/.github/skills/deeptest/SKILL.md b/.github/skills/deeptest/SKILL.md deleted file mode 100644 index 2e2cd747e..000000000 --- a/.github/skills/deeptest/SKILL.md +++ /dev/null @@ -1,113 +0,0 @@ ---- -name: deeptest -description: Generate stress tests and differential tests for Z3 theories. Creates random or structured SMT-LIB2 formulas, runs them through Z3, and checks for crashes, assertion failures, or result inconsistencies. Inspired by fuzzing and metamorphic testing approaches applied to SMT solvers. ---- - -Given a strategy and count, generate SMT-LIB2 formulas targeting Z3 internals and report anomalies. Strategies range from pure random generation to structured metamorphic and cross-theory combinations. Every formula and finding is logged to z3agent.db. - -# Step 1: Choose a strategy and run - -Action: - Select a generation strategy and invoke the script with the desired - count and seed. - -Expectation: - The script generates SMT-LIB2 formulas according to the chosen - strategy, runs each through Z3, and records results to z3agent.db. - -Result: - On completion: a summary is printed with formula count, anomaly count, - and elapsed time. Proceed to Step 2. - On early exit: verify the Z3 binary is accessible and review timeout - settings. - -```bash -python3 scripts/deeptest.py --strategy random --count 100 --seed 42 -python3 scripts/deeptest.py --strategy metamorphic --seed-file base.smt2 --count 50 -python3 scripts/deeptest.py --strategy cross-theory --theories "LIA,BV" --count 80 -python3 scripts/deeptest.py --strategy incremental --count 60 --debug -``` - -Available strategies: - -- `random`: generate formulas with random declarations (Int, Bool, BitVec), random arithmetic and boolean assertions, and check-sat. -- `metamorphic`: start from a base formula (generated or loaded from file), apply equisatisfiable transformations (tautology insertion, double negation, assertion duplication), and verify the result stays consistent. -- `cross-theory`: combine multiple theories (LIA, Bool, BV) in a single formula with bridging constraints to stress theory combination. -- `incremental`: generate push/pop sequences with per-frame assertions to stress incremental solving. - -# Step 2: Interpret the output - -Action: - Review the summary printed after the run completes. - -Expectation: - The summary shows strategy, seed, formula count, anomaly count, and - elapsed time. - -Result: - On zero anomalies: Z3 handled all generated formulas without issue. - On nonzero anomalies: crashes, assertion failures, solver errors, or - result disagreements were detected. Proceed to Step 3 for details. - -Example summary: - -``` -strategy: random -seed: 42 -formulas: 100 -anomalies: 2 -elapsed: 4500ms -``` - -# Step 3: Inspect findings - -Action: - Query z3agent.db for detailed finding records from the run. - -Expectation: - Each finding includes category, severity, message, formula index, exit - code, and a stderr excerpt. - -Result: - Use findings to identify reproducible failure patterns and prioritize - fixes by severity. If a finding appears nondeterministic, proceed to - Step 4 with the same seed to confirm. - -```bash -python3 ../../shared/z3db.py query "SELECT category, severity, message FROM findings WHERE run_id IN (SELECT run_id FROM runs WHERE skill='deeptest') ORDER BY finding_id DESC LIMIT 20" -``` - -# Step 4: Reproduce - -Action: - Re-run the script with the same seed to reproduce the exact sequence - of generated formulas. - -Expectation: - Identical formulas are generated, producing the same anomalies if the - underlying bug persists. - -Result: - On same anomalies: bug confirmed and suitable for a regression test. - On zero anomalies: the issue may be nondeterministic or already fixed; - investigate further before closing. - -```bash -python3 scripts/deeptest.py --strategy random --count 100 --seed 42 -``` - -The seed is printed in every run summary and logged in the run record. - -# Parameters - -| Parameter | Type | Required | Default | Description | -|-----------|------|----------|---------|-------------| -| strategy | string | no | random | test generation strategy: random, metamorphic, cross-theory, incremental | -| count | int | no | 50 | number of formulas to generate | -| seed | int | no | clock | random seed for reproducibility | -| seed-file | path | no | | base .smt2 file for metamorphic strategy | -| theories | string | no | LIA,BV | comma-separated theories for cross-theory strategy | -| timeout | int | no | 10 | per-formula Z3 timeout in seconds | -| z3 | path | no | auto | path to z3 binary | -| debug | flag | no | off | verbose tracing | -| db | path | no | .z3-agent/z3agent.db | logging database | diff --git a/.github/skills/deeptest/scripts/deeptest.py b/.github/skills/deeptest/scripts/deeptest.py deleted file mode 100644 index 5d513a6bd..000000000 --- a/.github/skills/deeptest/scripts/deeptest.py +++ /dev/null @@ -1,393 +0,0 @@ -#!/usr/bin/env python3 -""" -deeptest.py: generate and run stress tests for Z3. - -Usage: - python deeptest.py --strategy random --count 100 - python deeptest.py --strategy metamorphic --seed-file base.smt2 - python deeptest.py --strategy cross-theory --theories "LIA,BV" --debug -""" - -import argparse -import logging -import random -import sys -import time -from pathlib import Path - -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "shared")) -from z3db import Z3DB, run_z3, setup_logging - -log = logging.getLogger("deeptest") - -# Sort and operator tables - -THEORY_SORTS = { - "LIA": "Int", - "Bool": "Bool", - "BV": "(_ BitVec 32)", -} - -INT_ARITH = ["+", "-", "*"] -INT_CMP = [">", "<", ">=", "<=", "="] -BV_ARITH = ["bvadd", "bvsub", "bvand", "bvor", "bvxor"] -BV_CMP = ["bvslt", "bvsgt", "bvsle", "bvsge", "="] - -# Assertion generators (one per sort) - - -def _int_assertion(rng, vs): - if len(vs) < 2: - return f"(assert ({rng.choice(INT_CMP)} {vs[0]} {rng.randint(-10, 10)}))" - a, b = rng.sample(vs, 2) - return f"(assert ({rng.choice(INT_CMP)} ({rng.choice(INT_ARITH)} {a} {b}) {rng.randint(-10, 10)}))" - - -def _bool_assertion(rng, vs): - if len(vs) == 1: - return f"(assert {vs[0]})" if rng.random() < 0.5 else f"(assert (not {vs[0]}))" - a, b = rng.sample(vs, 2) - return f"(assert ({rng.choice(['and', 'or', '=>'])} {a} {b}))" - - -def _bv_assertion(rng, vs): - lit = f"(_ bv{rng.randint(0, 255)} 32)" - if len(vs) < 2: - return f"(assert ({rng.choice(BV_CMP)} {vs[0]} {lit}))" - a, b = rng.sample(vs, 2) - return f"(assert ({rng.choice(BV_CMP)} ({rng.choice(BV_ARITH)} {a} {b}) {lit}))" - - -SORT_ASSERTION = { - "Int": _int_assertion, - "Bool": _bool_assertion, - "(_ BitVec 32)": _bv_assertion, -} - - -def _random_assertion(rng, vars_by_sort): - """Pick a populated sort and emit one random assertion.""" - available = [s for s in vars_by_sort if vars_by_sort[s]] - if not available: - return None - sort = rng.choice(available) - return SORT_ASSERTION[sort](rng, vars_by_sort[sort]) - -# Formula generators - - -def gen_random_formula(rng, num_vars=5, num_assertions=5): - """Random declarations, random assertions, single check-sat.""" - lines = [] - vars_by_sort = {} - sorts = list(THEORY_SORTS.values()) - - for i in range(num_vars): - sort = rng.choice(sorts) - name = f"v{i}" - lines.append(f"(declare-const {name} {sort})") - vars_by_sort.setdefault(sort, []).append(name) - - for _ in range(num_assertions): - a = _random_assertion(rng, vars_by_sort) - if a: - lines.append(a) - - lines.append("(check-sat)") - return "\n".join(lines) - - -def gen_metamorphic_variant(rng, base_formula): - """Apply an equisatisfiable transformation to a formula. - - Transformations: - tautology : insert (assert true) before check-sat - double_neg : wrap one assertion body in double negation - duplicate : repeat an existing assertion - """ - lines = base_formula.strip().split("\n") - transform = rng.choice(["tautology", "double_neg", "duplicate"]) - assertion_idxs = [i for i, l in enumerate(lines) - if l.strip().startswith("(assert")] - - if transform == "tautology": - pos = next((i for i, l in enumerate(lines) if "check-sat" in l), - len(lines)) - lines.insert(pos, "(assert true)") - - elif transform == "double_neg" and assertion_idxs: - idx = rng.choice(assertion_idxs) - body = lines[idx].strip() - inner = body[len("(assert "):-1] - lines[idx] = f"(assert (not (not {inner})))" - - elif transform == "duplicate" and assertion_idxs: - idx = rng.choice(assertion_idxs) - lines.insert(idx + 1, lines[idx]) - - return "\n".join(lines) - - -def gen_cross_theory_formula(rng, theories, num_vars=4, num_assertions=6): - """Combine variables from multiple theories with bridging constraints.""" - lines = [] - vars_by_sort = {} - sorts = [THEORY_SORTS[t] for t in theories if t in THEORY_SORTS] - if not sorts: - sorts = list(THEORY_SORTS.values()) - - for i in range(num_vars): - sort = sorts[i % len(sorts)] - name = f"v{i}" - lines.append(f"(declare-const {name} {sort})") - vars_by_sort.setdefault(sort, []).append(name) - - for _ in range(num_assertions): - a = _random_assertion(rng, vars_by_sort) - if a: - lines.append(a) - - # Bridge Int and Bool when both present - int_vs = vars_by_sort.get("Int", []) - bool_vs = vars_by_sort.get("Bool", []) - if int_vs and bool_vs: - iv = rng.choice(int_vs) - bv = rng.choice(bool_vs) - lines.append(f"(assert (= {bv} (> {iv} 0)))") - - lines.append("(check-sat)") - return "\n".join(lines) - - -def gen_incremental_formula(rng, num_frames=3, num_vars=4, - asserts_per_frame=3): - """Push/pop sequence: all variables declared globally, assertions scoped.""" - lines = [] - vars_by_sort = {} - sorts = list(THEORY_SORTS.values()) - - for i in range(num_vars): - sort = rng.choice(sorts) - name = f"v{i}" - lines.append(f"(declare-const {name} {sort})") - vars_by_sort.setdefault(sort, []).append(name) - - for _ in range(num_frames): - lines.append("(push 1)") - for _ in range(asserts_per_frame): - a = _random_assertion(rng, vars_by_sort) - if a: - lines.append(a) - lines.append("(check-sat)") - lines.append("(pop 1)") - - lines.append("(check-sat)") - return "\n".join(lines) - -# Anomaly detection - - -def classify_result(result): - """Return an anomaly category string or None if the result looks normal.""" - if result["exit_code"] != 0 and result["result"] != "timeout": - return "crash" - if "assertion" in result["stderr"].lower(): - return "assertion_failure" - if result["result"] == "error": - return "error" - return None - -# Strategy runners - - -def run_random(args, rng, db, run_id): - anomalies = 0 - for i in range(args.count): - formula = gen_random_formula(rng, rng.randint(2, 8), - rng.randint(1, 10)) - log.debug("formula %d:\n%s", i, formula) - result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout, - debug=args.debug) - db.log_formula(run_id, formula, result["result"]) - - cat = classify_result(result) - if cat: - anomalies += 1 - db.log_finding( - run_id, cat, - f"random formula #{i}: {cat} (exit={result['exit_code']})", - severity="high" if cat == "crash" else "medium", - details={"formula_index": i, - "exit_code": result["exit_code"], - "stderr": result["stderr"][:500]}) - log.warning("anomaly in formula %d: %s", i, cat) - return anomalies - - -def run_metamorphic(args, rng, db, run_id): - if args.seed_file: - base = Path(args.seed_file).read_text() - else: - base = gen_random_formula(rng, num_vars=4, num_assertions=3) - - base_out = run_z3(base, z3_bin=args.z3, timeout=args.timeout, - debug=args.debug) - base_status = base_out["result"] - db.log_formula(run_id, base, base_status) - log.info("base formula result: %s", base_status) - - if base_status not in ("sat", "unsat"): - db.log_finding(run_id, "skip", - f"base formula not definite: {base_status}", - severity="info") - return 0 - - anomalies = 0 - for i in range(args.count): - variant = gen_metamorphic_variant(rng, base) - log.debug("variant %d:\n%s", i, variant) - result = run_z3(variant, z3_bin=args.z3, timeout=args.timeout, - debug=args.debug) - db.log_formula(run_id, variant, result["result"]) - - cat = classify_result(result) - if cat: - anomalies += 1 - db.log_finding( - run_id, cat, - f"metamorphic variant #{i}: {cat}", - severity="high", - details={"variant_index": i, - "stderr": result["stderr"][:500]}) - log.warning("anomaly in variant %d: %s", i, cat) - continue - - if result["result"] in ("sat", "unsat") \ - and result["result"] != base_status: - anomalies += 1 - db.log_finding( - run_id, "disagreement", - f"variant #{i}: expected {base_status}, " - f"got {result['result']}", - severity="critical", - details={"variant_index": i, - "expected": base_status, - "actual": result["result"]}) - log.warning("disagreement in variant %d: expected %s, got %s", - i, base_status, result["result"]) - return anomalies - - -def run_cross_theory(args, rng, db, run_id): - theories = [t.strip() for t in args.theories.split(",")] - anomalies = 0 - for i in range(args.count): - formula = gen_cross_theory_formula(rng, theories, - rng.randint(3, 8), - rng.randint(2, 10)) - log.debug("cross-theory formula %d:\n%s", i, formula) - result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout, - debug=args.debug) - db.log_formula(run_id, formula, result["result"]) - - cat = classify_result(result) - if cat: - anomalies += 1 - db.log_finding( - run_id, cat, - f"cross-theory #{i} ({','.join(theories)}): {cat}", - severity="high" if cat == "crash" else "medium", - details={"formula_index": i, "theories": theories, - "exit_code": result["exit_code"], - "stderr": result["stderr"][:500]}) - log.warning("anomaly in cross-theory formula %d: %s", i, cat) - return anomalies - - -def run_incremental(args, rng, db, run_id): - anomalies = 0 - for i in range(args.count): - num_frames = rng.randint(2, 6) - formula = gen_incremental_formula(rng, num_frames) - log.debug("incremental formula %d:\n%s", i, formula) - result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout, - debug=args.debug) - db.log_formula(run_id, formula, result["result"]) - - cat = classify_result(result) - if cat: - anomalies += 1 - db.log_finding( - run_id, cat, - f"incremental #{i} ({num_frames} frames): {cat}", - severity="high" if cat == "crash" else "medium", - details={"formula_index": i, "num_frames": num_frames, - "exit_code": result["exit_code"], - "stderr": result["stderr"][:500]}) - log.warning("anomaly in incremental formula %d: %s", i, cat) - return anomalies - - -STRATEGIES = { - "random": run_random, - "metamorphic": run_metamorphic, - "cross-theory": run_cross_theory, - "incremental": run_incremental, -} - -# Entry point - - -def main(): - parser = argparse.ArgumentParser( - prog="deeptest", - description="Generate and run stress tests for Z3.", - ) - parser.add_argument("--strategy", choices=list(STRATEGIES), - default="random", - help="test generation strategy") - parser.add_argument("--count", type=int, default=50, - help="number of formulas to generate") - parser.add_argument("--seed", type=int, default=None, - help="random seed for reproducibility") - parser.add_argument("--seed-file", default=None, - help="base .smt2 file for metamorphic strategy") - parser.add_argument("--theories", default="LIA,BV", - help="comma-separated theories for cross-theory") - parser.add_argument("--timeout", type=int, default=10, - help="per-formula Z3 timeout in seconds") - parser.add_argument("--z3", default=None, help="path to z3 binary") - parser.add_argument("--db", default=None, help="path to z3agent.db") - parser.add_argument("--debug", action="store_true") - args = parser.parse_args() - - setup_logging(args.debug) - - seed = args.seed if args.seed is not None else int(time.time()) - rng = random.Random(seed) - log.info("seed: %d", seed) - - db = Z3DB(args.db) - run_id = db.start_run( - "deeptest", - f"strategy={args.strategy} count={args.count} seed={seed}") - - t0 = time.monotonic() - anomalies = STRATEGIES[args.strategy](args, rng, db, run_id) - elapsed_ms = int((time.monotonic() - t0) * 1000) - - status = "success" if anomalies == 0 else "findings" - db.finish_run(run_id, status, elapsed_ms) - - print(f"strategy: {args.strategy}") - print(f"seed: {seed}") - print(f"formulas: {args.count}") - print(f"anomalies: {anomalies}") - print(f"elapsed: {elapsed_ms}ms") - - db.close() - sys.exit(1 if anomalies > 0 else 0) - - -if __name__ == "__main__": - main()