Add Copilot skill architecture with 10 skills, 2 agents, and shared infra

Introduce .github/skills/ with solve, prove, optimize, simplify, encode, explain, benchmark, memory-safety, static-analysis, and deeptest skills. Each skill follows a SKILL.md + scripts/ pattern with Python scripts backed by a shared SQLite logging library (z3db.py). Two orchestrator agents (z3-solver, z3-verifier) route requests to the appropriate skills. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-06-10 02:47:15 +00:00 · 2026-03-11 17:41:29 +00:00 · 2026-03-11 17:41:29 +00:00 · d349b93d1d
commit d349b93d1d
parent 1cba7cb5ee
25 changed files with 2784 additions and 0 deletions
--- a/.github/skills/deeptest/scripts/deeptest.py
+++ b/.github/skills/deeptest/scripts/deeptest.py
@ -0,0 +1,393 @@
+#!/usr/bin/env python3
+"""
+deeptest.py: generate and run stress tests for Z3.
+
+Usage:
+    python deeptest.py --strategy random --count 100
+    python deeptest.py --strategy metamorphic --seed-file base.smt2
+    python deeptest.py --strategy cross-theory --theories "LIA,BV" --debug
+"""
+
+import argparse
+import logging
+import random
+import sys
+import time
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "shared"))
+from z3db import Z3DB, run_z3, setup_logging
+
+log = logging.getLogger("deeptest")
+
+# Sort and operator tables
+
+THEORY_SORTS = {
+    "LIA":  "Int",
+    "Bool": "Bool",
+    "BV":   "(_ BitVec 32)",
+}
+
+INT_ARITH = ["+", "-", "*"]
+INT_CMP   = [">", "<", ">=", "<=", "="]
+BV_ARITH  = ["bvadd", "bvsub", "bvand", "bvor", "bvxor"]
+BV_CMP    = ["bvslt", "bvsgt", "bvsle", "bvsge", "="]
+
+# Assertion generators (one per sort)
+
+
+def _int_assertion(rng, vs):
+    if len(vs) < 2:
+        return f"(assert ({rng.choice(INT_CMP)} {vs[0]} {rng.randint(-10, 10)}))"
+    a, b = rng.sample(vs, 2)
+    return f"(assert ({rng.choice(INT_CMP)} ({rng.choice(INT_ARITH)} {a} {b}) {rng.randint(-10, 10)}))"
+
+
+def _bool_assertion(rng, vs):
+    if len(vs) == 1:
+        return f"(assert {vs[0]})" if rng.random() < 0.5 else f"(assert (not {vs[0]}))"
+    a, b = rng.sample(vs, 2)
+    return f"(assert ({rng.choice(['and', 'or', '=>'])} {a} {b}))"
+
+
+def _bv_assertion(rng, vs):
+    lit = f"(_ bv{rng.randint(0, 255)} 32)"
+    if len(vs) < 2:
+        return f"(assert ({rng.choice(BV_CMP)} {vs[0]} {lit}))"
+    a, b = rng.sample(vs, 2)
+    return f"(assert ({rng.choice(BV_CMP)} ({rng.choice(BV_ARITH)} {a} {b}) {lit}))"
+
+
+SORT_ASSERTION = {
+    "Int":            _int_assertion,
+    "Bool":           _bool_assertion,
+    "(_ BitVec 32)":  _bv_assertion,
+}
+
+
+def _random_assertion(rng, vars_by_sort):
+    """Pick a populated sort and emit one random assertion."""
+    available = [s for s in vars_by_sort if vars_by_sort[s]]
+    if not available:
+        return None
+    sort = rng.choice(available)
+    return SORT_ASSERTION[sort](rng, vars_by_sort[sort])
+
+# Formula generators
+
+
+def gen_random_formula(rng, num_vars=5, num_assertions=5):
+    """Random declarations, random assertions, single check-sat."""
+    lines = []
+    vars_by_sort = {}
+    sorts = list(THEORY_SORTS.values())
+
+    for i in range(num_vars):
+        sort = rng.choice(sorts)
+        name = f"v{i}"
+        lines.append(f"(declare-const {name} {sort})")
+        vars_by_sort.setdefault(sort, []).append(name)
+
+    for _ in range(num_assertions):
+        a = _random_assertion(rng, vars_by_sort)
+        if a:
+            lines.append(a)
+
+    lines.append("(check-sat)")
+    return "\n".join(lines)
+
+
+def gen_metamorphic_variant(rng, base_formula):
+    """Apply an equisatisfiable transformation to a formula.
+
+    Transformations:
+      tautology   : insert (assert true) before check-sat
+      double_neg  : wrap one assertion body in double negation
+      duplicate   : repeat an existing assertion
+    """
+    lines = base_formula.strip().split("\n")
+    transform = rng.choice(["tautology", "double_neg", "duplicate"])
+    assertion_idxs = [i for i, l in enumerate(lines)
+                      if l.strip().startswith("(assert")]
+
+    if transform == "tautology":
+        pos = next((i for i, l in enumerate(lines) if "check-sat" in l),
+                   len(lines))
+        lines.insert(pos, "(assert true)")
+
+    elif transform == "double_neg" and assertion_idxs:
+        idx = rng.choice(assertion_idxs)
+        body = lines[idx].strip()
+        inner = body[len("(assert "):-1]
+        lines[idx] = f"(assert (not (not {inner})))"
+
+    elif transform == "duplicate" and assertion_idxs:
+        idx = rng.choice(assertion_idxs)
+        lines.insert(idx + 1, lines[idx])
+
+    return "\n".join(lines)
+
+
+def gen_cross_theory_formula(rng, theories, num_vars=4, num_assertions=6):
+    """Combine variables from multiple theories with bridging constraints."""
+    lines = []
+    vars_by_sort = {}
+    sorts = [THEORY_SORTS[t] for t in theories if t in THEORY_SORTS]
+    if not sorts:
+        sorts = list(THEORY_SORTS.values())
+
+    for i in range(num_vars):
+        sort = sorts[i % len(sorts)]
+        name = f"v{i}"
+        lines.append(f"(declare-const {name} {sort})")
+        vars_by_sort.setdefault(sort, []).append(name)
+
+    for _ in range(num_assertions):
+        a = _random_assertion(rng, vars_by_sort)
+        if a:
+            lines.append(a)
+
+    # Bridge Int and Bool when both present
+    int_vs = vars_by_sort.get("Int", [])
+    bool_vs = vars_by_sort.get("Bool", [])
+    if int_vs and bool_vs:
+        iv = rng.choice(int_vs)
+        bv = rng.choice(bool_vs)
+        lines.append(f"(assert (= {bv} (> {iv} 0)))")
+
+    lines.append("(check-sat)")
+    return "\n".join(lines)
+
+
+def gen_incremental_formula(rng, num_frames=3, num_vars=4,
+                            asserts_per_frame=3):
+    """Push/pop sequence: all variables declared globally, assertions scoped."""
+    lines = []
+    vars_by_sort = {}
+    sorts = list(THEORY_SORTS.values())
+
+    for i in range(num_vars):
+        sort = rng.choice(sorts)
+        name = f"v{i}"
+        lines.append(f"(declare-const {name} {sort})")
+        vars_by_sort.setdefault(sort, []).append(name)
+
+    for _ in range(num_frames):
+        lines.append("(push 1)")
+        for _ in range(asserts_per_frame):
+            a = _random_assertion(rng, vars_by_sort)
+            if a:
+                lines.append(a)
+        lines.append("(check-sat)")
+        lines.append("(pop 1)")
+
+    lines.append("(check-sat)")
+    return "\n".join(lines)
+
+# Anomaly detection
+
+
+def classify_result(result):
+    """Return an anomaly category string or None if the result looks normal."""
+    if result["exit_code"] != 0 and result["result"] != "timeout":
+        return "crash"
+    if "assertion" in result["stderr"].lower():
+        return "assertion_failure"
+    if result["result"] == "error":
+        return "error"
+    return None
+
+# Strategy runners
+
+
+def run_random(args, rng, db, run_id):
+    anomalies = 0
+    for i in range(args.count):
+        formula = gen_random_formula(rng, rng.randint(2, 8),
+                                     rng.randint(1, 10))
+        log.debug("formula %d:\n%s", i, formula)
+        result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout,
+                        debug=args.debug)
+        db.log_formula(run_id, formula, result["result"])
+
+        cat = classify_result(result)
+        if cat:
+            anomalies += 1
+            db.log_finding(
+                run_id, cat,
+                f"random formula #{i}: {cat} (exit={result['exit_code']})",
+                severity="high" if cat == "crash" else "medium",
+                details={"formula_index": i,
+                         "exit_code": result["exit_code"],
+                         "stderr": result["stderr"][:500]})
+            log.warning("anomaly in formula %d: %s", i, cat)
+    return anomalies
+
+
+def run_metamorphic(args, rng, db, run_id):
+    if args.seed_file:
+        base = Path(args.seed_file).read_text()
+    else:
+        base = gen_random_formula(rng, num_vars=4, num_assertions=3)
+
+    base_out = run_z3(base, z3_bin=args.z3, timeout=args.timeout,
+                      debug=args.debug)
+    base_status = base_out["result"]
+    db.log_formula(run_id, base, base_status)
+    log.info("base formula result: %s", base_status)
+
+    if base_status not in ("sat", "unsat"):
+        db.log_finding(run_id, "skip",
+                       f"base formula not definite: {base_status}",
+                       severity="info")
+        return 0
+
+    anomalies = 0
+    for i in range(args.count):
+        variant = gen_metamorphic_variant(rng, base)
+        log.debug("variant %d:\n%s", i, variant)
+        result = run_z3(variant, z3_bin=args.z3, timeout=args.timeout,
+                        debug=args.debug)
+        db.log_formula(run_id, variant, result["result"])
+
+        cat = classify_result(result)
+        if cat:
+            anomalies += 1
+            db.log_finding(
+                run_id, cat,
+                f"metamorphic variant #{i}: {cat}",
+                severity="high",
+                details={"variant_index": i,
+                         "stderr": result["stderr"][:500]})
+            log.warning("anomaly in variant %d: %s", i, cat)
+            continue
+
+        if result["result"] in ("sat", "unsat") \
+                and result["result"] != base_status:
+            anomalies += 1
+            db.log_finding(
+                run_id, "disagreement",
+                f"variant #{i}: expected {base_status}, "
+                f"got {result['result']}",
+                severity="critical",
+                details={"variant_index": i,
+                         "expected": base_status,
+                         "actual": result["result"]})
+            log.warning("disagreement in variant %d: expected %s, got %s",
+                        i, base_status, result["result"])
+    return anomalies
+
+
+def run_cross_theory(args, rng, db, run_id):
+    theories = [t.strip() for t in args.theories.split(",")]
+    anomalies = 0
+    for i in range(args.count):
+        formula = gen_cross_theory_formula(rng, theories,
+                                           rng.randint(3, 8),
+                                           rng.randint(2, 10))
+        log.debug("cross-theory formula %d:\n%s", i, formula)
+        result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout,
+                        debug=args.debug)
+        db.log_formula(run_id, formula, result["result"])
+
+        cat = classify_result(result)
+        if cat:
+            anomalies += 1
+            db.log_finding(
+                run_id, cat,
+                f"cross-theory #{i} ({','.join(theories)}): {cat}",
+                severity="high" if cat == "crash" else "medium",
+                details={"formula_index": i, "theories": theories,
+                         "exit_code": result["exit_code"],
+                         "stderr": result["stderr"][:500]})
+            log.warning("anomaly in cross-theory formula %d: %s", i, cat)
+    return anomalies
+
+
+def run_incremental(args, rng, db, run_id):
+    anomalies = 0
+    for i in range(args.count):
+        num_frames = rng.randint(2, 6)
+        formula = gen_incremental_formula(rng, num_frames)
+        log.debug("incremental formula %d:\n%s", i, formula)
+        result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout,
+                        debug=args.debug)
+        db.log_formula(run_id, formula, result["result"])
+
+        cat = classify_result(result)
+        if cat:
+            anomalies += 1
+            db.log_finding(
+                run_id, cat,
+                f"incremental #{i} ({num_frames} frames): {cat}",
+                severity="high" if cat == "crash" else "medium",
+                details={"formula_index": i, "num_frames": num_frames,
+                         "exit_code": result["exit_code"],
+                         "stderr": result["stderr"][:500]})
+            log.warning("anomaly in incremental formula %d: %s", i, cat)
+    return anomalies
+
+
+STRATEGIES = {
+    "random":        run_random,
+    "metamorphic":   run_metamorphic,
+    "cross-theory":  run_cross_theory,
+    "incremental":   run_incremental,
+}
+
+# Entry point
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog="deeptest",
+        description="Generate and run stress tests for Z3.",
+    )
+    parser.add_argument("--strategy", choices=list(STRATEGIES),
+                        default="random",
+                        help="test generation strategy")
+    parser.add_argument("--count", type=int, default=50,
+                        help="number of formulas to generate")
+    parser.add_argument("--seed", type=int, default=None,
+                        help="random seed for reproducibility")
+    parser.add_argument("--seed-file", default=None,
+                        help="base .smt2 file for metamorphic strategy")
+    parser.add_argument("--theories", default="LIA,BV",
+                        help="comma-separated theories for cross-theory")
+    parser.add_argument("--timeout", type=int, default=10,
+                        help="per-formula Z3 timeout in seconds")
+    parser.add_argument("--z3", default=None, help="path to z3 binary")
+    parser.add_argument("--db", default=None, help="path to z3agent.db")
+    parser.add_argument("--debug", action="store_true")
+    args = parser.parse_args()
+
+    setup_logging(args.debug)
+
+    seed = args.seed if args.seed is not None else int(time.time())
+    rng = random.Random(seed)
+    log.info("seed: %d", seed)
+
+    db = Z3DB(args.db)
+    run_id = db.start_run(
+        "deeptest",
+        f"strategy={args.strategy} count={args.count} seed={seed}")
+
+    t0 = time.monotonic()
+    anomalies = STRATEGIES[args.strategy](args, rng, db, run_id)
+    elapsed_ms = int((time.monotonic() - t0) * 1000)
+
+    status = "success" if anomalies == 0 else "findings"
+    db.finish_run(run_id, status, elapsed_ms)
+
+    print(f"strategy:  {args.strategy}")
+    print(f"seed:      {seed}")
+    print(f"formulas:  {args.count}")
+    print(f"anomalies: {anomalies}")
+    print(f"elapsed:   {elapsed_ms}ms")
+
+    db.close()
+    sys.exit(1 if anomalies > 0 else 0)
+
+
+if __name__ == "__main__":
+    main()