mirror of
https://github.com/Z3Prover/z3
synced 2026-04-27 06:13:35 +00:00
Add Copilot skill architecture with 10 skills, 2 agents, and shared infra
Introduce .github/skills/ with solve, prove, optimize, simplify, encode, explain, benchmark, memory-safety, static-analysis, and deeptest skills. Each skill follows a SKILL.md + scripts/ pattern with Python scripts backed by a shared SQLite logging library (z3db.py). Two orchestrator agents (z3-solver, z3-verifier) route requests to the appropriate skills. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
1cba7cb5ee
commit
d349b93d1d
25 changed files with 2784 additions and 0 deletions
48
.github/skills/benchmark/SKILL.md
vendored
Normal file
48
.github/skills/benchmark/SKILL.md
vendored
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
---
|
||||
name: benchmark
|
||||
description: Measure Z3 performance on a formula or file. Collects wall-clock time, theory solver statistics, memory usage, and conflict counts. Results are logged to z3agent.db for longitudinal tracking.
|
||||
---
|
||||
|
||||
Given an SMT-LIB2 formula or file, run Z3 with statistics enabled and report performance characteristics. This is useful for identifying performance regressions, comparing tactic strategies, and profiling theory solver workload distribution.
|
||||
|
||||
# Step 1: Run Z3 with statistics
|
||||
|
||||
```bash
|
||||
python3 scripts/benchmark.py --file problem.smt2
|
||||
python3 scripts/benchmark.py --file problem.smt2 --runs 5
|
||||
python3 scripts/benchmark.py --formula "(declare-const x Int)..." --debug
|
||||
```
|
||||
|
||||
The script invokes `z3 -st` and parses the `:key value` statistics block.
|
||||
|
||||
# Step 2: Interpret the output
|
||||
|
||||
The output includes:
|
||||
|
||||
- wall-clock time (ms)
|
||||
- result (sat/unsat/unknown/timeout)
|
||||
- memory usage (MB)
|
||||
- conflicts, decisions, propagations
|
||||
- per-theory breakdown (arithmetic, bv, array, etc.)
|
||||
|
||||
With `--runs N`, the script runs Z3 N times and reports min/median/max timing.
|
||||
|
||||
# Step 3: Compare over time
|
||||
|
||||
Past benchmark runs are logged to `z3agent.db`. Query them:
|
||||
```bash
|
||||
python3 ../../shared/z3db.py runs --skill benchmark --last 20
|
||||
python3 ../../shared/z3db.py query "SELECT smtlib2, result, stats FROM formulas WHERE run_id IN (SELECT run_id FROM runs WHERE skill='benchmark') ORDER BY run_id DESC LIMIT 5"
|
||||
```
|
||||
|
||||
# Parameters
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| formula | string | no | | SMT-LIB2 formula |
|
||||
| file | path | no | | path to .smt2 file |
|
||||
| runs | int | no | 1 | number of repeated runs for timing |
|
||||
| timeout | int | no | 60 | seconds per run |
|
||||
| z3 | path | no | auto | path to z3 binary |
|
||||
| debug | flag | no | off | verbose tracing |
|
||||
| db | path | no | .z3-agent/z3agent.db | logging database |
|
||||
74
.github/skills/benchmark/scripts/benchmark.py
vendored
Normal file
74
.github/skills/benchmark/scripts/benchmark.py
vendored
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
benchmark.py: measure Z3 performance with statistics.
|
||||
|
||||
Usage:
|
||||
python benchmark.py --file problem.smt2
|
||||
python benchmark.py --file problem.smt2 --runs 5
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import statistics
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "shared"))
|
||||
from z3db import Z3DB, run_z3, parse_stats, setup_logging
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="benchmark")
|
||||
parser.add_argument("--formula")
|
||||
parser.add_argument("--file")
|
||||
parser.add_argument("--runs", type=int, default=1)
|
||||
parser.add_argument("--timeout", type=int, default=60)
|
||||
parser.add_argument("--z3", default=None)
|
||||
parser.add_argument("--db", default=None)
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(args.debug)
|
||||
|
||||
if args.file:
|
||||
formula = Path(args.file).read_text()
|
||||
elif args.formula:
|
||||
formula = args.formula
|
||||
else:
|
||||
parser.error("provide --formula or --file")
|
||||
return
|
||||
|
||||
db = Z3DB(args.db)
|
||||
timings = []
|
||||
|
||||
for i in range(args.runs):
|
||||
run_id = db.start_run("benchmark", formula)
|
||||
result = run_z3(formula, z3_bin=args.z3, timeout=args.timeout,
|
||||
args=["-st"], debug=args.debug)
|
||||
|
||||
stats = parse_stats(result["stdout"])
|
||||
db.log_formula(run_id, formula, result["result"], stats=stats)
|
||||
db.finish_run(run_id, result["result"], result["duration_ms"],
|
||||
result["exit_code"])
|
||||
timings.append(result["duration_ms"])
|
||||
|
||||
if args.runs == 1:
|
||||
print(f"result: {result['result']}")
|
||||
print(f"time: {result['duration_ms']}ms")
|
||||
if stats:
|
||||
print("statistics:")
|
||||
for k, v in sorted(stats.items()):
|
||||
print(f" :{k} {v}")
|
||||
|
||||
if args.runs > 1:
|
||||
print(f"runs: {args.runs}")
|
||||
print(f"min: {min(timings)}ms")
|
||||
print(f"median: {statistics.median(timings):.0f}ms")
|
||||
print(f"max: {max(timings)}ms")
|
||||
print(f"result: {result['result']}")
|
||||
|
||||
db.close()
|
||||
sys.exit(0 if result["exit_code"] == 0 else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue