mirror of
https://github.com/Z3Prover/z3
synced 2026-07-04 14:26:10 +00:00
Add Copilot skill architecture with 10 skills, 2 agents, and shared infra
Introduce .github/skills/ with solve, prove, optimize, simplify, encode, explain, benchmark, memory-safety, static-analysis, and deeptest skills. Each skill follows a SKILL.md + scripts/ pattern with Python scripts backed by a shared SQLite logging library (z3db.py). Two orchestrator agents (z3-solver, z3-verifier) route requests to the appropriate skills. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
This commit is contained in:
parent
1cba7cb5ee
commit
d349b93d1d
25 changed files with 2784 additions and 0 deletions
53
.github/skills/memory-safety/SKILL.md
vendored
Normal file
53
.github/skills/memory-safety/SKILL.md
vendored
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
---
|
||||
name: memory-safety
|
||||
description: Run AddressSanitizer and UndefinedBehaviorSanitizer on the Z3 test suite to detect memory errors, undefined behavior, and leaks. Logs each finding to z3agent.db.
|
||||
---
|
||||
|
||||
Build Z3 with compiler-based sanitizer instrumentation, execute the test suite, and parse the runtime output for memory safety violations. Supported sanitizers are AddressSanitizer (heap and stack buffer overflows, use-after-free, double-free, memory leaks) and UndefinedBehaviorSanitizer (signed integer overflow, null pointer dereference, misaligned access, shift errors). Findings are deduplicated and stored in z3agent.db for triage and longitudinal tracking.
|
||||
|
||||
# Step 1: Configure and build
|
||||
|
||||
The script invokes cmake with the appropriate `-fsanitize` flags and builds the `test-z3` target. Each sanitizer uses a separate build directory to avoid flag conflicts. If a prior instrumented build exists with matching flags, only incremental compilation runs.
|
||||
|
||||
```bash
|
||||
python3 scripts/memory_safety.py --sanitizer asan
|
||||
python3 scripts/memory_safety.py --sanitizer ubsan
|
||||
python3 scripts/memory_safety.py --sanitizer both
|
||||
```
|
||||
|
||||
To reuse an existing build:
|
||||
```bash
|
||||
python3 scripts/memory_safety.py --sanitizer asan --skip-build --build-dir build/sanitizer-asan
|
||||
```
|
||||
|
||||
# Step 2: Run and collect
|
||||
|
||||
The test binary runs with `halt_on_error=0` so the sanitizer reports all violations rather than aborting on the first. The script parses `ERROR: AddressSanitizer`, `runtime error:`, and `ERROR: LeakSanitizer` patterns from the combined output, extracts source locations where available, and deduplicates by category, file, and line.
|
||||
|
||||
```bash
|
||||
python3 scripts/memory_safety.py --sanitizer asan --timeout 900 --debug
|
||||
```
|
||||
|
||||
# Step 3: Interpret results
|
||||
|
||||
- `clean`: no sanitizer violations detected.
|
||||
- `findings`: one or more violations found. Each is printed with severity, category, message, and source location.
|
||||
- `timeout`: the test suite did not complete within the deadline. Increase the timeout or investigate a possible infinite loop.
|
||||
- `error`: build or execution failed before sanitizer output could be collected.
|
||||
|
||||
Query past runs:
|
||||
```bash
|
||||
python3 ../../shared/z3db.py runs --skill memory-safety --last 10
|
||||
python3 ../../shared/z3db.py query "SELECT category, severity, file, line, message FROM findings WHERE run_id IN (SELECT run_id FROM runs WHERE skill='memory-safety') ORDER BY run_id DESC LIMIT 20"
|
||||
```
|
||||
|
||||
# Parameters
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| sanitizer | choice | no | asan | which sanitizer to enable: asan, ubsan, or both |
|
||||
| build-dir | path | no | build/sanitizer-{name} | path to the build directory |
|
||||
| timeout | int | no | 600 | seconds before killing the test run |
|
||||
| skip-build | flag | no | off | reuse an existing instrumented build |
|
||||
| debug | flag | no | off | verbose cmake, make, and test output |
|
||||
| db | path | no | .z3-agent/z3agent.db | path to the logging database |
|
||||
266
.github/skills/memory-safety/scripts/memory_safety.py
vendored
Normal file
266
.github/skills/memory-safety/scripts/memory_safety.py
vendored
Normal file
|
|
@ -0,0 +1,266 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
memory_safety.py: run sanitizer checks on Z3 test suite.
|
||||
|
||||
Usage:
|
||||
python memory_safety.py --sanitizer asan
|
||||
python memory_safety.py --sanitizer ubsan --debug
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "shared"))
|
||||
from z3db import Z3DB, setup_logging
|
||||
|
||||
logger = logging.getLogger("z3agent")
|
||||
|
||||
SANITIZER_FLAGS = {
|
||||
"asan": "-fsanitize=address -fno-omit-frame-pointer",
|
||||
"ubsan": "-fsanitize=undefined -fno-omit-frame-pointer",
|
||||
}
|
||||
|
||||
ASAN_ERROR = re.compile(r"ERROR:\s*AddressSanitizer:\s*(\S+)")
|
||||
UBSAN_ERROR = re.compile(r":\d+:\d+:\s*runtime error:\s*(.+)")
|
||||
LEAK_ERROR = re.compile(r"ERROR:\s*LeakSanitizer:")
|
||||
LOCATION = re.compile(r"(\S+\.(?:cpp|c|h|hpp)):(\d+)")
|
||||
|
||||
|
||||
def find_repo_root() -> Path:
|
||||
d = Path.cwd()
|
||||
for _ in range(10):
|
||||
if (d / "CMakeLists.txt").exists() and (d / "src").is_dir():
|
||||
return d
|
||||
parent = d.parent
|
||||
if parent == d:
|
||||
break
|
||||
d = parent
|
||||
logger.error("could not locate Z3 repository root")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def build_is_configured(build_dir: Path, sanitizer: str) -> bool:
|
||||
"""Check whether the build directory already has a matching cmake config."""
|
||||
cache = build_dir / "CMakeCache.txt"
|
||||
if not cache.is_file():
|
||||
return False
|
||||
expected = SANITIZER_FLAGS[sanitizer].split()[0]
|
||||
return expected in cache.read_text()
|
||||
|
||||
|
||||
def configure(build_dir: Path, sanitizer: str, repo_root: Path) -> bool:
|
||||
"""Run cmake with the requested sanitizer flags."""
|
||||
flags = SANITIZER_FLAGS[sanitizer]
|
||||
build_dir.mkdir(parents=True, exist_ok=True)
|
||||
cmd = [
|
||||
"cmake", str(repo_root),
|
||||
f"-DCMAKE_C_FLAGS={flags}",
|
||||
f"-DCMAKE_CXX_FLAGS={flags}",
|
||||
f"-DCMAKE_EXE_LINKER_FLAGS={flags}",
|
||||
"-DCMAKE_BUILD_TYPE=Debug",
|
||||
"-DZ3_BUILD_TEST=ON",
|
||||
]
|
||||
logger.info("configuring %s build in %s", sanitizer, build_dir)
|
||||
logger.debug("cmake command: %s", " ".join(cmd))
|
||||
proc = subprocess.run(cmd, cwd=build_dir, capture_output=True, text=True)
|
||||
if proc.returncode != 0:
|
||||
logger.error("cmake failed:\n%s", proc.stderr)
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def compile_tests(build_dir: Path) -> bool:
|
||||
"""Compile the test-z3 target."""
|
||||
nproc = os.cpu_count() or 4
|
||||
cmd = ["make", f"-j{nproc}", "test-z3"]
|
||||
logger.info("compiling test-z3 (%d parallel jobs)", nproc)
|
||||
proc = subprocess.run(cmd, cwd=build_dir, capture_output=True, text=True)
|
||||
if proc.returncode != 0:
|
||||
logger.error("compilation failed:\n%s", proc.stderr[-2000:])
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def run_tests(build_dir: Path, timeout: int) -> dict:
|
||||
"""Execute test-z3 under sanitizer runtime and capture output."""
|
||||
test_bin = build_dir / "test-z3"
|
||||
if not test_bin.is_file():
|
||||
logger.error("test-z3 not found at %s", test_bin)
|
||||
return {"stdout": "", "stderr": "binary not found", "exit_code": -1,
|
||||
"duration_ms": 0}
|
||||
|
||||
env = os.environ.copy()
|
||||
env["ASAN_OPTIONS"] = "detect_leaks=1:halt_on_error=0:print_stacktrace=1"
|
||||
env["UBSAN_OPTIONS"] = "print_stacktrace=1:halt_on_error=0"
|
||||
|
||||
cmd = [str(test_bin), "/a"]
|
||||
logger.info("running: %s", " ".join(cmd))
|
||||
start = time.monotonic()
|
||||
try:
|
||||
proc = subprocess.run(
|
||||
cmd, capture_output=True, text=True, timeout=timeout,
|
||||
cwd=build_dir, env=env,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
ms = int((time.monotonic() - start) * 1000)
|
||||
logger.warning("test-z3 timed out after %dms", ms)
|
||||
return {"stdout": "", "stderr": "timeout", "exit_code": -1,
|
||||
"duration_ms": ms}
|
||||
|
||||
ms = int((time.monotonic() - start) * 1000)
|
||||
logger.debug("exit_code=%d duration=%dms", proc.returncode, ms)
|
||||
return {
|
||||
"stdout": proc.stdout,
|
||||
"stderr": proc.stderr,
|
||||
"exit_code": proc.returncode,
|
||||
"duration_ms": ms,
|
||||
}
|
||||
|
||||
|
||||
def parse_findings(output: str) -> list:
|
||||
"""Extract sanitizer error reports from combined stdout and stderr."""
|
||||
findings = []
|
||||
lines = output.split("\n")
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
entry = None
|
||||
|
||||
m = ASAN_ERROR.search(line)
|
||||
if m:
|
||||
entry = {"category": "asan", "message": m.group(1),
|
||||
"severity": "high"}
|
||||
|
||||
if not entry:
|
||||
m = LEAK_ERROR.search(line)
|
||||
if m:
|
||||
entry = {"category": "leak",
|
||||
"message": "detected memory leaks",
|
||||
"severity": "high"}
|
||||
|
||||
if not entry:
|
||||
m = UBSAN_ERROR.search(line)
|
||||
if m:
|
||||
entry = {"category": "ubsan", "message": m.group(1),
|
||||
"severity": "medium"}
|
||||
|
||||
if not entry:
|
||||
continue
|
||||
|
||||
file_path, line_no = None, None
|
||||
window = lines[max(0, i - 2):i + 5]
|
||||
for ctx in window:
|
||||
loc = LOCATION.search(ctx)
|
||||
if loc and "/usr/" not in loc.group(1):
|
||||
file_path = loc.group(1)
|
||||
line_no = int(loc.group(2))
|
||||
break
|
||||
|
||||
entry["file"] = file_path
|
||||
entry["line"] = line_no
|
||||
entry["raw"] = line.strip()
|
||||
findings.append(entry)
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def deduplicate(findings: list) -> list:
|
||||
"""Remove duplicate reports at the same category, file, and line."""
|
||||
seen = set()
|
||||
result = []
|
||||
for f in findings:
|
||||
key = (f["category"], f["file"], f["line"], f["message"])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
result.append(f)
|
||||
return result
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="memory-safety")
|
||||
parser.add_argument("--sanitizer", choices=["asan", "ubsan", "both"],
|
||||
default="asan",
|
||||
help="sanitizer to enable (default: asan)")
|
||||
parser.add_argument("--build-dir", default=None,
|
||||
help="path to build directory")
|
||||
parser.add_argument("--timeout", type=int, default=600,
|
||||
help="seconds before killing test run")
|
||||
parser.add_argument("--skip-build", action="store_true",
|
||||
help="reuse existing instrumented build")
|
||||
parser.add_argument("--db", default=None,
|
||||
help="path to z3agent.db")
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
setup_logging(args.debug)
|
||||
repo_root = find_repo_root()
|
||||
|
||||
sanitizers = ["asan", "ubsan"] if args.sanitizer == "both" else [args.sanitizer]
|
||||
all_findings = []
|
||||
|
||||
db = Z3DB(args.db)
|
||||
|
||||
for san in sanitizers:
|
||||
if args.build_dir:
|
||||
build_dir = Path(args.build_dir)
|
||||
else:
|
||||
build_dir = repo_root / "build" / f"sanitizer-{san}"
|
||||
|
||||
run_id = db.start_run("memory-safety", f"sanitizer={san}")
|
||||
db.log(f"sanitizer: {san}, build: {build_dir}", run_id=run_id)
|
||||
|
||||
if not args.skip_build:
|
||||
needs_configure = not build_is_configured(build_dir, san)
|
||||
if needs_configure and not configure(build_dir, san, repo_root):
|
||||
db.finish_run(run_id, "error", 0, exit_code=1)
|
||||
print(f"FAIL: cmake configuration failed for {san}")
|
||||
continue
|
||||
if not compile_tests(build_dir):
|
||||
db.finish_run(run_id, "error", 0, exit_code=1)
|
||||
print(f"FAIL: compilation failed for {san}")
|
||||
continue
|
||||
|
||||
result = run_tests(build_dir, args.timeout)
|
||||
combined = result["stdout"] + "\n" + result["stderr"]
|
||||
findings = deduplicate(parse_findings(combined))
|
||||
|
||||
for f in findings:
|
||||
db.log_finding(
|
||||
run_id,
|
||||
category=f["category"],
|
||||
message=f["message"],
|
||||
severity=f["severity"],
|
||||
file=f["file"],
|
||||
line=f["line"],
|
||||
details={"raw": f["raw"]},
|
||||
)
|
||||
|
||||
status = "clean" if not findings else "findings"
|
||||
if result["exit_code"] == -1:
|
||||
status = "timeout" if "timeout" in result["stderr"] else "error"
|
||||
|
||||
db.finish_run(run_id, status, result["duration_ms"], result["exit_code"])
|
||||
all_findings.extend(findings)
|
||||
print(f"{san}: {len(findings)} finding(s), {result['duration_ms']}ms")
|
||||
|
||||
if all_findings:
|
||||
print(f"\nTotal: {len(all_findings)} finding(s)")
|
||||
for f in all_findings:
|
||||
loc = f"{f['file']}:{f['line']}" if f["file"] else "unknown location"
|
||||
print(f" [{f['severity']}] {f['category']}: {f['message']} at {loc}")
|
||||
db.close()
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("\nNo sanitizer findings.")
|
||||
db.close()
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue