Add Copilot skill architecture with 10 skills, 2 agents, and shared infra

Introduce .github/skills/ with solve, prove, optimize, simplify, encode, explain, benchmark, memory-safety, static-analysis, and deeptest skills. Each skill follows a SKILL.md + scripts/ pattern with Python scripts backed by a shared SQLite logging library (z3db.py). Two orchestrator agents (z3-solver, z3-verifier) route requests to the appropriate skills. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
2026-07-04 14:26:10 +00:00 · 2026-03-11 17:41:29 +00:00 · 2026-03-11 17:41:29 +00:00 · d349b93d1d
commit d349b93d1d
parent 1cba7cb5ee
25 changed files with 2784 additions and 0 deletions
--- a/.github/skills/memory-safety/SKILL.md
+++ b/.github/skills/memory-safety/SKILL.md
@ -0,0 +1,53 @@
+---
+name: memory-safety
+description: Run AddressSanitizer and UndefinedBehaviorSanitizer on the Z3 test suite to detect memory errors, undefined behavior, and leaks. Logs each finding to z3agent.db.
+---
+
+Build Z3 with compiler-based sanitizer instrumentation, execute the test suite, and parse the runtime output for memory safety violations. Supported sanitizers are AddressSanitizer (heap and stack buffer overflows, use-after-free, double-free, memory leaks) and UndefinedBehaviorSanitizer (signed integer overflow, null pointer dereference, misaligned access, shift errors). Findings are deduplicated and stored in z3agent.db for triage and longitudinal tracking.
+
+# Step 1: Configure and build
+
+The script invokes cmake with the appropriate `-fsanitize` flags and builds the `test-z3` target. Each sanitizer uses a separate build directory to avoid flag conflicts. If a prior instrumented build exists with matching flags, only incremental compilation runs.
+
+```bash
+python3 scripts/memory_safety.py --sanitizer asan
+python3 scripts/memory_safety.py --sanitizer ubsan
+python3 scripts/memory_safety.py --sanitizer both
+```
+
+To reuse an existing build:
+```bash
+python3 scripts/memory_safety.py --sanitizer asan --skip-build --build-dir build/sanitizer-asan
+```
+
+# Step 2: Run and collect
+
+The test binary runs with `halt_on_error=0` so the sanitizer reports all violations rather than aborting on the first. The script parses `ERROR: AddressSanitizer`, `runtime error:`, and `ERROR: LeakSanitizer` patterns from the combined output, extracts source locations where available, and deduplicates by category, file, and line.
+
+```bash
+python3 scripts/memory_safety.py --sanitizer asan --timeout 900 --debug
+```
+
+# Step 3: Interpret results
+
+- `clean`: no sanitizer violations detected.
+- `findings`: one or more violations found. Each is printed with severity, category, message, and source location.
+- `timeout`: the test suite did not complete within the deadline. Increase the timeout or investigate a possible infinite loop.
+- `error`: build or execution failed before sanitizer output could be collected.
+
+Query past runs:
+```bash
+python3 ../../shared/z3db.py runs --skill memory-safety --last 10
+python3 ../../shared/z3db.py query "SELECT category, severity, file, line, message FROM findings WHERE run_id IN (SELECT run_id FROM runs WHERE skill='memory-safety') ORDER BY run_id DESC LIMIT 20"
+```
+
+# Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| sanitizer | choice | no | asan | which sanitizer to enable: asan, ubsan, or both |
+| build-dir | path | no | build/sanitizer-{name} | path to the build directory |
+| timeout | int | no | 600 | seconds before killing the test run |
+| skip-build | flag | no | off | reuse an existing instrumented build |
+| debug | flag | no | off | verbose cmake, make, and test output |
+| db | path | no | .z3-agent/z3agent.db | path to the logging database |
--- a/.github/skills/memory-safety/scripts/memory_safety.py
+++ b/.github/skills/memory-safety/scripts/memory_safety.py
@ -0,0 +1,266 @@
+#!/usr/bin/env python3
+"""
+memory_safety.py: run sanitizer checks on Z3 test suite.
+
+Usage:
+    python memory_safety.py --sanitizer asan
+    python memory_safety.py --sanitizer ubsan --debug
+"""
+
+import argparse
+import logging
+import os
+import re
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent / "shared"))
+from z3db import Z3DB, setup_logging
+
+logger = logging.getLogger("z3agent")
+
+SANITIZER_FLAGS = {
+    "asan": "-fsanitize=address -fno-omit-frame-pointer",
+    "ubsan": "-fsanitize=undefined -fno-omit-frame-pointer",
+}
+
+ASAN_ERROR = re.compile(r"ERROR:\s*AddressSanitizer:\s*(\S+)")
+UBSAN_ERROR = re.compile(r":\d+:\d+:\s*runtime error:\s*(.+)")
+LEAK_ERROR = re.compile(r"ERROR:\s*LeakSanitizer:")
+LOCATION = re.compile(r"(\S+\.(?:cpp|c|h|hpp)):(\d+)")
+
+
+def find_repo_root() -> Path:
+    d = Path.cwd()
+    for _ in range(10):
+        if (d / "CMakeLists.txt").exists() and (d / "src").is_dir():
+            return d
+        parent = d.parent
+        if parent == d:
+            break
+        d = parent
+    logger.error("could not locate Z3 repository root")
+    sys.exit(1)
+
+
+def build_is_configured(build_dir: Path, sanitizer: str) -> bool:
+    """Check whether the build directory already has a matching cmake config."""
+    cache = build_dir / "CMakeCache.txt"
+    if not cache.is_file():
+        return False
+    expected = SANITIZER_FLAGS[sanitizer].split()[0]
+    return expected in cache.read_text()
+
+
+def configure(build_dir: Path, sanitizer: str, repo_root: Path) -> bool:
+    """Run cmake with the requested sanitizer flags."""
+    flags = SANITIZER_FLAGS[sanitizer]
+    build_dir.mkdir(parents=True, exist_ok=True)
+    cmd = [
+        "cmake", str(repo_root),
+        f"-DCMAKE_C_FLAGS={flags}",
+        f"-DCMAKE_CXX_FLAGS={flags}",
+        f"-DCMAKE_EXE_LINKER_FLAGS={flags}",
+        "-DCMAKE_BUILD_TYPE=Debug",
+        "-DZ3_BUILD_TEST=ON",
+    ]
+    logger.info("configuring %s build in %s", sanitizer, build_dir)
+    logger.debug("cmake command: %s", " ".join(cmd))
+    proc = subprocess.run(cmd, cwd=build_dir, capture_output=True, text=True)
+    if proc.returncode != 0:
+        logger.error("cmake failed:\n%s", proc.stderr)
+        return False
+    return True
+
+
+def compile_tests(build_dir: Path) -> bool:
+    """Compile the test-z3 target."""
+    nproc = os.cpu_count() or 4
+    cmd = ["make", f"-j{nproc}", "test-z3"]
+    logger.info("compiling test-z3 (%d parallel jobs)", nproc)
+    proc = subprocess.run(cmd, cwd=build_dir, capture_output=True, text=True)
+    if proc.returncode != 0:
+        logger.error("compilation failed:\n%s", proc.stderr[-2000:])
+        return False
+    return True
+
+
+def run_tests(build_dir: Path, timeout: int) -> dict:
+    """Execute test-z3 under sanitizer runtime and capture output."""
+    test_bin = build_dir / "test-z3"
+    if not test_bin.is_file():
+        logger.error("test-z3 not found at %s", test_bin)
+        return {"stdout": "", "stderr": "binary not found", "exit_code": -1,
+                "duration_ms": 0}
+
+    env = os.environ.copy()
+    env["ASAN_OPTIONS"] = "detect_leaks=1:halt_on_error=0:print_stacktrace=1"
+    env["UBSAN_OPTIONS"] = "print_stacktrace=1:halt_on_error=0"
+
+    cmd = [str(test_bin), "/a"]
+    logger.info("running: %s", " ".join(cmd))
+    start = time.monotonic()
+    try:
+        proc = subprocess.run(
+            cmd, capture_output=True, text=True, timeout=timeout,
+            cwd=build_dir, env=env,
+        )
+    except subprocess.TimeoutExpired:
+        ms = int((time.monotonic() - start) * 1000)
+        logger.warning("test-z3 timed out after %dms", ms)
+        return {"stdout": "", "stderr": "timeout", "exit_code": -1,
+                "duration_ms": ms}
+
+    ms = int((time.monotonic() - start) * 1000)
+    logger.debug("exit_code=%d duration=%dms", proc.returncode, ms)
+    return {
+        "stdout": proc.stdout,
+        "stderr": proc.stderr,
+        "exit_code": proc.returncode,
+        "duration_ms": ms,
+    }
+
+
+def parse_findings(output: str) -> list:
+    """Extract sanitizer error reports from combined stdout and stderr."""
+    findings = []
+    lines = output.split("\n")
+
+    for i, line in enumerate(lines):
+        entry = None
+
+        m = ASAN_ERROR.search(line)
+        if m:
+            entry = {"category": "asan", "message": m.group(1),
+                     "severity": "high"}
+
+        if not entry:
+            m = LEAK_ERROR.search(line)
+            if m:
+                entry = {"category": "leak",
+                         "message": "detected memory leaks",
+                         "severity": "high"}
+
+        if not entry:
+            m = UBSAN_ERROR.search(line)
+            if m:
+                entry = {"category": "ubsan", "message": m.group(1),
+                         "severity": "medium"}
+
+        if not entry:
+            continue
+
+        file_path, line_no = None, None
+        window = lines[max(0, i - 2):i + 5]
+        for ctx in window:
+            loc = LOCATION.search(ctx)
+            if loc and "/usr/" not in loc.group(1):
+                file_path = loc.group(1)
+                line_no = int(loc.group(2))
+                break
+
+        entry["file"] = file_path
+        entry["line"] = line_no
+        entry["raw"] = line.strip()
+        findings.append(entry)
+
+    return findings
+
+
+def deduplicate(findings: list) -> list:
+    """Remove duplicate reports at the same category, file, and line."""
+    seen = set()
+    result = []
+    for f in findings:
+        key = (f["category"], f["file"], f["line"], f["message"])
+        if key in seen:
+            continue
+        seen.add(key)
+        result.append(f)
+    return result
+
+
+def main():
+    parser = argparse.ArgumentParser(prog="memory-safety")
+    parser.add_argument("--sanitizer", choices=["asan", "ubsan", "both"],
+                        default="asan",
+                        help="sanitizer to enable (default: asan)")
+    parser.add_argument("--build-dir", default=None,
+                        help="path to build directory")
+    parser.add_argument("--timeout", type=int, default=600,
+                        help="seconds before killing test run")
+    parser.add_argument("--skip-build", action="store_true",
+                        help="reuse existing instrumented build")
+    parser.add_argument("--db", default=None,
+                        help="path to z3agent.db")
+    parser.add_argument("--debug", action="store_true")
+    args = parser.parse_args()
+
+    setup_logging(args.debug)
+    repo_root = find_repo_root()
+
+    sanitizers = ["asan", "ubsan"] if args.sanitizer == "both" else [args.sanitizer]
+    all_findings = []
+
+    db = Z3DB(args.db)
+
+    for san in sanitizers:
+        if args.build_dir:
+            build_dir = Path(args.build_dir)
+        else:
+            build_dir = repo_root / "build" / f"sanitizer-{san}"
+
+        run_id = db.start_run("memory-safety", f"sanitizer={san}")
+        db.log(f"sanitizer: {san}, build: {build_dir}", run_id=run_id)
+
+        if not args.skip_build:
+            needs_configure = not build_is_configured(build_dir, san)
+            if needs_configure and not configure(build_dir, san, repo_root):
+                db.finish_run(run_id, "error", 0, exit_code=1)
+                print(f"FAIL: cmake configuration failed for {san}")
+                continue
+            if not compile_tests(build_dir):
+                db.finish_run(run_id, "error", 0, exit_code=1)
+                print(f"FAIL: compilation failed for {san}")
+                continue
+
+        result = run_tests(build_dir, args.timeout)
+        combined = result["stdout"] + "\n" + result["stderr"]
+        findings = deduplicate(parse_findings(combined))
+
+        for f in findings:
+            db.log_finding(
+                run_id,
+                category=f["category"],
+                message=f["message"],
+                severity=f["severity"],
+                file=f["file"],
+                line=f["line"],
+                details={"raw": f["raw"]},
+            )
+
+        status = "clean" if not findings else "findings"
+        if result["exit_code"] == -1:
+            status = "timeout" if "timeout" in result["stderr"] else "error"
+
+        db.finish_run(run_id, status, result["duration_ms"], result["exit_code"])
+        all_findings.extend(findings)
+        print(f"{san}: {len(findings)} finding(s), {result['duration_ms']}ms")
+
+    if all_findings:
+        print(f"\nTotal: {len(all_findings)} finding(s)")
+        for f in all_findings:
+            loc = f"{f['file']}:{f['line']}" if f["file"] else "unknown location"
+            print(f"  [{f['severity']}] {f['category']}: {f['message']} at {loc}")
+        db.close()
+        sys.exit(1)
+    else:
+        print("\nNo sanitizer findings.")
+        db.close()
+        sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()