mirror of
https://github.com/Z3Prover/z3
synced 2026-03-18 02:53:46 +00:00
Unit cases
This commit is contained in:
parent
a567a7edfb
commit
1351efe9af
3 changed files with 344 additions and 34 deletions
|
|
@ -14,6 +14,7 @@ and reports:
|
|||
"""
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
|
@ -29,6 +30,47 @@ SOLVERS = {
|
|||
}
|
||||
|
||||
|
||||
_STATUS_RE = re.compile(r'\(\s*set-info\s+:status\s+(sat|unsat|unknown)\s*\)')
|
||||
|
||||
|
||||
def read_smtlib_status(smt_file: Path) -> str:
|
||||
"""Read the expected status from the SMT-LIB (set-info :status ...) directive.
|
||||
Returns 'sat', 'unsat', or 'unknown'.
|
||||
"""
|
||||
try:
|
||||
text = smt_file.read_text(encoding="utf-8", errors="replace")
|
||||
m = _STATUS_RE.search(text)
|
||||
if m:
|
||||
return m.group(1)
|
||||
except OSError:
|
||||
pass
|
||||
return "unknown"
|
||||
|
||||
|
||||
def determine_status(res_nseq: str, res_seq: str, smtlib_status: str) -> str:
|
||||
"""Determine the ground-truth status of a problem.
|
||||
Priority: if both solvers agree on sat/unsat, use that; otherwise if one
|
||||
solver gives sat/unsat, use that; otherwise fall back to the SMT-LIB
|
||||
annotation; otherwise 'unknown'.
|
||||
"""
|
||||
definite = {"sat", "unsat"}
|
||||
if res_nseq in definite and res_nseq == res_seq:
|
||||
return res_nseq
|
||||
if res_nseq in definite and res_seq not in definite:
|
||||
return res_nseq
|
||||
if res_seq in definite and res_nseq not in definite:
|
||||
return res_seq
|
||||
# Disagreement (sat vs unsat) — fall back to SMTLIB annotation
|
||||
if res_nseq in definite and res_seq in definite and res_nseq != res_seq:
|
||||
if smtlib_status in definite:
|
||||
return smtlib_status
|
||||
return "unknown"
|
||||
# Neither solver gave a definite answer
|
||||
if smtlib_status in definite:
|
||||
return smtlib_status
|
||||
return "unknown"
|
||||
|
||||
|
||||
def run_z3(z3_bin: str, smt_file: Path, solver_arg: str) -> tuple[str, float]:
|
||||
"""Run z3 on a file with the given solver argument.
|
||||
Returns (result, elapsed) where result is 'sat', 'unsat', 'unknown', or 'timeout'/'error'.
|
||||
|
|
@ -81,13 +123,17 @@ def process_file(z3_bin: str, smt_file: Path) -> dict:
|
|||
res_nseq, t_nseq = run_z3(z3_bin, smt_file, SOLVERS["nseq"])
|
||||
res_seq, t_seq = run_z3(z3_bin, smt_file, SOLVERS["seq"])
|
||||
cat = classify(res_nseq, res_seq)
|
||||
smtlib_status = read_smtlib_status(smt_file)
|
||||
status = determine_status(res_nseq, res_seq, smtlib_status)
|
||||
return {
|
||||
"file": smt_file,
|
||||
"nseq": res_nseq,
|
||||
"seq": res_seq,
|
||||
"t_nseq": t_nseq,
|
||||
"t_seq": t_seq,
|
||||
"category": cat,
|
||||
"file": smt_file,
|
||||
"nseq": res_nseq,
|
||||
"seq": res_seq,
|
||||
"t_nseq": t_nseq,
|
||||
"t_seq": t_seq,
|
||||
"category": cat,
|
||||
"smtlib_status": smtlib_status,
|
||||
"status": status,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -138,24 +184,45 @@ def main():
|
|||
categories.setdefault(r["category"], []).append(r)
|
||||
|
||||
print("\n" + "="*70)
|
||||
print("SUMMARY")
|
||||
print("="*70)
|
||||
|
||||
print("TOTALS")
|
||||
for cat, items in categories.items():
|
||||
if not items:
|
||||
continue
|
||||
print(f" {cat:40s}: {len(items)}")
|
||||
print(f"{'='*70}")
|
||||
|
||||
# ── Per-solver timeout & divergence file lists ─────────────────────────
|
||||
nseq_timeouts = [r for r in results if r["nseq"] == "timeout"]
|
||||
seq_timeouts = [r for r in results if r["seq"] == "timeout"]
|
||||
both_to = categories["both_timeout"]
|
||||
diverged = categories["diverge"]
|
||||
|
||||
def _print_file_list(label: str, items: list[dict]):
|
||||
print(f"\n{'─'*70}")
|
||||
print(f" {cat.upper().replace('_', ' ')} ({len(items)} files)")
|
||||
print(f" {label} ({len(items)} files)")
|
||||
print(f"{'─'*70}")
|
||||
for r in sorted(items, key=lambda x: x["file"]):
|
||||
print(f" {r['file']}")
|
||||
if cat not in ("both_timeout", "both_agree"):
|
||||
print(f" nseq={r['nseq']:8s} ({r['t_nseq']:.1f}s) seq={r['seq']:8s} ({r['t_seq']:.1f}s)")
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"TOTALS")
|
||||
for cat, items in categories.items():
|
||||
print(f" {cat:40s}: {len(items)}")
|
||||
if nseq_timeouts:
|
||||
_print_file_list("NSEQ TIMES OUT", nseq_timeouts)
|
||||
if seq_timeouts:
|
||||
_print_file_list("SEQ TIMES OUT", seq_timeouts)
|
||||
if both_to:
|
||||
_print_file_list("BOTH TIME OUT", both_to)
|
||||
if diverged:
|
||||
_print_file_list("DIVERGE (sat vs unsat)", diverged)
|
||||
|
||||
print()
|
||||
|
||||
# ── Problem status statistics ────────────────────────────────────────────
|
||||
status_counts = {"sat": 0, "unsat": 0, "unknown": 0}
|
||||
for r in results:
|
||||
status_counts[r["status"]] = status_counts.get(r["status"], 0) + 1
|
||||
|
||||
print(f"\nPROBLEM STATUS (total {len(results)} files)")
|
||||
print(f"{'─'*40}")
|
||||
print(f" {'sat':12s}: {status_counts['sat']:5d} ({100*status_counts['sat']/len(results):.1f}%)")
|
||||
print(f" {'unsat':12s}: {status_counts['unsat']:5d} ({100*status_counts['unsat']/len(results):.1f}%)")
|
||||
print(f" {'unknown':12s}: {status_counts['unknown']:5d} ({100*status_counts['unknown']/len(results):.1f}%)")
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
# ── Optional CSV output ───────────────────────────────────────────────────
|
||||
|
|
@ -163,7 +230,7 @@ def main():
|
|||
import csv
|
||||
csv_path = Path(args.csv)
|
||||
with csv_path.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category"])
|
||||
writer = csv.DictWriter(f, fieldnames=["file", "nseq", "seq", "t_nseq", "t_seq", "category", "smtlib_status", "status"])
|
||||
writer.writeheader()
|
||||
for r in sorted(results, key=lambda x: x["file"]):
|
||||
writer.writerow({**r, "file": str(r["file"])})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue