3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-03-16 18:20:00 +00:00
z3/.github/scripts/parse_sanitizer_reports.py
Angelica Moreira db46d52056
fix memory-safety-report to download artifacts via MCP tools (#8979)
gh CLI is not available inside AWF so the agent could not download
artifacts. Switch to GitHub MCP actions toolset for artifact URLs
and add helper scripts for download and parsing.
2026-03-15 10:12:49 -07:00

201 lines
6 KiB
Python

#!/usr/bin/env python3
"""Parse ASan/UBSan artifacts from the memory-safety workflow.
Reads the report directory produced by fetch-artifacts.sh, extracts
findings from per-PID log files and stdout captures, writes structured
JSON to /tmp/parsed-report.json.
Usage:
parse_sanitizer_reports.py [report_dir]
report_dir defaults to /tmp/reports.
"""
import json
import os
import re
import sys
from pathlib import Path
REPORT_DIR = Path(sys.argv[1]) if len(sys.argv) > 1 else Path("/tmp/reports")
OUT = Path("/tmp/parsed-report.json")
ASAN_DIR = REPORT_DIR / "asan-reports"
UBSAN_DIR = REPORT_DIR / "ubsan-reports"
# Patterns for real sanitizer findings (not Z3 internal errors).
ASAN_ERROR = re.compile(
r"==\d+==ERROR: (AddressSanitizer|LeakSanitizer): (.+)"
)
ASAN_SUMMARY = re.compile(
r"SUMMARY: (AddressSanitizer|LeakSanitizer): (\d+) byte"
)
UBSAN_ERROR = re.compile(
r"(.+:\d+:\d+): runtime error: (.+)"
)
# Stack frame: #N 0xADDR in func file:line
STACK_FRAME = re.compile(
r"\s+#(\d+) 0x[0-9a-f]+ in (.+?) (.+)"
)
def read_text(path):
if path.is_file():
return path.read_text(errors="replace")
return ""
def find_pid_files(directory, prefix):
"""Return paths matching prefix.* (asan.12345, ubsan.67890, etc)."""
if not directory.is_dir():
return []
return sorted(
p for p in directory.iterdir()
if p.name.startswith(prefix + ".") and p.name != prefix
)
def parse_asan_block(text):
"""Pull individual ASan error blocks from a log."""
findings = []
current = None
for line in text.splitlines():
m = ASAN_ERROR.match(line)
if m:
if current:
findings.append(current)
current = {
"tool": m.group(1),
"type": m.group(2).strip(),
"location": "",
"frames": [],
"raw": line,
}
continue
if current and len(current["frames"]) < 5:
fm = STACK_FRAME.match(line)
if fm:
frame = {"func": fm.group(2), "location": fm.group(3)}
current["frames"].append(frame)
if not current["location"] and ":" in fm.group(3):
current["location"] = fm.group(3).strip()
if current:
findings.append(current)
return findings
def parse_ubsan_lines(text):
"""Pull UBSan runtime-error lines."""
findings = []
seen = set()
for line in text.splitlines():
m = UBSAN_ERROR.search(line)
if m:
key = (m.group(1), m.group(2))
if key not in seen:
seen.add(key)
findings.append({
"tool": "UBSan",
"type": m.group(2).strip(),
"location": m.group(1).strip(),
"raw": line.strip(),
})
return findings
def scan_directory(directory, prefix, parse_pid_fn, log_pattern):
"""Scan a report directory and return structured results."""
summary_text = read_text(directory / "summary.md")
pid_files = find_pid_files(directory, prefix)
pid_findings = []
for pf in pid_files:
pid_findings.extend(parse_pid_fn(pf.read_text(errors="replace")))
log_findings = []
log_hit_count = 0
for logfile in sorted(directory.glob("*.log")):
content = logfile.read_text(errors="replace")
hits = len(log_pattern.findall(content))
log_hit_count += hits
log_findings.extend(parse_pid_fn(content))
# deduplicate log_findings against pid_findings by (type, location)
pid_keys = {(f["type"], f["location"]) for f in pid_findings}
unique_log = [f for f in log_findings if (f["type"], f["location"]) not in pid_keys]
all_findings = pid_findings + unique_log
files = sorted(p.name for p in directory.iterdir()) if directory.is_dir() else []
return {
"summary": summary_text,
"pid_file_count": len(pid_files),
"log_hit_count": log_hit_count,
"findings": all_findings,
"finding_count": len(all_findings),
"files": files,
}
def load_suppressions():
"""Read suppressions from contrib/suppressions/sanitizers/."""
base = Path("contrib/suppressions/sanitizers")
result = {}
for name in ("asan", "ubsan", "lsan"):
path = base / f"{name}.txt"
entries = []
if path.is_file():
for line in path.read_text().splitlines():
line = line.strip()
if line and not line.startswith("#"):
entries.append(line)
result[name] = entries
return result
def main():
if not REPORT_DIR.is_dir():
print(f"error: {REPORT_DIR} not found", file=sys.stderr)
sys.exit(1)
asan = scan_directory(ASAN_DIR, "asan", parse_asan_block, ASAN_ERROR)
ubsan = scan_directory(UBSAN_DIR, "ubsan", parse_ubsan_lines, UBSAN_ERROR)
suppressions = load_suppressions()
report = {
"asan": asan,
"ubsan": ubsan,
"suppressions": suppressions,
"total_findings": asan["finding_count"] + ubsan["finding_count"],
}
OUT.write_text(json.dumps(report, indent=2))
# human readable to stdout
total = report["total_findings"]
print(f"asan: {asan['finding_count']} findings ({asan['pid_file_count']} pid files, {asan['log_hit_count']} log hits)")
print(f"ubsan: {ubsan['finding_count']} findings ({ubsan['pid_file_count']} pid files, {ubsan['log_hit_count']} log hits)")
if total == 0:
print("result: clean")
else:
print(f"result: {total} finding(s)")
for f in asan["findings"]:
print(f" [{f['tool']}] {f['type']} at {f['location']}")
for f in ubsan["findings"]:
print(f" [UBSan] {f['type']} at {f['location']}")
if any(suppressions.values()):
print("suppressions:")
for tool, entries in suppressions.items():
for e in entries:
print(f" {tool}: {e}")
print(f"\njson: {OUT}")
if __name__ == "__main__":
main()