mirror of
https://github.com/Z3Prover/z3
synced 2026-05-16 15:15:35 +00:00
Move Z3 build to YAML preamble steps (runs before agent loads)
Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/bee30d14-e7e7-4b18-8794-7cf6927c60e5 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
a2a3c520b3
commit
62c8273a87
2 changed files with 43 additions and 45 deletions
32
.github/workflows/tptp-benchmark.lock.yml
generated
vendored
32
.github/workflows/tptp-benchmark.lock.yml
generated
vendored
|
|
@ -1,4 +1,4 @@
|
|||
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"05a6ebcfc669740744c3887e595339f183e37872d3ebbaebe34513c2a8016af8","compiler_version":"v0.72.1","strict":true,"agent_id":"copilot"}
|
||||
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"f76e98e3954f6892f78728b22721e87d255ca21f122ab7076306b1ed8b5679b8","compiler_version":"v0.72.1","strict":true,"agent_id":"copilot"}
|
||||
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/setup-node","sha":"48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e","version":"v6.4.0"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"v0.72.1","version":"v0.72.1"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.41"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.41"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.41"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.3.6","digest":"sha256:2bb8eef86006a4c5963c55616a9c51c32f27bfdecb023b8aa6f91f6718d9171c","pinned_image":"ghcr.io/github/gh-aw-mcpg:v0.3.6@sha256:2bb8eef86006a4c5963c55616a9c51c32f27bfdecb023b8aa6f91f6718d9171c"},{"image":"ghcr.io/github/github-mcp-server:v1.0.3","digest":"sha256:2ac27ef03461ef2b877031b838a7d1fd7f12b12d4ace7796d8cad91446d55959","pinned_image":"ghcr.io/github/github-mcp-server:v1.0.3@sha256:2ac27ef03461ef2b877031b838a7d1fd7f12b12d4ace7796d8cad91446d55959"},{"image":"node:lts-alpine","digest":"sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f","pinned_image":"node:lts-alpine@sha256:d1b3b4da11eefd5941e7f0b9cf17783fc99d9c6fc34884a665f40a06dbdfc94f"}]}
|
||||
# ___ _ _
|
||||
# / _ \ | | (_)
|
||||
|
|
@ -183,20 +183,20 @@ jobs:
|
|||
run: |
|
||||
bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
|
||||
{
|
||||
cat << 'GH_AW_PROMPT_dbe2d26d7191f793_EOF'
|
||||
cat << 'GH_AW_PROMPT_0f8b33477e7f4791_EOF'
|
||||
<system>
|
||||
GH_AW_PROMPT_dbe2d26d7191f793_EOF
|
||||
GH_AW_PROMPT_0f8b33477e7f4791_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_dbe2d26d7191f793_EOF'
|
||||
cat << 'GH_AW_PROMPT_0f8b33477e7f4791_EOF'
|
||||
<safe-output-tools>
|
||||
Tools: create_discussion, missing_tool, missing_data, noop
|
||||
</safe-output-tools>
|
||||
GH_AW_PROMPT_dbe2d26d7191f793_EOF
|
||||
GH_AW_PROMPT_0f8b33477e7f4791_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/mcp_cli_tools_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_dbe2d26d7191f793_EOF'
|
||||
cat << 'GH_AW_PROMPT_0f8b33477e7f4791_EOF'
|
||||
<github-context>
|
||||
The following GitHub context information is available for this workflow:
|
||||
{{#if __GH_AW_GITHUB_ACTOR__ }}
|
||||
|
|
@ -225,12 +225,12 @@ jobs:
|
|||
{{/if}}
|
||||
</github-context>
|
||||
|
||||
GH_AW_PROMPT_dbe2d26d7191f793_EOF
|
||||
GH_AW_PROMPT_0f8b33477e7f4791_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_dbe2d26d7191f793_EOF'
|
||||
cat << 'GH_AW_PROMPT_0f8b33477e7f4791_EOF'
|
||||
</system>
|
||||
{{#runtime-import .github/workflows/tptp-benchmark.md}}
|
||||
GH_AW_PROMPT_dbe2d26d7191f793_EOF
|
||||
GH_AW_PROMPT_0f8b33477e7f4791_EOF
|
||||
} > "$GH_AW_PROMPT"
|
||||
- name: Interpolate variables and render templates
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9.0.0
|
||||
|
|
@ -366,6 +366,12 @@ jobs:
|
|||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
persist-credentials: false
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
sudo apt-get update -y -q
|
||||
sudo apt-get install -y cmake ninja-build python3 wget curl bc
|
||||
- name: Build Z3
|
||||
run: "mkdir -p /tmp/z3-build\ncd /tmp/z3-build\ncmake \"$GITHUB_WORKSPACE\" \\\n -G Ninja \\\n -DCMAKE_BUILD_TYPE=Release \\\n -DZ3_BUILD_TEST_EXECUTABLES=OFF\nninja -j$(nproc) z3\n./z3 --version\n"
|
||||
|
||||
- name: Configure Git credentials
|
||||
env:
|
||||
|
|
@ -433,9 +439,9 @@ jobs:
|
|||
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
|
||||
mkdir -p /tmp/gh-aw/safeoutputs
|
||||
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
|
||||
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_832ae11399c26134_EOF'
|
||||
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_6d18c7040963ed61_EOF'
|
||||
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":336,"fallback_to_issue":true,"max":1,"title_prefix":"[TPTP Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
|
||||
GH_AW_SAFE_OUTPUTS_CONFIG_832ae11399c26134_EOF
|
||||
GH_AW_SAFE_OUTPUTS_CONFIG_6d18c7040963ed61_EOF
|
||||
- name: Generate Safe Outputs Tools
|
||||
env:
|
||||
GH_AW_TOOLS_META_JSON: |
|
||||
|
|
@ -628,7 +634,7 @@ jobs:
|
|||
|
||||
mkdir -p /home/runner/.copilot
|
||||
GH_AW_NODE=$(which node 2>/dev/null || command -v node 2>/dev/null || echo node)
|
||||
cat << GH_AW_MCP_CONFIG_0b5ba1ac1a376250_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs"
|
||||
cat << GH_AW_MCP_CONFIG_9ec215538233c65c_EOF | "$GH_AW_NODE" "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.cjs"
|
||||
{
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
|
|
@ -669,7 +675,7 @@ jobs:
|
|||
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
|
||||
}
|
||||
}
|
||||
GH_AW_MCP_CONFIG_0b5ba1ac1a376250_EOF
|
||||
GH_AW_MCP_CONFIG_9ec215538233c65c_EOF
|
||||
- name: Mount MCP servers as CLIs
|
||||
id: mount-mcp-clis
|
||||
continue-on-error: true
|
||||
|
|
|
|||
56
.github/workflows/tptp-benchmark.md
vendored
56
.github/workflows/tptp-benchmark.md
vendored
|
|
@ -41,6 +41,22 @@ steps:
|
|||
with:
|
||||
persist-credentials: false
|
||||
|
||||
- name: Install build dependencies
|
||||
run: |
|
||||
sudo apt-get update -y -q
|
||||
sudo apt-get install -y cmake ninja-build python3 wget curl bc
|
||||
|
||||
- name: Build Z3
|
||||
run: |
|
||||
mkdir -p /tmp/z3-build
|
||||
cd /tmp/z3-build
|
||||
cmake "$GITHUB_WORKSPACE" \
|
||||
-G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DZ3_BUILD_TEST_EXECUTABLES=OFF
|
||||
ninja -j$(nproc) z3
|
||||
./z3 --version
|
||||
|
||||
---
|
||||
|
||||
# TPTP Front-End Benchmark
|
||||
|
|
@ -49,7 +65,7 @@ steps:
|
|||
|
||||
Your name is ${{ github.workflow }}. You are an expert testing engineer for the Z3 theorem prover. Your task is to:
|
||||
|
||||
1. Build Z3 from the current `master` branch
|
||||
1. Verify the Z3 binary built by the pre-flight step is available
|
||||
2. Download the TPTP benchmark library from tptp.org
|
||||
3. Select 500 random small-to-medium problems (with their axiom dependencies)
|
||||
4. Run each problem through Z3's TPTP front-end with a 5-second timeout
|
||||
|
|
@ -59,40 +75,18 @@ Your name is ${{ github.workflow }}. You are an expert testing engineer for the
|
|||
**Repository**: ${{ github.repository }}
|
||||
**Workspace**: ${{ github.workspace }}
|
||||
|
||||
## Phase 1: Build Z3
|
||||
## Phase 1: Verify Z3 Binary
|
||||
|
||||
Install build tools and build Z3 in Release mode.
|
||||
|
||||
```bash
|
||||
sudo apt-get update -y -q
|
||||
sudo apt-get install -y cmake ninja-build python3 wget curl bc 2>/dev/null || true
|
||||
```
|
||||
|
||||
Configure and build:
|
||||
|
||||
```bash
|
||||
mkdir -p /tmp/z3-build
|
||||
cd /tmp/z3-build
|
||||
cmake "$GITHUB_WORKSPACE" \
|
||||
-G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DZ3_BUILD_TEST_EXECUTABLES=OFF \
|
||||
2>&1 | tail -20
|
||||
|
||||
# Build with limited parallelism to avoid OOM alongside LLM inference.
|
||||
# Never run ninja in the background with &.
|
||||
ninja -j2 z3 2>&1 | tail -30
|
||||
```
|
||||
|
||||
Verify the build:
|
||||
Z3 was built by the workflow pre-flight step and is available at `/tmp/z3-build/z3`.
|
||||
Confirm the binary is present and functional:
|
||||
|
||||
```bash
|
||||
/tmp/z3-build/z3 --version
|
||||
```
|
||||
|
||||
If the build fails, call the `noop` safe-output with a message describing the error and stop.
|
||||
If the binary is missing or returns an error, call the `noop` safe-output with a message describing the problem and stop.
|
||||
|
||||
Once the binary is confirmed, call `noop` with `"Z3 built successfully. Downloading TPTP benchmark library — this may take a few minutes."` to keep the safe-output session alive.
|
||||
Once confirmed, call `noop` with `"Z3 binary verified. Downloading TPTP benchmark library — this may take a few minutes."` to keep the safe-output session alive.
|
||||
|
||||
## Phase 2: Download the TPTP Problem Library
|
||||
|
||||
|
|
@ -536,15 +530,13 @@ You **MUST** call either `create_discussion` or `noop` before the workflow ends:
|
|||
|
||||
- **Full success**: Call `create_discussion` with the complete report.
|
||||
- **Partial results** (some problems ran): Call `create_discussion` with whatever results are available and a note about incomplete execution.
|
||||
- **Build failure**: Call `noop` with a brief message describing the build error.
|
||||
- **Download failure**: Call `noop` with the download error details.
|
||||
- **No problems selected**: Call `noop` explaining why no problems were found.
|
||||
|
||||
Failing to produce any safe output triggers an automatic workflow-failure issue that clutters the repository.
|
||||
- **Binary missing**: If `/tmp/z3-build/z3` is unexpectedly absent, call `noop` with that detail and stop.
|
||||
|
||||
## Important Notes
|
||||
|
||||
- **Never run `ninja` in the background with `&`**: Concurrent C++ compilation and LLM inference exhausts available RAM and kills the agent process (exit 137). Always wait for build commands to finish before continuing.
|
||||
- **Build failure handling**: Z3 was built before the agent loaded. If the binary is missing or non-functional, call `noop` with the error and stop.
|
||||
- **TPTP environment variable**: Set `TPTP=/tmp/tptp` when invoking `z3 -tptp` so that `include()` directives in problem files resolve correctly against the downloaded Axioms directory.
|
||||
- **Timeout detection**: Use `timeout 8` as the outer OS-level guard (3 seconds beyond Z3's `-T:5`) to allow Z3 to exit cleanly before the shell kills it. If the exit code from `timeout` is 124, record the verdict as `Timeout`.
|
||||
- **Crash detection**: A crash is a non-zero exit code with no `% SZS status` line in the output and no timeout. Record it separately from `GaveUp`.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue