From daf2506b6002149d531cb6c9d8ba976a040cd647 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Thu, 16 Apr 2026 03:22:33 +0200 Subject: [PATCH] fix(ostrich-benchmark): add safeoutputs keepalive noop calls before long benchmark run (#9313) Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/7d129a85-614d-4927-a598-05ae902ab771 Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- .github/aw/actions-lock.json | 10 -- .github/workflows/ostrich-benchmark.lock.yml | 163 +++++++++++-------- .github/workflows/ostrich-benchmark.md | 4 + 3 files changed, 102 insertions(+), 75 deletions(-) diff --git a/.github/aw/actions-lock.json b/.github/aw/actions-lock.json index 3a7d4c130..5f39772d5 100644 --- a/.github/aw/actions-lock.json +++ b/.github/aw/actions-lock.json @@ -35,16 +35,6 @@ "version": "v7.0.0", "sha": "bbbca2ddaa5d8feaa63e36b76fdaad77386f024f" }, - "github/gh-aw-actions/setup-cli@v0.68.1": { - "repo": "github/gh-aw-actions/setup-cli", - "version": "v0.68.1", - "sha": "2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc" - }, - "github/gh-aw-actions/setup@v0.68.1": { - "repo": "github/gh-aw-actions/setup", - "version": "v0.68.1", - "sha": "2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc" - }, "github/gh-aw/actions/setup@v0.65.4": { "repo": "github/gh-aw/actions/setup", "version": "v0.65.4", diff --git a/.github/workflows/ostrich-benchmark.lock.yml b/.github/workflows/ostrich-benchmark.lock.yml index b802d05e3..d957ebc75 100644 --- a/.github/workflows/ostrich-benchmark.lock.yml +++ b/.github/workflows/ostrich-benchmark.lock.yml @@ -1,5 +1,5 @@ -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"0ec32242191968fac3261380e9254f6fffc790c0fd616efe779e78965a66a6da","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} -# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc","version":"v0.68.1"}]} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"0ec32242191968fac3261380e9254f6fffc790c0fd616efe779e78965a66a6da","compiler_version":"v0.68.3","strict":true,"agent_id":"copilot"} +# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"v0.68.3","version":"v0.68.3"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.2.19"},{"image":"ghcr.io/github/github-mcp-server:v0.32.0"},{"image":"node:lts-alpine"}]} # ___ _ _ # / _ \ | | (_) # | |_| | __ _ ___ _ __ | |_ _ ___ @@ -14,7 +14,7 @@ # \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ # \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ # -# This file was automatically generated by gh-aw (v0.68.1). DO NOT EDIT. +# This file was automatically generated by gh-aw (v0.68.3). DO NOT EDIT. # # To update this file, edit the corresponding .md file and run: # gh aw compile @@ -34,9 +34,16 @@ # - actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 # - actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 -# - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 -# - actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 -# - github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 +# - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 +# - github/gh-aw-actions/setup@v0.68.3 +# +# Container images used: +# - ghcr.io/github/gh-aw-firewall/agent:0.25.20 +# - ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20 +# - ghcr.io/github/gh-aw-firewall/squid:0.25.20 +# - ghcr.io/github/gh-aw-mcpg:v0.2.19 +# - ghcr.io/github/github-mcp-server:v0.32.0 +# - node:lts-alpine name: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" "on": @@ -74,7 +81,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 + uses: github/gh-aw-actions/setup@v0.68.3 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -86,18 +93,18 @@ jobs: GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} GH_AW_INFO_VERSION: "1.0.21" GH_AW_INFO_AGENT_VERSION: "1.0.21" - GH_AW_INFO_CLI_VERSION: "v0.68.1" + GH_AW_INFO_CLI_VERSION: "v0.68.3" GH_AW_INFO_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" GH_AW_INFO_STAGED: "false" GH_AW_INFO_ALLOWED_DOMAINS: '["defaults","api.nuget.org"]' GH_AW_INFO_FIREWALL_ENABLED: "true" - GH_AW_INFO_AWF_VERSION: "v0.25.18" + GH_AW_INFO_AWF_VERSION: "v0.25.20" GH_AW_INFO_AWMG_VERSION: "" GH_AW_INFO_FIREWALL_TYPE: "squid" GH_AW_COMPILED_STRICT: "true" - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -120,7 +127,7 @@ jobs: fetch-depth: 1 - name: Check workflow lock file id: check-lock-file - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_WORKFLOW_FILE: "ostrich-benchmark.lock.yml" GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}" @@ -131,9 +138,9 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs'); await main(); - name: Check compile-agentic version - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: - GH_AW_COMPILED_VERSION: "v0.68.1" + GH_AW_COMPILED_VERSION: "v0.68.3" with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -203,7 +210,7 @@ jobs: GH_AW_PROMPT_b9636aa328031c49_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} @@ -215,7 +222,7 @@ jobs: const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs'); await main(); - name: Substitute placeholders - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_GITHUB_ACTOR: ${{ github.actor }} @@ -259,7 +266,7 @@ jobs: run: bash "${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh" - name: Upload activation artifact if: success() - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: activation path: | @@ -283,18 +290,21 @@ jobs: GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs GH_AW_WORKFLOW_ID_SANITIZED: ostrichbenchmark outputs: + agentic_engine_timeout: ${{ steps.detect-copilot-errors.outputs.agentic_engine_timeout || 'false' }} checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} effective_tokens: ${{ steps.parse-mcp-gateway.outputs.effective_tokens }} has_patch: ${{ steps.collect_output.outputs.has_patch }} - inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }} + inference_access_error: ${{ steps.detect-copilot-errors.outputs.inference_access_error || 'false' }} + mcp_policy_error: ${{ steps.detect-copilot-errors.outputs.mcp_policy_error || 'false' }} model: ${{ needs.activation.outputs.model }} + model_not_supported_error: ${{ steps.detect-copilot-errors.outputs.model_not_supported_error || 'false' }} output: ${{ steps.collect_output.outputs.output }} output_types: ${{ steps.collect_output.outputs.output_types }} setup-trace-id: ${{ steps.setup.outputs.trace-id }} steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 + uses: github/gh-aw-actions/setup@v0.68.3 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -302,9 +312,11 @@ jobs: - name: Set runtime paths id: set-runtime-paths run: | - echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" - echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" - echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" + { + echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" + echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" + echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" + } >> "$GITHUB_OUTPUT" - name: Create gh-aw temp directory run: bash "${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh" - name: Configure gh CLI for GitHub Enterprise @@ -335,7 +347,7 @@ jobs: id: checkout-pr if: | github.event.pull_request || github.event.issue.pull_request - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} with: @@ -350,7 +362,7 @@ jobs: env: GH_HOST: github.com - name: Install AWF binary - run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.18 + run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.20 - name: Determine automatic lockdown mode for GitHub MCP Server id: determine-automatic-lockdown uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 @@ -362,7 +374,7 @@ jobs: const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs'); await determineAutomaticLockdown(github, context, core); - name: Download container images - run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.18 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18 ghcr.io/github/gh-aw-firewall/squid:0.25.18 ghcr.io/github/gh-aw-mcpg:v0.2.17 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine + run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.20 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20 ghcr.io/github/gh-aw-firewall/squid:0.25.20 ghcr.io/github/gh-aw-mcpg:v0.2.19 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine - name: Write Safe Outputs Config run: | mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" @@ -483,7 +495,7 @@ jobs: } } } - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -555,7 +567,7 @@ jobs: export DEBUG="*" export GH_AW_ENGINE="copilot" - export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17' + export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.19' mkdir -p /home/runner/.copilot cat << GH_AW_MCP_CONFIG_bae047c495059a7a_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" @@ -617,8 +629,8 @@ jobs: touch /tmp/gh-aw/agent-step-summary.md (umask 177 && touch /tmp/gh-aw/agent-stdio.log) # shellcheck disable=SC1003 - sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \ - -- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log + sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.20 --skip-pull --enable-api-proxy \ + -- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log env: COPILOT_AGENT_RUNNER_TYPE: STANDALONE COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} @@ -627,7 +639,7 @@ jobs: GH_AW_PHASE: agent GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} - GH_AW_VERSION: v0.68.1 + GH_AW_VERSION: v0.68.3 GITHUB_API_URL: ${{ github.api_url }} GITHUB_AW: true GITHUB_HEAD_REF: ${{ github.head_ref }} @@ -641,11 +653,11 @@ jobs: GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com GIT_COMMITTER_NAME: github-actions[bot] XDG_CONFIG_HOME: /home/runner - - name: Detect inference access error - id: detect-inference-error + - name: Detect Copilot errors + id: detect-copilot-errors if: always() continue-on-error: true - run: bash "${RUNNER_TEMP}/gh-aw/actions/detect_inference_access_error.sh" + run: node "${RUNNER_TEMP}/gh-aw/actions/detect_copilot_errors.cjs" - name: Configure Git credentials env: REPO_NAME: ${{ github.repository }} @@ -674,7 +686,7 @@ jobs: bash "${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh" "$GATEWAY_PID" - name: Redact secrets in logs if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -700,7 +712,7 @@ jobs: - name: Ingest agent output id: collect_output if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" @@ -714,7 +726,7 @@ jobs: await main(); - name: Parse agent logs for step summary if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ with: @@ -726,7 +738,7 @@ jobs: - name: Parse MCP Gateway logs for step summary if: always() id: parse-mcp-gateway - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -751,7 +763,7 @@ jobs: - name: Parse token usage for step summary if: always() continue-on-error: true - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -767,7 +779,7 @@ jobs: - name: Upload agent artifacts if: always() continue-on-error: true - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: agent path: | @@ -783,14 +795,6 @@ jobs: /tmp/gh-aw/agent_output.json /tmp/gh-aw/aw-*.patch /tmp/gh-aw/aw-*.bundle - if-no-files-found: ignore - - name: Upload firewall audit logs - if: always() - continue-on-error: true - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 - with: - name: firewall-audit-logs - path: | /tmp/gh-aw/sandbox/firewall/logs/ /tmp/gh-aw/sandbox/firewall/audit/ if-no-files-found: ignore @@ -820,7 +824,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 + uses: github/gh-aw-actions/setup@v0.68.3 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -839,9 +843,9 @@ jobs: mkdir -p /tmp/gh-aw/ find "/tmp/gh-aw/" -type f -print echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" - - name: Process No-Op Messages + - name: Process no-op messages id: noop - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_NOOP_MAX: "1" @@ -856,9 +860,25 @@ jobs: setupGlobals(core, github, context, exec, io, getOctokit); const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); await main(); + - name: Log detection run + id: detection_runs + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 + env: + GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} + GH_AW_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" + GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} + GH_AW_DETECTION_CONCLUSION: ${{ needs.detection.outputs.detection_conclusion }} + GH_AW_DETECTION_REASON: ${{ needs.detection.outputs.detection_reason }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io, getOctokit); + const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_detection_runs.cjs'); + await main(); - name: Record missing tool id: missing_tool - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_MISSING_TOOL_CREATE_ISSUE: "true" @@ -873,7 +893,7 @@ jobs: await main(); - name: Record incomplete id: report_incomplete - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_REPORT_INCOMPLETE_CREATE_ISSUE: "true" @@ -888,7 +908,7 @@ jobs: - name: Handle agent failure id: handle_agent_failure if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" @@ -899,6 +919,9 @@ jobs: GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} + GH_AW_MCP_POLICY_ERROR: ${{ needs.agent.outputs.mcp_policy_error }} + GH_AW_AGENTIC_ENGINE_TIMEOUT: ${{ needs.agent.outputs.agentic_engine_timeout }} + GH_AW_MODEL_NOT_SUPPORTED_ERROR: ${{ needs.agent.outputs.model_not_supported_error }} GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} @@ -925,11 +948,12 @@ jobs: contents: read outputs: detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} + detection_reason: ${{ steps.detection_conclusion.outputs.reason }} detection_success: ${{ steps.detection_conclusion.outputs.success }} steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 + uses: github/gh-aw-actions/setup@v0.68.3 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -954,8 +978,12 @@ jobs: with: persist-credentials: false # --- Threat Detection --- + - name: Clean stale firewall files from agent artifact + run: | + rm -rf /tmp/gh-aw/sandbox/firewall/logs + rm -rf /tmp/gh-aw/sandbox/firewall/audit - name: Download container images - run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.18 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18 ghcr.io/github/gh-aw-firewall/squid:0.25.18 + run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.20 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20 ghcr.io/github/gh-aw-firewall/squid:0.25.20 - name: Check if detection needed id: detection_guard if: always() @@ -992,7 +1020,7 @@ jobs: ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true - name: Setup threat detection if: always() && steps.detection_guard.outputs.run_detection == 'true' - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" WORKFLOW_DESCRIPTION: "Run Z3 string solver benchmarks (seq vs nseq) and ZIPT on all Ostrich benchmarks from tests/ostrich.zip on the c3 branch and post results as a GitHub discussion" @@ -1013,7 +1041,7 @@ jobs: env: GH_HOST: github.com - name: Install AWF binary - run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.18 + run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.20 - name: Execute GitHub Copilot CLI if: always() && steps.detection_guard.outputs.run_detection == 'true' id: detection_agentic_execution @@ -1024,15 +1052,15 @@ jobs: touch /tmp/gh-aw/agent-step-summary.md (umask 177 && touch /tmp/gh-aw/threat-detection/detection.log) # shellcheck disable=SC1003 - sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,telemetry.enterprise.githubcopilot.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \ - -- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log + sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,telemetry.enterprise.githubcopilot.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.20 --skip-pull --enable-api-proxy \ + -- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log env: COPILOT_AGENT_RUNNER_TYPE: STANDALONE COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} GH_AW_PHASE: detection GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt - GH_AW_VERSION: v0.68.1 + GH_AW_VERSION: v0.68.3 GITHUB_API_URL: ${{ github.api_url }} GITHUB_AW: true GITHUB_HEAD_REF: ${{ github.head_ref }} @@ -1047,7 +1075,7 @@ jobs: XDG_CONFIG_HOME: /home/runner - name: Upload threat detection log if: always() && steps.detection_guard.outputs.run_detection == 'true' - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: detection path: /tmp/gh-aw/threat-detection/detection.log @@ -1055,9 +1083,10 @@ jobs: - name: Parse and conclude threat detection id: detection_conclusion if: always() - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }} + GH_AW_DETECTION_CONTINUE_ON_ERROR: "true" with: script: | const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); @@ -1079,6 +1108,8 @@ jobs: timeout-minutes: 15 env: GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/ostrich-benchmark" + GH_AW_DETECTION_CONCLUSION: ${{ needs.detection.outputs.detection_conclusion }} + GH_AW_DETECTION_REASON: ${{ needs.detection.outputs.detection_reason }} GH_AW_EFFECTIVE_TOKENS: ${{ needs.agent.outputs.effective_tokens }} GH_AW_ENGINE_ID: "copilot" GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} @@ -1094,7 +1125,7 @@ jobs: steps: - name: Setup Scripts id: setup - uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 + uses: github/gh-aw-actions/setup@v0.68.3 with: destination: ${{ runner.temp }}/gh-aw/actions job-name: ${{ github.job }} @@ -1124,7 +1155,7 @@ jobs: echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" - name: Process Safe Outputs id: process_safe_outputs - uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 + uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 env: GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" @@ -1140,9 +1171,11 @@ jobs: await main(); - name: Upload Safe Outputs Items if: always() - uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: safe-outputs-items - path: /tmp/gh-aw/safe-output-items.jsonl + path: | + /tmp/gh-aw/safe-output-items.jsonl + /tmp/gh-aw/temporary-id-map.json if-no-files-found: ignore diff --git a/.github/workflows/ostrich-benchmark.md b/.github/workflows/ostrich-benchmark.md index 22cbc4cff..ef298d065 100644 --- a/.github/workflows/ostrich-benchmark.md +++ b/.github/workflows/ostrich-benchmark.md @@ -88,6 +88,8 @@ echo "Found Microsoft.Z3.dll at: $Z3_DOTNET_DLL" If the build fails, report the error clearly and exit without proceeding. +Once the binary is confirmed working, call the `noop` safe-output tool with the message `"Z3 built successfully from the c3 branch. Starting ZIPT build and benchmark — results will be posted as a GitHub Discussion once complete."` This keepalive call refreshes the safe-output MCP session before the long build and benchmark phases begin, preventing a session timeout. + ## Phase 2a: Clone and Build ZIPT Clone the ZIPT solver from the `parikh` branch and compile it against the Z3 .NET bindings built in Phase 1. @@ -153,6 +155,8 @@ if [ "$TOTAL_FILES" -eq 0 ]; then fi ``` +Once the benchmark files are confirmed, call the `noop` safe-output tool with the message `"Benchmark files ready: Ostrich .smt2 files extracted. Starting benchmark run — this may take over an hour."` This second keepalive refreshes the safe-output MCP session immediately before the long per-file benchmark loop begins. + ## Phase 3: Run Benchmarks Run every file from `/tmp/all_ostrich_files.txt` with both Z3 string solvers and ZIPT. Use a **5-second timeout** per run.