3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-05-05 09:55:15 +00:00

fix(ostrich-benchmark): add safeoutputs keepalive noop calls before long benchmark run (#9313)

Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/7d129a85-614d-4927-a598-05ae902ab771

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
Copilot 2026-04-16 03:22:33 +02:00 committed by GitHub
parent 1d19d4a0dc
commit daf2506b60
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 102 additions and 75 deletions

View file

@ -35,16 +35,6 @@
"version": "v7.0.0", "version": "v7.0.0",
"sha": "bbbca2ddaa5d8feaa63e36b76fdaad77386f024f" "sha": "bbbca2ddaa5d8feaa63e36b76fdaad77386f024f"
}, },
"github/gh-aw-actions/setup-cli@v0.68.1": {
"repo": "github/gh-aw-actions/setup-cli",
"version": "v0.68.1",
"sha": "2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc"
},
"github/gh-aw-actions/setup@v0.68.1": {
"repo": "github/gh-aw-actions/setup",
"version": "v0.68.1",
"sha": "2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc"
},
"github/gh-aw/actions/setup@v0.65.4": { "github/gh-aw/actions/setup@v0.65.4": {
"repo": "github/gh-aw/actions/setup", "repo": "github/gh-aw/actions/setup",
"version": "v0.65.4", "version": "v0.65.4",

View file

@ -1,5 +1,5 @@
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"0ec32242191968fac3261380e9254f6fffc790c0fd616efe779e78965a66a6da","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"} # gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"0ec32242191968fac3261380e9254f6fffc790c0fd616efe779e78965a66a6da","compiler_version":"v0.68.3","strict":true,"agent_id":"copilot"}
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc","version":"v0.68.1"}]} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"373c709c69115d41ff229c7e5df9f8788daa9553","version":"v9"},{"repo":"actions/upload-artifact","sha":"043fb46d1a93c77aae656e7c1c64a875d1fc6a0a","version":"v7.0.1"},{"repo":"github/gh-aw-actions/setup","sha":"v0.68.3","version":"v0.68.3"}],"containers":[{"image":"ghcr.io/github/gh-aw-firewall/agent:0.25.20"},{"image":"ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20"},{"image":"ghcr.io/github/gh-aw-firewall/squid:0.25.20"},{"image":"ghcr.io/github/gh-aw-mcpg:v0.2.19"},{"image":"ghcr.io/github/github-mcp-server:v0.32.0"},{"image":"node:lts-alpine"}]}
# ___ _ _ # ___ _ _
# / _ \ | | (_) # / _ \ | | (_)
# | |_| | __ _ ___ _ __ | |_ _ ___ # | |_| | __ _ ___ _ __ | |_ _ ___
@ -14,7 +14,7 @@
# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ # \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \
# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ # \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/
# #
# This file was automatically generated by gh-aw (v0.68.1). DO NOT EDIT. # This file was automatically generated by gh-aw (v0.68.3). DO NOT EDIT.
# #
# To update this file, edit the corresponding .md file and run: # To update this file, edit the corresponding .md file and run:
# gh aw compile # gh aw compile
@ -34,9 +34,16 @@
# - actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 # - actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 # - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
# - actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 # - actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
# - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 # - actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
# - actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 # - github/gh-aw-actions/setup@v0.68.3
# - github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 #
# Container images used:
# - ghcr.io/github/gh-aw-firewall/agent:0.25.20
# - ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20
# - ghcr.io/github/gh-aw-firewall/squid:0.25.20
# - ghcr.io/github/gh-aw-mcpg:v0.2.19
# - ghcr.io/github/github-mcp-server:v0.32.0
# - node:lts-alpine
name: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" name: "Ostrich Benchmark: Z3 c3 branch vs ZIPT"
"on": "on":
@ -74,7 +81,7 @@ jobs:
steps: steps:
- name: Setup Scripts - name: Setup Scripts
id: setup id: setup
uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 uses: github/gh-aw-actions/setup@v0.68.3
with: with:
destination: ${{ runner.temp }}/gh-aw/actions destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }} job-name: ${{ github.job }}
@ -86,18 +93,18 @@ jobs:
GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }} GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }}
GH_AW_INFO_VERSION: "1.0.21" GH_AW_INFO_VERSION: "1.0.21"
GH_AW_INFO_AGENT_VERSION: "1.0.21" GH_AW_INFO_AGENT_VERSION: "1.0.21"
GH_AW_INFO_CLI_VERSION: "v0.68.1" GH_AW_INFO_CLI_VERSION: "v0.68.3"
GH_AW_INFO_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" GH_AW_INFO_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT"
GH_AW_INFO_EXPERIMENTAL: "false" GH_AW_INFO_EXPERIMENTAL: "false"
GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true" GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true"
GH_AW_INFO_STAGED: "false" GH_AW_INFO_STAGED: "false"
GH_AW_INFO_ALLOWED_DOMAINS: '["defaults","api.nuget.org"]' GH_AW_INFO_ALLOWED_DOMAINS: '["defaults","api.nuget.org"]'
GH_AW_INFO_FIREWALL_ENABLED: "true" GH_AW_INFO_FIREWALL_ENABLED: "true"
GH_AW_INFO_AWF_VERSION: "v0.25.18" GH_AW_INFO_AWF_VERSION: "v0.25.20"
GH_AW_INFO_AWMG_VERSION: "" GH_AW_INFO_AWMG_VERSION: ""
GH_AW_INFO_FIREWALL_TYPE: "squid" GH_AW_INFO_FIREWALL_TYPE: "squid"
GH_AW_COMPILED_STRICT: "true" GH_AW_COMPILED_STRICT: "true"
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -120,7 +127,7 @@ jobs:
fetch-depth: 1 fetch-depth: 1
- name: Check workflow lock file - name: Check workflow lock file
id: check-lock-file id: check-lock-file
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_WORKFLOW_FILE: "ostrich-benchmark.lock.yml" GH_AW_WORKFLOW_FILE: "ostrich-benchmark.lock.yml"
GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}" GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}"
@ -131,9 +138,9 @@ jobs:
const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs'); const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs');
await main(); await main();
- name: Check compile-agentic version - name: Check compile-agentic version
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_COMPILED_VERSION: "v0.68.1" GH_AW_COMPILED_VERSION: "v0.68.3"
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -203,7 +210,7 @@ jobs:
GH_AW_PROMPT_b9636aa328031c49_EOF GH_AW_PROMPT_b9636aa328031c49_EOF
} > "$GH_AW_PROMPT" } > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates - name: Interpolate variables and render templates
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
@ -215,7 +222,7 @@ jobs:
const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs'); const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs');
await main(); await main();
- name: Substitute placeholders - name: Substitute placeholders
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_GITHUB_ACTOR: ${{ github.actor }} GH_AW_GITHUB_ACTOR: ${{ github.actor }}
@ -259,7 +266,7 @@ jobs:
run: bash "${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh" run: bash "${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh"
- name: Upload activation artifact - name: Upload activation artifact
if: success() if: success()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with: with:
name: activation name: activation
path: | path: |
@ -283,18 +290,21 @@ jobs:
GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs
GH_AW_WORKFLOW_ID_SANITIZED: ostrichbenchmark GH_AW_WORKFLOW_ID_SANITIZED: ostrichbenchmark
outputs: outputs:
agentic_engine_timeout: ${{ steps.detect-copilot-errors.outputs.agentic_engine_timeout || 'false' }}
checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }} checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }}
effective_tokens: ${{ steps.parse-mcp-gateway.outputs.effective_tokens }} effective_tokens: ${{ steps.parse-mcp-gateway.outputs.effective_tokens }}
has_patch: ${{ steps.collect_output.outputs.has_patch }} has_patch: ${{ steps.collect_output.outputs.has_patch }}
inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }} inference_access_error: ${{ steps.detect-copilot-errors.outputs.inference_access_error || 'false' }}
mcp_policy_error: ${{ steps.detect-copilot-errors.outputs.mcp_policy_error || 'false' }}
model: ${{ needs.activation.outputs.model }} model: ${{ needs.activation.outputs.model }}
model_not_supported_error: ${{ steps.detect-copilot-errors.outputs.model_not_supported_error || 'false' }}
output: ${{ steps.collect_output.outputs.output }} output: ${{ steps.collect_output.outputs.output }}
output_types: ${{ steps.collect_output.outputs.output_types }} output_types: ${{ steps.collect_output.outputs.output_types }}
setup-trace-id: ${{ steps.setup.outputs.trace-id }} setup-trace-id: ${{ steps.setup.outputs.trace-id }}
steps: steps:
- name: Setup Scripts - name: Setup Scripts
id: setup id: setup
uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 uses: github/gh-aw-actions/setup@v0.68.3
with: with:
destination: ${{ runner.temp }}/gh-aw/actions destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }} job-name: ${{ github.job }}
@ -302,9 +312,11 @@ jobs:
- name: Set runtime paths - name: Set runtime paths
id: set-runtime-paths id: set-runtime-paths
run: | run: |
echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_OUTPUT" {
echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_OUTPUT" echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl"
echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_OUTPUT" echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json"
echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json"
} >> "$GITHUB_OUTPUT"
- name: Create gh-aw temp directory - name: Create gh-aw temp directory
run: bash "${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh" run: bash "${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh"
- name: Configure gh CLI for GitHub Enterprise - name: Configure gh CLI for GitHub Enterprise
@ -335,7 +347,7 @@ jobs:
id: checkout-pr id: checkout-pr
if: | if: |
github.event.pull_request || github.event.issue.pull_request github.event.pull_request || github.event.issue.pull_request
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
with: with:
@ -350,7 +362,7 @@ jobs:
env: env:
GH_HOST: github.com GH_HOST: github.com
- name: Install AWF binary - name: Install AWF binary
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.18 run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.20
- name: Determine automatic lockdown mode for GitHub MCP Server - name: Determine automatic lockdown mode for GitHub MCP Server
id: determine-automatic-lockdown id: determine-automatic-lockdown
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
@ -362,7 +374,7 @@ jobs:
const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs'); const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs');
await determineAutomaticLockdown(github, context, core); await determineAutomaticLockdown(github, context, core);
- name: Download container images - name: Download container images
run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.18 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18 ghcr.io/github/gh-aw-firewall/squid:0.25.18 ghcr.io/github/gh-aw-mcpg:v0.2.17 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.20 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20 ghcr.io/github/gh-aw-firewall/squid:0.25.20 ghcr.io/github/gh-aw-mcpg:v0.2.19 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine
- name: Write Safe Outputs Config - name: Write Safe Outputs Config
run: | run: |
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
@ -483,7 +495,7 @@ jobs:
} }
} }
} }
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -555,7 +567,7 @@ jobs:
export DEBUG="*" export DEBUG="*"
export GH_AW_ENGINE="copilot" export GH_AW_ENGINE="copilot"
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17' export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.19'
mkdir -p /home/runner/.copilot mkdir -p /home/runner/.copilot
cat << GH_AW_MCP_CONFIG_bae047c495059a7a_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" cat << GH_AW_MCP_CONFIG_bae047c495059a7a_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
@ -617,8 +629,8 @@ jobs:
touch /tmp/gh-aw/agent-step-summary.md touch /tmp/gh-aw/agent-step-summary.md
(umask 177 && touch /tmp/gh-aw/agent-stdio.log) (umask 177 && touch /tmp/gh-aw/agent-stdio.log)
# shellcheck disable=SC1003 # shellcheck disable=SC1003
sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \ sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.20 --skip-pull --enable-api-proxy \
-- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log -- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log
env: env:
COPILOT_AGENT_RUNNER_TYPE: STANDALONE COPILOT_AGENT_RUNNER_TYPE: STANDALONE
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
@ -627,7 +639,7 @@ jobs:
GH_AW_PHASE: agent GH_AW_PHASE: agent
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
GH_AW_VERSION: v0.68.1 GH_AW_VERSION: v0.68.3
GITHUB_API_URL: ${{ github.api_url }} GITHUB_API_URL: ${{ github.api_url }}
GITHUB_AW: true GITHUB_AW: true
GITHUB_HEAD_REF: ${{ github.head_ref }} GITHUB_HEAD_REF: ${{ github.head_ref }}
@ -641,11 +653,11 @@ jobs:
GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com
GIT_COMMITTER_NAME: github-actions[bot] GIT_COMMITTER_NAME: github-actions[bot]
XDG_CONFIG_HOME: /home/runner XDG_CONFIG_HOME: /home/runner
- name: Detect inference access error - name: Detect Copilot errors
id: detect-inference-error id: detect-copilot-errors
if: always() if: always()
continue-on-error: true continue-on-error: true
run: bash "${RUNNER_TEMP}/gh-aw/actions/detect_inference_access_error.sh" run: node "${RUNNER_TEMP}/gh-aw/actions/detect_copilot_errors.cjs"
- name: Configure Git credentials - name: Configure Git credentials
env: env:
REPO_NAME: ${{ github.repository }} REPO_NAME: ${{ github.repository }}
@ -674,7 +686,7 @@ jobs:
bash "${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh" "$GATEWAY_PID" bash "${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh" "$GATEWAY_PID"
- name: Redact secrets in logs - name: Redact secrets in logs
if: always() if: always()
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -700,7 +712,7 @@ jobs:
- name: Ingest agent output - name: Ingest agent output
id: collect_output id: collect_output
if: always() if: always()
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }} GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
@ -714,7 +726,7 @@ jobs:
await main(); await main();
- name: Parse agent logs for step summary - name: Parse agent logs for step summary
if: always() if: always()
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/
with: with:
@ -726,7 +738,7 @@ jobs:
- name: Parse MCP Gateway logs for step summary - name: Parse MCP Gateway logs for step summary
if: always() if: always()
id: parse-mcp-gateway id: parse-mcp-gateway
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -751,7 +763,7 @@ jobs:
- name: Parse token usage for step summary - name: Parse token usage for step summary
if: always() if: always()
continue-on-error: true continue-on-error: true
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -767,7 +779,7 @@ jobs:
- name: Upload agent artifacts - name: Upload agent artifacts
if: always() if: always()
continue-on-error: true continue-on-error: true
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with: with:
name: agent name: agent
path: | path: |
@ -783,14 +795,6 @@ jobs:
/tmp/gh-aw/agent_output.json /tmp/gh-aw/agent_output.json
/tmp/gh-aw/aw-*.patch /tmp/gh-aw/aw-*.patch
/tmp/gh-aw/aw-*.bundle /tmp/gh-aw/aw-*.bundle
if-no-files-found: ignore
- name: Upload firewall audit logs
if: always()
continue-on-error: true
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
with:
name: firewall-audit-logs
path: |
/tmp/gh-aw/sandbox/firewall/logs/ /tmp/gh-aw/sandbox/firewall/logs/
/tmp/gh-aw/sandbox/firewall/audit/ /tmp/gh-aw/sandbox/firewall/audit/
if-no-files-found: ignore if-no-files-found: ignore
@ -820,7 +824,7 @@ jobs:
steps: steps:
- name: Setup Scripts - name: Setup Scripts
id: setup id: setup
uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 uses: github/gh-aw-actions/setup@v0.68.3
with: with:
destination: ${{ runner.temp }}/gh-aw/actions destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }} job-name: ${{ github.job }}
@ -839,9 +843,9 @@ jobs:
mkdir -p /tmp/gh-aw/ mkdir -p /tmp/gh-aw/
find "/tmp/gh-aw/" -type f -print find "/tmp/gh-aw/" -type f -print
echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT" echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT"
- name: Process No-Op Messages - name: Process no-op messages
id: noop id: noop
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_NOOP_MAX: "1" GH_AW_NOOP_MAX: "1"
@ -856,9 +860,25 @@ jobs:
setupGlobals(core, github, context, exec, io, getOctokit); setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs'); const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs');
await main(); await main();
- name: Log detection run
id: detection_runs
uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT"
GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
GH_AW_DETECTION_CONCLUSION: ${{ needs.detection.outputs.detection_conclusion }}
GH_AW_DETECTION_REASON: ${{ needs.detection.outputs.detection_reason }}
with:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_detection_runs.cjs');
await main();
- name: Record missing tool - name: Record missing tool
id: missing_tool id: missing_tool
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_MISSING_TOOL_CREATE_ISSUE: "true" GH_AW_MISSING_TOOL_CREATE_ISSUE: "true"
@ -873,7 +893,7 @@ jobs:
await main(); await main();
- name: Record incomplete - name: Record incomplete
id: report_incomplete id: report_incomplete
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_REPORT_INCOMPLETE_CREATE_ISSUE: "true" GH_AW_REPORT_INCOMPLETE_CREATE_ISSUE: "true"
@ -888,7 +908,7 @@ jobs:
- name: Handle agent failure - name: Handle agent failure
id: handle_agent_failure id: handle_agent_failure
if: always() if: always()
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" GH_AW_WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT"
@ -899,6 +919,9 @@ jobs:
GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }} GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }}
GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }} GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }}
GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }} GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }}
GH_AW_MCP_POLICY_ERROR: ${{ needs.agent.outputs.mcp_policy_error }}
GH_AW_AGENTIC_ENGINE_TIMEOUT: ${{ needs.agent.outputs.agentic_engine_timeout }}
GH_AW_MODEL_NOT_SUPPORTED_ERROR: ${{ needs.agent.outputs.model_not_supported_error }}
GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }} GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }}
GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }} GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }}
GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }} GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }}
@ -925,11 +948,12 @@ jobs:
contents: read contents: read
outputs: outputs:
detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }} detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }}
detection_reason: ${{ steps.detection_conclusion.outputs.reason }}
detection_success: ${{ steps.detection_conclusion.outputs.success }} detection_success: ${{ steps.detection_conclusion.outputs.success }}
steps: steps:
- name: Setup Scripts - name: Setup Scripts
id: setup id: setup
uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 uses: github/gh-aw-actions/setup@v0.68.3
with: with:
destination: ${{ runner.temp }}/gh-aw/actions destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }} job-name: ${{ github.job }}
@ -954,8 +978,12 @@ jobs:
with: with:
persist-credentials: false persist-credentials: false
# --- Threat Detection --- # --- Threat Detection ---
- name: Clean stale firewall files from agent artifact
run: |
rm -rf /tmp/gh-aw/sandbox/firewall/logs
rm -rf /tmp/gh-aw/sandbox/firewall/audit
- name: Download container images - name: Download container images
run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.18 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.18 ghcr.io/github/gh-aw-firewall/squid:0.25.18 run: bash "${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh" ghcr.io/github/gh-aw-firewall/agent:0.25.20 ghcr.io/github/gh-aw-firewall/api-proxy:0.25.20 ghcr.io/github/gh-aw-firewall/squid:0.25.20
- name: Check if detection needed - name: Check if detection needed
id: detection_guard id: detection_guard
if: always() if: always()
@ -992,7 +1020,7 @@ jobs:
ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true
- name: Setup threat detection - name: Setup threat detection
if: always() && steps.detection_guard.outputs.run_detection == 'true' if: always() && steps.detection_guard.outputs.run_detection == 'true'
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT" WORKFLOW_NAME: "Ostrich Benchmark: Z3 c3 branch vs ZIPT"
WORKFLOW_DESCRIPTION: "Run Z3 string solver benchmarks (seq vs nseq) and ZIPT on all Ostrich benchmarks from tests/ostrich.zip on the c3 branch and post results as a GitHub discussion" WORKFLOW_DESCRIPTION: "Run Z3 string solver benchmarks (seq vs nseq) and ZIPT on all Ostrich benchmarks from tests/ostrich.zip on the c3 branch and post results as a GitHub discussion"
@ -1013,7 +1041,7 @@ jobs:
env: env:
GH_HOST: github.com GH_HOST: github.com
- name: Install AWF binary - name: Install AWF binary
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.18 run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.20
- name: Execute GitHub Copilot CLI - name: Execute GitHub Copilot CLI
if: always() && steps.detection_guard.outputs.run_detection == 'true' if: always() && steps.detection_guard.outputs.run_detection == 'true'
id: detection_agentic_execution id: detection_agentic_execution
@ -1024,15 +1052,15 @@ jobs:
touch /tmp/gh-aw/agent-step-summary.md touch /tmp/gh-aw/agent-step-summary.md
(umask 177 && touch /tmp/gh-aw/threat-detection/detection.log) (umask 177 && touch /tmp/gh-aw/threat-detection/detection.log)
# shellcheck disable=SC1003 # shellcheck disable=SC1003
sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,telemetry.enterprise.githubcopilot.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \ sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,telemetry.enterprise.githubcopilot.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.20 --skip-pull --enable-api-proxy \
-- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log -- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --no-ask-user --allow-all-tools --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log
env: env:
COPILOT_AGENT_RUNNER_TYPE: STANDALONE COPILOT_AGENT_RUNNER_TYPE: STANDALONE
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }}
GH_AW_PHASE: detection GH_AW_PHASE: detection
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_VERSION: v0.68.1 GH_AW_VERSION: v0.68.3
GITHUB_API_URL: ${{ github.api_url }} GITHUB_API_URL: ${{ github.api_url }}
GITHUB_AW: true GITHUB_AW: true
GITHUB_HEAD_REF: ${{ github.head_ref }} GITHUB_HEAD_REF: ${{ github.head_ref }}
@ -1047,7 +1075,7 @@ jobs:
XDG_CONFIG_HOME: /home/runner XDG_CONFIG_HOME: /home/runner
- name: Upload threat detection log - name: Upload threat detection log
if: always() && steps.detection_guard.outputs.run_detection == 'true' if: always() && steps.detection_guard.outputs.run_detection == 'true'
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with: with:
name: detection name: detection
path: /tmp/gh-aw/threat-detection/detection.log path: /tmp/gh-aw/threat-detection/detection.log
@ -1055,9 +1083,10 @@ jobs:
- name: Parse and conclude threat detection - name: Parse and conclude threat detection
id: detection_conclusion id: detection_conclusion
if: always() if: always()
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }} RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }}
GH_AW_DETECTION_CONTINUE_ON_ERROR: "true"
with: with:
script: | script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs'); const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
@ -1079,6 +1108,8 @@ jobs:
timeout-minutes: 15 timeout-minutes: 15
env: env:
GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/ostrich-benchmark" GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/ostrich-benchmark"
GH_AW_DETECTION_CONCLUSION: ${{ needs.detection.outputs.detection_conclusion }}
GH_AW_DETECTION_REASON: ${{ needs.detection.outputs.detection_reason }}
GH_AW_EFFECTIVE_TOKENS: ${{ needs.agent.outputs.effective_tokens }} GH_AW_EFFECTIVE_TOKENS: ${{ needs.agent.outputs.effective_tokens }}
GH_AW_ENGINE_ID: "copilot" GH_AW_ENGINE_ID: "copilot"
GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }} GH_AW_ENGINE_MODEL: ${{ needs.agent.outputs.model }}
@ -1094,7 +1125,7 @@ jobs:
steps: steps:
- name: Setup Scripts - name: Setup Scripts
id: setup id: setup
uses: github/gh-aw-actions/setup@2fe53acc038ba01c3bbdc767d4b25df31ca5bdfc # v0.68.1 uses: github/gh-aw-actions/setup@v0.68.3
with: with:
destination: ${{ runner.temp }}/gh-aw/actions destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }} job-name: ${{ github.job }}
@ -1124,7 +1155,7 @@ jobs:
echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV" echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
- name: Process Safe Outputs - name: Process Safe Outputs
id: process_safe_outputs id: process_safe_outputs
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9 uses: actions/github-script@373c709c69115d41ff229c7e5df9f8788daa9553 # v9
env: env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }} GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.nuget.org,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
@ -1140,9 +1171,11 @@ jobs:
await main(); await main();
- name: Upload Safe Outputs Items - name: Upload Safe Outputs Items
if: always() if: always()
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7 uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with: with:
name: safe-outputs-items name: safe-outputs-items
path: /tmp/gh-aw/safe-output-items.jsonl path: |
/tmp/gh-aw/safe-output-items.jsonl
/tmp/gh-aw/temporary-id-map.json
if-no-files-found: ignore if-no-files-found: ignore

View file

@ -88,6 +88,8 @@ echo "Found Microsoft.Z3.dll at: $Z3_DOTNET_DLL"
If the build fails, report the error clearly and exit without proceeding. If the build fails, report the error clearly and exit without proceeding.
Once the binary is confirmed working, call the `noop` safe-output tool with the message `"Z3 built successfully from the c3 branch. Starting ZIPT build and benchmark — results will be posted as a GitHub Discussion once complete."` This keepalive call refreshes the safe-output MCP session before the long build and benchmark phases begin, preventing a session timeout.
## Phase 2a: Clone and Build ZIPT ## Phase 2a: Clone and Build ZIPT
Clone the ZIPT solver from the `parikh` branch and compile it against the Z3 .NET bindings built in Phase 1. Clone the ZIPT solver from the `parikh` branch and compile it against the Z3 .NET bindings built in Phase 1.
@ -153,6 +155,8 @@ if [ "$TOTAL_FILES" -eq 0 ]; then
fi fi
``` ```
Once the benchmark files are confirmed, call the `noop` safe-output tool with the message `"Benchmark files ready: <TOTAL_FILES> Ostrich .smt2 files extracted. Starting benchmark run — this may take over an hour."` This second keepalive refreshes the safe-output MCP session immediately before the long per-file benchmark loop begins.
## Phase 3: Run Benchmarks ## Phase 3: Run Benchmarks
Run every file from `/tmp/all_ostrich_files.txt` with both Z3 string solvers and ZIPT. Use a **5-second timeout** per run. Run every file from `/tmp/all_ostrich_files.txt` with both Z3 string solvers and ZIPT. Use a **5-second timeout** per run.