3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-04-15 00:35:11 +00:00

fix qf-s-benchmark: Release mode build, fix broken code fence, reduce timeouts

Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/8eaace11-bbc1-49d9-993d-67290f5b1841

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-04-10 21:38:10 +00:00 committed by GitHub
parent b22d44867b
commit 6ca1bab43d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 88 additions and 85 deletions

View file

@ -1,5 +1,5 @@
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"e5e5c332eb206c2bde9e7e5cb0bb1babe7b1c50e0437a00b4093ddb8b5ab80cf","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"}
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"ed597411d8f924073f98dfc5c65a23a2325f34cd","version":"v8"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.67.4","version":"v0.67.4"}]}
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"d49708d2a52ea8ed02a11700663bca11b8d1a61414630351523fef60d589ae56","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"}
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.68.1","version":"v0.68.1"}]}
# ___ _ _
# / _ \ | | (_)
# | |_| | __ _ ___ _ __ | |_ _ ___
@ -14,7 +14,7 @@
# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \
# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/
#
# This file was automatically generated by gh-aw (v0.67.4). DO NOT EDIT.
# This file was automatically generated by gh-aw (v0.68.1). DO NOT EDIT.
#
# To update this file, edit the corresponding .md file and run:
# gh aw compile
@ -33,9 +33,9 @@
# Custom actions used:
# - actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
# - actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
# - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
# - actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
# - github/gh-aw-actions/setup@v0.67.4
# - github/gh-aw-actions/setup@v0.68.1
name: "QF_S String Solver Benchmark"
"on":
@ -69,10 +69,11 @@ jobs:
model: ${{ steps.generate_aw_info.outputs.model }}
secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }}
setup-trace-id: ${{ steps.setup.outputs.trace-id }}
stale_lock_file_failed: ${{ steps.check-lock-file.outputs.stale_lock_file_failed == 'true' }}
steps:
- name: Setup Scripts
id: setup
uses: github/gh-aw-actions/setup@v0.67.4
uses: github/gh-aw-actions/setup@v0.68.1
with:
destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }}
@ -82,9 +83,9 @@ jobs:
GH_AW_INFO_ENGINE_ID: "copilot"
GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI"
GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }}
GH_AW_INFO_VERSION: "1.0.20"
GH_AW_INFO_AGENT_VERSION: "1.0.20"
GH_AW_INFO_CLI_VERSION: "v0.67.4"
GH_AW_INFO_VERSION: "1.0.21"
GH_AW_INFO_AGENT_VERSION: "1.0.21"
GH_AW_INFO_CLI_VERSION: "v0.68.1"
GH_AW_INFO_WORKFLOW_NAME: "QF_S String Solver Benchmark"
GH_AW_INFO_EXPERIMENTAL: "false"
GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true"
@ -95,11 +96,11 @@ jobs:
GH_AW_INFO_AWMG_VERSION: ""
GH_AW_INFO_FIREWALL_TYPE: "squid"
GH_AW_COMPILED_STRICT: "true"
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs');
await main(core, context);
- name: Validate COPILOT_GITHUB_TOKEN secret
@ -117,24 +118,25 @@ jobs:
sparse-checkout-cone-mode: true
fetch-depth: 1
- name: Check workflow lock file
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
id: check-lock-file
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_WORKFLOW_FILE: "qf-s-benchmark.lock.yml"
GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}"
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs');
await main();
- name: Check compile-agentic version
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_COMPILED_VERSION: "v0.67.4"
GH_AW_COMPILED_VERSION: "v0.68.1"
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/check_version_updates.cjs');
await main();
- name: Create prompt with built-in context
@ -154,14 +156,14 @@ jobs:
run: |
bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
{
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
cat << 'GH_AW_PROMPT_78ca018fd0ee9e53_EOF'
<system>
GH_AW_PROMPT_c25676ba2ab40d85_EOF
GH_AW_PROMPT_78ca018fd0ee9e53_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
cat << 'GH_AW_PROMPT_78ca018fd0ee9e53_EOF'
<safe-output-tools>
Tools: create_discussion, missing_tool, missing_data, noop
</safe-output-tools>
@ -193,15 +195,15 @@ jobs:
{{/if}}
</github-context>
GH_AW_PROMPT_c25676ba2ab40d85_EOF
GH_AW_PROMPT_78ca018fd0ee9e53_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
cat << 'GH_AW_PROMPT_78ca018fd0ee9e53_EOF'
</system>
{{#runtime-import .github/workflows/qf-s-benchmark.md}}
GH_AW_PROMPT_c25676ba2ab40d85_EOF
GH_AW_PROMPT_78ca018fd0ee9e53_EOF
} > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
@ -210,11 +212,11 @@ jobs:
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs');
await main();
- name: Substitute placeholders
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_GITHUB_ACTOR: ${{ github.actor }}
@ -229,7 +231,7 @@ jobs:
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs');
@ -295,7 +297,7 @@ jobs:
steps:
- name: Setup Scripts
id: setup
uses: github/gh-aw-actions/setup@v0.67.4
uses: github/gh-aw-actions/setup@v0.68.1
with:
destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }}
@ -336,25 +338,25 @@ jobs:
id: checkout-pr
if: |
github.event.pull_request || github.event.issue.pull_request
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
with:
github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/checkout_pr_branch.cjs');
await main();
- name: Install GitHub Copilot CLI
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.20
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.21
env:
GH_HOST: github.com
- name: Install AWF binary
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.18
- name: Determine automatic lockdown mode for GitHub MCP Server
id: determine-automatic-lockdown
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }}
GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }}
@ -369,9 +371,9 @@ jobs:
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
mkdir -p /tmp/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF'
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_7b004ca92a18e849_EOF'
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[QF_S Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF
GH_AW_SAFE_OUTPUTS_CONFIG_7b004ca92a18e849_EOF
- name: Write Safe Outputs Tools
env:
GH_AW_TOOLS_META_JSON: |
@ -484,11 +486,11 @@ jobs:
}
}
}
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_safe_outputs_tools.cjs');
await main();
- name: Generate Safe Outputs MCP Server Config
@ -559,7 +561,7 @@ jobs:
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17'
mkdir -p /home/runner/.copilot
cat << GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
cat << GH_AW_MCP_CONFIG_dd4b678d464df877_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
{
"mcpServers": {
"github": {
@ -600,7 +602,7 @@ jobs:
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
}
}
GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF
GH_AW_MCP_CONFIG_dd4b678d464df877_EOF
- name: Download activation artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
@ -612,10 +614,11 @@ jobs:
- name: Execute GitHub Copilot CLI
id: agentic_execution
# Copilot CLI tool arguments (sorted):
timeout-minutes: 90
timeout-minutes: 120
run: |
set -o pipefail
touch /tmp/gh-aw/agent-step-summary.md
(umask 177 && touch /tmp/gh-aw/agent-stdio.log)
# shellcheck disable=SC1003
sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \
-- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log
@ -627,10 +630,9 @@ jobs:
GH_AW_PHASE: agent
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
GH_AW_VERSION: v0.67.4
GH_AW_VERSION: v0.68.1
GITHUB_API_URL: ${{ github.api_url }}
GITHUB_AW: true
GITHUB_COPILOT_INTEGRATION_ID: agentic-workflows
GITHUB_HEAD_REF: ${{ github.head_ref }}
GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
GITHUB_REF_NAME: ${{ github.ref_name }}
@ -675,11 +677,11 @@ jobs:
bash "${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh" "$GATEWAY_PID"
- name: Redact secrets in logs
if: always()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/redact_secrets.cjs');
await main();
env:
@ -701,7 +703,7 @@ jobs:
- name: Ingest agent output
id: collect_output
if: always()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
@ -710,28 +712,28 @@ jobs:
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/collect_ndjson_output.cjs');
await main();
- name: Parse agent logs for step summary
if: always()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_copilot_log.cjs');
await main();
- name: Parse MCP Gateway logs for step summary
if: always()
id: parse-mcp-gateway
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_mcp_gateway_log.cjs');
await main();
- name: Print firewall logs
@ -752,11 +754,11 @@ jobs:
- name: Parse token usage for step summary
if: always()
continue-on-error: true
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_token_usage.cjs');
await main();
- name: Write agent output placeholder if missing
@ -802,7 +804,9 @@ jobs:
- agent
- detection
- safe_outputs
if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true')
if: >
always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' ||
needs.activation.outputs.stale_lock_file_failed == 'true')
runs-on: ubuntu-slim
permissions:
contents: read
@ -819,7 +823,7 @@ jobs:
steps:
- name: Setup Scripts
id: setup
uses: github/gh-aw-actions/setup@v0.67.4
uses: github/gh-aw-actions/setup@v0.68.1
with:
destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }}
@ -840,7 +844,7 @@ jobs:
echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT"
- name: Process No-Op Messages
id: noop
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_NOOP_MAX: "1"
@ -852,12 +856,12 @@ jobs:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs');
await main();
- name: Record missing tool
id: missing_tool
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_MISSING_TOOL_CREATE_ISSUE: "true"
@ -867,12 +871,12 @@ jobs:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs');
await main();
- name: Record incomplete
id: report_incomplete
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_REPORT_INCOMPLETE_CREATE_ISSUE: "true"
@ -881,13 +885,13 @@ jobs:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/report_incomplete_handler.cjs');
await main();
- name: Handle agent failure
id: handle_agent_failure
if: always()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_WORKFLOW_NAME: "QF_S String Solver Benchmark"
@ -901,14 +905,15 @@ jobs:
GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }}
GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }}
GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }}
GH_AW_STALE_LOCK_FILE_FAILED: ${{ needs.activation.outputs.stale_lock_file_failed }}
GH_AW_GROUP_REPORTS: "false"
GH_AW_FAILURE_REPORT_AS_ISSUE: "true"
GH_AW_TIMEOUT_MINUTES: "90"
GH_AW_TIMEOUT_MINUTES: "120"
with:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs');
await main();
@ -927,7 +932,7 @@ jobs:
steps:
- name: Setup Scripts
id: setup
uses: github/gh-aw-actions/setup@v0.67.4
uses: github/gh-aw-actions/setup@v0.68.1
with:
destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }}
@ -990,7 +995,7 @@ jobs:
ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true
- name: Setup threat detection
if: always() && steps.detection_guard.outputs.run_detection == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
WORKFLOW_NAME: "QF_S String Solver Benchmark"
WORKFLOW_DESCRIPTION: "Benchmark Z3 seq vs nseq string solvers on QF_S test suite from the c3 branch and post results as a GitHub discussion"
@ -998,7 +1003,7 @@ jobs:
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/setup_threat_detection.cjs');
await main();
- name: Ensure threat-detection directory and log
@ -1007,7 +1012,7 @@ jobs:
mkdir -p /tmp/gh-aw/threat-detection
touch /tmp/gh-aw/threat-detection/detection.log
- name: Install GitHub Copilot CLI
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.20
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.21
env:
GH_HOST: github.com
- name: Install AWF binary
@ -1020,6 +1025,7 @@ jobs:
run: |
set -o pipefail
touch /tmp/gh-aw/agent-step-summary.md
(umask 177 && touch /tmp/gh-aw/threat-detection/detection.log)
# shellcheck disable=SC1003
sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,telemetry.enterprise.githubcopilot.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \
-- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log
@ -1029,10 +1035,9 @@ jobs:
COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }}
GH_AW_PHASE: detection
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
GH_AW_VERSION: v0.67.4
GH_AW_VERSION: v0.68.1
GITHUB_API_URL: ${{ github.api_url }}
GITHUB_AW: true
GITHUB_COPILOT_INTEGRATION_ID: agentic-workflows
GITHUB_HEAD_REF: ${{ github.head_ref }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_SERVER_URL: ${{ github.server_url }}
@ -1053,13 +1058,13 @@ jobs:
- name: Parse and conclude threat detection
id: detection_conclusion
if: always()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }}
with:
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs');
await main();
@ -1092,7 +1097,7 @@ jobs:
steps:
- name: Setup Scripts
id: setup
uses: github/gh-aw-actions/setup@v0.67.4
uses: github/gh-aw-actions/setup@v0.68.1
with:
destination: ${{ runner.temp }}/gh-aw/actions
job-name: ${{ github.job }}
@ -1122,7 +1127,7 @@ jobs:
echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
- name: Process Safe Outputs
id: process_safe_outputs
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
env:
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
@ -1133,7 +1138,7 @@ jobs:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
setupGlobals(core, github, context, exec, io);
setupGlobals(core, github, context, exec, io, getOctokit);
const { main } = require('${{ runner.temp }}/gh-aw/actions/safe_output_handler_manager.cjs');
await main();
- name: Upload Safe Outputs Items

View file

@ -25,7 +25,7 @@ safe-outputs:
noop:
report-as-issue: false
timeout-minutes: 90
timeout-minutes: 120
steps:
- name: Checkout c3 branch
@ -62,19 +62,19 @@ ninja --version
python3 --version
```
## Phase 2: Build Z3 in Debug Mode with Seq Tracing
## Phase 2: Build Z3 in Release Mode
Build Z3 with debug symbols so that tracing and timing data are meaningful.
Build Z3 in Release mode for accurate benchmark performance numbers and lower memory usage. Running `ninja` in the background with `&` is not allowed — concurrent C++ compilation and LLM inference can exhaust available RAM and kill the agent process.
```bash
mkdir -p /tmp/z3-build
cd /tmp/z3-build
cmake "$GITHUB_WORKSPACE" \
-G Ninja \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_BUILD_TYPE=Release \
-DZ3_BUILD_TEST_EXECUTABLES=OFF \
2>&1 | tee /tmp/z3-cmake.log
ninja z3 2>&1 | tee /tmp/z3-build.log
ninja -j2 z3 2>&1 | tee /tmp/z3-build.log
```
Verify the binary was built:
@ -133,12 +133,12 @@ echo "Running benchmarks on $SAMPLE files"
## Phase 4: Run Benchmarks — seq vs nseq
Run each benchmark with both solvers. Use a per-file timeout of 10 seconds. Set Z3's internal timeout to 9 seconds so it exits cleanly before the shell timeout fires.
Run each benchmark with both solvers. Use a per-file timeout of 5 seconds. Set Z3's internal timeout to 4 seconds so it exits cleanly before the shell timeout fires.
```bash
Z3=/tmp/z3-build/z3
TIMEOUT_SEC=10
Z3_TIMEOUT_SEC=9
TIMEOUT_SEC=5
Z3_TIMEOUT_SEC=4
RESULTS=/tmp/benchmark-results.csv
echo "file,seq_result,seq_time_ms,nseq_result,nseq_time_ms" > "$RESULTS"
@ -208,10 +208,7 @@ done
## Phase 6: Analyze Results
Compute summary statistics from the CSV:
```bash
Save the analysis script to a file and run it:
Compute summary statistics from the CSV. Save the analysis script to a file and run it:
```bash
cat > /tmp/analyze_benchmark.py << 'PYEOF'
@ -397,7 +394,8 @@ These are benchmarks where `nseq` shows a performance advantage.
## Important Notes
- **DO NOT** modify any source files or create pull requests.
- **DO NOT** run benchmarks for longer than 80 minutes total (leave buffer for posting).
- **DO NOT** run `ninja` or any build command in the background with `&` — concurrent C++ compilation and LLM inference can exhaust available RAM and kill the agent process. Always wait for build commands to complete before proceeding.
- **DO NOT** run benchmarks for longer than 100 minutes total (leave buffer for posting).
- **DO** always report the commit SHA so results can be correlated with specific code versions.
- **DO** close older QF_S Benchmark discussions automatically (configured via `close-older-discussions: true`).
- **DO** highlight disagreements prominently — these are potential correctness bugs.