mirror of
https://github.com/Z3Prover/z3
synced 2026-04-15 00:35:11 +00:00
fix qf-s-benchmark: Release mode build, fix broken code fence, reduce timeouts
Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/8eaace11-bbc1-49d9-993d-67290f5b1841 Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
b22d44867b
commit
6ca1bab43d
2 changed files with 88 additions and 85 deletions
149
.github/workflows/qf-s-benchmark.lock.yml
generated
vendored
149
.github/workflows/qf-s-benchmark.lock.yml
generated
vendored
|
|
@ -1,5 +1,5 @@
|
|||
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"e5e5c332eb206c2bde9e7e5cb0bb1babe7b1c50e0437a00b4093ddb8b5ab80cf","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"}
|
||||
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"ed597411d8f924073f98dfc5c65a23a2325f34cd","version":"v8"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.67.4","version":"v0.67.4"}]}
|
||||
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"d49708d2a52ea8ed02a11700663bca11b8d1a61414630351523fef60d589ae56","compiler_version":"v0.68.1","strict":true,"agent_id":"copilot"}
|
||||
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"3a2844b7e9c422d3c10d287c895573f7108da1b3","version":"v9"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.68.1","version":"v0.68.1"}]}
|
||||
# ___ _ _
|
||||
# / _ \ | | (_)
|
||||
# | |_| | __ _ ___ _ __ | |_ _ ___
|
||||
|
|
@ -14,7 +14,7 @@
|
|||
# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \
|
||||
# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/
|
||||
#
|
||||
# This file was automatically generated by gh-aw (v0.67.4). DO NOT EDIT.
|
||||
# This file was automatically generated by gh-aw (v0.68.1). DO NOT EDIT.
|
||||
#
|
||||
# To update this file, edit the corresponding .md file and run:
|
||||
# gh aw compile
|
||||
|
|
@ -33,9 +33,9 @@
|
|||
# Custom actions used:
|
||||
# - actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
# - actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
# - actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
# - actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
# - actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7
|
||||
# - github/gh-aw-actions/setup@v0.67.4
|
||||
# - github/gh-aw-actions/setup@v0.68.1
|
||||
|
||||
name: "QF_S String Solver Benchmark"
|
||||
"on":
|
||||
|
|
@ -69,10 +69,11 @@ jobs:
|
|||
model: ${{ steps.generate_aw_info.outputs.model }}
|
||||
secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }}
|
||||
setup-trace-id: ${{ steps.setup.outputs.trace-id }}
|
||||
stale_lock_file_failed: ${{ steps.check-lock-file.outputs.stale_lock_file_failed == 'true' }}
|
||||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@v0.67.4
|
||||
uses: github/gh-aw-actions/setup@v0.68.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
|
|
@ -82,9 +83,9 @@ jobs:
|
|||
GH_AW_INFO_ENGINE_ID: "copilot"
|
||||
GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI"
|
||||
GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || 'auto' }}
|
||||
GH_AW_INFO_VERSION: "1.0.20"
|
||||
GH_AW_INFO_AGENT_VERSION: "1.0.20"
|
||||
GH_AW_INFO_CLI_VERSION: "v0.67.4"
|
||||
GH_AW_INFO_VERSION: "1.0.21"
|
||||
GH_AW_INFO_AGENT_VERSION: "1.0.21"
|
||||
GH_AW_INFO_CLI_VERSION: "v0.68.1"
|
||||
GH_AW_INFO_WORKFLOW_NAME: "QF_S String Solver Benchmark"
|
||||
GH_AW_INFO_EXPERIMENTAL: "false"
|
||||
GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true"
|
||||
|
|
@ -95,11 +96,11 @@ jobs:
|
|||
GH_AW_INFO_AWMG_VERSION: ""
|
||||
GH_AW_INFO_FIREWALL_TYPE: "squid"
|
||||
GH_AW_COMPILED_STRICT: "true"
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs');
|
||||
await main(core, context);
|
||||
- name: Validate COPILOT_GITHUB_TOKEN secret
|
||||
|
|
@ -117,24 +118,25 @@ jobs:
|
|||
sparse-checkout-cone-mode: true
|
||||
fetch-depth: 1
|
||||
- name: Check workflow lock file
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
id: check-lock-file
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_WORKFLOW_FILE: "qf-s-benchmark.lock.yml"
|
||||
GH_AW_CONTEXT_WORKFLOW_REF: "${{ github.workflow_ref }}"
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs');
|
||||
await main();
|
||||
- name: Check compile-agentic version
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_COMPILED_VERSION: "v0.67.4"
|
||||
GH_AW_COMPILED_VERSION: "v0.68.1"
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/check_version_updates.cjs');
|
||||
await main();
|
||||
- name: Create prompt with built-in context
|
||||
|
|
@ -154,14 +156,14 @@ jobs:
|
|||
run: |
|
||||
bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
|
||||
{
|
||||
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
|
||||
cat << 'GH_AW_PROMPT_78ca018fd0ee9e53_EOF'
|
||||
<system>
|
||||
GH_AW_PROMPT_c25676ba2ab40d85_EOF
|
||||
GH_AW_PROMPT_78ca018fd0ee9e53_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
|
||||
cat << 'GH_AW_PROMPT_78ca018fd0ee9e53_EOF'
|
||||
<safe-output-tools>
|
||||
Tools: create_discussion, missing_tool, missing_data, noop
|
||||
</safe-output-tools>
|
||||
|
|
@ -193,15 +195,15 @@ jobs:
|
|||
{{/if}}
|
||||
</github-context>
|
||||
|
||||
GH_AW_PROMPT_c25676ba2ab40d85_EOF
|
||||
GH_AW_PROMPT_78ca018fd0ee9e53_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
|
||||
cat << 'GH_AW_PROMPT_78ca018fd0ee9e53_EOF'
|
||||
</system>
|
||||
{{#runtime-import .github/workflows/qf-s-benchmark.md}}
|
||||
GH_AW_PROMPT_c25676ba2ab40d85_EOF
|
||||
GH_AW_PROMPT_78ca018fd0ee9e53_EOF
|
||||
} > "$GH_AW_PROMPT"
|
||||
- name: Interpolate variables and render templates
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
|
||||
GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
|
|
@ -210,11 +212,11 @@ jobs:
|
|||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs');
|
||||
await main();
|
||||
- name: Substitute placeholders
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
|
||||
GH_AW_GITHUB_ACTOR: ${{ github.actor }}
|
||||
|
|
@ -229,7 +231,7 @@ jobs:
|
|||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
|
||||
const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs');
|
||||
|
||||
|
|
@ -295,7 +297,7 @@ jobs:
|
|||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@v0.67.4
|
||||
uses: github/gh-aw-actions/setup@v0.68.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
|
|
@ -336,25 +338,25 @@ jobs:
|
|||
id: checkout-pr
|
||||
if: |
|
||||
github.event.pull_request || github.event.issue.pull_request
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/checkout_pr_branch.cjs');
|
||||
await main();
|
||||
- name: Install GitHub Copilot CLI
|
||||
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.20
|
||||
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.21
|
||||
env:
|
||||
GH_HOST: github.com
|
||||
- name: Install AWF binary
|
||||
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh" v0.25.18
|
||||
- name: Determine automatic lockdown mode for GitHub MCP Server
|
||||
id: determine-automatic-lockdown
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }}
|
||||
GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }}
|
||||
|
|
@ -369,9 +371,9 @@ jobs:
|
|||
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
|
||||
mkdir -p /tmp/gh-aw/safeoutputs
|
||||
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
|
||||
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF'
|
||||
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_7b004ca92a18e849_EOF'
|
||||
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[QF_S Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
|
||||
GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF
|
||||
GH_AW_SAFE_OUTPUTS_CONFIG_7b004ca92a18e849_EOF
|
||||
- name: Write Safe Outputs Tools
|
||||
env:
|
||||
GH_AW_TOOLS_META_JSON: |
|
||||
|
|
@ -484,11 +486,11 @@ jobs:
|
|||
}
|
||||
}
|
||||
}
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_safe_outputs_tools.cjs');
|
||||
await main();
|
||||
- name: Generate Safe Outputs MCP Server Config
|
||||
|
|
@ -559,7 +561,7 @@ jobs:
|
|||
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17'
|
||||
|
||||
mkdir -p /home/runner/.copilot
|
||||
cat << GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
|
||||
cat << GH_AW_MCP_CONFIG_dd4b678d464df877_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
|
||||
{
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
|
|
@ -600,7 +602,7 @@ jobs:
|
|||
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
|
||||
}
|
||||
}
|
||||
GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF
|
||||
GH_AW_MCP_CONFIG_dd4b678d464df877_EOF
|
||||
- name: Download activation artifact
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
|
|
@ -612,10 +614,11 @@ jobs:
|
|||
- name: Execute GitHub Copilot CLI
|
||||
id: agentic_execution
|
||||
# Copilot CLI tool arguments (sorted):
|
||||
timeout-minutes: 90
|
||||
timeout-minutes: 120
|
||||
run: |
|
||||
set -o pipefail
|
||||
touch /tmp/gh-aw/agent-step-summary.md
|
||||
(umask 177 && touch /tmp/gh-aw/agent-stdio.log)
|
||||
# shellcheck disable=SC1003
|
||||
sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --exclude-env GITHUB_MCP_SERVER_TOKEN --exclude-env MCP_GATEWAY_API_KEY --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \
|
||||
-- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --allow-all-paths --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log
|
||||
|
|
@ -627,10 +630,9 @@ jobs:
|
|||
GH_AW_PHASE: agent
|
||||
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
|
||||
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
|
||||
GH_AW_VERSION: v0.67.4
|
||||
GH_AW_VERSION: v0.68.1
|
||||
GITHUB_API_URL: ${{ github.api_url }}
|
||||
GITHUB_AW: true
|
||||
GITHUB_COPILOT_INTEGRATION_ID: agentic-workflows
|
||||
GITHUB_HEAD_REF: ${{ github.head_ref }}
|
||||
GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
GITHUB_REF_NAME: ${{ github.ref_name }}
|
||||
|
|
@ -675,11 +677,11 @@ jobs:
|
|||
bash "${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh" "$GATEWAY_PID"
|
||||
- name: Redact secrets in logs
|
||||
if: always()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/redact_secrets.cjs');
|
||||
await main();
|
||||
env:
|
||||
|
|
@ -701,7 +703,7 @@ jobs:
|
|||
- name: Ingest agent output
|
||||
id: collect_output
|
||||
if: always()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_SAFE_OUTPUTS: ${{ steps.set-runtime-paths.outputs.GH_AW_SAFE_OUTPUTS }}
|
||||
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
|
||||
|
|
@ -710,28 +712,28 @@ jobs:
|
|||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/collect_ndjson_output.cjs');
|
||||
await main();
|
||||
- name: Parse agent logs for step summary
|
||||
if: always()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_copilot_log.cjs');
|
||||
await main();
|
||||
- name: Parse MCP Gateway logs for step summary
|
||||
if: always()
|
||||
id: parse-mcp-gateway
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_mcp_gateway_log.cjs');
|
||||
await main();
|
||||
- name: Print firewall logs
|
||||
|
|
@ -752,11 +754,11 @@ jobs:
|
|||
- name: Parse token usage for step summary
|
||||
if: always()
|
||||
continue-on-error: true
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_token_usage.cjs');
|
||||
await main();
|
||||
- name: Write agent output placeholder if missing
|
||||
|
|
@ -802,7 +804,9 @@ jobs:
|
|||
- agent
|
||||
- detection
|
||||
- safe_outputs
|
||||
if: always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true')
|
||||
if: >
|
||||
always() && (needs.agent.result != 'skipped' || needs.activation.outputs.lockdown_check_failed == 'true' ||
|
||||
needs.activation.outputs.stale_lock_file_failed == 'true')
|
||||
runs-on: ubuntu-slim
|
||||
permissions:
|
||||
contents: read
|
||||
|
|
@ -819,7 +823,7 @@ jobs:
|
|||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@v0.67.4
|
||||
uses: github/gh-aw-actions/setup@v0.68.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
|
|
@ -840,7 +844,7 @@ jobs:
|
|||
echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_OUTPUT"
|
||||
- name: Process No-Op Messages
|
||||
id: noop
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_NOOP_MAX: "1"
|
||||
|
|
@ -852,12 +856,12 @@ jobs:
|
|||
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs');
|
||||
await main();
|
||||
- name: Record missing tool
|
||||
id: missing_tool
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_MISSING_TOOL_CREATE_ISSUE: "true"
|
||||
|
|
@ -867,12 +871,12 @@ jobs:
|
|||
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs');
|
||||
await main();
|
||||
- name: Record incomplete
|
||||
id: report_incomplete
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_REPORT_INCOMPLETE_CREATE_ISSUE: "true"
|
||||
|
|
@ -881,13 +885,13 @@ jobs:
|
|||
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/report_incomplete_handler.cjs');
|
||||
await main();
|
||||
- name: Handle agent failure
|
||||
id: handle_agent_failure
|
||||
if: always()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_WORKFLOW_NAME: "QF_S String Solver Benchmark"
|
||||
|
|
@ -901,14 +905,15 @@ jobs:
|
|||
GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }}
|
||||
GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }}
|
||||
GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }}
|
||||
GH_AW_STALE_LOCK_FILE_FAILED: ${{ needs.activation.outputs.stale_lock_file_failed }}
|
||||
GH_AW_GROUP_REPORTS: "false"
|
||||
GH_AW_FAILURE_REPORT_AS_ISSUE: "true"
|
||||
GH_AW_TIMEOUT_MINUTES: "90"
|
||||
GH_AW_TIMEOUT_MINUTES: "120"
|
||||
with:
|
||||
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs');
|
||||
await main();
|
||||
|
||||
|
|
@ -927,7 +932,7 @@ jobs:
|
|||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@v0.67.4
|
||||
uses: github/gh-aw-actions/setup@v0.68.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
|
|
@ -990,7 +995,7 @@ jobs:
|
|||
ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true
|
||||
- name: Setup threat detection
|
||||
if: always() && steps.detection_guard.outputs.run_detection == 'true'
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
WORKFLOW_NAME: "QF_S String Solver Benchmark"
|
||||
WORKFLOW_DESCRIPTION: "Benchmark Z3 seq vs nseq string solvers on QF_S test suite from the c3 branch and post results as a GitHub discussion"
|
||||
|
|
@ -998,7 +1003,7 @@ jobs:
|
|||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/setup_threat_detection.cjs');
|
||||
await main();
|
||||
- name: Ensure threat-detection directory and log
|
||||
|
|
@ -1007,7 +1012,7 @@ jobs:
|
|||
mkdir -p /tmp/gh-aw/threat-detection
|
||||
touch /tmp/gh-aw/threat-detection/detection.log
|
||||
- name: Install GitHub Copilot CLI
|
||||
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.20
|
||||
run: bash "${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh" 1.0.21
|
||||
env:
|
||||
GH_HOST: github.com
|
||||
- name: Install AWF binary
|
||||
|
|
@ -1020,6 +1025,7 @@ jobs:
|
|||
run: |
|
||||
set -o pipefail
|
||||
touch /tmp/gh-aw/agent-step-summary.md
|
||||
(umask 177 && touch /tmp/gh-aw/threat-detection/detection.log)
|
||||
# shellcheck disable=SC1003
|
||||
sudo -E awf --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --env-all --exclude-env COPILOT_GITHUB_TOKEN --allow-domains api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,telemetry.enterprise.githubcopilot.com --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --audit-dir /tmp/gh-aw/sandbox/firewall/audit --enable-host-access --image-tag 0.25.18 --skip-pull --enable-api-proxy \
|
||||
-- /bin/bash -c 'node ${RUNNER_TEMP}/gh-aw/actions/copilot_driver.cjs /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --disable-builtin-mcps --allow-all-tools --add-dir "${GITHUB_WORKSPACE}" --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log
|
||||
|
|
@ -1029,10 +1035,9 @@ jobs:
|
|||
COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }}
|
||||
GH_AW_PHASE: detection
|
||||
GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
|
||||
GH_AW_VERSION: v0.67.4
|
||||
GH_AW_VERSION: v0.68.1
|
||||
GITHUB_API_URL: ${{ github.api_url }}
|
||||
GITHUB_AW: true
|
||||
GITHUB_COPILOT_INTEGRATION_ID: agentic-workflows
|
||||
GITHUB_HEAD_REF: ${{ github.head_ref }}
|
||||
GITHUB_REF_NAME: ${{ github.ref_name }}
|
||||
GITHUB_SERVER_URL: ${{ github.server_url }}
|
||||
|
|
@ -1053,13 +1058,13 @@ jobs:
|
|||
- name: Parse and conclude threat detection
|
||||
id: detection_conclusion
|
||||
if: always()
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }}
|
||||
with:
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs');
|
||||
await main();
|
||||
|
||||
|
|
@ -1092,7 +1097,7 @@ jobs:
|
|||
steps:
|
||||
- name: Setup Scripts
|
||||
id: setup
|
||||
uses: github/gh-aw-actions/setup@v0.67.4
|
||||
uses: github/gh-aw-actions/setup@v0.68.1
|
||||
with:
|
||||
destination: ${{ runner.temp }}/gh-aw/actions
|
||||
job-name: ${{ github.job }}
|
||||
|
|
@ -1122,7 +1127,7 @@ jobs:
|
|||
echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
|
||||
- name: Process Safe Outputs
|
||||
id: process_safe_outputs
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
uses: actions/github-script@3a2844b7e9c422d3c10d287c895573f7108da1b3 # v9
|
||||
env:
|
||||
GH_AW_AGENT_OUTPUT: ${{ steps.setup-agent-output-env.outputs.GH_AW_AGENT_OUTPUT }}
|
||||
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
|
||||
|
|
@ -1133,7 +1138,7 @@ jobs:
|
|||
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
|
||||
setupGlobals(core, github, context, exec, io);
|
||||
setupGlobals(core, github, context, exec, io, getOctokit);
|
||||
const { main } = require('${{ runner.temp }}/gh-aw/actions/safe_output_handler_manager.cjs');
|
||||
await main();
|
||||
- name: Upload Safe Outputs Items
|
||||
|
|
|
|||
24
.github/workflows/qf-s-benchmark.md
vendored
24
.github/workflows/qf-s-benchmark.md
vendored
|
|
@ -25,7 +25,7 @@ safe-outputs:
|
|||
noop:
|
||||
report-as-issue: false
|
||||
|
||||
timeout-minutes: 90
|
||||
timeout-minutes: 120
|
||||
|
||||
steps:
|
||||
- name: Checkout c3 branch
|
||||
|
|
@ -62,19 +62,19 @@ ninja --version
|
|||
python3 --version
|
||||
```
|
||||
|
||||
## Phase 2: Build Z3 in Debug Mode with Seq Tracing
|
||||
## Phase 2: Build Z3 in Release Mode
|
||||
|
||||
Build Z3 with debug symbols so that tracing and timing data are meaningful.
|
||||
Build Z3 in Release mode for accurate benchmark performance numbers and lower memory usage. Running `ninja` in the background with `&` is not allowed — concurrent C++ compilation and LLM inference can exhaust available RAM and kill the agent process.
|
||||
|
||||
```bash
|
||||
mkdir -p /tmp/z3-build
|
||||
cd /tmp/z3-build
|
||||
cmake "$GITHUB_WORKSPACE" \
|
||||
-G Ninja \
|
||||
-DCMAKE_BUILD_TYPE=Debug \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DZ3_BUILD_TEST_EXECUTABLES=OFF \
|
||||
2>&1 | tee /tmp/z3-cmake.log
|
||||
ninja z3 2>&1 | tee /tmp/z3-build.log
|
||||
ninja -j2 z3 2>&1 | tee /tmp/z3-build.log
|
||||
```
|
||||
|
||||
Verify the binary was built:
|
||||
|
|
@ -133,12 +133,12 @@ echo "Running benchmarks on $SAMPLE files"
|
|||
|
||||
## Phase 4: Run Benchmarks — seq vs nseq
|
||||
|
||||
Run each benchmark with both solvers. Use a per-file timeout of 10 seconds. Set Z3's internal timeout to 9 seconds so it exits cleanly before the shell timeout fires.
|
||||
Run each benchmark with both solvers. Use a per-file timeout of 5 seconds. Set Z3's internal timeout to 4 seconds so it exits cleanly before the shell timeout fires.
|
||||
|
||||
```bash
|
||||
Z3=/tmp/z3-build/z3
|
||||
TIMEOUT_SEC=10
|
||||
Z3_TIMEOUT_SEC=9
|
||||
TIMEOUT_SEC=5
|
||||
Z3_TIMEOUT_SEC=4
|
||||
RESULTS=/tmp/benchmark-results.csv
|
||||
|
||||
echo "file,seq_result,seq_time_ms,nseq_result,nseq_time_ms" > "$RESULTS"
|
||||
|
|
@ -208,10 +208,7 @@ done
|
|||
|
||||
## Phase 6: Analyze Results
|
||||
|
||||
Compute summary statistics from the CSV:
|
||||
|
||||
```bash
|
||||
Save the analysis script to a file and run it:
|
||||
Compute summary statistics from the CSV. Save the analysis script to a file and run it:
|
||||
|
||||
```bash
|
||||
cat > /tmp/analyze_benchmark.py << 'PYEOF'
|
||||
|
|
@ -397,7 +394,8 @@ These are benchmarks where `nseq` shows a performance advantage.
|
|||
## Important Notes
|
||||
|
||||
- **DO NOT** modify any source files or create pull requests.
|
||||
- **DO NOT** run benchmarks for longer than 80 minutes total (leave buffer for posting).
|
||||
- **DO NOT** run `ninja` or any build command in the background with `&` — concurrent C++ compilation and LLM inference can exhaust available RAM and kill the agent process. Always wait for build commands to complete before proceeding.
|
||||
- **DO NOT** run benchmarks for longer than 100 minutes total (leave buffer for posting).
|
||||
- **DO** always report the commit SHA so results can be correlated with specific code versions.
|
||||
- **DO** close older QF_S Benchmark discussions automatically (configured via `close-older-discussions: true`).
|
||||
- **DO** highlight disagreements prominently — these are potential correctness bugs.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue