3
0
Fork 0
mirror of https://github.com/Z3Prover/z3 synced 2026-04-15 08:44:10 +00:00

Update qf-s-benchmark title prefix and note to QF_S Benchmark

Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/c36bada5-c222-4b97-99c4-08392955b32d

Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot] 2026-04-10 21:21:25 +00:00 committed by GitHub
parent 11e02bda99
commit fafe159447
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 16 additions and 16 deletions

View file

@ -1,4 +1,4 @@
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"3e0c4769aa2ace55f3d5fad7d9e2d252cce85e8d983cfd11b363c2766ed651ba","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"}
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"e5e5c332eb206c2bde9e7e5cb0bb1babe7b1c50e0437a00b4093ddb8b5ab80cf","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"}
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"ed597411d8f924073f98dfc5c65a23a2325f34cd","version":"v8"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.67.4","version":"v0.67.4"}]}
# ___ _ _
# / _ \ | | (_)
@ -154,14 +154,14 @@ jobs:
run: |
bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
{
cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF'
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
<system>
GH_AW_PROMPT_1f1e9add9d73f7ee_EOF
GH_AW_PROMPT_c25676ba2ab40d85_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF'
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
<safe-output-tools>
Tools: create_discussion, missing_tool, missing_data, noop
</safe-output-tools>
@ -193,12 +193,12 @@ jobs:
{{/if}}
</github-context>
GH_AW_PROMPT_1f1e9add9d73f7ee_EOF
GH_AW_PROMPT_c25676ba2ab40d85_EOF
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF'
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
</system>
{{#runtime-import .github/workflows/qf-s-benchmark.md}}
GH_AW_PROMPT_1f1e9add9d73f7ee_EOF
GH_AW_PROMPT_c25676ba2ab40d85_EOF
} > "$GH_AW_PROMPT"
- name: Interpolate variables and render templates
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
@ -369,15 +369,15 @@ jobs:
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
mkdir -p /tmp/gh-aw/safeoutputs
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_cf3aae02d12a091b_EOF'
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[ZIPT Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
GH_AW_SAFE_OUTPUTS_CONFIG_cf3aae02d12a091b_EOF
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF'
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[QF_S Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF
- name: Write Safe Outputs Tools
env:
GH_AW_TOOLS_META_JSON: |
{
"description_suffixes": {
"create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[ZIPT Benchmark] \". Discussions will be created in category \"agentic workflows\"."
"create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[QF_S Benchmark] \". Discussions will be created in category \"agentic workflows\"."
},
"repo_params": {},
"dynamic_tools": []
@ -559,7 +559,7 @@ jobs:
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17'
mkdir -p /home/runner/.copilot
cat << GH_AW_MCP_CONFIG_e63e2ef76683cb23_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
cat << GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
{
"mcpServers": {
"github": {
@ -600,7 +600,7 @@ jobs:
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
}
}
GH_AW_MCP_CONFIG_e63e2ef76683cb23_EOF
GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF
- name: Download activation artifact
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
with:
@ -1128,7 +1128,7 @@ jobs:
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
GITHUB_SERVER_URL: ${{ github.server_url }}
GITHUB_API_URL: ${{ github.api_url }}
GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[ZIPT Benchmark] \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"},\"report_incomplete\":{}}"
GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[QF_S Benchmark] \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"},\"report_incomplete\":{}}"
with:
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
script: |

View file

@ -17,7 +17,7 @@ tools:
safe-outputs:
create-discussion:
title-prefix: "[ZIPT Benchmark] "
title-prefix: "[QF_S Benchmark] "
category: "Agentic Workflows"
close-older-discussions: true
missing-tool:
@ -399,5 +399,5 @@ These are benchmarks where `nseq` shows a performance advantage.
- **DO NOT** modify any source files or create pull requests.
- **DO NOT** run benchmarks for longer than 80 minutes total (leave buffer for posting).
- **DO** always report the commit SHA so results can be correlated with specific code versions.
- **DO** close older ZIPT Benchmark discussions automatically (configured via `close-older-discussions: true`).
- **DO** close older QF_S Benchmark discussions automatically (configured via `close-older-discussions: true`).
- **DO** highlight disagreements prominently — these are potential correctness bugs.