From fafe159447709b7810756c2defeed6f8fe5f401d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 21:21:25 +0000 Subject: [PATCH] Update qf-s-benchmark title prefix and note to QF_S Benchmark Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/c36bada5-c222-4b97-99c4-08392955b32d Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- .github/workflows/qf-s-benchmark.lock.yml | 28 +++++++++++------------ .github/workflows/qf-s-benchmark.md | 4 ++-- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/qf-s-benchmark.lock.yml b/.github/workflows/qf-s-benchmark.lock.yml index 8bf532412..8470aa0a4 100644 --- a/.github/workflows/qf-s-benchmark.lock.yml +++ b/.github/workflows/qf-s-benchmark.lock.yml @@ -1,4 +1,4 @@ -# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"3e0c4769aa2ace55f3d5fad7d9e2d252cce85e8d983cfd11b363c2766ed651ba","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"} +# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"e5e5c332eb206c2bde9e7e5cb0bb1babe7b1c50e0437a00b4093ddb8b5ab80cf","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"} # gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"ed597411d8f924073f98dfc5c65a23a2325f34cd","version":"v8"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.67.4","version":"v0.67.4"}]} # ___ _ _ # / _ \ | | (_) @@ -154,14 +154,14 @@ jobs: run: | bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh" { - cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF' + cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF' - GH_AW_PROMPT_1f1e9add9d73f7ee_EOF + GH_AW_PROMPT_c25676ba2ab40d85_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md" cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md" cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md" cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md" - cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF' + cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF' Tools: create_discussion, missing_tool, missing_data, noop @@ -193,12 +193,12 @@ jobs: {{/if}} - GH_AW_PROMPT_1f1e9add9d73f7ee_EOF + GH_AW_PROMPT_c25676ba2ab40d85_EOF cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md" - cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF' + cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF' {{#runtime-import .github/workflows/qf-s-benchmark.md}} - GH_AW_PROMPT_1f1e9add9d73f7ee_EOF + GH_AW_PROMPT_c25676ba2ab40d85_EOF } > "$GH_AW_PROMPT" - name: Interpolate variables and render templates uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8 @@ -369,15 +369,15 @@ jobs: mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs" mkdir -p /tmp/gh-aw/safeoutputs mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs - cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_cf3aae02d12a091b_EOF' - {"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[ZIPT Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}} - GH_AW_SAFE_OUTPUTS_CONFIG_cf3aae02d12a091b_EOF + cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF' + {"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[QF_S Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}} + GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF - name: Write Safe Outputs Tools env: GH_AW_TOOLS_META_JSON: | { "description_suffixes": { - "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[ZIPT Benchmark] \". Discussions will be created in category \"agentic workflows\"." + "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[QF_S Benchmark] \". Discussions will be created in category \"agentic workflows\"." }, "repo_params": {}, "dynamic_tools": [] @@ -559,7 +559,7 @@ jobs: export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17' mkdir -p /home/runner/.copilot - cat << GH_AW_MCP_CONFIG_e63e2ef76683cb23_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" + cat << GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh" { "mcpServers": { "github": { @@ -600,7 +600,7 @@ jobs: "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}" } } - GH_AW_MCP_CONFIG_e63e2ef76683cb23_EOF + GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF - name: Download activation artifact uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -1128,7 +1128,7 @@ jobs: GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" GITHUB_SERVER_URL: ${{ github.server_url }} GITHUB_API_URL: ${{ github.api_url }} - GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[ZIPT Benchmark] \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"},\"report_incomplete\":{}}" + GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[QF_S Benchmark] \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"},\"report_incomplete\":{}}" with: github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} script: | diff --git a/.github/workflows/qf-s-benchmark.md b/.github/workflows/qf-s-benchmark.md index 9f696769e..fcd3050a6 100644 --- a/.github/workflows/qf-s-benchmark.md +++ b/.github/workflows/qf-s-benchmark.md @@ -17,7 +17,7 @@ tools: safe-outputs: create-discussion: - title-prefix: "[ZIPT Benchmark] " + title-prefix: "[QF_S Benchmark] " category: "Agentic Workflows" close-older-discussions: true missing-tool: @@ -399,5 +399,5 @@ These are benchmarks where `nseq` shows a performance advantage. - **DO NOT** modify any source files or create pull requests. - **DO NOT** run benchmarks for longer than 80 minutes total (leave buffer for posting). - **DO** always report the commit SHA so results can be correlated with specific code versions. -- **DO** close older ZIPT Benchmark discussions automatically (configured via `close-older-discussions: true`). +- **DO** close older QF_S Benchmark discussions automatically (configured via `close-older-discussions: true`). - **DO** highlight disagreements prominently — these are potential correctness bugs.