mirror of
https://github.com/Z3Prover/z3
synced 2026-04-15 08:44:10 +00:00
Update qf-s-benchmark title prefix and note to QF_S Benchmark
Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/c36bada5-c222-4b97-99c4-08392955b32d Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
This commit is contained in:
parent
11e02bda99
commit
fafe159447
2 changed files with 16 additions and 16 deletions
28
.github/workflows/qf-s-benchmark.lock.yml
generated
vendored
28
.github/workflows/qf-s-benchmark.lock.yml
generated
vendored
|
|
@ -1,4 +1,4 @@
|
|||
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"3e0c4769aa2ace55f3d5fad7d9e2d252cce85e8d983cfd11b363c2766ed651ba","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"}
|
||||
# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"e5e5c332eb206c2bde9e7e5cb0bb1babe7b1c50e0437a00b4093ddb8b5ab80cf","compiler_version":"v0.67.4","strict":true,"agent_id":"copilot"}
|
||||
# gh-aw-manifest: {"version":1,"secrets":["COPILOT_GITHUB_TOKEN","GH_AW_GITHUB_MCP_SERVER_TOKEN","GH_AW_GITHUB_TOKEN","GITHUB_TOKEN"],"actions":[{"repo":"actions/checkout","sha":"de0fac2e4500dabe0009e67214ff5f5447ce83dd","version":"v6.0.2"},{"repo":"actions/download-artifact","sha":"3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c","version":"v8.0.1"},{"repo":"actions/github-script","sha":"ed597411d8f924073f98dfc5c65a23a2325f34cd","version":"v8"},{"repo":"actions/upload-artifact","sha":"bbbca2ddaa5d8feaa63e36b76fdaad77386f024f","version":"v7"},{"repo":"github/gh-aw-actions/setup","sha":"v0.67.4","version":"v0.67.4"}]}
|
||||
# ___ _ _
|
||||
# / _ \ | | (_)
|
||||
|
|
@ -154,14 +154,14 @@ jobs:
|
|||
run: |
|
||||
bash "${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh"
|
||||
{
|
||||
cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF'
|
||||
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
|
||||
<system>
|
||||
GH_AW_PROMPT_1f1e9add9d73f7ee_EOF
|
||||
GH_AW_PROMPT_c25676ba2ab40d85_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF'
|
||||
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
|
||||
<safe-output-tools>
|
||||
Tools: create_discussion, missing_tool, missing_data, noop
|
||||
</safe-output-tools>
|
||||
|
|
@ -193,12 +193,12 @@ jobs:
|
|||
{{/if}}
|
||||
</github-context>
|
||||
|
||||
GH_AW_PROMPT_1f1e9add9d73f7ee_EOF
|
||||
GH_AW_PROMPT_c25676ba2ab40d85_EOF
|
||||
cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
|
||||
cat << 'GH_AW_PROMPT_1f1e9add9d73f7ee_EOF'
|
||||
cat << 'GH_AW_PROMPT_c25676ba2ab40d85_EOF'
|
||||
</system>
|
||||
{{#runtime-import .github/workflows/qf-s-benchmark.md}}
|
||||
GH_AW_PROMPT_1f1e9add9d73f7ee_EOF
|
||||
GH_AW_PROMPT_c25676ba2ab40d85_EOF
|
||||
} > "$GH_AW_PROMPT"
|
||||
- name: Interpolate variables and render templates
|
||||
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
|
||||
|
|
@ -369,15 +369,15 @@ jobs:
|
|||
mkdir -p "${RUNNER_TEMP}/gh-aw/safeoutputs"
|
||||
mkdir -p /tmp/gh-aw/safeoutputs
|
||||
mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
|
||||
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_cf3aae02d12a091b_EOF'
|
||||
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[ZIPT Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
|
||||
GH_AW_SAFE_OUTPUTS_CONFIG_cf3aae02d12a091b_EOF
|
||||
cat > "${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" << 'GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF'
|
||||
{"create_discussion":{"category":"agentic workflows","close_older_discussions":true,"expires":168,"fallback_to_issue":true,"max":1,"title_prefix":"[QF_S Benchmark] "},"create_report_incomplete_issue":{},"missing_data":{},"missing_tool":{},"noop":{"max":1,"report-as-issue":"false"},"report_incomplete":{}}
|
||||
GH_AW_SAFE_OUTPUTS_CONFIG_ef274886e4650c6c_EOF
|
||||
- name: Write Safe Outputs Tools
|
||||
env:
|
||||
GH_AW_TOOLS_META_JSON: |
|
||||
{
|
||||
"description_suffixes": {
|
||||
"create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[ZIPT Benchmark] \". Discussions will be created in category \"agentic workflows\"."
|
||||
"create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[QF_S Benchmark] \". Discussions will be created in category \"agentic workflows\"."
|
||||
},
|
||||
"repo_params": {},
|
||||
"dynamic_tools": []
|
||||
|
|
@ -559,7 +559,7 @@ jobs:
|
|||
export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.2.17'
|
||||
|
||||
mkdir -p /home/runner/.copilot
|
||||
cat << GH_AW_MCP_CONFIG_e63e2ef76683cb23_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
|
||||
cat << GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF | bash "${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh"
|
||||
{
|
||||
"mcpServers": {
|
||||
"github": {
|
||||
|
|
@ -600,7 +600,7 @@ jobs:
|
|||
"payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
|
||||
}
|
||||
}
|
||||
GH_AW_MCP_CONFIG_e63e2ef76683cb23_EOF
|
||||
GH_AW_MCP_CONFIG_c990404fbee0ddc5_EOF
|
||||
- name: Download activation artifact
|
||||
uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
|
||||
with:
|
||||
|
|
@ -1128,7 +1128,7 @@ jobs:
|
|||
GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
|
||||
GITHUB_SERVER_URL: ${{ github.server_url }}
|
||||
GITHUB_API_URL: ${{ github.api_url }}
|
||||
GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[ZIPT Benchmark] \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"},\"report_incomplete\":{}}"
|
||||
GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[QF_S Benchmark] \"},\"create_report_incomplete_issue\":{},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"},\"report_incomplete\":{}}"
|
||||
with:
|
||||
github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
script: |
|
||||
|
|
|
|||
4
.github/workflows/qf-s-benchmark.md
vendored
4
.github/workflows/qf-s-benchmark.md
vendored
|
|
@ -17,7 +17,7 @@ tools:
|
|||
|
||||
safe-outputs:
|
||||
create-discussion:
|
||||
title-prefix: "[ZIPT Benchmark] "
|
||||
title-prefix: "[QF_S Benchmark] "
|
||||
category: "Agentic Workflows"
|
||||
close-older-discussions: true
|
||||
missing-tool:
|
||||
|
|
@ -399,5 +399,5 @@ These are benchmarks where `nseq` shows a performance advantage.
|
|||
- **DO NOT** modify any source files or create pull requests.
|
||||
- **DO NOT** run benchmarks for longer than 80 minutes total (leave buffer for posting).
|
||||
- **DO** always report the commit SHA so results can be correlated with specific code versions.
|
||||
- **DO** close older ZIPT Benchmark discussions automatically (configured via `close-older-discussions: true`).
|
||||
- **DO** close older QF_S Benchmark discussions automatically (configured via `close-older-discussions: true`).
|
||||
- **DO** highlight disagreements prominently — these are potential correctness bugs.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue