From 4a8c9729bf1b6ff91320fb0c59a894fea95dc71b Mon Sep 17 00:00:00 2001
From: Copilot <198982749+Copilot@users.noreply.github.com>
Date: Fri, 20 Mar 2026 11:19:37 -0700
Subject: [PATCH] Add ostrich-benchmark agentic workflow for ZIPT/Z3 c3 branch
benchmarking (#9064)
Agent-Logs-Url: https://github.com/Z3Prover/z3/sessions/bfaec259-86d9-4b56-ab04-182835e3563b
Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com>
---
.github/agentics/ostrich-benchmark.md | 363 +++++++
.github/workflows/ostrich-benchmark.lock.yml | 1011 ++++++++++++++++++
.github/workflows/ostrich-benchmark.md | 41 +
3 files changed, 1415 insertions(+)
create mode 100644 .github/agentics/ostrich-benchmark.md
create mode 100644 .github/workflows/ostrich-benchmark.lock.yml
create mode 100644 .github/workflows/ostrich-benchmark.md
diff --git a/.github/agentics/ostrich-benchmark.md b/.github/agentics/ostrich-benchmark.md
new file mode 100644
index 000000000..d498ee125
--- /dev/null
+++ b/.github/agentics/ostrich-benchmark.md
@@ -0,0 +1,363 @@
+
+
+
+# Ostrich Benchmark: Z3 c3 branch vs ZIPT
+
+You are an AI agent that benchmarks Z3 string solvers (`seq` and `nseq`) and the standalone ZIPT solver on all SMT-LIB2 benchmarks from the `tests/ostrich.zip` archive on the `c3` branch, and publishes a summary report as a GitHub discussion.
+
+## Context
+
+- **Repository**: ${{ github.repository }}
+- **Workspace**: ${{ github.workspace }}
+- **Branch**: c3 (already checked out by the workflow setup step)
+
+## Phase 1: Build Z3
+
+Build Z3 from the checked-out `c3` branch using CMake + Ninja, including the .NET bindings required by ZIPT.
+
+```bash
+cd ${{ github.workspace }}
+
+# Install build dependencies if missing
+sudo apt-get install -y ninja-build cmake python3 zstd dotnet-sdk-8.0 unzip 2>/dev/null || true
+
+# Configure the build in Debug mode to enable assertions and tracing
+# (Debug mode is required for -tr: trace flags to produce meaningful output)
+mkdir -p build
+cd build
+cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Debug -DZ3_BUILD_DOTNET_BINDINGS=ON 2>&1 | tail -20
+
+# Build z3 binary and .NET bindings (this takes ~15-17 minutes)
+ninja z3 2>&1 | tail -30
+ninja build_z3_dotnet_bindings 2>&1 | tail -20
+
+# Verify the build succeeded
+./z3 --version
+
+# Locate the Microsoft.Z3.dll produced by the build
+Z3_DOTNET_DLL=$(find . -name "Microsoft.Z3.dll" -not -path "*/obj/*" | head -1)
+if [ -z "$Z3_DOTNET_DLL" ]; then
+ echo "ERROR: Microsoft.Z3.dll not found after build"
+ exit 1
+fi
+echo "Found Microsoft.Z3.dll at: $Z3_DOTNET_DLL"
+```
+
+If the build fails, report the error clearly and exit without proceeding.
+
+## Phase 2a: Clone and Build ZIPT
+
+Clone the ZIPT solver from the `parikh` branch and compile it against the Z3 .NET bindings built in Phase 1.
+
+```bash
+cd ${{ github.workspace }}
+
+# Re-locate the Microsoft.Z3.dll if needed
+Z3_DOTNET_DLL=$(find build -name "Microsoft.Z3.dll" -not -path "*/obj/*" | head -1)
+Z3_LIB_DIR=${{ github.workspace }}/build
+
+# Clone ZIPT (parikh branch)
+git clone --depth=1 --branch parikh https://github.com/CEisenhofer/ZIPT.git /tmp/zipt
+
+# Patch ZIPT.csproj to point at the freshly built Microsoft.Z3.dll
+# (the repo has a Windows-relative hardcoded path that won't exist here)
+sed -i "s|.*|$Z3_DOTNET_DLL|" /tmp/zipt/ZIPT/ZIPT.csproj
+
+# Build ZIPT in Release mode
+cd /tmp/zipt/ZIPT
+dotnet build --configuration Release 2>&1 | tail -20
+
+# Locate the built ZIPT.dll
+ZIPT_DLL=$(find /tmp/zipt/ZIPT/bin/Release -name "ZIPT.dll" | head -1)
+if [ -z "$ZIPT_DLL" ]; then
+ echo "ERROR: ZIPT.dll not found after build"
+ exit 1
+fi
+echo "ZIPT binary: $ZIPT_DLL"
+
+# Make libz3.so visible to the .NET runtime at ZIPT startup
+ZIPT_OUT_DIR=$(dirname "$ZIPT_DLL")
+if cp "$Z3_LIB_DIR/libz3.so" "$ZIPT_OUT_DIR/" 2>/dev/null; then
+ echo "Copied libz3.so to $ZIPT_OUT_DIR"
+else
+ echo "WARNING: could not copy libz3.so to $ZIPT_OUT_DIR — setting LD_LIBRARY_PATH fallback"
+fi
+export LD_LIBRARY_PATH="$Z3_LIB_DIR${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
+echo "ZIPT build complete."
+```
+
+If the ZIPT build fails, note the error in the report but continue with the Z3-only benchmark columns.
+
+## Phase 2b: Extract Benchmark Files
+
+Extract all SMT-LIB2 files from the `tests/ostrich.zip` archive.
+
+```bash
+cd ${{ github.workspace }}
+
+# Extract the zip archive
+mkdir -p /tmp/ostrich_benchmarks
+unzip -q tests/ostrich.zip -d /tmp/ostrich_benchmarks
+
+# List all .smt2 files
+find /tmp/ostrich_benchmarks -name "*.smt2" -type f | sort > /tmp/all_ostrich_files.txt
+TOTAL_FILES=$(wc -l < /tmp/all_ostrich_files.txt)
+echo "Total Ostrich .smt2 files: $TOTAL_FILES"
+
+if [ "$TOTAL_FILES" -eq 0 ]; then
+ echo "ERROR: No .smt2 files found in tests/ostrich.zip"
+ exit 1
+fi
+```
+
+## Phase 3: Run Benchmarks
+
+Run every file from `/tmp/all_ostrich_files.txt` with both Z3 string solvers and ZIPT. Use a **5-second timeout** per run.
+
+For each file, run:
+1. `z3 smt.string_solver=seq -T:5 ` — seq solver
+2. `z3 smt.string_solver=nseq -T:5 ` — nseq (ZIPT) solver
+3. `dotnet -t:5000 ` — standalone ZIPT solver (milliseconds)
+
+Capture:
+- **Verdict**: `sat`, `unsat`, `unknown`, `timeout` (if exit code indicates timeout or process is killed), or `bug` (if a solver crashes / produces a non-standard result)
+- **Time** (seconds): wall-clock time for the run
+- A row is flagged `SOUNDNESS_DISAGREEMENT` when any two solvers that both produced a definitive answer (sat/unsat) disagree
+
+Use a bash script to automate this:
+
+```bash
+#!/usr/bin/env bash
+set -euo pipefail
+
+Z3=${{ github.workspace }}/build/z3
+ZIPT_DLL=$(find /tmp/zipt/ZIPT/bin/Release -name "ZIPT.dll" 2>/dev/null | head -1)
+ZIPT_AVAILABLE=false
+[ -n "$ZIPT_DLL" ] && ZIPT_AVAILABLE=true
+
+# Ensure libz3.so is on the dynamic-linker path for the .NET runtime
+export LD_LIBRARY_PATH=${{ github.workspace }}/build${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}
+
+RESULTS=/tmp/benchmark_results.tsv
+mkdir -p /tmp/ostrich_run
+
+echo -e "file\tseq_verdict\tseq_time\tnseq_verdict\tnseq_time\tzipt_verdict\tzipt_time\tnotes" > "$RESULTS"
+
+run_z3_seq() {
+ local file="$1"
+ local start end elapsed verdict output exit_code
+
+ start=$(date +%s%3N)
+ output=$(timeout 7 "$Z3" "smt.string_solver=seq" -T:5 "$file" 2>&1)
+ exit_code=$?
+ end=$(date +%s%3N)
+ elapsed=$(echo "scale=3; ($end - $start) / 1000" | bc)
+
+ if echo "$output" | grep -q "^unsat"; then
+ verdict="unsat"
+ elif echo "$output" | grep -q "^sat"; then
+ verdict="sat"
+ elif echo "$output" | grep -q "^unknown"; then
+ verdict="unknown"
+ elif [ "$exit_code" -eq 124 ]; then
+ verdict="timeout"
+ elif echo "$output" | grep -qi "error\|assertion\|segfault\|SIGABRT\|exception"; then
+ verdict="bug"
+ else
+ verdict="unknown"
+ fi
+
+ echo "$verdict $elapsed"
+}
+
+run_z3_nseq() {
+ local file="$1"
+ local start end elapsed verdict output exit_code
+
+ start=$(date +%s%3N)
+ output=$(timeout 7 "$Z3" "smt.string_solver=nseq" -T:5 "$file" 2>&1)
+ exit_code=$?
+ end=$(date +%s%3N)
+ elapsed=$(echo "scale=3; ($end - $start) / 1000" | bc)
+
+ if echo "$output" | grep -q "^unsat"; then
+ verdict="unsat"
+ elif echo "$output" | grep -q "^sat"; then
+ verdict="sat"
+ elif echo "$output" | grep -q "^unknown"; then
+ verdict="unknown"
+ elif [ "$exit_code" -eq 124 ]; then
+ verdict="timeout"
+ elif echo "$output" | grep -qi "error\|assertion\|segfault\|SIGABRT\|exception"; then
+ verdict="bug"
+ else
+ verdict="unknown"
+ fi
+
+ echo "$verdict $elapsed"
+}
+
+run_zipt() {
+ local file="$1"
+ local start end elapsed verdict output exit_code
+
+ if [ "$ZIPT_AVAILABLE" != "true" ]; then
+ echo "n/a 0.000"
+ return
+ fi
+
+ start=$(date +%s%3N)
+ # ZIPT prints the filename on the first line, then SAT/UNSAT/UNKNOWN on subsequent lines
+ output=$(timeout 7 dotnet "$ZIPT_DLL" -t:5000 "$file" 2>&1)
+ exit_code=$?
+ end=$(date +%s%3N)
+ elapsed=$(echo "scale=3; ($end - $start) / 1000" | bc)
+
+ if echo "$output" | grep -qi "^UNSAT$"; then
+ verdict="unsat"
+ elif echo "$output" | grep -qi "^SAT$"; then
+ verdict="sat"
+ elif echo "$output" | grep -qi "^UNKNOWN$"; then
+ verdict="unknown"
+ elif [ "$exit_code" -eq 124 ]; then
+ verdict="timeout"
+ elif echo "$output" | grep -qi "error\|crash\|exception\|Unsupported"; then
+ verdict="bug"
+ else
+ verdict="unknown"
+ fi
+
+ echo "$verdict $elapsed"
+}
+
+COUNTER=0
+while IFS= read -r file; do
+ COUNTER=$((COUNTER + 1))
+ fname=$(basename "$file")
+
+ seq_result=$(run_z3_seq "$file")
+ nseq_result=$(run_z3_nseq "$file")
+ zipt_result=$(run_zipt "$file")
+
+ seq_verdict=$(echo "$seq_result" | cut -d' ' -f1)
+ seq_time=$(echo "$seq_result" | cut -d' ' -f2)
+ nseq_verdict=$(echo "$nseq_result" | cut -d' ' -f1)
+ nseq_time=$(echo "$nseq_result" | cut -d' ' -f2)
+ zipt_verdict=$(echo "$zipt_result" | cut -d' ' -f1)
+ zipt_time=$(echo "$zipt_result" | cut -d' ' -f2)
+
+ # Flag soundness disagreement when any two definitive verdicts disagree
+ notes=""
+ declare -A definitive_map
+ [ "$seq_verdict" = "sat" ] || [ "$seq_verdict" = "unsat" ] && definitive_map[seq]="$seq_verdict"
+ [ "$nseq_verdict" = "sat" ] || [ "$nseq_verdict" = "unsat" ] && definitive_map[nseq]="$nseq_verdict"
+ [ "$zipt_verdict" = "sat" ] || [ "$zipt_verdict" = "unsat" ] && definitive_map[zipt]="$zipt_verdict"
+ has_sat=false; has_unsat=false
+ for v in "${definitive_map[@]}"; do
+ [ "$v" = "sat" ] && has_sat=true
+ [ "$v" = "unsat" ] && has_unsat=true
+ done
+ if $has_sat && $has_unsat; then
+ notes="SOUNDNESS_DISAGREEMENT"
+ fi
+
+ echo -e "$fname\t$seq_verdict\t$seq_time\t$nseq_verdict\t$nseq_time\t$zipt_verdict\t$zipt_time\t$notes" >> "$RESULTS"
+ echo "[$COUNTER] [$fname] seq=$seq_verdict(${seq_time}s) nseq=$nseq_verdict(${nseq_time}s) zipt=$zipt_verdict(${zipt_time}s) $notes"
+done < /tmp/all_ostrich_files.txt
+
+echo "Benchmark run complete. Results saved to $RESULTS"
+```
+
+Save this script to `/tmp/run_ostrich_benchmarks.sh`, make it executable, and run it. Do not skip any file.
+
+## Phase 4: Generate Summary Report
+
+Read `/tmp/benchmark_results.tsv` and compute statistics. Then generate a Markdown report.
+
+Compute:
+- **Total benchmarks**: total number of files run
+- **Per solver (seq, nseq, and ZIPT)**: count of sat / unsat / unknown / timeout / bug verdicts
+- **Total time used**: sum of all times for each solver
+- **Average time per benchmark**: total_time / total_files
+- **Soundness disagreements**: files where any two solvers that both returned a definitive answer disagree
+- **Bugs / crashes**: files with error/crash verdicts
+
+Format the report as a GitHub Discussion post (GitHub-flavored Markdown):
+
+```markdown
+### Ostrich Benchmark Report — Z3 c3 branch
+
+**Date**:
+**Branch**: c3
+**Benchmark set**: Ostrich (all files from tests/ostrich.zip)
+**Timeout**: 5 seconds per benchmark (`-T:5` for Z3; `-t:5000` for ZIPT)
+
+---
+
+### Summary
+
+| Metric | seq solver | nseq solver | ZIPT solver |
+|--------|-----------|-------------|-------------|
+| sat | X | X | X |
+| unsat | X | X | X |
+| unknown | X | X | X |
+| timeout | X | X | X |
+| bug/crash | X | X | X |
+| **Total time (s)** | X.XXX | X.XXX | X.XXX |
+| **Avg time/benchmark (s)** | X.XXX | X.XXX | X.XXX |
+
+**Soundness disagreements** (any two solvers return conflicting sat/unsat): N
+
+---
+
+### Per-File Results
+
+
+Click to expand full per-file table
+
+| # | File | seq verdict | seq time (s) | nseq verdict | nseq time (s) | ZIPT verdict | ZIPT time (s) | Notes |
+|---|------|-------------|-------------|--------------|--------------|--------------|--------------|-------|
+| 1 | benchmark_0001.smt2 | sat | 0.123 | sat | 0.456 | sat | 0.789 | |
+| ... | ... | ... | ... | ... | ... | ... | ... | ... |
+
+
+
+---
+
+### Notable Issues
+
+#### Soundness Disagreements (Critical)
+
+
+#### Crashes / Bugs
+
+
+#### Slow Benchmarks (> 4s)
+
+
+---
+
+*Generated automatically by the Ostrich Benchmark workflow on the c3 branch.*
+```
+
+## Phase 5: Post to GitHub Discussion
+
+Post the Markdown report as a new GitHub Discussion using the `create-discussion` safe output.
+
+- **Category**: "Agentic Workflows"
+- **Title**: `[Ostrich Benchmark] Z3 c3 branch — `
+- Close older discussions with the same title prefix to avoid clutter.
+
+## Guidelines
+
+- **Always build from c3 branch**: The workspace is already checked out on c3; don't change branches.
+- **Debug build required**: The build must use `CMAKE_BUILD_TYPE=Debug` so that Z3's internal assertions are active.
+- **Run all benchmarks**: Unlike the QF_S workflow, run every file in the archive — do not randomly sample.
+- **5-second timeout**: Pass `-T:5` to Z3 (both seq and nseq) and `-t:5000` to ZIPT (milliseconds). Use `timeout 7` as the outer OS-level guard to allow the solver to exit cleanly before being killed.
+- **Be precise with timing**: Use millisecond-precision timestamps and report times in seconds with 3 decimal places.
+- **Distinguish timeout from unknown**: A timeout is different from `(unknown)` returned by a solver within its time budget.
+- **ZIPT output format**: ZIPT prints the input filename on the first line, then `SAT`, `UNSAT`, or `UNKNOWN` on subsequent lines. Parse accordingly.
+- **Report soundness bugs prominently**: If any benchmark shows a conflict between any two solvers that both returned a definitive sat/unsat answer, highlight it as a critical finding and name which pair disagrees.
+- **Handle build failures gracefully**: If Z3 fails to build, report the error and create a brief discussion noting the build failure. If ZIPT fails to build, continue with only the seq/nseq columns and note `n/a` for ZIPT results.
+- **Large report**: Always put the per-file table in a `` collapsible section since there may be many files.
+- **Progress logging**: Print a line per file as you run it (e.g., `[N] [filename] seq=...`) so the workflow log shows progress even for large benchmark sets.
diff --git a/.github/workflows/ostrich-benchmark.lock.yml b/.github/workflows/ostrich-benchmark.lock.yml
new file mode 100644
index 000000000..adebe18e4
--- /dev/null
+++ b/.github/workflows/ostrich-benchmark.lock.yml
@@ -0,0 +1,1011 @@
+# ___ _ _
+# / _ \ | | (_)
+# | |_| | __ _ ___ _ __ | |_ _ ___
+# | _ |/ _` |/ _ \ '_ \| __| |/ __|
+# | | | | (_| | __/ | | | |_| | (__
+# \_| |_/\__, |\___|_| |_|\__|_|\___|
+# __/ |
+# _ _ |___/
+# | | | | / _| |
+# | | | | ___ _ __ _ __| |_| | _____ ____
+# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___|
+# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \
+# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/
+#
+# This file was automatically generated by gh-aw (v0.62.4). DO NOT EDIT.
+#
+# To update this file, edit the corresponding .md file and run:
+# gh aw compile
+# Not all edits will cause changes to this file.
+#
+# For more information: https://github.github.com/gh-aw/introduction/overview/
+#
+# Run Z3 string solver benchmarks (seq vs nseq) and ZIPT on all Ostrich benchmarks from tests/ostrich.zip on the c3 branch and post results as a GitHub discussion
+#
+# gh-aw-metadata: {"schema_version":"v3","frontmatter_hash":"3ac70e9acd74c08c55c4c8e60b61e24db0f1e0dbd5bc8e25c62af0279aea4d6b","compiler_version":"v0.62.4","strict":true,"agent_id":"copilot"}
+
+name: "Ostrich Benchmark"
+"on":
+ schedule:
+ - cron: "0 6 * * *"
+ workflow_dispatch:
+
+permissions: {}
+
+concurrency:
+ group: "gh-aw-${{ github.workflow }}"
+
+run-name: "Ostrich Benchmark"
+
+jobs:
+ activation:
+ runs-on: ubuntu-slim
+ permissions:
+ contents: read
+ outputs:
+ comment_id: ""
+ comment_repo: ""
+ lockdown_check_failed: ${{ steps.generate_aw_info.outputs.lockdown_check_failed == 'true' }}
+ model: ${{ steps.generate_aw_info.outputs.model }}
+ secret_verification_result: ${{ steps.validate-secret.outputs.verification_result }}
+ steps:
+ - name: Setup Scripts
+ uses: github/gh-aw-actions/setup@v0.62.4
+ with:
+ destination: ${{ runner.temp }}/gh-aw/actions
+ - name: Generate agentic run info
+ id: generate_aw_info
+ env:
+ GH_AW_INFO_ENGINE_ID: "copilot"
+ GH_AW_INFO_ENGINE_NAME: "GitHub Copilot CLI"
+ GH_AW_INFO_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }}
+ GH_AW_INFO_VERSION: ""
+ GH_AW_INFO_AGENT_VERSION: "latest"
+ GH_AW_INFO_CLI_VERSION: "v0.62.4"
+ GH_AW_INFO_WORKFLOW_NAME: "Ostrich Benchmark"
+ GH_AW_INFO_EXPERIMENTAL: "false"
+ GH_AW_INFO_SUPPORTS_TOOLS_ALLOWLIST: "true"
+ GH_AW_INFO_STAGED: "false"
+ GH_AW_INFO_ALLOWED_DOMAINS: '["defaults"]'
+ GH_AW_INFO_FIREWALL_ENABLED: "true"
+ GH_AW_INFO_AWF_VERSION: "v0.24.5"
+ GH_AW_INFO_AWMG_VERSION: ""
+ GH_AW_INFO_FIREWALL_TYPE: "squid"
+ GH_AW_COMPILED_STRICT: "true"
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/generate_aw_info.cjs');
+ await main(core, context);
+ - name: Validate COPILOT_GITHUB_TOKEN secret
+ id: validate-secret
+ run: ${RUNNER_TEMP}/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN 'GitHub Copilot CLI' https://github.github.com/gh-aw/reference/engines/#github-copilot-default
+ env:
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ - name: Checkout .github and .agents folders
+ uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+ with:
+ persist-credentials: false
+ sparse-checkout: |
+ .github
+ .agents
+ sparse-checkout-cone-mode: true
+ fetch-depth: 1
+ - name: Check workflow file timestamps
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_WORKFLOW_FILE: "ostrich-benchmark.lock.yml"
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/check_workflow_timestamp_api.cjs');
+ await main();
+ - name: Create prompt with built-in context
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_GITHUB_ACTOR: ${{ github.actor }}
+ GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }}
+ GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }}
+ GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }}
+ GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }}
+ GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
+ GH_AW_GITHUB_RUN_ID: ${{ github.run_id }}
+ GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }}
+ run: |
+ bash ${RUNNER_TEMP}/gh-aw/actions/create_prompt_first.sh
+ {
+ cat << 'GH_AW_PROMPT_EOF'
+
+ GH_AW_PROMPT_EOF
+ cat "${RUNNER_TEMP}/gh-aw/prompts/xpia.md"
+ cat "${RUNNER_TEMP}/gh-aw/prompts/temp_folder_prompt.md"
+ cat "${RUNNER_TEMP}/gh-aw/prompts/markdown.md"
+ cat "${RUNNER_TEMP}/gh-aw/prompts/safe_outputs_prompt.md"
+ cat << 'GH_AW_PROMPT_EOF'
+
+ Tools: create_discussion, missing_tool, missing_data, noop
+
+
+ The following GitHub context information is available for this workflow:
+ {{#if __GH_AW_GITHUB_ACTOR__ }}
+ - **actor**: __GH_AW_GITHUB_ACTOR__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_REPOSITORY__ }}
+ - **repository**: __GH_AW_GITHUB_REPOSITORY__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_WORKSPACE__ }}
+ - **workspace**: __GH_AW_GITHUB_WORKSPACE__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }}
+ - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }}
+ - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }}
+ - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }}
+ - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__
+ {{/if}}
+ {{#if __GH_AW_GITHUB_RUN_ID__ }}
+ - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__
+ {{/if}}
+
+
+ GH_AW_PROMPT_EOF
+ cat "${RUNNER_TEMP}/gh-aw/prompts/github_mcp_tools_with_safeoutputs_prompt.md"
+ cat << 'GH_AW_PROMPT_EOF'
+
+ GH_AW_PROMPT_EOF
+ cat << 'GH_AW_PROMPT_EOF'
+ {{#runtime-import .github/workflows/ostrich-benchmark.md}}
+ GH_AW_PROMPT_EOF
+ } > "$GH_AW_PROMPT"
+ - name: Interpolate variables and render templates
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/interpolate_prompt.cjs');
+ await main();
+ - name: Substitute placeholders
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_GITHUB_ACTOR: ${{ github.actor }}
+ GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }}
+ GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }}
+ GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }}
+ GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }}
+ GH_AW_GITHUB_REPOSITORY: ${{ github.repository }}
+ GH_AW_GITHUB_RUN_ID: ${{ github.run_id }}
+ GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }}
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+
+ const substitutePlaceholders = require('${{ runner.temp }}/gh-aw/actions/substitute_placeholders.cjs');
+
+ // Call the substitution function
+ return await substitutePlaceholders({
+ file: process.env.GH_AW_PROMPT,
+ substitutions: {
+ GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR,
+ GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID,
+ GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER,
+ GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER,
+ GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER,
+ GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY,
+ GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID,
+ GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE
+ }
+ });
+ - name: Validate prompt placeholders
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/validate_prompt_placeholders.sh
+ - name: Print prompt
+ env:
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/print_prompt_summary.sh
+ - name: Upload activation artifact
+ if: success()
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: activation
+ path: |
+ /tmp/gh-aw/aw_info.json
+ /tmp/gh-aw/aw-prompts/prompt.txt
+ retention-days: 1
+
+ agent:
+ needs: activation
+ runs-on: ubuntu-latest
+ permissions: read-all
+ concurrency:
+ group: "gh-aw-copilot-${{ github.workflow }}"
+ env:
+ DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
+ GH_AW_ASSETS_ALLOWED_EXTS: ""
+ GH_AW_ASSETS_BRANCH: ""
+ GH_AW_ASSETS_MAX_SIZE_KB: 0
+ GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs
+ GH_AW_WORKFLOW_ID_SANITIZED: ostrichbenchmark
+ outputs:
+ checkout_pr_success: ${{ steps.checkout-pr.outputs.checkout_pr_success || 'true' }}
+ detection_conclusion: ${{ steps.detection_conclusion.outputs.conclusion }}
+ detection_success: ${{ steps.detection_conclusion.outputs.success }}
+ has_patch: ${{ steps.collect_output.outputs.has_patch }}
+ inference_access_error: ${{ steps.detect-inference-error.outputs.inference_access_error || 'false' }}
+ model: ${{ needs.activation.outputs.model }}
+ output: ${{ steps.collect_output.outputs.output }}
+ output_types: ${{ steps.collect_output.outputs.output_types }}
+ steps:
+ - name: Setup Scripts
+ uses: github/gh-aw-actions/setup@v0.62.4
+ with:
+ destination: ${{ runner.temp }}/gh-aw/actions
+ - name: Set runtime paths
+ run: |
+ echo "GH_AW_SAFE_OUTPUTS=${RUNNER_TEMP}/gh-aw/safeoutputs/outputs.jsonl" >> "$GITHUB_ENV"
+ echo "GH_AW_SAFE_OUTPUTS_CONFIG_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/config.json" >> "$GITHUB_ENV"
+ echo "GH_AW_SAFE_OUTPUTS_TOOLS_PATH=${RUNNER_TEMP}/gh-aw/safeoutputs/tools.json" >> "$GITHUB_ENV"
+ - name: Create gh-aw temp directory
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/create_gh_aw_tmp_dir.sh
+ - name: Configure gh CLI for GitHub Enterprise
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/configure_gh_for_ghe.sh
+ env:
+ GH_TOKEN: ${{ github.token }}
+ - name: Checkout c3 branch
+ uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5
+ with:
+ fetch-depth: 1
+ persist-credentials: false
+ ref: c3
+
+ - name: Configure Git credentials
+ env:
+ REPO_NAME: ${{ github.repository }}
+ SERVER_URL: ${{ github.server_url }}
+ run: |
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
+ git config --global user.name "github-actions[bot]"
+ git config --global am.keepcr true
+ # Re-authenticate git with GitHub token
+ SERVER_URL_STRIPPED="${SERVER_URL#https://}"
+ git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git"
+ echo "Git configured with standard GitHub Actions identity"
+ - name: Checkout PR branch
+ id: checkout-pr
+ if: |
+ (github.event.pull_request) || (github.event.issue.pull_request)
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/checkout_pr_branch.cjs');
+ await main();
+ - name: Install GitHub Copilot CLI
+ run: ${RUNNER_TEMP}/gh-aw/actions/install_copilot_cli.sh latest
+ env:
+ GH_HOST: github.com
+ - name: Install AWF binary
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/install_awf_binary.sh v0.24.5
+ - name: Determine automatic lockdown mode for GitHub MCP Server
+ id: determine-automatic-lockdown
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }}
+ GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }}
+ with:
+ script: |
+ const determineAutomaticLockdown = require('${{ runner.temp }}/gh-aw/actions/determine_automatic_lockdown.cjs');
+ await determineAutomaticLockdown(github, context, core);
+ - name: Download container images
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/download_docker_images.sh ghcr.io/github/gh-aw-firewall/agent:0.24.5 ghcr.io/github/gh-aw-firewall/api-proxy:0.24.5 ghcr.io/github/gh-aw-firewall/squid:0.24.5 ghcr.io/github/gh-aw-mcpg:v0.1.19 ghcr.io/github/github-mcp-server:v0.32.0 node:lts-alpine
+ - name: Write Safe Outputs Config
+ run: |
+ mkdir -p ${RUNNER_TEMP}/gh-aw/safeoutputs
+ mkdir -p /tmp/gh-aw/safeoutputs
+ mkdir -p /tmp/gh-aw/mcp-logs/safeoutputs
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/config.json << 'GH_AW_SAFE_OUTPUTS_CONFIG_EOF'
+ {"create_discussion":{"expires":168,"max":1},"create_missing_tool_issue":{"max":1,"title_prefix":"[missing tool]"},"missing_data":{},"missing_tool":{},"noop":{"max":1}}
+ GH_AW_SAFE_OUTPUTS_CONFIG_EOF
+ - name: Write Safe Outputs Tools
+ run: |
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/tools_meta.json << 'GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF'
+ {
+ "description_suffixes": {
+ "create_discussion": " CONSTRAINTS: Maximum 1 discussion(s) can be created. Title will be prefixed with \"[Ostrich Benchmark] \". Discussions will be created in category \"agentic workflows\"."
+ },
+ "repo_params": {},
+ "dynamic_tools": []
+ }
+ GH_AW_SAFE_OUTPUTS_TOOLS_META_EOF
+ cat > ${RUNNER_TEMP}/gh-aw/safeoutputs/validation.json << 'GH_AW_SAFE_OUTPUTS_VALIDATION_EOF'
+ {
+ "create_discussion": {
+ "defaultMax": 1,
+ "fields": {
+ "body": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 65000
+ },
+ "category": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 128
+ },
+ "repo": {
+ "type": "string",
+ "maxLength": 256
+ },
+ "title": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 128
+ }
+ }
+ },
+ "missing_data": {
+ "defaultMax": 20,
+ "fields": {
+ "alternatives": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 256
+ },
+ "context": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 256
+ },
+ "data_type": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 128
+ },
+ "reason": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 256
+ }
+ }
+ },
+ "missing_tool": {
+ "defaultMax": 20,
+ "fields": {
+ "alternatives": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 512
+ },
+ "reason": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 256
+ },
+ "tool": {
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 128
+ }
+ }
+ },
+ "noop": {
+ "defaultMax": 1,
+ "fields": {
+ "message": {
+ "required": true,
+ "type": "string",
+ "sanitize": true,
+ "maxLength": 65000
+ }
+ }
+ }
+ }
+ GH_AW_SAFE_OUTPUTS_VALIDATION_EOF
+ node ${RUNNER_TEMP}/gh-aw/actions/generate_safe_outputs_tools.cjs
+ - name: Generate Safe Outputs MCP Server Config
+ id: safe-outputs-config
+ run: |
+ # Generate a secure random API key (360 bits of entropy, 40+ chars)
+ # Mask immediately to prevent timing vulnerabilities
+ API_KEY=$(openssl rand -base64 45 | tr -d '/+=')
+ echo "::add-mask::${API_KEY}"
+
+ PORT=3001
+
+ # Set outputs for next steps
+ {
+ echo "safe_outputs_api_key=${API_KEY}"
+ echo "safe_outputs_port=${PORT}"
+ } >> "$GITHUB_OUTPUT"
+
+ echo "Safe Outputs MCP server will run on port ${PORT}"
+
+ - name: Start Safe Outputs MCP HTTP Server
+ id: safe-outputs-start
+ env:
+ DEBUG: '*'
+ GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-config.outputs.safe_outputs_port }}
+ GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-config.outputs.safe_outputs_api_key }}
+ GH_AW_SAFE_OUTPUTS_TOOLS_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/tools.json
+ GH_AW_SAFE_OUTPUTS_CONFIG_PATH: ${{ runner.temp }}/gh-aw/safeoutputs/config.json
+ GH_AW_MCP_LOG_DIR: /tmp/gh-aw/mcp-logs/safeoutputs
+ run: |
+ # Environment variables are set above to prevent template injection
+ export DEBUG
+ export GH_AW_SAFE_OUTPUTS_PORT
+ export GH_AW_SAFE_OUTPUTS_API_KEY
+ export GH_AW_SAFE_OUTPUTS_TOOLS_PATH
+ export GH_AW_SAFE_OUTPUTS_CONFIG_PATH
+ export GH_AW_MCP_LOG_DIR
+
+ bash ${RUNNER_TEMP}/gh-aw/actions/start_safe_outputs_server.sh
+
+ - name: Start MCP Gateway
+ id: start-mcp-gateway
+ env:
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_SAFE_OUTPUTS_API_KEY: ${{ steps.safe-outputs-start.outputs.api_key }}
+ GH_AW_SAFE_OUTPUTS_PORT: ${{ steps.safe-outputs-start.outputs.port }}
+ GITHUB_MCP_GUARD_MIN_INTEGRITY: ${{ steps.determine-automatic-lockdown.outputs.min_integrity }}
+ GITHUB_MCP_GUARD_REPOS: ${{ steps.determine-automatic-lockdown.outputs.repos }}
+ GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ run: |
+ set -eo pipefail
+ mkdir -p /tmp/gh-aw/mcp-config
+
+ # Export gateway environment variables for MCP config and gateway script
+ export MCP_GATEWAY_PORT="80"
+ export MCP_GATEWAY_DOMAIN="host.docker.internal"
+ MCP_GATEWAY_API_KEY=$(openssl rand -base64 45 | tr -d '/+=')
+ echo "::add-mask::${MCP_GATEWAY_API_KEY}"
+ export MCP_GATEWAY_API_KEY
+ export MCP_GATEWAY_PAYLOAD_DIR="/tmp/gh-aw/mcp-payloads"
+ mkdir -p "${MCP_GATEWAY_PAYLOAD_DIR}"
+ export MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD="524288"
+ export DEBUG="*"
+
+ export GH_AW_ENGINE="copilot"
+ export MCP_GATEWAY_DOCKER_COMMAND='docker run -i --rm --network host -v /var/run/docker.sock:/var/run/docker.sock -e MCP_GATEWAY_PORT -e MCP_GATEWAY_DOMAIN -e MCP_GATEWAY_API_KEY -e MCP_GATEWAY_PAYLOAD_DIR -e MCP_GATEWAY_PAYLOAD_SIZE_THRESHOLD -e DEBUG -e MCP_GATEWAY_LOG_DIR -e GH_AW_MCP_LOG_DIR -e GH_AW_SAFE_OUTPUTS -e GH_AW_SAFE_OUTPUTS_CONFIG_PATH -e GH_AW_SAFE_OUTPUTS_TOOLS_PATH -e GH_AW_ASSETS_BRANCH -e GH_AW_ASSETS_MAX_SIZE_KB -e GH_AW_ASSETS_ALLOWED_EXTS -e DEFAULT_BRANCH -e GITHUB_MCP_SERVER_TOKEN -e GITHUB_MCP_GUARD_MIN_INTEGRITY -e GITHUB_MCP_GUARD_REPOS -e GITHUB_REPOSITORY -e GITHUB_SERVER_URL -e GITHUB_SHA -e GITHUB_WORKSPACE -e GITHUB_TOKEN -e GITHUB_RUN_ID -e GITHUB_RUN_NUMBER -e GITHUB_RUN_ATTEMPT -e GITHUB_JOB -e GITHUB_ACTION -e GITHUB_EVENT_NAME -e GITHUB_EVENT_PATH -e GITHUB_ACTOR -e GITHUB_ACTOR_ID -e GITHUB_TRIGGERING_ACTOR -e GITHUB_WORKFLOW -e GITHUB_WORKFLOW_REF -e GITHUB_WORKFLOW_SHA -e GITHUB_REF -e GITHUB_REF_NAME -e GITHUB_REF_TYPE -e GITHUB_HEAD_REF -e GITHUB_BASE_REF -e GH_AW_SAFE_OUTPUTS_PORT -e GH_AW_SAFE_OUTPUTS_API_KEY -v /tmp/gh-aw/mcp-payloads:/tmp/gh-aw/mcp-payloads:rw -v /opt:/opt:ro -v /tmp:/tmp:rw -v '"${GITHUB_WORKSPACE}"':'"${GITHUB_WORKSPACE}"':rw ghcr.io/github/gh-aw-mcpg:v0.1.19'
+
+ mkdir -p /home/runner/.copilot
+ cat << GH_AW_MCP_CONFIG_EOF | bash ${RUNNER_TEMP}/gh-aw/actions/start_mcp_gateway.sh
+ {
+ "mcpServers": {
+ "github": {
+ "type": "stdio",
+ "container": "ghcr.io/github/github-mcp-server:v0.32.0",
+ "env": {
+ "GITHUB_HOST": "\${GITHUB_SERVER_URL}",
+ "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}",
+ "GITHUB_READ_ONLY": "1",
+ "GITHUB_TOOLSETS": "context,repos,issues,pull_requests"
+ },
+ "guard-policies": {
+ "allow-only": {
+ "min-integrity": "$GITHUB_MCP_GUARD_MIN_INTEGRITY",
+ "repos": "$GITHUB_MCP_GUARD_REPOS"
+ }
+ }
+ },
+ "safeoutputs": {
+ "type": "http",
+ "url": "http://host.docker.internal:$GH_AW_SAFE_OUTPUTS_PORT",
+ "headers": {
+ "Authorization": "\${GH_AW_SAFE_OUTPUTS_API_KEY}"
+ },
+ "guard-policies": {
+ "write-sink": {
+ "accept": [
+ "*"
+ ]
+ }
+ }
+ }
+ },
+ "gateway": {
+ "port": $MCP_GATEWAY_PORT,
+ "domain": "${MCP_GATEWAY_DOMAIN}",
+ "apiKey": "${MCP_GATEWAY_API_KEY}",
+ "payloadDir": "${MCP_GATEWAY_PAYLOAD_DIR}"
+ }
+ }
+ GH_AW_MCP_CONFIG_EOF
+ - name: Download activation artifact
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: activation
+ path: /tmp/gh-aw
+ - name: Clean git credentials
+ continue-on-error: true
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/clean_git_credentials.sh
+ - name: Execute GitHub Copilot CLI
+ id: agentic_execution
+ # Copilot CLI tool arguments (sorted):
+ timeout-minutes: 180
+ run: |
+ set -o pipefail
+ touch /tmp/gh-aw/agent-step-summary.md
+ # shellcheck disable=SC1003
+ sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.24.5 --skip-pull --enable-api-proxy \
+ -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-all-tools --allow-all-paths --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/agent-stdio.log
+ env:
+ COPILOT_AGENT_RUNNER_TYPE: STANDALONE
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ COPILOT_MODEL: ${{ vars.GH_AW_MODEL_AGENT_COPILOT || '' }}
+ GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json
+ GH_AW_PHASE: agent
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_VERSION: v0.62.4
+ GITHUB_API_URL: ${{ github.api_url }}
+ GITHUB_AW: true
+ GITHUB_HEAD_REF: ${{ github.head_ref }}
+ GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ GITHUB_REF_NAME: ${{ github.ref_name }}
+ GITHUB_SERVER_URL: ${{ github.server_url }}
+ GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md
+ GITHUB_WORKSPACE: ${{ github.workspace }}
+ GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com
+ GIT_AUTHOR_NAME: github-actions[bot]
+ GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com
+ GIT_COMMITTER_NAME: github-actions[bot]
+ XDG_CONFIG_HOME: /home/runner
+ - name: Detect inference access error
+ id: detect-inference-error
+ if: always()
+ continue-on-error: true
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/detect_inference_access_error.sh
+ - name: Configure Git credentials
+ env:
+ REPO_NAME: ${{ github.repository }}
+ SERVER_URL: ${{ github.server_url }}
+ run: |
+ git config --global user.email "github-actions[bot]@users.noreply.github.com"
+ git config --global user.name "github-actions[bot]"
+ git config --global am.keepcr true
+ # Re-authenticate git with GitHub token
+ SERVER_URL_STRIPPED="${SERVER_URL#https://}"
+ git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git"
+ echo "Git configured with standard GitHub Actions identity"
+ - name: Copy Copilot session state files to logs
+ if: always()
+ continue-on-error: true
+ run: |
+ # Copy Copilot session state files to logs folder for artifact collection
+ # This ensures they are in /tmp/gh-aw/ where secret redaction can scan them
+ SESSION_STATE_DIR="$HOME/.copilot/session-state"
+ LOGS_DIR="/tmp/gh-aw/sandbox/agent/logs"
+
+ if [ -d "$SESSION_STATE_DIR" ]; then
+ echo "Copying Copilot session state files from $SESSION_STATE_DIR to $LOGS_DIR"
+ mkdir -p "$LOGS_DIR"
+ cp -v "$SESSION_STATE_DIR"/*.jsonl "$LOGS_DIR/" 2>/dev/null || true
+ echo "Session state files copied successfully"
+ else
+ echo "No session-state directory found at $SESSION_STATE_DIR"
+ fi
+ - name: Stop MCP Gateway
+ if: always()
+ continue-on-error: true
+ env:
+ MCP_GATEWAY_PORT: ${{ steps.start-mcp-gateway.outputs.gateway-port }}
+ MCP_GATEWAY_API_KEY: ${{ steps.start-mcp-gateway.outputs.gateway-api-key }}
+ GATEWAY_PID: ${{ steps.start-mcp-gateway.outputs.gateway-pid }}
+ run: |
+ bash ${RUNNER_TEMP}/gh-aw/actions/stop_mcp_gateway.sh "$GATEWAY_PID"
+ - name: Redact secrets in logs
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/redact_secrets.cjs');
+ await main();
+ env:
+ GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN'
+ SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }}
+ SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }}
+ SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ - name: Append agent step summary
+ if: always()
+ run: bash ${RUNNER_TEMP}/gh-aw/actions/append_agent_step_summary.sh
+ - name: Copy Safe Outputs
+ if: always()
+ run: |
+ mkdir -p /tmp/gh-aw
+ cp "$GH_AW_SAFE_OUTPUTS" /tmp/gh-aw/safeoutputs.jsonl 2>/dev/null || true
+ - name: Ingest agent output
+ id: collect_output
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_SAFE_OUTPUTS: ${{ env.GH_AW_SAFE_OUTPUTS }}
+ GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
+ GITHUB_SERVER_URL: ${{ github.server_url }}
+ GITHUB_API_URL: ${{ github.api_url }}
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/collect_ndjson_output.cjs');
+ await main();
+ - name: Parse agent logs for step summary
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_copilot_log.cjs');
+ await main();
+ - name: Parse MCP Gateway logs for step summary
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_mcp_gateway_log.cjs');
+ await main();
+ - name: Print firewall logs
+ if: always()
+ continue-on-error: true
+ env:
+ AWF_LOGS_DIR: /tmp/gh-aw/sandbox/firewall/logs
+ run: |
+ # Fix permissions on firewall logs so they can be uploaded as artifacts
+ # AWF runs with sudo, creating files owned by root
+ sudo chmod -R a+r /tmp/gh-aw/sandbox/firewall/logs 2>/dev/null || true
+ # Only run awf logs summary if awf command exists (it may not be installed if workflow failed before install step)
+ if command -v awf &> /dev/null; then
+ awf logs summary | tee -a "$GITHUB_STEP_SUMMARY"
+ else
+ echo 'AWF binary not installed, skipping firewall log summary'
+ fi
+ - name: Upload agent artifacts
+ if: always()
+ continue-on-error: true
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: agent
+ path: |
+ /tmp/gh-aw/aw-prompts/prompt.txt
+ /tmp/gh-aw/sandbox/agent/logs/
+ /tmp/gh-aw/redacted-urls.log
+ /tmp/gh-aw/mcp-logs/
+ /tmp/gh-aw/sandbox/firewall/logs/
+ /tmp/gh-aw/agent-stdio.log
+ /tmp/gh-aw/agent/
+ /tmp/gh-aw/safeoutputs.jsonl
+ /tmp/gh-aw/agent_output.json
+ if-no-files-found: ignore
+ # --- Threat Detection (inline) ---
+ - name: Check if detection needed
+ id: detection_guard
+ if: always()
+ env:
+ OUTPUT_TYPES: ${{ steps.collect_output.outputs.output_types }}
+ HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }}
+ run: |
+ if [[ -n "$OUTPUT_TYPES" || "$HAS_PATCH" == "true" ]]; then
+ echo "run_detection=true" >> "$GITHUB_OUTPUT"
+ echo "Detection will run: output_types=$OUTPUT_TYPES, has_patch=$HAS_PATCH"
+ else
+ echo "run_detection=false" >> "$GITHUB_OUTPUT"
+ echo "Detection skipped: no agent outputs or patches to analyze"
+ fi
+ - name: Clear MCP configuration for detection
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ run: |
+ rm -f /tmp/gh-aw/mcp-config/mcp-servers.json
+ rm -f /home/runner/.copilot/mcp-config.json
+ rm -f "$GITHUB_WORKSPACE/.gemini/settings.json"
+ - name: Prepare threat detection files
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ run: |
+ mkdir -p /tmp/gh-aw/threat-detection/aw-prompts
+ cp /tmp/gh-aw/aw-prompts/prompt.txt /tmp/gh-aw/threat-detection/aw-prompts/prompt.txt 2>/dev/null || true
+ cp /tmp/gh-aw/agent_output.json /tmp/gh-aw/threat-detection/agent_output.json 2>/dev/null || true
+ for f in /tmp/gh-aw/aw-*.patch; do
+ [ -f "$f" ] && cp "$f" /tmp/gh-aw/threat-detection/ 2>/dev/null || true
+ done
+ echo "Prepared threat detection files:"
+ ls -la /tmp/gh-aw/threat-detection/ 2>/dev/null || true
+ - name: Setup threat detection
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ WORKFLOW_NAME: "Ostrich Benchmark"
+ WORKFLOW_DESCRIPTION: "Run Z3 string solver benchmarks (seq vs nseq) and ZIPT on all Ostrich benchmarks from tests/ostrich.zip on the c3 branch and post results as a GitHub discussion"
+ HAS_PATCH: ${{ steps.collect_output.outputs.has_patch }}
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/setup_threat_detection.cjs');
+ await main();
+ - name: Ensure threat-detection directory and log
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ run: |
+ mkdir -p /tmp/gh-aw/threat-detection
+ touch /tmp/gh-aw/threat-detection/detection.log
+ - name: Execute GitHub Copilot CLI
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ id: detection_agentic_execution
+ # Copilot CLI tool arguments (sorted):
+ # --allow-tool shell(cat)
+ # --allow-tool shell(grep)
+ # --allow-tool shell(head)
+ # --allow-tool shell(jq)
+ # --allow-tool shell(ls)
+ # --allow-tool shell(tail)
+ # --allow-tool shell(wc)
+ timeout-minutes: 20
+ run: |
+ set -o pipefail
+ touch /tmp/gh-aw/agent-step-summary.md
+ # shellcheck disable=SC1003
+ sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount "${RUNNER_TEMP}/gh-aw:${RUNNER_TEMP}/gh-aw:ro" --mount "${RUNNER_TEMP}/gh-aw:/host${RUNNER_TEMP}/gh-aw:ro" --allow-domains "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,github.com,host.docker.internal,raw.githubusercontent.com,registry.npmjs.org,telemetry.enterprise.githubcopilot.com" --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --enable-host-access --image-tag 0.24.5 --skip-pull --enable-api-proxy \
+ -- /bin/bash -c '/usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool '\''shell(cat)'\'' --allow-tool '\''shell(grep)'\'' --allow-tool '\''shell(head)'\'' --allow-tool '\''shell(jq)'\'' --allow-tool '\''shell(ls)'\'' --allow-tool '\''shell(tail)'\'' --allow-tool '\''shell(wc)'\'' --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"' 2>&1 | tee -a /tmp/gh-aw/threat-detection/detection.log
+ env:
+ COPILOT_AGENT_RUNNER_TYPE: STANDALONE
+ COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
+ COPILOT_MODEL: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }}
+ GH_AW_PHASE: detection
+ GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt
+ GH_AW_VERSION: v0.62.4
+ GITHUB_API_URL: ${{ github.api_url }}
+ GITHUB_AW: true
+ GITHUB_HEAD_REF: ${{ github.head_ref }}
+ GITHUB_REF_NAME: ${{ github.ref_name }}
+ GITHUB_SERVER_URL: ${{ github.server_url }}
+ GITHUB_STEP_SUMMARY: /tmp/gh-aw/agent-step-summary.md
+ GITHUB_WORKSPACE: ${{ github.workspace }}
+ GIT_AUTHOR_EMAIL: github-actions[bot]@users.noreply.github.com
+ GIT_AUTHOR_NAME: github-actions[bot]
+ GIT_COMMITTER_EMAIL: github-actions[bot]@users.noreply.github.com
+ GIT_COMMITTER_NAME: github-actions[bot]
+ XDG_CONFIG_HOME: /home/runner
+ - name: Parse threat detection results
+ id: parse_detection_results
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ with:
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/parse_threat_detection_results.cjs');
+ await main();
+ - name: Upload threat detection log
+ if: always() && steps.detection_guard.outputs.run_detection == 'true'
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: detection
+ path: /tmp/gh-aw/threat-detection/detection.log
+ if-no-files-found: ignore
+ - name: Set detection conclusion
+ id: detection_conclusion
+ if: always()
+ env:
+ RUN_DETECTION: ${{ steps.detection_guard.outputs.run_detection }}
+ DETECTION_SUCCESS: ${{ steps.parse_detection_results.outputs.success }}
+ run: |
+ if [[ "$RUN_DETECTION" != "true" ]]; then
+ echo "conclusion=skipped" >> "$GITHUB_OUTPUT"
+ echo "success=true" >> "$GITHUB_OUTPUT"
+ echo "Detection was not needed, marking as skipped"
+ elif [[ "$DETECTION_SUCCESS" == "true" ]]; then
+ echo "conclusion=success" >> "$GITHUB_OUTPUT"
+ echo "success=true" >> "$GITHUB_OUTPUT"
+ echo "Detection passed successfully"
+ else
+ echo "conclusion=failure" >> "$GITHUB_OUTPUT"
+ echo "success=false" >> "$GITHUB_OUTPUT"
+ echo "Detection found issues"
+ fi
+
+ conclusion:
+ needs:
+ - activation
+ - agent
+ - safe_outputs
+ if: (always()) && ((needs.agent.result != 'skipped') || (needs.activation.outputs.lockdown_check_failed == 'true'))
+ runs-on: ubuntu-slim
+ permissions:
+ contents: read
+ discussions: write
+ issues: write
+ concurrency:
+ group: "gh-aw-conclusion-ostrich-benchmark"
+ cancel-in-progress: false
+ outputs:
+ noop_message: ${{ steps.noop.outputs.noop_message }}
+ tools_reported: ${{ steps.missing_tool.outputs.tools_reported }}
+ total_count: ${{ steps.missing_tool.outputs.total_count }}
+ steps:
+ - name: Setup Scripts
+ uses: github/gh-aw-actions/setup@v0.62.4
+ with:
+ destination: ${{ runner.temp }}/gh-aw/actions
+ - name: Download agent output artifact
+ id: download-agent-output
+ continue-on-error: true
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw/
+ - name: Setup agent output environment variable
+ if: steps.download-agent-output.outcome == 'success'
+ run: |
+ mkdir -p /tmp/gh-aw/
+ find "/tmp/gh-aw/" -type f -print
+ echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_ENV"
+ - name: Process No-Op Messages
+ id: noop
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_NOOP_MAX: "1"
+ GH_AW_WORKFLOW_NAME: "Ostrich Benchmark"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/noop.cjs');
+ await main();
+ - name: Record Missing Tool
+ id: missing_tool
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_MISSING_TOOL_CREATE_ISSUE: "true"
+ GH_AW_MISSING_TOOL_TITLE_PREFIX: "[missing tool]"
+ GH_AW_WORKFLOW_NAME: "Ostrich Benchmark"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/missing_tool.cjs');
+ await main();
+ - name: Handle Agent Failure
+ id: handle_agent_failure
+ if: always()
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_WORKFLOW_NAME: "Ostrich Benchmark"
+ GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }}
+ GH_AW_WORKFLOW_ID: "ostrich-benchmark"
+ GH_AW_SECRET_VERIFICATION_RESULT: ${{ needs.activation.outputs.secret_verification_result }}
+ GH_AW_CHECKOUT_PR_SUCCESS: ${{ needs.agent.outputs.checkout_pr_success }}
+ GH_AW_INFERENCE_ACCESS_ERROR: ${{ needs.agent.outputs.inference_access_error }}
+ GH_AW_CREATE_DISCUSSION_ERRORS: ${{ needs.safe_outputs.outputs.create_discussion_errors }}
+ GH_AW_CREATE_DISCUSSION_ERROR_COUNT: ${{ needs.safe_outputs.outputs.create_discussion_error_count }}
+ GH_AW_LOCKDOWN_CHECK_FAILED: ${{ needs.activation.outputs.lockdown_check_failed }}
+ GH_AW_GROUP_REPORTS: "false"
+ GH_AW_FAILURE_REPORT_AS_ISSUE: "true"
+ GH_AW_TIMEOUT_MINUTES: "180"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_agent_failure.cjs');
+ await main();
+ - name: Handle No-Op Message
+ id: handle_noop_message
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_WORKFLOW_NAME: "Ostrich Benchmark"
+ GH_AW_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+ GH_AW_AGENT_CONCLUSION: ${{ needs.agent.result }}
+ GH_AW_NOOP_MESSAGE: ${{ steps.noop.outputs.noop_message }}
+ GH_AW_NOOP_REPORT_AS_ISSUE: "false"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/handle_noop_message.cjs');
+ await main();
+
+ safe_outputs:
+ needs: agent
+ if: ((!cancelled()) && (needs.agent.result != 'skipped')) && (needs.agent.outputs.detection_success == 'true')
+ runs-on: ubuntu-slim
+ permissions:
+ contents: read
+ discussions: write
+ issues: write
+ timeout-minutes: 15
+ env:
+ GH_AW_CALLER_WORKFLOW_ID: "${{ github.repository }}/ostrich-benchmark"
+ GH_AW_ENGINE_ID: "copilot"
+ GH_AW_WORKFLOW_ID: "ostrich-benchmark"
+ GH_AW_WORKFLOW_NAME: "Ostrich Benchmark"
+ outputs:
+ code_push_failure_count: ${{ steps.process_safe_outputs.outputs.code_push_failure_count }}
+ code_push_failure_errors: ${{ steps.process_safe_outputs.outputs.code_push_failure_errors }}
+ create_discussion_error_count: ${{ steps.process_safe_outputs.outputs.create_discussion_error_count }}
+ create_discussion_errors: ${{ steps.process_safe_outputs.outputs.create_discussion_errors }}
+ process_safe_outputs_processed_count: ${{ steps.process_safe_outputs.outputs.processed_count }}
+ process_safe_outputs_temporary_id_map: ${{ steps.process_safe_outputs.outputs.temporary_id_map }}
+ steps:
+ - name: Setup Scripts
+ uses: github/gh-aw-actions/setup@v0.62.4
+ with:
+ destination: ${{ runner.temp }}/gh-aw/actions
+ - name: Download agent output artifact
+ id: download-agent-output
+ continue-on-error: true
+ uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1
+ with:
+ name: agent
+ path: /tmp/gh-aw/
+ - name: Setup agent output environment variable
+ if: steps.download-agent-output.outcome == 'success'
+ run: |
+ mkdir -p /tmp/gh-aw/
+ find "/tmp/gh-aw/" -type f -print
+ echo "GH_AW_AGENT_OUTPUT=/tmp/gh-aw/agent_output.json" >> "$GITHUB_ENV"
+ - name: Configure GH_HOST for enterprise compatibility
+ shell: bash
+ run: |
+ # Derive GH_HOST from GITHUB_SERVER_URL so the gh CLI targets the correct
+ # GitHub instance (GHES/GHEC). On github.com this is a harmless no-op.
+ GH_HOST="${GITHUB_SERVER_URL#https://}"
+ GH_HOST="${GH_HOST#http://}"
+ echo "GH_HOST=${GH_HOST}" >> "$GITHUB_ENV"
+ - name: Process Safe Outputs
+ id: process_safe_outputs
+ uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8
+ env:
+ GH_AW_AGENT_OUTPUT: ${{ env.GH_AW_AGENT_OUTPUT }}
+ GH_AW_ALLOWED_DOMAINS: "api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,github.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.npmjs.org,s.symcb.com,s.symcd.com,security.ubuntu.com,telemetry.enterprise.githubcopilot.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.googleapis.com"
+ GITHUB_SERVER_URL: ${{ github.server_url }}
+ GITHUB_API_URL: ${{ github.api_url }}
+ GH_AW_SAFE_OUTPUTS_HANDLER_CONFIG: "{\"create_discussion\":{\"category\":\"agentic workflows\",\"close_older_discussions\":true,\"expires\":168,\"fallback_to_issue\":true,\"max\":1,\"title_prefix\":\"[Ostrich Benchmark] \"},\"missing_data\":{},\"missing_tool\":{},\"noop\":{\"max\":1,\"report-as-issue\":\"false\"}}"
+ with:
+ github-token: ${{ secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }}
+ script: |
+ const { setupGlobals } = require('${{ runner.temp }}/gh-aw/actions/setup_globals.cjs');
+ setupGlobals(core, github, context, exec, io);
+ const { main } = require('${{ runner.temp }}/gh-aw/actions/safe_output_handler_manager.cjs');
+ await main();
+ - name: Upload safe output items
+ if: always()
+ uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
+ with:
+ name: safe-output-items
+ path: /tmp/gh-aw/safe-output-items.jsonl
+ if-no-files-found: ignore
+
diff --git a/.github/workflows/ostrich-benchmark.md b/.github/workflows/ostrich-benchmark.md
new file mode 100644
index 000000000..140e899f1
--- /dev/null
+++ b/.github/workflows/ostrich-benchmark.md
@@ -0,0 +1,41 @@
+---
+description: Run Z3 string solver benchmarks (seq vs nseq) and ZIPT on all Ostrich benchmarks from tests/ostrich.zip on the c3 branch and post results as a GitHub discussion
+
+on:
+ schedule:
+ - cron: "0 6 * * *"
+ workflow_dispatch:
+
+permissions: read-all
+
+network: defaults
+
+tools:
+ bash: true
+ github:
+ toolsets: [default]
+
+safe-outputs:
+ create-discussion:
+ title-prefix: "[Ostrich Benchmark] "
+ category: "Agentic Workflows"
+ close-older-discussions: true
+ missing-tool:
+ create-issue: true
+ noop:
+ report-as-issue: false
+
+timeout-minutes: 180
+
+steps:
+ - name: Checkout c3 branch
+ uses: actions/checkout@v5
+ with:
+ ref: c3
+ fetch-depth: 1
+ persist-credentials: false
+
+---
+
+
+@./agentics/ostrich-benchmark.md