feat(release): add staging deploy workflow and smoke checks
Implements roadmap issue #29 foundation with a manual staging workflow, environment profile matrix, smoke automation script, and rollback/runbook documentation updates. Also adds docs/phase5-6-implementation-plan.md to capture the dependency-ordered plan for phases 5 and 6 from roadmap #35.
This commit is contained in:
189
.github/workflows/staging-deploy.yml
vendored
Normal file
189
.github/workflows/staging-deploy.yml
vendored
Normal file
@@ -0,0 +1,189 @@
|
||||
name: Staging Deploy
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
ref:
|
||||
description: Git ref (branch, tag, or SHA) to deploy
|
||||
required: false
|
||||
default: main
|
||||
api_host_port:
|
||||
description: Host port for API container mapping
|
||||
required: false
|
||||
default: '3001'
|
||||
openrouter_api_key:
|
||||
description: OpenRouter key for live profile runs (leave empty for mock profile)
|
||||
required: false
|
||||
default: ''
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
concurrency:
|
||||
group: staging-deploy
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
deploy-staging:
|
||||
name: Deploy staging (${{ matrix.profile }})
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- profile: mock
|
||||
llm_mock: 'true'
|
||||
- profile: live
|
||||
llm_mock: 'false'
|
||||
env:
|
||||
API_HOST_PORT: ${{ github.event.inputs.api_host_port }}
|
||||
DEPLOY_PROFILE: ${{ matrix.profile }}
|
||||
LLM_MOCK: ${{ matrix.llm_mock }}
|
||||
OPENROUTER_API_KEY_INPUT: ${{ github.event.inputs.openrouter_api_key }}
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.ref }}
|
||||
|
||||
- name: Emit deploy start status
|
||||
run: |
|
||||
echo "::notice title=deploy_start::profile=${DEPLOY_PROFILE} sha=${GITHUB_SHA}"
|
||||
echo "deploy_started_at=$(date -u +%FT%TZ)" >> "$GITHUB_ENV"
|
||||
|
||||
- name: Validate staging credential contract
|
||||
run: |
|
||||
if [[ "${DEPLOY_PROFILE}" == "live" && -z "${OPENROUTER_API_KEY_INPUT}" ]]; then
|
||||
echo "openrouter_api_key input is required for live staging profile."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Set up Node.js
|
||||
uses: actions/setup-node@v4
|
||||
with:
|
||||
node-version: '22'
|
||||
cache: npm
|
||||
|
||||
- name: Install dependencies
|
||||
run: npm ci
|
||||
|
||||
- name: Prepare staging environment file
|
||||
run: |
|
||||
openrouter_key=""
|
||||
if [[ "${DEPLOY_PROFILE}" == "live" ]]; then
|
||||
openrouter_key="${OPENROUTER_API_KEY_INPUT}"
|
||||
fi
|
||||
|
||||
cp .env.example .env
|
||||
{
|
||||
echo "API_HOST_PORT=${API_HOST_PORT}"
|
||||
echo "LLM_MOCK=${LLM_MOCK}"
|
||||
echo "OPENROUTER_API_KEY=${openrouter_key}"
|
||||
echo "LOG_LEVEL=info"
|
||||
echo "TTS_PROVIDER=noop"
|
||||
echo "BROADCAST_PROVIDER=noop"
|
||||
} >> .env
|
||||
|
||||
- name: Deploy compose stack
|
||||
run: docker compose up -d --build
|
||||
|
||||
- name: Wait for API health
|
||||
run: |
|
||||
rm -f smoke-health.json
|
||||
for _attempt in {1..30}; do
|
||||
if curl -fsS "http://localhost:${API_HOST_PORT}/api/health" > smoke-health.json; then
|
||||
cat smoke-health.json
|
||||
exit 0
|
||||
fi
|
||||
sleep 2
|
||||
done
|
||||
echo "API health check did not pass within timeout."
|
||||
exit 1
|
||||
|
||||
- name: Run staging smoke checks
|
||||
run: |
|
||||
chmod +x scripts/staging-smoke.sh
|
||||
API_BASE_URL="http://localhost:${API_HOST_PORT}" \
|
||||
SMOKE_OUTPUT_PATH="smoke-results.json" \
|
||||
scripts/staging-smoke.sh
|
||||
|
||||
- name: Capture deploy metadata
|
||||
if: ${{ success() }}
|
||||
run: |
|
||||
deploy_finished_at="$(date -u +%FT%TZ)"
|
||||
cat <<JSON > deploy-metadata.json
|
||||
{
|
||||
"workflow": "${GITHUB_WORKFLOW}",
|
||||
"runId": "${GITHUB_RUN_ID}",
|
||||
"runAttempt": "${GITHUB_RUN_ATTEMPT}",
|
||||
"profile": "${DEPLOY_PROFILE}",
|
||||
"ref": "${GITHUB_REF}",
|
||||
"sha": "${GITHUB_SHA}",
|
||||
"deployedAt": "${deploy_started_at}",
|
||||
"finishedAt": "${deploy_finished_at}",
|
||||
"status": "success"
|
||||
}
|
||||
JSON
|
||||
cat deploy-metadata.json
|
||||
|
||||
- name: Ensure deploy metadata exists on failure
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
if [[ ! -f deploy-metadata.json ]]; then
|
||||
deploy_finished_at="$(date -u +%FT%TZ)"
|
||||
cat <<JSON > deploy-metadata.json
|
||||
{
|
||||
"workflow": "${GITHUB_WORKFLOW}",
|
||||
"runId": "${GITHUB_RUN_ID}",
|
||||
"runAttempt": "${GITHUB_RUN_ATTEMPT}",
|
||||
"profile": "${DEPLOY_PROFILE}",
|
||||
"ref": "${GITHUB_REF}",
|
||||
"sha": "${GITHUB_SHA}",
|
||||
"deployedAt": "${deploy_started_at}",
|
||||
"finishedAt": "${deploy_finished_at}",
|
||||
"status": "failure"
|
||||
}
|
||||
JSON
|
||||
fi
|
||||
|
||||
- name: Emit deploy end status (success)
|
||||
if: ${{ success() }}
|
||||
run: |
|
||||
echo "::notice title=deploy_end::profile=${DEPLOY_PROFILE} status=success sha=${GITHUB_SHA}"
|
||||
|
||||
- name: Emit deploy end status (failure)
|
||||
if: ${{ failure() }}
|
||||
run: |
|
||||
echo "::error title=deploy_end::profile=${DEPLOY_PROFILE} status=failure sha=${GITHUB_SHA}"
|
||||
|
||||
- name: Capture compose logs
|
||||
if: ${{ always() }}
|
||||
run: docker compose logs --no-color > docker-compose.log || true
|
||||
|
||||
- name: Tear down compose stack
|
||||
if: ${{ always() }}
|
||||
run: docker compose down -v || true
|
||||
|
||||
- name: Upload deployment artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: staging-${{ matrix.profile }}-${{ github.run_id }}
|
||||
path: |
|
||||
smoke-health.json
|
||||
smoke-results.json
|
||||
deploy-metadata.json
|
||||
docker-compose.log
|
||||
|
||||
- name: Write job summary
|
||||
if: ${{ always() }}
|
||||
run: |
|
||||
echo "### Staging deploy (${DEPLOY_PROFILE})" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "- Ref: \`${GITHUB_REF}\`" >> "$GITHUB_STEP_SUMMARY"
|
||||
echo "- SHA: \`${GITHUB_SHA}\`" >> "$GITHUB_STEP_SUMMARY"
|
||||
if [[ -f smoke-results.json ]]; then
|
||||
echo "- Smoke checks: ✅ passed" >> "$GITHUB_STEP_SUMMARY"
|
||||
else
|
||||
echo "- Smoke checks: ❌ failed" >> "$GITHUB_STEP_SUMMARY"
|
||||
fi
|
||||
5
Makefile
5
Makefile
@@ -10,7 +10,7 @@ SHELL := /usr/bin/env bash
|
||||
NPM ?= npm
|
||||
DOCKER_COMPOSE ?= docker compose
|
||||
|
||||
.PHONY: help install dev dev-dashboard lint build build-dashboard test test-spec ci start migrate migrate-dist docker-up docker-down docker-restart clean status
|
||||
.PHONY: help install dev dev-dashboard lint build build-dashboard test test-spec ci smoke-staging start migrate migrate-dist docker-up docker-down docker-restart clean status
|
||||
|
||||
help: ## Show available commands
|
||||
@awk 'BEGIN {FS = ":.*##"; printf "\nImprov Court Make targets:\n\n"} /^[a-zA-Z0-9_.-]+:.*##/ { printf " %-18s %s\n", $$1, $$2 } END { printf "\n" }' $(MAKEFILE_LIST)
|
||||
@@ -44,6 +44,9 @@ ci: ## Run local CI parity checks (lint + build + test)
|
||||
$(MAKE) build
|
||||
$(MAKE) test
|
||||
|
||||
smoke-staging: ## Run staging smoke checks against a running API instance
|
||||
$(NPM) run smoke:staging
|
||||
|
||||
start: ## Run compiled app from dist/
|
||||
$(NPM) run start
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ It does **not** depend on `subcult-corp` at runtime.
|
||||
| [docs/operator-runbook.md](docs/operator-runbook.md) | Setup, configuration, deployment, and monitoring |
|
||||
| [docs/moderation-playbook.md](docs/moderation-playbook.md) | Content moderation system and incident procedures |
|
||||
| [docs/event-taxonomy.md](docs/event-taxonomy.md) | Canonical event taxonomy, payload schemas, and logging guidelines |
|
||||
| [docs/phase5-6-implementation-plan.md](docs/phase5-6-implementation-plan.md) | Dependency-ordered implementation plan for roadmap phases 5 and 6 |
|
||||
|
||||
## What is implemented
|
||||
|
||||
@@ -125,7 +126,9 @@ If you need host access to Postgres, add a `ports` mapping to the `db` service i
|
||||
|
||||
## Operations runbook (staging)
|
||||
|
||||
See `docs/ops-runbook.md` for the repeatable staging deploy path, core SLI dashboard definitions, alert thresholds, and incident drill/recovery steps.
|
||||
See `docs/ops-runbook.md` for the repeatable staging deploy path, GitHub Actions
|
||||
workflow (`Staging Deploy`), core SLI dashboard definitions, alert thresholds,
|
||||
and incident drill/recovery steps.
|
||||
|
||||
## API
|
||||
|
||||
|
||||
@@ -16,13 +16,42 @@ Run from the project root directory:
|
||||
4. Optional migration-only verification:
|
||||
- `docker compose exec api npm run migrate:dist`
|
||||
|
||||
Rollback (staging):
|
||||
### 1.1 GitHub Actions staging workflow
|
||||
|
||||
Use workflow **`Staging Deploy`** (`.github/workflows/staging-deploy.yml`) to
|
||||
run repeatable staging deploy + smoke verification with an environment matrix:
|
||||
|
||||
- `mock` profile (`LLM_MOCK=true`, no OpenRouter key required)
|
||||
- `live` profile (`LLM_MOCK=false`, requires `openrouter_api_key` workflow input)
|
||||
|
||||
Workflow smoke checks:
|
||||
|
||||
1. `GET /api/health`
|
||||
2. `POST /api/court/sessions`
|
||||
3. `GET /api/court/sessions/:id`
|
||||
|
||||
Artifacts captured per run:
|
||||
|
||||
- `smoke-health.json`
|
||||
- `smoke-results.json`
|
||||
- `deploy-metadata.json`
|
||||
- `docker-compose.log`
|
||||
|
||||
### 1.2 Rollback (staging)
|
||||
|
||||
1. Stop current stack: `npm run docker:down`
|
||||
2. Checkout previous known-good commit/tag.
|
||||
3. Start previous version: `npm run docker:up`
|
||||
4. Re-run health check curl above.
|
||||
|
||||
Rollback trial checklist (verify once per release candidate):
|
||||
|
||||
- [ ] Deploy a known good revision via `Staging Deploy`.
|
||||
- [ ] Deploy a deliberately broken revision (or force failed smoke input).
|
||||
- [ ] Roll back to previous good revision.
|
||||
- [ ] Confirm smoke checks pass and artifact logs show healthy recovery.
|
||||
- [ ] Record run ID, operator, and timestamp in incident notes.
|
||||
|
||||
## 2) Core SLI dashboard definitions
|
||||
|
||||
Use these as dashboard panels (SQL via Postgres + synthetic HTTP check):
|
||||
|
||||
178
docs/phase5-6-implementation-plan.md
Normal file
178
docs/phase5-6-implementation-plan.md
Normal file
@@ -0,0 +1,178 @@
|
||||
# Phase 5 & 6 Implementation Plan (Roadmap #35)
|
||||
|
||||
Date: 2026-02-27
|
||||
|
||||
## Objective
|
||||
|
||||
Deliver all open roadmap work in **Phase 5 (Release & Operations)** and **Phase 6 (Post-launch Polish)** in dependency order, with production-grade quality gates and operational readiness.
|
||||
|
||||
## Dependency order
|
||||
|
||||
```text
|
||||
#29 -> #30 -> #31 -> (#33 + #34) -> #32
|
||||
```
|
||||
|
||||
- `#29` blocks `#30`, `#31`, and `#32`
|
||||
- `#30` blocks `#31` and `#33`
|
||||
- `#31` blocks `#34` and `#32`
|
||||
- `#33` blocks `#32`
|
||||
|
||||
## Phase 5 — Release & operations
|
||||
|
||||
### #29 Release: Staging deployment workflow + env matrix + smoke checks
|
||||
|
||||
#### Deliverables
|
||||
|
||||
- GitHub Actions workflow for staging deployment (manual trigger).
|
||||
- Environment matrix (`mock`, `live`) with credential contract enforcement.
|
||||
- Post-deploy smoke checks for:
|
||||
- `GET /api/health`
|
||||
- `POST /api/court/sessions` (bootstrap path)
|
||||
- Deployment metadata capture (start/end status + revision context).
|
||||
- Rollback guidance + trial checklist in ops docs.
|
||||
|
||||
#### File targets
|
||||
|
||||
- `.github/workflows/staging-deploy.yml`
|
||||
- `scripts/staging-smoke.sh`
|
||||
- `docs/ops-runbook.md`
|
||||
- `README.md`
|
||||
- `package.json`
|
||||
|
||||
#### Verification
|
||||
|
||||
- Workflow logs contain smoke check output.
|
||||
- Artifact contains deploy metadata and compose logs.
|
||||
- Rollback trial checklist completed in docs.
|
||||
|
||||
---
|
||||
|
||||
### #30 Runtime dashboards and alerts for session health/moderation
|
||||
|
||||
#### Deliverables
|
||||
|
||||
- Dashboard definitions for core SLIs/SLO proxies:
|
||||
- session completion
|
||||
- vote latency
|
||||
- moderation actions
|
||||
- stream/API health
|
||||
- Alert threshold configurations with runbook links.
|
||||
- Synthetic alert validation instructions/tests.
|
||||
|
||||
#### File targets
|
||||
|
||||
- `ops/dashboards/*`
|
||||
- `ops/alerts/*`
|
||||
- `docs/ops-runbook.md`
|
||||
- `README.md`
|
||||
|
||||
#### Verification
|
||||
|
||||
- Simulated failure conditions trigger expected alert payloads.
|
||||
- Dashboard queries align with event taxonomy (`docs/event-taxonomy.md`).
|
||||
|
||||
---
|
||||
|
||||
### #31 Operator runbook: live controls + mistrial + incident response
|
||||
|
||||
#### Deliverables
|
||||
|
||||
- Expanded runbook covering startup, live operation, and shutdown.
|
||||
- Incident section with at least 5 common failure scenarios.
|
||||
- Mistrial fallback, emergency recap, and witness-swap procedures.
|
||||
- Dashboard/alert panel references embedded into procedures.
|
||||
|
||||
#### File targets
|
||||
|
||||
- `docs/operator-runbook.md`
|
||||
- `README.md`
|
||||
|
||||
#### Verification
|
||||
|
||||
- Tabletop drill notes captured and missing steps patched.
|
||||
|
||||
## Phase 6 — Post-launch polish
|
||||
|
||||
### #33 Token budget and summary cadence controls
|
||||
|
||||
#### Deliverables
|
||||
|
||||
- Runtime knobs for per-role token caps and recap cadence controls.
|
||||
- Safe defaults balancing quality and cost.
|
||||
- Session-level cost-estimate telemetry.
|
||||
- New telemetry events:
|
||||
- `token_budget_applied`
|
||||
- session token estimate event
|
||||
|
||||
#### File targets
|
||||
|
||||
- `src/court/orchestrator.ts`
|
||||
- `src/court/witness-caps.ts` (or dedicated budget module)
|
||||
- `src/types.ts`
|
||||
- `src/events.ts`
|
||||
- `dashboard/src/components/Analytics.tsx`
|
||||
- `docs/api.md`
|
||||
- `docs/event-taxonomy.md`
|
||||
|
||||
#### Verification
|
||||
|
||||
- Unit tests for budget enforcement.
|
||||
- Integration test: phase completion remains intact under stricter caps.
|
||||
|
||||
---
|
||||
|
||||
### #34 Onboarding/catch-up panel for new viewers
|
||||
|
||||
#### Deliverables
|
||||
|
||||
- Compact viewer-facing catch-up panel:
|
||||
- “case so far” summary
|
||||
- current phase/jury step status
|
||||
- Refresh on phase transitions.
|
||||
- Toggle without layout breakage.
|
||||
- Aggregate-only telemetry for toggle visibility usage.
|
||||
|
||||
#### File targets
|
||||
|
||||
- `public/index.html`
|
||||
- `public/app.js`
|
||||
- `docs/operator-runbook.md`
|
||||
|
||||
#### Verification
|
||||
|
||||
- Component/behavior tests for panel rendering and toggle.
|
||||
- Integration test for phase-change refresh behavior.
|
||||
|
||||
---
|
||||
|
||||
### #32 Post-launch retrospective template + technical debt queue
|
||||
|
||||
#### Deliverables
|
||||
|
||||
- Reusable retrospective template.
|
||||
- Structured debt intake queue format with triage rubric (P0-P3 + effort).
|
||||
- First filled example draft from mock incident.
|
||||
|
||||
#### File targets
|
||||
|
||||
- `docs/templates/retrospective-template.md`
|
||||
- `docs/templates/technical-debt-queue.md`
|
||||
- `README.md`
|
||||
|
||||
#### Verification
|
||||
|
||||
- Trial retrospective run confirms template usability.
|
||||
|
||||
## PR slicing strategy
|
||||
|
||||
1. PR-A: `#29` staging workflow + smoke + rollback docs
|
||||
2. PR-B: `#30` dashboards + alerts + alert simulation checks
|
||||
3. PR-C: `#31` operator runbook expansion + drill checklist
|
||||
4. PR-D: `#33` token budget/cadence controls + telemetry + tests
|
||||
5. PR-E: `#34` onboarding/catch-up panel + telemetry + tests
|
||||
6. PR-F: `#32` retrospective/debt templates + example draft
|
||||
|
||||
## Current execution status
|
||||
|
||||
- Plan documented ✅
|
||||
- Implementation started with `#29` ✅
|
||||
@@ -10,6 +10,7 @@
|
||||
"build": "tsc -p tsconfig.json && vite build",
|
||||
"build:dashboard": "vite build",
|
||||
"test": "node --import tsx --test src/*.test.ts src/**/*.test.ts",
|
||||
"smoke:staging": "bash ./scripts/staging-smoke.sh",
|
||||
"start": "node dist/server.js",
|
||||
"migrate": "tsx src/scripts/migrate.ts",
|
||||
"migrate:dist": "node dist/scripts/migrate.js",
|
||||
|
||||
46
scripts/staging-smoke.sh
Normal file
46
scripts/staging-smoke.sh
Normal file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
API_BASE_URL="${API_BASE_URL:-http://localhost:${API_HOST_PORT:-3001}}"
|
||||
SMOKE_TOPIC="${SMOKE_TOPIC:-Did the defendant weaponize office glitter in the break room?}"
|
||||
SMOKE_CASE_TYPE="${SMOKE_CASE_TYPE:-criminal}"
|
||||
SMOKE_OUTPUT_PATH="${SMOKE_OUTPUT_PATH:-smoke-results.json}"
|
||||
|
||||
echo "[smoke] starting checks for ${API_BASE_URL}"
|
||||
|
||||
health_response="$(curl -fsS "${API_BASE_URL}/api/health")"
|
||||
echo "[smoke] /api/health response: ${health_response}"
|
||||
|
||||
request_body="$(
|
||||
SMOKE_TOPIC="${SMOKE_TOPIC}" SMOKE_CASE_TYPE="${SMOKE_CASE_TYPE}" node -e "const topic = process.env.SMOKE_TOPIC; const caseType = process.env.SMOKE_CASE_TYPE || 'criminal'; process.stdout.write(JSON.stringify({ topic, caseType }));"
|
||||
)"
|
||||
|
||||
session_response="$(
|
||||
curl -fsS -X POST "${API_BASE_URL}/api/court/sessions" \
|
||||
-H 'Content-Type: application/json' \
|
||||
-d "${request_body}"
|
||||
)"
|
||||
echo "[smoke] POST /api/court/sessions response: ${session_response}"
|
||||
|
||||
session_id="$(
|
||||
printf '%s' "${session_response}" | node -e "let input=''; process.stdin.on('data', chunk => input += chunk); process.stdin.on('end', () => { try { const parsed = JSON.parse(input); const id = parsed?.session?.id; if (!id) process.exit(1); process.stdout.write(id); } catch { process.exit(1); } });"
|
||||
)"
|
||||
|
||||
if [[ -z "${session_id}" ]]; then
|
||||
echo "[smoke] failed to parse session id from bootstrap response"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
session_lookup="$(curl -fsS "${API_BASE_URL}/api/court/sessions/${session_id}")"
|
||||
echo "[smoke] GET /api/court/sessions/${session_id} response: ${session_lookup}"
|
||||
|
||||
SMOKE_OUTPUT_PATH="${SMOKE_OUTPUT_PATH}" \
|
||||
SMOKE_TOPIC="${SMOKE_TOPIC}" \
|
||||
SMOKE_CASE_TYPE="${SMOKE_CASE_TYPE}" \
|
||||
API_BASE_URL="${API_BASE_URL}" \
|
||||
SESSION_ID="${session_id}" \
|
||||
HEALTH_RESPONSE="${health_response}" \
|
||||
node -e "const fs = require('node:fs'); const outputPath = process.env.SMOKE_OUTPUT_PATH; const payload = { checkedAt: new Date().toISOString(), apiBaseUrl: process.env.API_BASE_URL, topic: process.env.SMOKE_TOPIC, caseType: process.env.SMOKE_CASE_TYPE, sessionId: process.env.SESSION_ID, health: process.env.HEALTH_RESPONSE }; fs.writeFileSync(outputPath, JSON.stringify(payload, null, 2));"
|
||||
|
||||
echo "[smoke] checks passed (session=${session_id})"
|
||||
echo "[smoke] result artifact written to ${SMOKE_OUTPUT_PATH}"
|
||||
Reference in New Issue
Block a user