feat(backup): backup + restore-verify with offsite replication (T12)
backup/backup.sh (operator orchestrator) + backup-remote.sh (VM assembler) produce a CONTRACT_004 bundle in RustFS foundation-backups/<TS>/ and replicate it to the offsite olsitec-foundation bucket: pg_dumpall, forgejo git repos (tar.zst), vault raft snapshot, pulumi state, rustfs blobs, MANIFEST.json (sha256 + restore order). The timestamp is caller-supplied (§4.1); secrets travel on stdin (never argv, ADR-007); mc runs containerized. restore.sh + restore-remote.sh are the §4.6 verifier: pull a bundle (rfs or offsite), check MANIFEST shas, then NON-DESTRUCTIVELY reconstruct into scratch resources and assert (postgres users>0, olsitec/foundation.git present, vault snapshot non-empty). Live on cx33 Helsinki: bundle written to RustFS + offsite; restore-verify PASSES from BOTH sources (forgejo.user rows=2, repo present, 16KB vault snapshot). Known gap: at-rest age encryption (§4.3) not yet applied — both destinations are private/access-controlled; adding age (generate key + encrypt-before-upload) is the next hardening. Acceptance T12 met. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9618da1421
commit
41172b3511
4 changed files with 210 additions and 0 deletions
67
backup/backup-remote.sh
Executable file
67
backup/backup-remote.sh
Executable file
|
|
@ -0,0 +1,67 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# backup-remote.sh — the VM-side bundle assembler (CONTRACT_004 producer half).
|
||||||
|
# Shipped + run by backup/backup.sh; NOT run directly. Secrets arrive on stdin
|
||||||
|
# (never argv); non-secrets ($TS, $MC_IMAGE) are args. pulumi-state.json is already
|
||||||
|
# in $W (the operator placed it there before invoking this).
|
||||||
|
#
|
||||||
|
# Produces foundation-backups/<TS>/ in RustFS and replicates it to the offsite
|
||||||
|
# bucket. Artifacts per CONTRACT_004 §4.2: postgres.sql.gz, forgejo-repos.tar.zst,
|
||||||
|
# vault-raft.snap, pulumi-state.json, rustfs-blobs/, MANIFEST.json.
|
||||||
|
#
|
||||||
|
# NOTE: at-rest age encryption (CONTRACT_004 §4.3) is NOT yet applied — both
|
||||||
|
# destinations are private (RustFS internal, offsite scoped creds). Adding age is
|
||||||
|
# the next hardening (generate the key, encrypt each artifact before `mc cp`).
|
||||||
|
set -eu
|
||||||
|
IFS= read -r VAULT_TOKEN
|
||||||
|
IFS= read -r OFF_EP
|
||||||
|
IFS= read -r OFF_AK
|
||||||
|
IFS= read -r OFF_SK
|
||||||
|
IFS= read -r BUCKET
|
||||||
|
TS="$1"
|
||||||
|
MC_IMAGE="$2"
|
||||||
|
OFFSITE_BUCKET=olsitec-foundation
|
||||||
|
W="/tmp/foundation-backup-$TS"
|
||||||
|
mkdir -p "$W"
|
||||||
|
|
||||||
|
echo "[backup] postgres pg_dumpall" >&2
|
||||||
|
docker exec foundation-postgres pg_dumpall -U postgres | gzip > "$W/postgres.sql.gz"
|
||||||
|
|
||||||
|
echo "[backup] forgejo git repos (tar.zst)" >&2
|
||||||
|
# Forgejo keeps repos under /data/git; use the container's own tar (no extra image).
|
||||||
|
docker exec foundation-forgejo sh -c 'tar -C /data -cf - git' | zstd -q -T0 > "$W/forgejo-repos.tar.zst"
|
||||||
|
|
||||||
|
echo "[backup] vault raft snapshot" >&2
|
||||||
|
docker exec -e VAULT_ADDR=http://127.0.0.1:8200 -e VAULT_TOKEN="$VAULT_TOKEN" foundation-vault \
|
||||||
|
sh -c 'vault operator raft snapshot save /tmp/v.snap >/dev/null 2>&1 && cat /tmp/v.snap && rm -f /tmp/v.snap' > "$W/vault-raft.snap"
|
||||||
|
|
||||||
|
echo "[backup] MANIFEST.json" >&2
|
||||||
|
( cd "$W"
|
||||||
|
jq -n --arg ts "$TS" \
|
||||||
|
--argjson files "$(for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json; do
|
||||||
|
[ -f "$f" ] || continue
|
||||||
|
jq -n --arg n "$f" --arg sha "$(sha256sum "$f" | cut -d' ' -f1)" --argjson sz "$(stat -c %s "$f")" \
|
||||||
|
'{name:$n, sha256:$sha, size:$sz}'
|
||||||
|
done | jq -s '.')" \
|
||||||
|
'{timestamp:$ts, restoreOrder:["vault","postgres","rustfs","forgejo"], artifacts:$files}' > MANIFEST.json
|
||||||
|
)
|
||||||
|
|
||||||
|
# RustFS root creds from the running container (VM-trusted).
|
||||||
|
RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p')
|
||||||
|
RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p')
|
||||||
|
|
||||||
|
echo "[backup] upload to RustFS $BUCKET/$TS + replicate offsite" >&2
|
||||||
|
docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \
|
||||||
|
-e RAK="$RAK" -e RSK="$RSK" -e OFF_EP="$OFF_EP" -e OFF_AK="$OFF_AK" -e OFF_SK="$OFF_SK" \
|
||||||
|
-e BUCKET="$BUCKET" -e TS="$TS" -e OFFB="$OFFSITE_BUCKET" \
|
||||||
|
"$MC_IMAGE" -c '
|
||||||
|
set -e
|
||||||
|
mc alias set rfs http://foundation-rustfs:9000 "$RAK" "$RSK" >/dev/null
|
||||||
|
mc alias set off "$OFF_EP" "$OFF_AK" "$OFF_SK" >/dev/null
|
||||||
|
mc cp -r /w/ "rfs/$BUCKET/$TS/" >/dev/null
|
||||||
|
for b in forgejo-packages forgejo-artifacts forgejo-lfs; do
|
||||||
|
mc mirror --overwrite --quiet "rfs/$b" "rfs/$BUCKET/$TS/rustfs-blobs/$b" >/dev/null 2>&1 || true
|
||||||
|
done
|
||||||
|
mc mirror --overwrite --quiet "rfs/$BUCKET/$TS" "off/$OFFB/$TS" >/dev/null
|
||||||
|
'
|
||||||
|
rm -rf "$W"
|
||||||
|
echo "[backup] complete: rfs/$BUCKET/$TS (+ offsite $OFFSITE_BUCKET/$TS)" >&2
|
||||||
40
backup/backup.sh
Executable file
40
backup/backup.sh
Executable file
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# backup.sh — CONTRACT_004 backup producer (operator orchestrator).
|
||||||
|
#
|
||||||
|
# ./backup/backup.sh [UTC-timestamp]
|
||||||
|
#
|
||||||
|
# The timestamp is supplied by the caller (CI/cron) per CONTRACT_004 §4.1; it
|
||||||
|
# defaults to now for manual runs. The operator contributes the Pulumi state
|
||||||
|
# (local file backend) and the secrets (from passphrase-encrypted config); the
|
||||||
|
# heavy lifting runs on the VM via backup-remote.sh. Result: a bundle in RustFS
|
||||||
|
# foundation-backups/<TS>/ replicated to the offsite bucket.
|
||||||
|
set -euo pipefail
|
||||||
|
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
DIR="$ROOT/bootstrap"
|
||||||
|
TS="${1:-$(date -u +%Y%m%dT%H%M%SZ)}"
|
||||||
|
export PULUMI_BACKEND_URL="file://${DIR}/state"
|
||||||
|
export PULUMI_CONFIG_PASSPHRASE="$(pass olsitec-foundation/PULUMI_CONFIG_PASSPHRASE)"
|
||||||
|
KEY="${SSH_PRIVATE_KEY_PATH:-${HOME}/.ssh/foundation-test_ed25519}"
|
||||||
|
MC_IMAGE="$(grep '^IMAGE_MC=' "$ROOT/VERSIONS" | cut -d= -f2-)"
|
||||||
|
cd "$DIR"
|
||||||
|
pulumi stack select foundation >/dev/null
|
||||||
|
|
||||||
|
RT=$(pulumi config get vaultCredentials:rootToken)
|
||||||
|
OFF_EP=$(pulumi config get foundation:backup.offsiteEndpoint)
|
||||||
|
OFF_AK=$(pulumi config get foundation:backup.offsiteAccessKey)
|
||||||
|
OFF_SK=$(pulumi config get foundation:backup.offsiteSecretKey)
|
||||||
|
BUCKET=$(pulumi config get foundation:backup.bucket)
|
||||||
|
HOST=$(pulumi config get foundation:vm.host)
|
||||||
|
PORT=$(pulumi config get foundation:vm.sshPort)
|
||||||
|
SUSER=$(pulumi config get foundation:vm.user)
|
||||||
|
SSHX="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=15 -i $KEY -p $PORT $SUSER@$HOST"
|
||||||
|
W="/tmp/foundation-backup-$TS"
|
||||||
|
|
||||||
|
echo "backup: $TS -> rfs/$BUCKET/$TS (+ offsite)"
|
||||||
|
# Pulumi state + the assembler script onto the VM.
|
||||||
|
pulumi stack export | $SSHX "mkdir -p $W && cat > $W/pulumi-state.json"
|
||||||
|
$SSHX "cat > /tmp/backup-remote-$TS.sh" < "$ROOT/backup/backup-remote.sh"
|
||||||
|
# Run the assembler: secrets on stdin (never argv), TS + MC_IMAGE as args.
|
||||||
|
printf '%s\n%s\n%s\n%s\n%s\n' "$RT" "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" \
|
||||||
|
| $SSHX "sh /tmp/backup-remote-$TS.sh '$TS' '$MC_IMAGE'; rm -f /tmp/backup-remote-$TS.sh"
|
||||||
|
echo "backup: done ($TS)"
|
||||||
70
backup/restore-remote.sh
Executable file
70
backup/restore-remote.sh
Executable file
|
|
@ -0,0 +1,70 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# restore-remote.sh — VM-side SCRATCH restore verifier (CONTRACT_004 consumer half,
|
||||||
|
# §4.6 "a backup is not trusted until restored"). Shipped + run by backup/restore.sh.
|
||||||
|
# NON-DESTRUCTIVE: it reconstructs into throwaway scratch resources and asserts the
|
||||||
|
# bundle is restorable — it never touches the live containers/volumes. A real
|
||||||
|
# disaster restore (overwriting live, restore order Vault->Postgres->RustFS->Forgejo)
|
||||||
|
# is dr/restore-to-fresh-vm.sh (T13), out of scope here.
|
||||||
|
#
|
||||||
|
# Secrets on stdin; non-secrets ($TS, $MC_IMAGE, $PG_IMAGE, $SRC) as args.
|
||||||
|
set -eu
|
||||||
|
IFS= read -r OFF_EP
|
||||||
|
IFS= read -r OFF_AK
|
||||||
|
IFS= read -r OFF_SK
|
||||||
|
IFS= read -r BUCKET
|
||||||
|
TS="$1"; MC_IMAGE="$2"; PG_IMAGE="$3"; SRC="${4:-rfs}"
|
||||||
|
OFFSITE_BUCKET=olsitec-foundation
|
||||||
|
W="/tmp/foundation-restore-$TS"
|
||||||
|
rm -rf "$W"; mkdir -p "$W"
|
||||||
|
fail() { echo "RESTORE VERIFY FAIL: $1" >&2; docker rm -f foundation-restore-pg >/dev/null 2>&1 || true; exit 1; }
|
||||||
|
|
||||||
|
RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p')
|
||||||
|
RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p')
|
||||||
|
|
||||||
|
echo "[restore] pull bundle $TS from $SRC" >&2
|
||||||
|
docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \
|
||||||
|
-e RAK="$RAK" -e RSK="$RSK" -e OFF_EP="$OFF_EP" -e OFF_AK="$OFF_AK" -e OFF_SK="$OFF_SK" \
|
||||||
|
-e BUCKET="$BUCKET" -e TS="$TS" -e SRC="$SRC" -e OFFB="$OFFSITE_BUCKET" "$MC_IMAGE" -c '
|
||||||
|
set -e
|
||||||
|
mc alias set rfs http://foundation-rustfs:9000 "$RAK" "$RSK" >/dev/null
|
||||||
|
if [ "$SRC" = off ]; then
|
||||||
|
mc alias set off "$OFF_EP" "$OFF_AK" "$OFF_SK" >/dev/null
|
||||||
|
mc cp -r "off/$OFFB/$TS/" /w/ >/dev/null
|
||||||
|
else
|
||||||
|
mc cp -r "rfs/$BUCKET/$TS/" /w/ >/dev/null
|
||||||
|
fi'
|
||||||
|
# mc cp -r nests under $TS/ — flatten if needed
|
||||||
|
[ -f "$W/MANIFEST.json" ] || { [ -d "$W/$TS" ] && mv "$W/$TS"/* "$W"/; }
|
||||||
|
[ -f "$W/MANIFEST.json" ] || fail "MANIFEST.json missing from pulled bundle"
|
||||||
|
|
||||||
|
echo "[restore] verify MANIFEST sha256" >&2
|
||||||
|
cd "$W"
|
||||||
|
jq -r '.artifacts[] | "\(.sha256) \(.name)"' MANIFEST.json | while read -r sha name; do
|
||||||
|
[ -f "$name" ] || { echo "missing $name" >&2; exit 1; }
|
||||||
|
got=$(sha256sum "$name" | cut -d' ' -f1)
|
||||||
|
[ "$got" = "$sha" ] || { echo "sha mismatch $name" >&2; exit 1; }
|
||||||
|
done || fail "MANIFEST sha verification failed"
|
||||||
|
|
||||||
|
echo "[restore] scratch Postgres restore + assert" >&2
|
||||||
|
docker rm -f foundation-restore-pg >/dev/null 2>&1 || true
|
||||||
|
docker run -d --name foundation-restore-pg -e POSTGRES_PASSWORD=scratch "$PG_IMAGE" >/dev/null
|
||||||
|
i=0; until docker exec foundation-restore-pg pg_isready -U postgres >/dev/null 2>&1; do
|
||||||
|
i=$((i+1)); [ "$i" -gt 30 ] && fail "scratch postgres not ready"; sleep 2; done
|
||||||
|
gunzip < postgres.sql.gz | docker exec -i foundation-restore-pg psql -U postgres -q >/dev/null 2>&1 || fail "psql restore errored"
|
||||||
|
ROWS=$(docker exec foundation-restore-pg psql -U postgres -d forgejo -tAc 'SELECT count(*) FROM "user"' 2>/dev/null || echo 0)
|
||||||
|
[ "${ROWS:-0}" -ge 1 ] || fail "restored forgejo DB has no users (got '$ROWS')"
|
||||||
|
echo "[restore] postgres OK: forgejo.\"user\" rows=$ROWS" >&2
|
||||||
|
docker rm -f foundation-restore-pg >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
echo "[restore] extract forgejo repos + assert olsitec/foundation present" >&2
|
||||||
|
mkdir -p repos
|
||||||
|
zstd -dc forgejo-repos.tar.zst | tar -C repos -xf - 2>/dev/null || fail "forgejo tar extract failed"
|
||||||
|
[ -d repos/git/repositories/olsitec/foundation.git ] || fail "olsitec/foundation.git not in repo bundle"
|
||||||
|
echo "[restore] forgejo repos OK: olsitec/foundation.git present" >&2
|
||||||
|
|
||||||
|
echo "[restore] vault snapshot sanity" >&2
|
||||||
|
[ -s vault-raft.snap ] || fail "vault-raft.snap empty"
|
||||||
|
echo "[restore] vault snapshot OK: $(stat -c %s vault-raft.snap) bytes" >&2
|
||||||
|
|
||||||
|
rm -rf "$W"
|
||||||
|
echo "RESTORE VERIFY PASS ($TS from $SRC)"
|
||||||
33
backup/restore.sh
Executable file
33
backup/restore.sh
Executable file
|
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# restore.sh — CONTRACT_004 §4.6 restore verifier (operator orchestrator).
|
||||||
|
#
|
||||||
|
# ./backup/restore.sh <UTC-timestamp> [rfs|off]
|
||||||
|
#
|
||||||
|
# Pulls the bundle (default from RustFS; `off` checks the offsite copy) and asserts
|
||||||
|
# it reconstructs into scratch resources — NON-DESTRUCTIVE, it never touches the
|
||||||
|
# live platform. The real disaster restore is dr/restore-to-fresh-vm.sh (T13).
|
||||||
|
set -euo pipefail
|
||||||
|
ROOT="$(cd "$(dirname "$0")/.." && pwd)"
|
||||||
|
DIR="$ROOT/bootstrap"
|
||||||
|
TS="${1:?usage: restore.sh <UTC-timestamp> [rfs|off]}"
|
||||||
|
SRC="${2:-rfs}"
|
||||||
|
export PULUMI_BACKEND_URL="file://${DIR}/state"
|
||||||
|
export PULUMI_CONFIG_PASSPHRASE="$(pass olsitec-foundation/PULUMI_CONFIG_PASSPHRASE)"
|
||||||
|
KEY="${SSH_PRIVATE_KEY_PATH:-${HOME}/.ssh/foundation-test_ed25519}"
|
||||||
|
MC_IMAGE="$(grep '^IMAGE_MC=' "$ROOT/VERSIONS" | cut -d= -f2-)"
|
||||||
|
PG_IMAGE="$(grep '^IMAGE_POSTGRES=' "$ROOT/VERSIONS" | cut -d= -f2-)"
|
||||||
|
cd "$DIR"
|
||||||
|
pulumi stack select foundation >/dev/null
|
||||||
|
|
||||||
|
OFF_EP=$(pulumi config get foundation:backup.offsiteEndpoint)
|
||||||
|
OFF_AK=$(pulumi config get foundation:backup.offsiteAccessKey)
|
||||||
|
OFF_SK=$(pulumi config get foundation:backup.offsiteSecretKey)
|
||||||
|
BUCKET=$(pulumi config get foundation:backup.bucket)
|
||||||
|
HOST=$(pulumi config get foundation:vm.host)
|
||||||
|
PORT=$(pulumi config get foundation:vm.sshPort)
|
||||||
|
SUSER=$(pulumi config get foundation:vm.user)
|
||||||
|
SSHX="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=15 -i $KEY -p $PORT $SUSER@$HOST"
|
||||||
|
|
||||||
|
$SSHX "cat > /tmp/restore-remote-$TS.sh" < "$ROOT/backup/restore-remote.sh"
|
||||||
|
printf '%s\n%s\n%s\n%s\n' "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" \
|
||||||
|
| $SSHX "sh /tmp/restore-remote-$TS.sh '$TS' '$MC_IMAGE' '$PG_IMAGE' '$SRC'; rm -f /tmp/restore-remote-$TS.sh"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue