feat(backup): age at-rest encryption of bundles (CONTRACT_004 §4.3)

Close the known gap: backup bundles were uploaded unencrypted, relying
solely on destination access control. Now every data artifact is
age-encrypted on the VM before upload and decrypted on restore.

- backup-remote.sh: assemble rustfs blobs into rustfs-blobs.tar.zst (so the
  whole bundle is one encrypted unit), then age -r <recipient> each artifact
  to <name>.age and drop the plaintext. MANIFEST.json stays cleartext — it
  is the inventory + integrity gate and carries no secrets; it records each
  artifact's PLAINTEXT sha256 so restore verifies after decrypt.
- restore-remote.sh: materialise the age identity to a 0600 file, decrypt
  each .age, then run the existing sha + scratch-restore asserts; add a
  rustfs-blobs extract+assert.
- backup.sh / restore.sh: pass the public recipient (arg) / secret identity
  (stdin, never argv) from passphrase-encrypted config.
- provision/index.ts: install age + zstd on the VM via cloud-init so a fresh
  DR VM (T13) has the backup tools from first boot.
- Pulumi.foundation.yaml: seed backup.ageRecipient (public) + backup.ageIdentity
  (secure:). The identity lives in config so {repo + passphrase} can decrypt a
  bundle even after total Vault loss (CONTRACT_004 §4.3).

Validated live: encrypted backup + restore-verify PASS from both RustFS and
offsite; bucket shows only *.age + cleartext MANIFEST.json.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Andreas Niemann 2026-06-30 23:23:38 +02:00
parent aabb50fb3b
commit 92e8f978a5
6 changed files with 79 additions and 25 deletions

View file

@ -1,16 +1,19 @@
#!/bin/sh
# backup-remote.sh — the VM-side bundle assembler (CONTRACT_004 producer half).
# Shipped + run by backup/backup.sh; NOT run directly. Secrets arrive on stdin
# (never argv); non-secrets ($TS, $MC_IMAGE) are args. pulumi-state.json is already
# in $W (the operator placed it there before invoking this).
# (never argv); non-secrets ($TS, $MC_IMAGE, $AGE_RECIPIENT) are args. pulumi-state.json
# is already in $W (the operator placed it there before invoking this).
#
# Produces foundation-backups/<TS>/ in RustFS and replicates it to the offsite
# bucket. Artifacts per CONTRACT_004 §4.2: postgres.sql.gz, forgejo-repos.tar.zst,
# vault-raft.snap, pulumi-state.json, rustfs-blobs/, MANIFEST.json.
# vault-raft.snap, pulumi-state.json, rustfs-blobs.tar.zst, MANIFEST.json.
#
# NOTE: at-rest age encryption (CONTRACT_004 §4.3) is NOT yet applied — both
# destinations are private (RustFS internal, offsite scoped creds). Adding age is
# the next hardening (generate the key, encrypt each artifact before `mc cp`).
# At-rest encryption (CONTRACT_004 §4.3): every DATA artifact is age-encrypted to
# $AGE_RECIPIENT before upload (`<name>` -> `<name>.age`); only MANIFEST.json travels
# in cleartext (it carries no secrets — it is the inventory + integrity gate, and
# lists each artifact's PLAINTEXT sha256 so restore verifies after decryption). The
# matching identity is in Vault + passphrase-encrypted config (CONTRACT_004 §4.3),
# so {repo + passphrase} can always decrypt even after total Vault loss.
set -eu
IFS= read -r VAULT_TOKEN
IFS= read -r OFF_EP
@ -19,6 +22,7 @@ IFS= read -r OFF_SK
IFS= read -r BUCKET
TS="$1"
MC_IMAGE="$2"
AGE_RECIPIENT="$3"
OFFSITE_BUCKET=olsitec-foundation
W="/tmp/foundation-backup-$TS"
mkdir -p "$W"
@ -34,20 +38,41 @@ echo "[backup] vault raft snapshot" >&2
docker exec -e VAULT_ADDR=http://127.0.0.1:8200 -e VAULT_TOKEN="$VAULT_TOKEN" foundation-vault \
sh -c 'vault operator raft snapshot save /tmp/v.snap >/dev/null 2>&1 && cat /tmp/v.snap && rm -f /tmp/v.snap' > "$W/vault-raft.snap"
# RustFS root creds from the running container (VM-trusted).
RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p')
RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p')
echo "[backup] rustfs blobs -> rustfs-blobs.tar.zst" >&2
# Pull the blob buckets onto the VM fs so the bundle is a single encrypted unit
# (CONTRACT_004 §4.3 "whole bundle"). Tiny at Layer 0; may be made incremental later.
mkdir -p "$W/blobs/forgejo-packages" "$W/blobs/forgejo-artifacts" "$W/blobs/forgejo-lfs"
docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \
-e RAK="$RAK" -e RSK="$RSK" "$MC_IMAGE" -c '
set -e
mc alias set rfs http://foundation-rustfs:9000 "$RAK" "$RSK" >/dev/null
for b in forgejo-packages forgejo-artifacts forgejo-lfs; do
mc mirror --overwrite --quiet "rfs/$b" "/w/blobs/$b" >/dev/null 2>&1 || true
done'
tar -C "$W/blobs" -cf - . | zstd -q -T0 > "$W/rustfs-blobs.tar.zst"
rm -rf "$W/blobs"
echo "[backup] MANIFEST.json" >&2
( cd "$W"
jq -n --arg ts "$TS" \
--argjson files "$(for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json; do
jq -n --arg ts "$TS" --arg rcpt "$AGE_RECIPIENT" \
--argjson files "$(for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json rustfs-blobs.tar.zst; do
[ -f "$f" ] || continue
jq -n --arg n "$f" --arg sha "$(sha256sum "$f" | cut -d' ' -f1)" --argjson sz "$(stat -c %s "$f")" \
'{name:$n, sha256:$sha, size:$sz}'
done | jq -s '.')" \
'{timestamp:$ts, restoreOrder:["vault","postgres","rustfs","forgejo"], artifacts:$files}' > MANIFEST.json
'{timestamp:$ts, encryption:"age", ageRecipient:$rcpt, restoreOrder:["vault","postgres","rustfs","forgejo"], artifacts:$files}' > MANIFEST.json
)
# RustFS root creds from the running container (VM-trusted).
RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p')
RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p')
echo "[backup] age-encrypt artifacts (-> *.age)" >&2
for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json rustfs-blobs.tar.zst; do
[ -f "$W/$f" ] || continue
age -r "$AGE_RECIPIENT" -o "$W/$f.age" "$W/$f"
rm -f "$W/$f"
done
echo "[backup] upload to RustFS $BUCKET/$TS + replicate offsite" >&2
docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \
@ -58,10 +83,7 @@ docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \
mc alias set rfs http://foundation-rustfs:9000 "$RAK" "$RSK" >/dev/null
mc alias set off "$OFF_EP" "$OFF_AK" "$OFF_SK" >/dev/null
mc cp -r /w/ "rfs/$BUCKET/$TS/" >/dev/null
for b in forgejo-packages forgejo-artifacts forgejo-lfs; do
mc mirror --overwrite --quiet "rfs/$b" "rfs/$BUCKET/$TS/rustfs-blobs/$b" >/dev/null 2>&1 || true
done
mc mirror --overwrite --quiet "rfs/$BUCKET/$TS" "off/$OFFB/$TS" >/dev/null
'
rm -rf "$W"
echo "[backup] complete: rfs/$BUCKET/$TS (+ offsite $OFFSITE_BUCKET/$TS)" >&2
echo "[backup] complete: rfs/$BUCKET/$TS (+ offsite $OFFSITE_BUCKET/$TS), age-encrypted" >&2

View file

@ -24,6 +24,7 @@ OFF_EP=$(pulumi config get foundation:backup.offsiteEndpoint)
OFF_AK=$(pulumi config get foundation:backup.offsiteAccessKey)
OFF_SK=$(pulumi config get foundation:backup.offsiteSecretKey)
BUCKET=$(pulumi config get foundation:backup.bucket)
AGE_RECIPIENT=$(pulumi config get foundation:backup.ageRecipient) # public; CONTRACT_004 §4.3
HOST=$(pulumi config get foundation:vm.host)
PORT=$(pulumi config get foundation:vm.sshPort)
SUSER=$(pulumi config get foundation:vm.user)
@ -34,7 +35,7 @@ echo "backup: $TS -> rfs/$BUCKET/$TS (+ offsite)"
# Pulumi state + the assembler script onto the VM.
pulumi stack export | $SSHX "mkdir -p $W && cat > $W/pulumi-state.json"
$SSHX "cat > /tmp/backup-remote-$TS.sh" < "$ROOT/backup/backup-remote.sh"
# Run the assembler: secrets on stdin (never argv), TS + MC_IMAGE as args.
# Run the assembler: secrets on stdin (never argv); TS, MC_IMAGE, age recipient as args.
printf '%s\n%s\n%s\n%s\n%s\n' "$RT" "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" \
| $SSHX "sh /tmp/backup-remote-$TS.sh '$TS' '$MC_IMAGE'; rm -f /tmp/backup-remote-$TS.sh"
| $SSHX "sh /tmp/backup-remote-$TS.sh '$TS' '$MC_IMAGE' '$AGE_RECIPIENT'; rm -f /tmp/backup-remote-$TS.sh"
echo "backup: done ($TS)"

View file

@ -6,17 +6,24 @@
# disaster restore (overwriting live, restore order Vault->Postgres->RustFS->Forgejo)
# is dr/restore-to-fresh-vm.sh (T13), out of scope here.
#
# Secrets on stdin; non-secrets ($TS, $MC_IMAGE, $PG_IMAGE, $SRC) as args.
# Secrets on stdin (OFF_* offsite creds + the age IDENTITY); non-secrets ($TS,
# $MC_IMAGE, $PG_IMAGE, $SRC) as args. The bundle is age-encrypted (CONTRACT_004
# §4.3): every artifact is pulled as <name>.age and decrypted with the identity
# BEFORE its MANIFEST sha256 (a PLAINTEXT sha) is verified.
set -eu
IFS= read -r OFF_EP
IFS= read -r OFF_AK
IFS= read -r OFF_SK
IFS= read -r BUCKET
IFS= read -r AGE_IDENTITY
TS="$1"; MC_IMAGE="$2"; PG_IMAGE="$3"; SRC="${4:-rfs}"
OFFSITE_BUCKET=olsitec-foundation
W="/tmp/foundation-restore-$TS"
rm -rf "$W"; mkdir -p "$W"
fail() { echo "RESTORE VERIFY FAIL: $1" >&2; docker rm -f foundation-restore-pg >/dev/null 2>&1 || true; exit 1; }
rm -rf "$W"; (umask 077; mkdir -p "$W")
fail() { echo "RESTORE VERIFY FAIL: $1" >&2; docker rm -f foundation-restore-pg >/dev/null 2>&1 || true; rm -f "$W/age.key" 2>/dev/null || true; exit 1; }
# Materialise the age identity to a 0600 file for `age -d -i` (removed on exit).
( umask 077; printf '%s\n' "$AGE_IDENTITY" > "$W/age.key" )
RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p')
RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p')
@ -37,8 +44,14 @@ docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \
[ -f "$W/MANIFEST.json" ] || { [ -d "$W/$TS" ] && mv "$W/$TS"/* "$W"/; }
[ -f "$W/MANIFEST.json" ] || fail "MANIFEST.json missing from pulled bundle"
echo "[restore] verify MANIFEST sha256" >&2
cd "$W"
echo "[restore] age-decrypt artifacts" >&2
for name in $(jq -r '.artifacts[].name' MANIFEST.json); do
[ -f "$name.age" ] || fail "$name.age missing from bundle"
age -d -i age.key -o "$name" "$name.age" 2>/dev/null || fail "age decrypt failed: $name"
done
echo "[restore] verify MANIFEST sha256 (plaintext)" >&2
jq -r '.artifacts[] | "\(.sha256) \(.name)"' MANIFEST.json | while read -r sha name; do
[ -f "$name" ] || { echo "missing $name" >&2; exit 1; }
got=$(sha256sum "$name" | cut -d' ' -f1)
@ -62,9 +75,16 @@ zstd -dc forgejo-repos.tar.zst | tar -C repos -xf - 2>/dev/null || fail "forgejo
[ -d repos/git/repositories/olsitec/foundation.git ] || fail "olsitec/foundation.git not in repo bundle"
echo "[restore] forgejo repos OK: olsitec/foundation.git present" >&2
echo "[restore] extract rustfs blobs + assert packages present" >&2
mkdir -p blobs
zstd -dc rustfs-blobs.tar.zst | tar -C blobs -xf - 2>/dev/null || fail "rustfs-blobs tar extract failed"
[ -d blobs/forgejo-packages ] || fail "forgejo-packages not in blob bundle"
echo "[restore] rustfs blobs OK: $(find blobs -type f | wc -l | tr -d ' ') object(s)" >&2
echo "[restore] vault snapshot sanity" >&2
[ -s vault-raft.snap ] || fail "vault-raft.snap empty"
echo "[restore] vault snapshot OK: $(stat -c %s vault-raft.snap) bytes" >&2
rm -f "$W/age.key"
rm -rf "$W"
echo "RESTORE VERIFY PASS ($TS from $SRC)"
echo "RESTORE VERIFY PASS ($TS from $SRC, age-decrypted)"

View file

@ -23,11 +23,12 @@ OFF_EP=$(pulumi config get foundation:backup.offsiteEndpoint)
OFF_AK=$(pulumi config get foundation:backup.offsiteAccessKey)
OFF_SK=$(pulumi config get foundation:backup.offsiteSecretKey)
BUCKET=$(pulumi config get foundation:backup.bucket)
AGE_IDENTITY=$(pulumi config get foundation:backup.ageIdentity) # secret; CONTRACT_004 §4.3
HOST=$(pulumi config get foundation:vm.host)
PORT=$(pulumi config get foundation:vm.sshPort)
SUSER=$(pulumi config get foundation:vm.user)
SSHX="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=15 -i $KEY -p $PORT $SUSER@$HOST"
$SSHX "cat > /tmp/restore-remote-$TS.sh" < "$ROOT/backup/restore-remote.sh"
printf '%s\n%s\n%s\n%s\n' "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" \
printf '%s\n%s\n%s\n%s\n%s\n' "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" "$AGE_IDENTITY" \
| $SSHX "sh /tmp/restore-remote-$TS.sh '$TS' '$MC_IMAGE' '$PG_IMAGE' '$SRC'; rm -f /tmp/restore-remote-$TS.sh"