From 92e8f978a5d106ed5c614148ebdec0fe45b978e6 Mon Sep 17 00:00:00 2001 From: Andreas Niemann Date: Tue, 30 Jun 2026 23:23:38 +0200 Subject: [PATCH] =?UTF-8?q?feat(backup):=20age=20at-rest=20encryption=20of?= =?UTF-8?q?=20bundles=20(CONTRACT=5F004=20=C2=A74.3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Close the known gap: backup bundles were uploaded unencrypted, relying solely on destination access control. Now every data artifact is age-encrypted on the VM before upload and decrypted on restore. - backup-remote.sh: assemble rustfs blobs into rustfs-blobs.tar.zst (so the whole bundle is one encrypted unit), then age -r each artifact to .age and drop the plaintext. MANIFEST.json stays cleartext — it is the inventory + integrity gate and carries no secrets; it records each artifact's PLAINTEXT sha256 so restore verifies after decrypt. - restore-remote.sh: materialise the age identity to a 0600 file, decrypt each .age, then run the existing sha + scratch-restore asserts; add a rustfs-blobs extract+assert. - backup.sh / restore.sh: pass the public recipient (arg) / secret identity (stdin, never argv) from passphrase-encrypted config. - provision/index.ts: install age + zstd on the VM via cloud-init so a fresh DR VM (T13) has the backup tools from first boot. - Pulumi.foundation.yaml: seed backup.ageRecipient (public) + backup.ageIdentity (secure:). The identity lives in config so {repo + passphrase} can decrypt a bundle even after total Vault loss (CONTRACT_004 §4.3). Validated live: encrypted backup + restore-verify PASS from both RustFS and offsite; bucket shows only *.age + cleartext MANIFEST.json. Co-Authored-By: Claude Opus 4.8 (1M context) --- backup/backup-remote.sh | 54 ++++++++++++++++++++++---------- backup/backup.sh | 5 +-- backup/restore-remote.sh | 30 +++++++++++++++--- backup/restore.sh | 3 +- bootstrap/Pulumi.foundation.yaml | 3 ++ provision/index.ts | 9 +++++- 6 files changed, 79 insertions(+), 25 deletions(-) diff --git a/backup/backup-remote.sh b/backup/backup-remote.sh index 0ec19f4..a32ce3b 100755 --- a/backup/backup-remote.sh +++ b/backup/backup-remote.sh @@ -1,16 +1,19 @@ #!/bin/sh # backup-remote.sh — the VM-side bundle assembler (CONTRACT_004 producer half). # Shipped + run by backup/backup.sh; NOT run directly. Secrets arrive on stdin -# (never argv); non-secrets ($TS, $MC_IMAGE) are args. pulumi-state.json is already -# in $W (the operator placed it there before invoking this). +# (never argv); non-secrets ($TS, $MC_IMAGE, $AGE_RECIPIENT) are args. pulumi-state.json +# is already in $W (the operator placed it there before invoking this). # # Produces foundation-backups// in RustFS and replicates it to the offsite # bucket. Artifacts per CONTRACT_004 §4.2: postgres.sql.gz, forgejo-repos.tar.zst, -# vault-raft.snap, pulumi-state.json, rustfs-blobs/, MANIFEST.json. +# vault-raft.snap, pulumi-state.json, rustfs-blobs.tar.zst, MANIFEST.json. # -# NOTE: at-rest age encryption (CONTRACT_004 §4.3) is NOT yet applied — both -# destinations are private (RustFS internal, offsite scoped creds). Adding age is -# the next hardening (generate the key, encrypt each artifact before `mc cp`). +# At-rest encryption (CONTRACT_004 §4.3): every DATA artifact is age-encrypted to +# $AGE_RECIPIENT before upload (`` -> `.age`); only MANIFEST.json travels +# in cleartext (it carries no secrets — it is the inventory + integrity gate, and +# lists each artifact's PLAINTEXT sha256 so restore verifies after decryption). The +# matching identity is in Vault + passphrase-encrypted config (CONTRACT_004 §4.3), +# so {repo + passphrase} can always decrypt even after total Vault loss. set -eu IFS= read -r VAULT_TOKEN IFS= read -r OFF_EP @@ -19,6 +22,7 @@ IFS= read -r OFF_SK IFS= read -r BUCKET TS="$1" MC_IMAGE="$2" +AGE_RECIPIENT="$3" OFFSITE_BUCKET=olsitec-foundation W="/tmp/foundation-backup-$TS" mkdir -p "$W" @@ -34,20 +38,41 @@ echo "[backup] vault raft snapshot" >&2 docker exec -e VAULT_ADDR=http://127.0.0.1:8200 -e VAULT_TOKEN="$VAULT_TOKEN" foundation-vault \ sh -c 'vault operator raft snapshot save /tmp/v.snap >/dev/null 2>&1 && cat /tmp/v.snap && rm -f /tmp/v.snap' > "$W/vault-raft.snap" +# RustFS root creds from the running container (VM-trusted). +RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p') +RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p') + +echo "[backup] rustfs blobs -> rustfs-blobs.tar.zst" >&2 +# Pull the blob buckets onto the VM fs so the bundle is a single encrypted unit +# (CONTRACT_004 §4.3 "whole bundle"). Tiny at Layer 0; may be made incremental later. +mkdir -p "$W/blobs/forgejo-packages" "$W/blobs/forgejo-artifacts" "$W/blobs/forgejo-lfs" +docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \ + -e RAK="$RAK" -e RSK="$RSK" "$MC_IMAGE" -c ' + set -e + mc alias set rfs http://foundation-rustfs:9000 "$RAK" "$RSK" >/dev/null + for b in forgejo-packages forgejo-artifacts forgejo-lfs; do + mc mirror --overwrite --quiet "rfs/$b" "/w/blobs/$b" >/dev/null 2>&1 || true + done' +tar -C "$W/blobs" -cf - . | zstd -q -T0 > "$W/rustfs-blobs.tar.zst" +rm -rf "$W/blobs" + echo "[backup] MANIFEST.json" >&2 ( cd "$W" - jq -n --arg ts "$TS" \ - --argjson files "$(for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json; do + jq -n --arg ts "$TS" --arg rcpt "$AGE_RECIPIENT" \ + --argjson files "$(for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json rustfs-blobs.tar.zst; do [ -f "$f" ] || continue jq -n --arg n "$f" --arg sha "$(sha256sum "$f" | cut -d' ' -f1)" --argjson sz "$(stat -c %s "$f")" \ '{name:$n, sha256:$sha, size:$sz}' done | jq -s '.')" \ - '{timestamp:$ts, restoreOrder:["vault","postgres","rustfs","forgejo"], artifacts:$files}' > MANIFEST.json + '{timestamp:$ts, encryption:"age", ageRecipient:$rcpt, restoreOrder:["vault","postgres","rustfs","forgejo"], artifacts:$files}' > MANIFEST.json ) -# RustFS root creds from the running container (VM-trusted). -RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p') -RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p') +echo "[backup] age-encrypt artifacts (-> *.age)" >&2 +for f in postgres.sql.gz forgejo-repos.tar.zst vault-raft.snap pulumi-state.json rustfs-blobs.tar.zst; do + [ -f "$W/$f" ] || continue + age -r "$AGE_RECIPIENT" -o "$W/$f.age" "$W/$f" + rm -f "$W/$f" +done echo "[backup] upload to RustFS $BUCKET/$TS + replicate offsite" >&2 docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \ @@ -58,10 +83,7 @@ docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \ mc alias set rfs http://foundation-rustfs:9000 "$RAK" "$RSK" >/dev/null mc alias set off "$OFF_EP" "$OFF_AK" "$OFF_SK" >/dev/null mc cp -r /w/ "rfs/$BUCKET/$TS/" >/dev/null - for b in forgejo-packages forgejo-artifacts forgejo-lfs; do - mc mirror --overwrite --quiet "rfs/$b" "rfs/$BUCKET/$TS/rustfs-blobs/$b" >/dev/null 2>&1 || true - done mc mirror --overwrite --quiet "rfs/$BUCKET/$TS" "off/$OFFB/$TS" >/dev/null ' rm -rf "$W" -echo "[backup] complete: rfs/$BUCKET/$TS (+ offsite $OFFSITE_BUCKET/$TS)" >&2 +echo "[backup] complete: rfs/$BUCKET/$TS (+ offsite $OFFSITE_BUCKET/$TS), age-encrypted" >&2 diff --git a/backup/backup.sh b/backup/backup.sh index c2190fb..6a99317 100755 --- a/backup/backup.sh +++ b/backup/backup.sh @@ -24,6 +24,7 @@ OFF_EP=$(pulumi config get foundation:backup.offsiteEndpoint) OFF_AK=$(pulumi config get foundation:backup.offsiteAccessKey) OFF_SK=$(pulumi config get foundation:backup.offsiteSecretKey) BUCKET=$(pulumi config get foundation:backup.bucket) +AGE_RECIPIENT=$(pulumi config get foundation:backup.ageRecipient) # public; CONTRACT_004 §4.3 HOST=$(pulumi config get foundation:vm.host) PORT=$(pulumi config get foundation:vm.sshPort) SUSER=$(pulumi config get foundation:vm.user) @@ -34,7 +35,7 @@ echo "backup: $TS -> rfs/$BUCKET/$TS (+ offsite)" # Pulumi state + the assembler script onto the VM. pulumi stack export | $SSHX "mkdir -p $W && cat > $W/pulumi-state.json" $SSHX "cat > /tmp/backup-remote-$TS.sh" < "$ROOT/backup/backup-remote.sh" -# Run the assembler: secrets on stdin (never argv), TS + MC_IMAGE as args. +# Run the assembler: secrets on stdin (never argv); TS, MC_IMAGE, age recipient as args. printf '%s\n%s\n%s\n%s\n%s\n' "$RT" "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" \ - | $SSHX "sh /tmp/backup-remote-$TS.sh '$TS' '$MC_IMAGE'; rm -f /tmp/backup-remote-$TS.sh" + | $SSHX "sh /tmp/backup-remote-$TS.sh '$TS' '$MC_IMAGE' '$AGE_RECIPIENT'; rm -f /tmp/backup-remote-$TS.sh" echo "backup: done ($TS)" diff --git a/backup/restore-remote.sh b/backup/restore-remote.sh index 12a10b0..68b507c 100755 --- a/backup/restore-remote.sh +++ b/backup/restore-remote.sh @@ -6,17 +6,24 @@ # disaster restore (overwriting live, restore order Vault->Postgres->RustFS->Forgejo) # is dr/restore-to-fresh-vm.sh (T13), out of scope here. # -# Secrets on stdin; non-secrets ($TS, $MC_IMAGE, $PG_IMAGE, $SRC) as args. +# Secrets on stdin (OFF_* offsite creds + the age IDENTITY); non-secrets ($TS, +# $MC_IMAGE, $PG_IMAGE, $SRC) as args. The bundle is age-encrypted (CONTRACT_004 +# §4.3): every artifact is pulled as .age and decrypted with the identity +# BEFORE its MANIFEST sha256 (a PLAINTEXT sha) is verified. set -eu IFS= read -r OFF_EP IFS= read -r OFF_AK IFS= read -r OFF_SK IFS= read -r BUCKET +IFS= read -r AGE_IDENTITY TS="$1"; MC_IMAGE="$2"; PG_IMAGE="$3"; SRC="${4:-rfs}" OFFSITE_BUCKET=olsitec-foundation W="/tmp/foundation-restore-$TS" -rm -rf "$W"; mkdir -p "$W" -fail() { echo "RESTORE VERIFY FAIL: $1" >&2; docker rm -f foundation-restore-pg >/dev/null 2>&1 || true; exit 1; } +rm -rf "$W"; (umask 077; mkdir -p "$W") +fail() { echo "RESTORE VERIFY FAIL: $1" >&2; docker rm -f foundation-restore-pg >/dev/null 2>&1 || true; rm -f "$W/age.key" 2>/dev/null || true; exit 1; } + +# Materialise the age identity to a 0600 file for `age -d -i` (removed on exit). +( umask 077; printf '%s\n' "$AGE_IDENTITY" > "$W/age.key" ) RAK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_ACCESS_KEY=//p') RSK=$(docker inspect foundation-rustfs --format '{{range .Config.Env}}{{println .}}{{end}}' | sed -n 's/^RUSTFS_SECRET_KEY=//p') @@ -37,8 +44,14 @@ docker run --rm --network foundation-net --entrypoint sh -v "$W":/w \ [ -f "$W/MANIFEST.json" ] || { [ -d "$W/$TS" ] && mv "$W/$TS"/* "$W"/; } [ -f "$W/MANIFEST.json" ] || fail "MANIFEST.json missing from pulled bundle" -echo "[restore] verify MANIFEST sha256" >&2 cd "$W" +echo "[restore] age-decrypt artifacts" >&2 +for name in $(jq -r '.artifacts[].name' MANIFEST.json); do + [ -f "$name.age" ] || fail "$name.age missing from bundle" + age -d -i age.key -o "$name" "$name.age" 2>/dev/null || fail "age decrypt failed: $name" +done + +echo "[restore] verify MANIFEST sha256 (plaintext)" >&2 jq -r '.artifacts[] | "\(.sha256) \(.name)"' MANIFEST.json | while read -r sha name; do [ -f "$name" ] || { echo "missing $name" >&2; exit 1; } got=$(sha256sum "$name" | cut -d' ' -f1) @@ -62,9 +75,16 @@ zstd -dc forgejo-repos.tar.zst | tar -C repos -xf - 2>/dev/null || fail "forgejo [ -d repos/git/repositories/olsitec/foundation.git ] || fail "olsitec/foundation.git not in repo bundle" echo "[restore] forgejo repos OK: olsitec/foundation.git present" >&2 +echo "[restore] extract rustfs blobs + assert packages present" >&2 +mkdir -p blobs +zstd -dc rustfs-blobs.tar.zst | tar -C blobs -xf - 2>/dev/null || fail "rustfs-blobs tar extract failed" +[ -d blobs/forgejo-packages ] || fail "forgejo-packages not in blob bundle" +echo "[restore] rustfs blobs OK: $(find blobs -type f | wc -l | tr -d ' ') object(s)" >&2 + echo "[restore] vault snapshot sanity" >&2 [ -s vault-raft.snap ] || fail "vault-raft.snap empty" echo "[restore] vault snapshot OK: $(stat -c %s vault-raft.snap) bytes" >&2 +rm -f "$W/age.key" rm -rf "$W" -echo "RESTORE VERIFY PASS ($TS from $SRC)" +echo "RESTORE VERIFY PASS ($TS from $SRC, age-decrypted)" diff --git a/backup/restore.sh b/backup/restore.sh index d8efe1d..6206690 100755 --- a/backup/restore.sh +++ b/backup/restore.sh @@ -23,11 +23,12 @@ OFF_EP=$(pulumi config get foundation:backup.offsiteEndpoint) OFF_AK=$(pulumi config get foundation:backup.offsiteAccessKey) OFF_SK=$(pulumi config get foundation:backup.offsiteSecretKey) BUCKET=$(pulumi config get foundation:backup.bucket) +AGE_IDENTITY=$(pulumi config get foundation:backup.ageIdentity) # secret; CONTRACT_004 §4.3 HOST=$(pulumi config get foundation:vm.host) PORT=$(pulumi config get foundation:vm.sshPort) SUSER=$(pulumi config get foundation:vm.user) SSHX="ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=15 -i $KEY -p $PORT $SUSER@$HOST" $SSHX "cat > /tmp/restore-remote-$TS.sh" < "$ROOT/backup/restore-remote.sh" -printf '%s\n%s\n%s\n%s\n' "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" \ +printf '%s\n%s\n%s\n%s\n%s\n' "$OFF_EP" "$OFF_AK" "$OFF_SK" "$BUCKET" "$AGE_IDENTITY" \ | $SSHX "sh /tmp/restore-remote-$TS.sh '$TS' '$MC_IMAGE' '$PG_IMAGE' '$SRC'; rm -f /tmp/restore-remote-$TS.sh" diff --git a/bootstrap/Pulumi.foundation.yaml b/bootstrap/Pulumi.foundation.yaml index 5dba134..e1508e1 100644 --- a/bootstrap/Pulumi.foundation.yaml +++ b/bootstrap/Pulumi.foundation.yaml @@ -63,4 +63,7 @@ config: secure: v1:9YpTkFoQanMwxAQV:dJ4YmXS0aOTHPbuK1H6AJ0SAJ0CjYX0iIyLOQAUNfsOWLsSy5TXxPpGecieBWkzc4AALDkJNlQN9Xo6Q0ZcaSg== vaultCredentials:rootToken: secure: v1:OUpYMjnaftxMUKjv:2m+dydQopXGRleeX6ddhYSHgHP7HHZXYLAvQHXUvaA91qajoxU+VugDB/Rs= + foundation:backup.ageRecipient: age1x6dmgtt2eahpvyzkmy6j80rts28chw2lcam0rcxq3nhc8ld649sslzpsy4 + foundation:backup.ageIdentity: + secure: v1:VCFVXswrmMrXyFbr:p4pfG/Kp2lreetYX4O86rZqpU1xQugRycF+PBBiNGZnaD0c15R+mJuLNrl0rBXY5vJwyZTbNSpFY1zPQ7TwuQcVp9h8oiGcgVEobsbb4BBp3lFhsObllgYM9 encryptionsalt: v1:5YhUt8BVfH0=:v1:DPCHl+7zwn4RaMPj:A19tZzBlZ1NmDtTWrHreEKk5e8idyw== diff --git a/provision/index.ts b/provision/index.ts index 8bc5b53..d3eac95 100644 --- a/provision/index.ts +++ b/provision/index.ts @@ -44,6 +44,13 @@ const dockerInstall = [ "touch /root/.provision-done", ]; +// Host tools the backup/restore path runs directly on the VM (not in a container): +// `zstd` (forgejo-repos + rustfs-blobs compression) and `age` (CONTRACT_004 §4.3 +// at-rest encryption). `jq` is already in the cloud-init base packages. Declaring +// them here means a fresh DR VM (T13) has them from first boot — do not rely on +// them being present incidentally. +const backupTools = ["zstd", "age"]; + const dep = new HetznerDeployment(platformName, { platformName, hcloudToken: token, @@ -56,7 +63,7 @@ const dep = new HetznerDeployment(platformName, { type: "cx33", // 4c/8G/80GB (Helsinki); cx22 legacy, cx33 not in nbg1 image: "debian-12", labels: { purpose: "foundation-test", ephemeral: "true" }, - cloudInitConfig: { lateCommands: dockerInstall }, + cloudInitConfig: { extraPackages: backupTools, lateCommands: dockerInstall }, }, ], });