feat(runners): decoupled Pulumi stack for the fenced runner fleet (R5)
A separate, isolated Pulumi project (peer to bootstrap/provision/offsite-backup) that provisions runner VM(s) on a libvirt host and registers Forgejo Actions runners with a distinct `fenced` label — so ecosystem/untrusted jobs run OFF the forge VM. Decoupled ON PURPOSE: a @pulumi/libvirt provider dials the runner host on every up/refresh, so keeping it in `bootstrap` would make the foundation undeployable/ unrefreshable whenever the host (crunchy01) is down or unreachable (the Terraform coupling trap). As its own stack, bootstrap never imports it — foundation ops never touch crunchy01, and this stack's health is independent. One-way dependency: it mints a runner token FROM the forge, i.e. runs after the foundation stands. Codifies what was built + hardened by hand this session (runners/README.md): Ubuntu VM on the LAN bridge (docker + qemu-guest-agent via cloud-init), the kube-router-proof FORWARD timer, and runner registration. Typechecked; the live `pulumi up` cutover from the hand-built VM is the remaining validation step. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9bea030a47
commit
cfa71847ba
8 changed files with 402 additions and 1 deletions
272
runners/index.ts
Normal file
272
runners/index.ts
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
// runners/index.ts — Step-0-after-foundation: the fenced Actions runner fleet.
|
||||
//
|
||||
// ISOLATED STACK, decoupled from `bootstrap` on purpose (see Pulumi.yaml). A
|
||||
// @pulumi/libvirt provider dials the runner HOST (e.g. crunchy01) on every
|
||||
// up/refresh/preview; keeping that here — never imported by bootstrap — means
|
||||
// foundation ops never require, and are never blocked by, the runner host being
|
||||
// reachable. One-way dependency only: this stack mints a runner token FROM the
|
||||
// forge, so it runs after the foundation stands.
|
||||
//
|
||||
// It codifies what SESSION_2026-07-01_003 built by hand (see runners/README.md):
|
||||
// 1. an Ubuntu VM on the host's LAN bridge, with docker + qemu-guest-agent;
|
||||
// 2. a kube-router-proof FORWARD accept (the host is a k3s node whose FORWARD
|
||||
// policy is DROP — bridged VM traffic needs an idempotent, re-asserted rule);
|
||||
// 3. a Forgejo Actions runner registered with a distinct label ("fenced") so
|
||||
// ecosystem/untrusted jobs (runs-on: fenced) run OFF the forge VM (R5).
|
||||
//
|
||||
// PREREQUISITE (host, one-time, kept OUT of this stack so the libvirt provider
|
||||
// always has something to connect to): qemu-kvm + libvirt-daemon-system +
|
||||
// libvirt-clients + virtinst + cloud-image-utils installed on the host, libvirtd
|
||||
// enabled, and a LAN bridge (br0). See runners/README.md §Host prep.
|
||||
import * as pulumi from "@pulumi/pulumi";
|
||||
import * as libvirt from "@pulumi/libvirt";
|
||||
import * as command from "@pulumi/command";
|
||||
import * as fs from "fs";
|
||||
import * as yaml from "js-yaml";
|
||||
|
||||
const cfg = new pulumi.Config();
|
||||
|
||||
// --- runner host (libvirt over qemu+ssh) ---
|
||||
const host = {
|
||||
address: cfg.get("host.address") ?? "192.168.1.2", // crunchy01
|
||||
user: cfg.get("host.user") ?? "root",
|
||||
bridge: cfg.get("host.bridge") ?? "br0", // LAN bridge the VM joins
|
||||
pool: cfg.get("host.pool") ?? "default", // libvirt storage pool (nvme)
|
||||
};
|
||||
// SSH key reaching the host AND the created VM (root). Path via ENV, never config.
|
||||
const sshKeyPath =
|
||||
process.env.RUNNER_SSH_KEY_PATH ??
|
||||
`${process.env.HOME}/.ssh/foundation-test_ed25519`;
|
||||
const sshPrivateKey = pulumi.secret(fs.readFileSync(sshKeyPath, "utf8"));
|
||||
const sshPublicKey = fs.readFileSync(`${sshKeyPath}.pub`, "utf8").trim();
|
||||
const hostConn: command.types.input.remote.ConnectionArgs = {
|
||||
host: host.address,
|
||||
port: cfg.getNumber("host.sshPort") ?? 22,
|
||||
user: host.user,
|
||||
privateKey: sshPrivateKey,
|
||||
};
|
||||
|
||||
// --- the forge (mint a runner registration token; reached over SSH via docker) ---
|
||||
const forge = {
|
||||
address: cfg.get("forge.address") ?? "204.168.234.72",
|
||||
sshPort: cfg.getNumber("forge.sshPort") ?? 222,
|
||||
user: cfg.get("forge.user") ?? "root",
|
||||
instanceUrl: cfg.get("forge.instanceUrl") ?? "https://forge.olsitec.net",
|
||||
container: cfg.get("forge.container") ?? "foundation-forgejo",
|
||||
};
|
||||
const forgeConn: command.types.input.remote.ConnectionArgs = {
|
||||
host: forge.address,
|
||||
port: forge.sshPort,
|
||||
user: forge.user,
|
||||
privateKey: sshPrivateKey,
|
||||
};
|
||||
|
||||
// --- the VM + runner shape ---
|
||||
const vm = {
|
||||
name: cfg.get("vm.name") ?? "foundation-runner-01",
|
||||
vcpu: cfg.getNumber("vm.vcpu") ?? 4,
|
||||
memoryMiB: cfg.getNumber("vm.memoryMiB") ?? 8192,
|
||||
diskGiB: cfg.getNumber("vm.diskGiB") ?? 40,
|
||||
// LAN address for the VM. Empty → DHCP (the runner polls the forge outbound, so a
|
||||
// fixed address is optional). Default matches the hand-built VM.
|
||||
ipCidr: cfg.get("vm.ipCidr") ?? "192.168.1.15/24",
|
||||
gateway: cfg.get("vm.gateway") ?? "192.168.1.251",
|
||||
nameservers: (cfg.getObject<string[]>("vm.nameservers")) ?? [
|
||||
"192.168.1.251",
|
||||
"1.1.1.1",
|
||||
],
|
||||
ubuntuImageUrl:
|
||||
cfg.get("vm.ubuntuImageUrl") ??
|
||||
"https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img",
|
||||
};
|
||||
// The runner label(s). `fenced` routes runs-on: fenced here; the schema maps it to a
|
||||
// default job image on the VM's docker.
|
||||
const runnerLabels =
|
||||
cfg.get("runner.labels") ?? "fenced:docker://node:20-bookworm";
|
||||
|
||||
// =============================================================================
|
||||
// libvirt provider — qemu+ssh to the host. Lazy-connects when a libvirt resource
|
||||
// is created; the host prerequisite (libvirtd) must already be satisfied.
|
||||
// =============================================================================
|
||||
const provider = new libvirt.Provider("runner-host", {
|
||||
uri: `qemu+ssh://${host.user}@${host.address}/system?sshauth=privkey&keyfile=${sshKeyPath}&known_hosts_verify=ignore`,
|
||||
});
|
||||
|
||||
// --- host prep: the kube-router-proof bridged-FORWARD accept (idempotent timer) ---
|
||||
// The host is a k3s node; kube-router sets FORWARD policy DROP and re-syncs iptables,
|
||||
// which drops bridged VM↔LAN traffic and can flush a hand-added rule. A 60s systemd
|
||||
// timer re-asserts it. This is a control-plane op on the HOST (not a libvirt resource).
|
||||
const FIREWALL = `set -eu
|
||||
cat > /etc/systemd/system/libvirt-bridge-forward.service <<'U'
|
||||
[Unit]
|
||||
Description=Ensure bridged VM traffic passes iptables FORWARD (libvirt on a kube-router host)
|
||||
After=network-online.target
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/bin/sh -c 'iptables -C FORWARD -m physdev --physdev-is-bridged -j ACCEPT 2>/dev/null || iptables -I FORWARD 1 -m physdev --physdev-is-bridged -j ACCEPT'
|
||||
U
|
||||
cat > /etc/systemd/system/libvirt-bridge-forward.timer <<'U'
|
||||
[Unit]
|
||||
Description=Re-assert the bridged-FORWARD accept rule (kube-router flushes iptables on resync)
|
||||
[Timer]
|
||||
OnBootSec=30s
|
||||
OnUnitActiveSec=60s
|
||||
AccuracySec=5s
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
U
|
||||
systemctl daemon-reload
|
||||
systemctl enable --now libvirt-bridge-forward.timer >/dev/null
|
||||
echo "bridged-FORWARD timer active"`;
|
||||
const firewall = new command.remote.Command("runner-host-firewall", {
|
||||
connection: hostConn,
|
||||
create: FIREWALL,
|
||||
update: FIREWALL,
|
||||
});
|
||||
|
||||
// =============================================================================
|
||||
// The VM: Ubuntu base volume → backed domain disk → cloud-init → domain.
|
||||
// Ubuntu (not the Debian genericcloud image) because Debian's cloud-init wrote
|
||||
// netplan the image never applied (no IPv4); Ubuntu renders + applies it cleanly.
|
||||
// =============================================================================
|
||||
const base = new libvirt.Volume(
|
||||
`${vm.name}-base`,
|
||||
{
|
||||
name: `${vm.name}-ubuntu-base.img`,
|
||||
source: vm.ubuntuImageUrl,
|
||||
pool: host.pool,
|
||||
format: "qcow2",
|
||||
},
|
||||
{ provider },
|
||||
);
|
||||
const disk = new libvirt.Volume(
|
||||
`${vm.name}-disk`,
|
||||
{
|
||||
name: `${vm.name}.qcow2`,
|
||||
pool: host.pool,
|
||||
format: "qcow2",
|
||||
baseVolumeId: base.id,
|
||||
size: vm.diskGiB * 1024 * 1024 * 1024,
|
||||
},
|
||||
{ provider },
|
||||
);
|
||||
|
||||
// cloud-init user-data: docker + qemu-guest-agent + our SSH key + a marker.
|
||||
const userData =
|
||||
"#cloud-config\n" +
|
||||
yaml.dump({
|
||||
hostname: vm.name,
|
||||
manage_etc_hosts: true,
|
||||
ssh_pwauth: false,
|
||||
users: [
|
||||
{
|
||||
name: "root",
|
||||
lock_passwd: false,
|
||||
ssh_authorized_keys: [sshPublicKey],
|
||||
},
|
||||
],
|
||||
packages: ["ca-certificates", "curl", "jq", "qemu-guest-agent"],
|
||||
runcmd: [
|
||||
["sh", "-c", "curl -fsSL https://get.docker.com | sh"],
|
||||
"systemctl enable --now docker qemu-guest-agent",
|
||||
"touch /root/cloud-init-done",
|
||||
],
|
||||
});
|
||||
// network-config v2: static on enp1s0 if ipCidr set, else DHCP. Ubuntu applies both.
|
||||
const networkConfig = yaml.dump({
|
||||
version: 2,
|
||||
ethernets: {
|
||||
enp1s0: vm.ipCidr
|
||||
? {
|
||||
dhcp4: false,
|
||||
addresses: [vm.ipCidr],
|
||||
routes: [{ to: "default", via: vm.gateway }],
|
||||
nameservers: { addresses: vm.nameservers },
|
||||
}
|
||||
: { dhcp4: true },
|
||||
},
|
||||
});
|
||||
const cloudinit = new libvirt.CloudInitDisk(
|
||||
`${vm.name}-cloudinit`,
|
||||
{ name: `${vm.name}-cloudinit.iso`, pool: host.pool, userData, networkConfig },
|
||||
{ provider },
|
||||
);
|
||||
|
||||
const domain = new libvirt.Domain(
|
||||
vm.name,
|
||||
{
|
||||
name: vm.name,
|
||||
memory: vm.memoryMiB,
|
||||
vcpu: vm.vcpu,
|
||||
cpu: { mode: "host-passthrough" },
|
||||
autostart: true,
|
||||
qemuAgent: true,
|
||||
cloudinit: cloudinit.id,
|
||||
disks: [{ volumeId: disk.id }],
|
||||
networkInterfaces: [{ bridge: host.bridge }],
|
||||
// A real PTY console so `virsh console <vm>` works (learned the hard way).
|
||||
consoles: [
|
||||
{ type: "pty", targetPort: "0", targetType: "serial" },
|
||||
{ type: "pty", targetPort: "1", targetType: "virtio" },
|
||||
],
|
||||
},
|
||||
{ provider, dependsOn: [firewall] },
|
||||
);
|
||||
|
||||
// =============================================================================
|
||||
// Register the Forgejo runner: mint a token on the forge, then register + run
|
||||
// act_runner inside the VM (docker), reachable once cloud-init has installed docker.
|
||||
// =============================================================================
|
||||
// 1) token — instance-scoped registration token, minted over SSH via docker exec.
|
||||
const tokenCmd = new command.remote.Command("runner-token", {
|
||||
connection: forgeConn,
|
||||
create: `docker exec -u git ${forge.container} forgejo actions generate-runner-token`,
|
||||
// Re-mint if the forge container id or the label set changes.
|
||||
triggers: [runnerLabels],
|
||||
});
|
||||
const runnerToken = pulumi.secret(tokenCmd.stdout.apply((s) => s.trim()));
|
||||
|
||||
// 2) register + run — connect to the VM (its static/DHCP IP). The script waits for
|
||||
// cloud-init (docker) to be ready, registers idempotently, and runs the daemon with
|
||||
// the host docker gid so uid-1000 act_runner can reach the socket.
|
||||
const vmIp = vm.ipCidr ? vm.ipCidr.split("/")[0] : host.address; // static → known IP
|
||||
const REGISTER = pulumi.interpolate`set -eu
|
||||
IMG=code.forgejo.org/forgejo/runner:6
|
||||
for _ in $(seq 1 60); do [ -f /root/cloud-init-done ] && docker info >/dev/null 2>&1 && break; sleep 5; done
|
||||
DGID=$(stat -c %g /var/run/docker.sock)
|
||||
docker volume inspect crunchy-runner-data >/dev/null 2>&1 || docker volume create crunchy-runner-data >/dev/null
|
||||
docker pull -q "$IMG" >/dev/null
|
||||
if docker run --rm -v crunchy-runner-data:/data --entrypoint sh "$IMG" -c '[ -s /data/.runner ]'; then
|
||||
echo "already registered"
|
||||
else
|
||||
printf '%s' '${runnerToken}' | docker run --rm -i -v crunchy-runner-data:/data --entrypoint sh "$IMG" -c 'cat > /tmp/t'
|
||||
docker run --rm -v crunchy-runner-data:/data --entrypoint /bin/forgejo-runner "$IMG" \
|
||||
register --no-interactive --instance ${forge.instanceUrl} --token "$(docker run --rm -v crunchy-runner-data:/data --entrypoint sh "$IMG" -c 'cat /tmp/t')" \
|
||||
--name ${vm.name} --labels '${runnerLabels}' >/dev/null
|
||||
echo "registered"
|
||||
fi
|
||||
docker rm -f forgejo-runner >/dev/null 2>&1 || true
|
||||
docker run -d --name forgejo-runner --restart unless-stopped --group-add "$DGID" \
|
||||
-v crunchy-runner-data:/data -v /var/run/docker.sock:/var/run/docker.sock \
|
||||
--entrypoint /bin/forgejo-runner "$IMG" daemon >/dev/null
|
||||
echo "runner daemon up"`;
|
||||
const register = new command.remote.Command(
|
||||
"runner-register",
|
||||
{
|
||||
connection: {
|
||||
host: vmIp,
|
||||
port: 22,
|
||||
user: "root",
|
||||
privateKey: sshPrivateKey,
|
||||
},
|
||||
create: REGISTER,
|
||||
triggers: [domain.id, runnerToken, runnerLabels],
|
||||
},
|
||||
{ dependsOn: [domain, tokenCmd] },
|
||||
);
|
||||
|
||||
export const runnerHost = host.address;
|
||||
export const runnerVmIp = vmIp;
|
||||
export const runnerLabelsOut = runnerLabels;
|
||||
export const forgeInstance = forge.instanceUrl;
|
||||
void register;
|
||||
Loading…
Add table
Add a link
Reference in a new issue