// runners/index.ts — Step-0-after-foundation: the fenced Actions runner fleet. // // ISOLATED STACK, decoupled from `bootstrap` on purpose (see Pulumi.yaml). A // @pulumi/libvirt provider dials the runner HOST (e.g. crunchy01) on every // up/refresh/preview; keeping that here — never imported by bootstrap — means // foundation ops never require, and are never blocked by, the runner host being // reachable. One-way dependency only: this stack mints a runner token FROM the // forge, so it runs after the foundation stands. // // It codifies what SESSION_2026-07-01_003 built by hand (see runners/README.md): // 1. an Ubuntu VM on the host's LAN bridge, with docker + qemu-guest-agent; // 2. a kube-router-proof FORWARD accept (the host is a k3s node whose FORWARD // policy is DROP — bridged VM traffic needs an idempotent, re-asserted rule); // 3. a Forgejo Actions runner registered with a distinct label ("fenced") so // ecosystem/untrusted jobs (runs-on: fenced) run OFF the forge VM (R5). // // PREREQUISITE (host, one-time, kept OUT of this stack so the libvirt provider // always has something to connect to): qemu-kvm + libvirt-daemon-system + // libvirt-clients + virtinst + cloud-image-utils installed on the host, libvirtd // enabled, and a LAN bridge (br0). See runners/README.md §Host prep. import * as pulumi from "@pulumi/pulumi"; import * as libvirt from "@pulumi/libvirt"; import * as command from "@pulumi/command"; import * as fs from "fs"; import * as yaml from "js-yaml"; const cfg = new pulumi.Config(); // --- runner host (libvirt over qemu+ssh) --- const host = { address: cfg.get("host.address") ?? "192.168.1.2", // crunchy01 user: cfg.get("host.user") ?? "root", bridge: cfg.get("host.bridge") ?? "br0", // LAN bridge the VM joins pool: cfg.get("host.pool") ?? "default", // libvirt storage pool (nvme) }; // SSH key reaching the host AND the created VM (root). Path via ENV, never config. const sshKeyPath = process.env.RUNNER_SSH_KEY_PATH ?? `${process.env.HOME}/.ssh/foundation-test_ed25519`; const sshPrivateKey = pulumi.secret(fs.readFileSync(sshKeyPath, "utf8")); const sshPublicKey = fs.readFileSync(`${sshKeyPath}.pub`, "utf8").trim(); const hostConn: command.types.input.remote.ConnectionArgs = { host: host.address, port: cfg.getNumber("host.sshPort") ?? 22, user: host.user, privateKey: sshPrivateKey, }; // --- the forge (mint a runner registration token; reached over SSH via docker) --- const forge = { address: cfg.get("forge.address") ?? "204.168.234.72", sshPort: cfg.getNumber("forge.sshPort") ?? 222, user: cfg.get("forge.user") ?? "root", instanceUrl: cfg.get("forge.instanceUrl") ?? "https://forge.olsitec.net", container: cfg.get("forge.container") ?? "foundation-forgejo", }; const forgeConn: command.types.input.remote.ConnectionArgs = { host: forge.address, port: forge.sshPort, user: forge.user, privateKey: sshPrivateKey, }; // --- the VM + runner shape --- const vm = { name: cfg.get("vm.name") ?? "foundation-runner-01", vcpu: cfg.getNumber("vm.vcpu") ?? 8, memoryMiB: cfg.getNumber("vm.memoryMiB") ?? 32768, diskGiB: cfg.getNumber("vm.diskGiB") ?? 40, // LAN address for the VM. Empty → DHCP (the runner polls the forge outbound, so a // fixed address is optional). Default matches the hand-built VM. ipCidr: cfg.get("vm.ipCidr") ?? "192.168.1.15/24", gateway: cfg.get("vm.gateway") ?? "192.168.1.251", nameservers: (cfg.getObject("vm.nameservers")) ?? [ "192.168.1.251", "1.1.1.1", ], ubuntuImageUrl: cfg.get("vm.ubuntuImageUrl") ?? "https://cloud-images.ubuntu.com/noble/current/noble-server-cloudimg-amd64.img", }; // The runner label(s). `fenced` routes runs-on: fenced here; the schema maps it to a // default job image on the VM's docker. const runnerLabels = cfg.get("runner.labels") ?? "fenced:docker://node:20-bookworm"; // ============================================================================= // libvirt provider — qemu+ssh to the host. Lazy-connects when a libvirt resource // is created; the host prerequisite (libvirtd) must already be satisfied. // ============================================================================= const provider = new libvirt.Provider("runner-host", { uri: `qemu+ssh://${host.user}@${host.address}/system?sshauth=privkey&keyfile=${sshKeyPath}&known_hosts_verify=ignore`, }); // --- host prep: the kube-router-proof bridged-FORWARD accept (idempotent timer) --- // The host is a k3s node; kube-router sets FORWARD policy DROP and re-syncs iptables, // which drops bridged VM↔LAN traffic and can flush a hand-added rule. A 60s systemd // timer re-asserts it. This is a control-plane op on the HOST (not a libvirt resource). const FIREWALL = `set -eu cat > /etc/systemd/system/libvirt-bridge-forward.service <<'U' [Unit] Description=Ensure bridged VM traffic passes iptables FORWARD (libvirt on a kube-router host) After=network-online.target [Service] Type=oneshot ExecStart=/bin/sh -c 'iptables -C FORWARD -m physdev --physdev-is-bridged -j ACCEPT 2>/dev/null || iptables -I FORWARD 1 -m physdev --physdev-is-bridged -j ACCEPT' U cat > /etc/systemd/system/libvirt-bridge-forward.timer <<'U' [Unit] Description=Re-assert the bridged-FORWARD accept rule (kube-router flushes iptables on resync) [Timer] OnBootSec=30s OnUnitActiveSec=60s AccuracySec=5s [Install] WantedBy=timers.target U systemctl daemon-reload systemctl enable --now libvirt-bridge-forward.timer >/dev/null echo "bridged-FORWARD timer active"`; const firewall = new command.remote.Command("runner-host-firewall", { connection: hostConn, create: FIREWALL, update: FIREWALL, }); // ============================================================================= // The VM: Ubuntu base volume → backed domain disk → cloud-init → domain. // Ubuntu (not the Debian genericcloud image) because Debian's cloud-init wrote // netplan the image never applied (no IPv4); Ubuntu renders + applies it cleanly. // ============================================================================= const base = new libvirt.Volume( `${vm.name}-base`, { name: `${vm.name}-ubuntu-base.img`, source: vm.ubuntuImageUrl, pool: host.pool, format: "qcow2", }, { provider }, ); const disk = new libvirt.Volume( `${vm.name}-disk`, { name: `${vm.name}.qcow2`, pool: host.pool, format: "qcow2", baseVolumeId: base.id, size: vm.diskGiB * 1024 * 1024 * 1024, }, { provider }, ); // cloud-init user-data: docker + qemu-guest-agent + our SSH key + a marker. const userData = "#cloud-config\n" + yaml.dump({ hostname: vm.name, manage_etc_hosts: true, ssh_pwauth: false, users: [ { name: "root", lock_passwd: false, ssh_authorized_keys: [sshPublicKey], }, ], packages: ["ca-certificates", "curl", "jq", "qemu-guest-agent"], runcmd: [ ["sh", "-c", "curl -fsSL https://get.docker.com | sh"], "systemctl enable --now docker qemu-guest-agent", "touch /root/cloud-init-done", ], }); // network-config v2. Match ANY ethernet by name-glob (the NIC may enumerate as // enp1s0/ens3/… depending on the machine type — hardcoding enp1s0 left the VM with // no IP). Static if ipCidr set (needed so the register command knows where to // connect), else DHCP. Ubuntu (netplan-native) applies this at first boot. const networkConfig = yaml.dump({ version: 2, ethernets: { primary: { match: { name: "e*" }, ...(vm.ipCidr ? { dhcp4: false, addresses: [vm.ipCidr], routes: [{ to: "default", via: vm.gateway }], nameservers: { addresses: vm.nameservers }, } : { dhcp4: true }), }, }, }); const cloudinit = new libvirt.CloudInitDisk( `${vm.name}-cloudinit`, { name: `${vm.name}-cloudinit.iso`, pool: host.pool, userData, networkConfig }, { provider }, ); const domain = new libvirt.Domain( vm.name, { name: vm.name, memory: vm.memoryMiB, vcpu: vm.vcpu, cpu: { mode: "host-passthrough" }, autostart: true, // NB: do NOT set qemuAgent:true — it makes the provider block on the guest agent // (not up on a fresh boot) during create. We register the runner over the VM's // STATIC IP, so we don't need agent-discovered addresses. (guest-agent is still // installed via cloud-init for `virsh domifaddr --source agent` convenience.) cloudinit: cloudinit.id, disks: [{ volumeId: disk.id }], networkInterfaces: [{ bridge: host.bridge }], // A real PTY console so `virsh console ` works (learned the hard way). consoles: [ { type: "pty", targetPort: "0", targetType: "serial" }, { type: "pty", targetPort: "1", targetType: "virtio" }, ], }, { provider, dependsOn: [firewall] }, ); // ============================================================================= // Register the Forgejo runner: mint a token on the forge, then register + run // act_runner inside the VM (docker), reachable once cloud-init has installed docker. // ============================================================================= // 1) token — instance-scoped registration token, minted over SSH via docker exec. const tokenCmd = new command.remote.Command("runner-token", { connection: forgeConn, create: `docker exec -u git ${forge.container} forgejo actions generate-runner-token`, // Re-mint if the forge container id or the label set changes. triggers: [runnerLabels], }); const runnerToken = pulumi.secret(tokenCmd.stdout.apply((s) => s.trim())); // 2) register + run — connect to the VM (its static/DHCP IP). The script waits for // cloud-init (docker) to be ready, registers idempotently, and runs the daemon with // the host docker gid so uid-1000 act_runner can reach the socket. const vmIp = vm.ipCidr ? vm.ipCidr.split("/")[0] : host.address; // static → known IP const REGISTER = pulumi.interpolate`set -eu IMG=code.forgejo.org/forgejo/runner:6 TOKEN='${runnerToken}' for _ in $(seq 1 60); do [ -f /root/cloud-init-done ] && docker info >/dev/null 2>&1 && break; sleep 5; done DGID=$(stat -c %g /var/run/docker.sock) docker volume inspect crunchy-runner-data >/dev/null 2>&1 || docker volume create crunchy-runner-data >/dev/null docker pull -q "$IMG" >/dev/null # (Re)register — this command only re-runs when the token/domain/labels change # (triggers), so a clean re-register each time is safe. Token passed directly # (pulumi redacts it in its own output as a secret; short-lived on the VM's argv). docker run --rm -v crunchy-runner-data:/data --entrypoint sh "$IMG" -c 'rm -f /data/.runner' docker run --rm -v crunchy-runner-data:/data --entrypoint /bin/forgejo-runner "$IMG" \ register --no-interactive --instance ${forge.instanceUrl} --token "$TOKEN" \ --name ${vm.name} --labels '${runnerLabels}' >/dev/null echo "registered" docker rm -f forgejo-runner >/dev/null 2>&1 || true docker run -d --name forgejo-runner --restart unless-stopped --group-add "$DGID" \ -v crunchy-runner-data:/data -v /var/run/docker.sock:/var/run/docker.sock \ --entrypoint /bin/forgejo-runner "$IMG" daemon >/dev/null echo "runner daemon up"`; const register = new command.remote.Command( "runner-register", { connection: { host: vmIp, port: 22, user: "root", privateKey: sshPrivateKey, // The VM needs ~60-90s to boot + apply the static IP before sshd answers; // wait up to ~5min rather than the default ~10 dials so a fresh create lands // the runner in one `up`. dialErrorLimit: 30, perDialTimeout: 10, }, create: REGISTER, triggers: [domain.id, runnerToken, runnerLabels], }, { dependsOn: [domain, tokenCmd] }, ); export const runnerHost = host.address; export const runnerVmIp = vmIp; export const runnerLabelsOut = runnerLabels; export const forgeInstance = forge.instanceUrl; void register;