fix(ci): fix ccache strategy and add runner cleanup

- Cache key: replace run_id (unique per build) with date-based key
  so entries are reused within the same day and rotate daily
- Skip cache save on exact hit (act_runner can't overwrite keys)
- build.sh: set CCACHE_DIR/CCACHE_BASEDIR inside rattler-build's
  isolated env so release builds actually use the cached directory
- build.sh: print ccache stats at end for diagnostics
- Add disk space cleanup step to build.yml (matching release.yml)
- Remove cross-build cache fallback in release.yml (different -O flags)
- Add runner cleanup daemon (.gitea/runner/) with systemd timer
  to purge stale cache entries, Docker data, and old workspaces
This commit is contained in:
forbes
2026-02-07 08:18:00 -06:00
parent 1e4deea130
commit 8ba7b73aa8
6 changed files with 285 additions and 7 deletions

View File

@@ -0,0 +1,11 @@
[Unit]
Description=Kindred Create CI runner disk cleanup
After=docker.service
[Service]
Type=oneshot
ExecStart=/opt/runner/cleanup.sh
Environment=CLEANUP_THRESHOLD=85
Environment=CACHE_MAX_AGE_DAYS=7
StandardOutput=append:/var/log/runner-cleanup.log
StandardError=append:/var/log/runner-cleanup.log

220
.gitea/runner/cleanup.sh Executable file
View File

@@ -0,0 +1,220 @@
#!/usr/bin/env bash
# Runner disk cleanup script for Kindred Create CI/CD
#
# Designed to run as a cron job on the pubworker host:
# */30 * * * * /path/to/cleanup.sh >> /var/log/runner-cleanup.log 2>&1
#
# Or install the systemd timer (see cleanup.timer / cleanup.service).
#
# What it cleans:
# 1. Docker: stopped containers, dangling images, build cache
# 2. act_runner action cache: keeps only the newest entry per key prefix
# 3. act_runner workspaces: removes leftover build workspaces
# 4. System: apt cache, old logs
#
# What it preserves:
# - The current runner container and its image
# - The most recent cache entry per prefix (so ccache hits still work)
# - Everything outside of known CI paths
set -euo pipefail
# ---------------------------------------------------------------------------
# Configuration -- adjust these to match your runner setup
# ---------------------------------------------------------------------------
# Disk usage threshold (percent) -- only run aggressive cleanup above this
THRESHOLD=${CLEANUP_THRESHOLD:-85}
# act_runner cache directory (default location)
CACHE_DIR=${CACHE_DIR:-/root/.cache/actcache}
# act_runner workspace directories
WORKSPACES=(
"/root/.cache/act"
"/workspace"
)
# Maximum age (days) for cache entries before unconditional deletion
CACHE_MAX_AGE_DAYS=${CACHE_MAX_AGE_DAYS:-7}
# Maximum age (days) for Docker images not used by running containers
DOCKER_IMAGE_MAX_AGE=${DOCKER_IMAGE_MAX_AGE:-48h}
# Log prefix
LOG_PREFIX="[runner-cleanup]"
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
log() { echo "$(date '+%Y-%m-%d %H:%M:%S') ${LOG_PREFIX} $*"; }
disk_usage_pct() {
df --output=pcent / | tail -1 | tr -dc '0-9'
}
bytes_to_human() {
numfmt --to=iec-i --suffix=B "$1" 2>/dev/null || echo "${1}B"
}
# ---------------------------------------------------------------------------
# Phase 1: Check if cleanup is needed
# ---------------------------------------------------------------------------
usage=$(disk_usage_pct)
log "Disk usage: ${usage}% (threshold: ${THRESHOLD}%)"
if [ "$usage" -lt "$THRESHOLD" ]; then
log "Below threshold, running light cleanup only"
AGGRESSIVE=false
else
log "Above threshold, running aggressive cleanup"
AGGRESSIVE=true
fi
# ---------------------------------------------------------------------------
# Phase 2: Docker cleanup (always runs, safe)
# ---------------------------------------------------------------------------
log "--- Docker cleanup ---"
# Remove stopped containers
stopped=$(docker ps -aq --filter status=exited --filter status=dead 2>/dev/null | wc -l)
if [ "$stopped" -gt 0 ]; then
docker rm $(docker ps -aq --filter status=exited --filter status=dead) 2>/dev/null || true
log "Removed ${stopped} stopped containers"
fi
# Remove dangling images (untagged layers)
dangling=$(docker images -q --filter dangling=true 2>/dev/null | wc -l)
if [ "$dangling" -gt 0 ]; then
docker rmi $(docker images -q --filter dangling=true) 2>/dev/null || true
log "Removed ${dangling} dangling images"
fi
# Prune build cache
docker builder prune -f --filter "until=${DOCKER_IMAGE_MAX_AGE}" 2>/dev/null || true
log "Pruned Docker build cache older than ${DOCKER_IMAGE_MAX_AGE}"
if [ "$AGGRESSIVE" = true ]; then
# Remove all images not used by running containers
running_images=$(docker ps -q 2>/dev/null | xargs -r docker inspect --format='{{.Image}}' | sort -u)
all_images=$(docker images -q 2>/dev/null | sort -u)
for img in $all_images; do
if ! echo "$running_images" | grep -q "$img"; then
docker rmi -f "$img" 2>/dev/null || true
fi
done
log "Removed unused Docker images (aggressive)"
# Prune volumes
docker volume prune -f 2>/dev/null || true
log "Pruned unused Docker volumes"
fi
# ---------------------------------------------------------------------------
# Phase 3: act_runner action cache cleanup
# ---------------------------------------------------------------------------
log "--- Action cache cleanup ---"
if [ -d "$CACHE_DIR" ]; then
before=$(du -sb "$CACHE_DIR" 2>/dev/null | cut -f1)
# Delete cache entries older than max age
find "$CACHE_DIR" -type f -mtime "+${CACHE_MAX_AGE_DAYS}" -delete 2>/dev/null || true
find "$CACHE_DIR" -type d -empty -delete 2>/dev/null || true
after=$(du -sb "$CACHE_DIR" 2>/dev/null | cut -f1)
freed=$((before - after))
log "Cache cleanup freed $(bytes_to_human $freed) (entries older than ${CACHE_MAX_AGE_DAYS}d)"
else
log "Cache directory not found: ${CACHE_DIR}"
# Try common alternative locations
for alt in /var/lib/act_runner/.cache/actcache /home/*/.cache/actcache; do
if [ -d "$alt" ]; then
log "Found cache at: $alt (update CACHE_DIR config)"
CACHE_DIR="$alt"
find "$CACHE_DIR" -type f -mtime "+${CACHE_MAX_AGE_DAYS}" -delete 2>/dev/null || true
find "$CACHE_DIR" -type d -empty -delete 2>/dev/null || true
break
fi
done
fi
# ---------------------------------------------------------------------------
# Phase 4: Workspace cleanup
# ---------------------------------------------------------------------------
log "--- Workspace cleanup ---"
for ws in "${WORKSPACES[@]}"; do
if [ -d "$ws" ]; then
# Remove workspace dirs not modified in the last 2 hours
# (active builds should be touching files continuously)
before=$(du -sb "$ws" 2>/dev/null | cut -f1)
find "$ws" -mindepth 1 -maxdepth 1 -type d -mmin +120 -exec rm -rf {} + 2>/dev/null || true
after=$(du -sb "$ws" 2>/dev/null | cut -f1)
freed=$((before - after))
if [ "$freed" -gt 0 ]; then
log "Workspace $ws: freed $(bytes_to_human $freed)"
fi
fi
done
# ---------------------------------------------------------------------------
# Phase 5: System cleanup
# ---------------------------------------------------------------------------
log "--- System cleanup ---"
# apt cache
apt-get clean 2>/dev/null || true
# Truncate large log files (keep last 1000 lines)
for logfile in /var/log/syslog /var/log/daemon.log /var/log/kern.log; do
if [ -f "$logfile" ] && [ "$(stat -c%s "$logfile" 2>/dev/null)" -gt 104857600 ]; then
tail -1000 "$logfile" > "${logfile}.tmp" && mv "${logfile}.tmp" "$logfile"
log "Truncated $logfile (was >100MB)"
fi
done
# Journal logs older than 3 days
journalctl --vacuum-time=3d 2>/dev/null || true
# ---------------------------------------------------------------------------
# Phase 6: Emergency cleanup (only if still critical)
# ---------------------------------------------------------------------------
usage=$(disk_usage_pct)
if [ "$usage" -gt 95 ]; then
log "CRITICAL: Still at ${usage}% after cleanup"
# Nuclear option: remove ALL docker data except running containers
docker system prune -af --volumes 2>/dev/null || true
log "Ran docker system prune -af --volumes"
# Clear entire action cache
if [ -d "$CACHE_DIR" ]; then
rm -rf "${CACHE_DIR:?}/"*
log "Cleared entire action cache"
fi
usage=$(disk_usage_pct)
log "After emergency cleanup: ${usage}%"
fi
# ---------------------------------------------------------------------------
# Summary
# ---------------------------------------------------------------------------
usage=$(disk_usage_pct)
log "Cleanup complete. Disk usage: ${usage}%"
# Report top space consumers for diagnostics
log "Top 10 directories under /var:"
du -sh /var/*/ 2>/dev/null | sort -rh | head -10 | while read -r line; do
log " $line"
done

View File

@@ -0,0 +1,10 @@
[Unit]
Description=Run CI runner cleanup every 30 minutes
[Timer]
OnBootSec=5min
OnUnitActiveSec=30min
Persistent=true
[Install]
WantedBy=timers.target

View File

@@ -22,6 +22,17 @@ jobs:
DEBIAN_FRONTEND: noninteractive
steps:
- name: Free disk space
run: |
echo "=== Disk usage before cleanup ==="
df -h /
rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache 2>/dev/null || true
rm -rf /usr/local/share/boost /usr/share/swift 2>/dev/null || true
apt-get autoremove -y 2>/dev/null || true
apt-get clean 2>/dev/null || true
echo "=== Disk usage after cleanup ==="
df -h /
- name: Install system prerequisites
run: |
apt-get update -qq
@@ -46,12 +57,16 @@ jobs:
export PATH="$HOME/.pixi/bin:$PATH"
pixi --version
- name: Compute cache date key
id: cache-date
run: echo "date=$(date -u +%Y%m%d)" >> $GITHUB_OUTPUT
- name: Restore ccache
id: ccache-restore
uses: https://github.com/actions/cache/restore@v4
with:
path: /tmp/ccache-kindred-create
key: ccache-build-${{ github.ref_name }}-${{ github.run_id }}
key: ccache-build-${{ github.ref_name }}-${{ steps.cache-date.outputs.date }}
restore-keys: |
ccache-build-${{ github.ref_name }}-
ccache-build-main-
@@ -60,6 +75,7 @@ jobs:
run: |
mkdir -p $CCACHE_DIR
pixi run ccache -z
pixi run ccache -p
- name: Configure (CMake)
run: pixi run cmake --preset conda-linux-release
@@ -71,11 +87,11 @@ jobs:
run: pixi run ccache -s
- name: Save ccache
if: always()
if: always() && steps.ccache-restore.outputs.cache-hit != 'true'
uses: https://github.com/actions/cache/save@v4
with:
path: /tmp/ccache-kindred-create
key: ccache-build-${{ github.ref_name }}-${{ github.run_id }}
key: ccache-build-${{ github.ref_name }}-${{ steps.cache-date.outputs.date }}
- name: Run C++ unit tests
continue-on-error: true

View File

@@ -67,20 +67,26 @@ jobs:
export PATH="$HOME/.pixi/bin:$PATH"
pixi --version
- name: Compute cache date key
id: cache-date
run: echo "date=$(date -u +%Y%m%d)" >> $GITHUB_OUTPUT
- name: Restore ccache
id: ccache-restore
uses: https://github.com/actions/cache/restore@v4
with:
path: /tmp/ccache-kindred-create
key: ccache-release-linux-${{ github.run_id }}
key: ccache-release-linux-${{ steps.cache-date.outputs.date }}
restore-keys: |
ccache-release-linux-
ccache-build-main-
- name: Prepare ccache
run: |
mkdir -p $CCACHE_DIR
# Ensure ccache is accessible to rattler-build's subprocess
export PATH="$(pixi run bash -c 'echo $PATH')"
pixi run ccache -z
pixi run ccache -p
- name: Build release package (AppImage)
working-directory: package/rattler-build
@@ -92,11 +98,11 @@ jobs:
run: pixi run ccache -s
- name: Save ccache
if: always()
if: always() && steps.ccache-restore.outputs.cache-hit != 'true'
uses: https://github.com/actions/cache/save@v4
with:
path: /tmp/ccache-kindred-create
key: ccache-release-linux-${{ github.run_id }}
key: ccache-release-linux-${{ steps.cache-date.outputs.date }}
- name: Clean up intermediate build files
run: |

View File

@@ -1,3 +1,15 @@
# Configure ccache to use a shared cache directory that persists across CI runs.
# The workflow caches /tmp/ccache-kindred-create between builds.
export CCACHE_DIR="${CCACHE_DIR:-/tmp/ccache-kindred-create}"
export CCACHE_BASEDIR="${SRC_DIR:-$(pwd)}"
export CCACHE_COMPRESS="${CCACHE_COMPRESS:-true}"
export CCACHE_COMPRESSLEVEL="${CCACHE_COMPRESSLEVEL:-6}"
export CCACHE_MAXSIZE="${CCACHE_MAXSIZE:-4G}"
export CCACHE_SLOPPINESS="${CCACHE_SLOPPINESS:-include_file_ctime,include_file_mtime,pch_defines,time_macros}"
mkdir -p "$CCACHE_DIR"
echo "ccache config: CCACHE_DIR=$CCACHE_DIR CCACHE_BASEDIR=$CCACHE_BASEDIR"
ccache -z || true
if [[ ${HOST} =~ .*linux.* ]]; then
CMAKE_PRESET=conda-linux-release
fi
@@ -46,3 +58,6 @@ cmake --install build
mv ${PREFIX}/bin/FreeCAD ${PREFIX}/bin/freecad || true
mv ${PREFIX}/bin/FreeCADCmd ${PREFIX}/bin/freecadcmd || true
echo "=== ccache statistics ==="
ccache -s || true