fix(ci): fix ccache strategy and add runner cleanup
- Cache key: replace run_id (unique per build) with date-based key so entries are reused within the same day and rotate daily - Skip cache save on exact hit (act_runner can't overwrite keys) - build.sh: set CCACHE_DIR/CCACHE_BASEDIR inside rattler-build's isolated env so release builds actually use the cached directory - build.sh: print ccache stats at end for diagnostics - Add disk space cleanup step to build.yml (matching release.yml) - Remove cross-build cache fallback in release.yml (different -O flags) - Add runner cleanup daemon (.gitea/runner/) with systemd timer to purge stale cache entries, Docker data, and old workspaces
This commit is contained in:
11
.gitea/runner/cleanup.service
Normal file
11
.gitea/runner/cleanup.service
Normal file
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=Kindred Create CI runner disk cleanup
|
||||
After=docker.service
|
||||
|
||||
[Service]
|
||||
Type=oneshot
|
||||
ExecStart=/opt/runner/cleanup.sh
|
||||
Environment=CLEANUP_THRESHOLD=85
|
||||
Environment=CACHE_MAX_AGE_DAYS=7
|
||||
StandardOutput=append:/var/log/runner-cleanup.log
|
||||
StandardError=append:/var/log/runner-cleanup.log
|
||||
220
.gitea/runner/cleanup.sh
Executable file
220
.gitea/runner/cleanup.sh
Executable file
@@ -0,0 +1,220 @@
|
||||
#!/usr/bin/env bash
|
||||
# Runner disk cleanup script for Kindred Create CI/CD
|
||||
#
|
||||
# Designed to run as a cron job on the pubworker host:
|
||||
# */30 * * * * /path/to/cleanup.sh >> /var/log/runner-cleanup.log 2>&1
|
||||
#
|
||||
# Or install the systemd timer (see cleanup.timer / cleanup.service).
|
||||
#
|
||||
# What it cleans:
|
||||
# 1. Docker: stopped containers, dangling images, build cache
|
||||
# 2. act_runner action cache: keeps only the newest entry per key prefix
|
||||
# 3. act_runner workspaces: removes leftover build workspaces
|
||||
# 4. System: apt cache, old logs
|
||||
#
|
||||
# What it preserves:
|
||||
# - The current runner container and its image
|
||||
# - The most recent cache entry per prefix (so ccache hits still work)
|
||||
# - Everything outside of known CI paths
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Configuration -- adjust these to match your runner setup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Disk usage threshold (percent) -- only run aggressive cleanup above this
|
||||
THRESHOLD=${CLEANUP_THRESHOLD:-85}
|
||||
|
||||
# act_runner cache directory (default location)
|
||||
CACHE_DIR=${CACHE_DIR:-/root/.cache/actcache}
|
||||
|
||||
# act_runner workspace directories
|
||||
WORKSPACES=(
|
||||
"/root/.cache/act"
|
||||
"/workspace"
|
||||
)
|
||||
|
||||
# Maximum age (days) for cache entries before unconditional deletion
|
||||
CACHE_MAX_AGE_DAYS=${CACHE_MAX_AGE_DAYS:-7}
|
||||
|
||||
# Maximum age (days) for Docker images not used by running containers
|
||||
DOCKER_IMAGE_MAX_AGE=${DOCKER_IMAGE_MAX_AGE:-48h}
|
||||
|
||||
# Log prefix
|
||||
LOG_PREFIX="[runner-cleanup]"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log() { echo "$(date '+%Y-%m-%d %H:%M:%S') ${LOG_PREFIX} $*"; }
|
||||
|
||||
disk_usage_pct() {
|
||||
df --output=pcent / | tail -1 | tr -dc '0-9'
|
||||
}
|
||||
|
||||
bytes_to_human() {
|
||||
numfmt --to=iec-i --suffix=B "$1" 2>/dev/null || echo "${1}B"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 1: Check if cleanup is needed
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
usage=$(disk_usage_pct)
|
||||
log "Disk usage: ${usage}% (threshold: ${THRESHOLD}%)"
|
||||
|
||||
if [ "$usage" -lt "$THRESHOLD" ]; then
|
||||
log "Below threshold, running light cleanup only"
|
||||
AGGRESSIVE=false
|
||||
else
|
||||
log "Above threshold, running aggressive cleanup"
|
||||
AGGRESSIVE=true
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 2: Docker cleanup (always runs, safe)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log "--- Docker cleanup ---"
|
||||
|
||||
# Remove stopped containers
|
||||
stopped=$(docker ps -aq --filter status=exited --filter status=dead 2>/dev/null | wc -l)
|
||||
if [ "$stopped" -gt 0 ]; then
|
||||
docker rm $(docker ps -aq --filter status=exited --filter status=dead) 2>/dev/null || true
|
||||
log "Removed ${stopped} stopped containers"
|
||||
fi
|
||||
|
||||
# Remove dangling images (untagged layers)
|
||||
dangling=$(docker images -q --filter dangling=true 2>/dev/null | wc -l)
|
||||
if [ "$dangling" -gt 0 ]; then
|
||||
docker rmi $(docker images -q --filter dangling=true) 2>/dev/null || true
|
||||
log "Removed ${dangling} dangling images"
|
||||
fi
|
||||
|
||||
# Prune build cache
|
||||
docker builder prune -f --filter "until=${DOCKER_IMAGE_MAX_AGE}" 2>/dev/null || true
|
||||
log "Pruned Docker build cache older than ${DOCKER_IMAGE_MAX_AGE}"
|
||||
|
||||
if [ "$AGGRESSIVE" = true ]; then
|
||||
# Remove all images not used by running containers
|
||||
running_images=$(docker ps -q 2>/dev/null | xargs -r docker inspect --format='{{.Image}}' | sort -u)
|
||||
all_images=$(docker images -q 2>/dev/null | sort -u)
|
||||
for img in $all_images; do
|
||||
if ! echo "$running_images" | grep -q "$img"; then
|
||||
docker rmi -f "$img" 2>/dev/null || true
|
||||
fi
|
||||
done
|
||||
log "Removed unused Docker images (aggressive)"
|
||||
|
||||
# Prune volumes
|
||||
docker volume prune -f 2>/dev/null || true
|
||||
log "Pruned unused Docker volumes"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 3: act_runner action cache cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log "--- Action cache cleanup ---"
|
||||
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
before=$(du -sb "$CACHE_DIR" 2>/dev/null | cut -f1)
|
||||
|
||||
# Delete cache entries older than max age
|
||||
find "$CACHE_DIR" -type f -mtime "+${CACHE_MAX_AGE_DAYS}" -delete 2>/dev/null || true
|
||||
find "$CACHE_DIR" -type d -empty -delete 2>/dev/null || true
|
||||
|
||||
after=$(du -sb "$CACHE_DIR" 2>/dev/null | cut -f1)
|
||||
freed=$((before - after))
|
||||
log "Cache cleanup freed $(bytes_to_human $freed) (entries older than ${CACHE_MAX_AGE_DAYS}d)"
|
||||
else
|
||||
log "Cache directory not found: ${CACHE_DIR}"
|
||||
|
||||
# Try common alternative locations
|
||||
for alt in /var/lib/act_runner/.cache/actcache /home/*/.cache/actcache; do
|
||||
if [ -d "$alt" ]; then
|
||||
log "Found cache at: $alt (update CACHE_DIR config)"
|
||||
CACHE_DIR="$alt"
|
||||
find "$CACHE_DIR" -type f -mtime "+${CACHE_MAX_AGE_DAYS}" -delete 2>/dev/null || true
|
||||
find "$CACHE_DIR" -type d -empty -delete 2>/dev/null || true
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 4: Workspace cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log "--- Workspace cleanup ---"
|
||||
|
||||
for ws in "${WORKSPACES[@]}"; do
|
||||
if [ -d "$ws" ]; then
|
||||
# Remove workspace dirs not modified in the last 2 hours
|
||||
# (active builds should be touching files continuously)
|
||||
before=$(du -sb "$ws" 2>/dev/null | cut -f1)
|
||||
find "$ws" -mindepth 1 -maxdepth 1 -type d -mmin +120 -exec rm -rf {} + 2>/dev/null || true
|
||||
after=$(du -sb "$ws" 2>/dev/null | cut -f1)
|
||||
freed=$((before - after))
|
||||
if [ "$freed" -gt 0 ]; then
|
||||
log "Workspace $ws: freed $(bytes_to_human $freed)"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 5: System cleanup
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
log "--- System cleanup ---"
|
||||
|
||||
# apt cache
|
||||
apt-get clean 2>/dev/null || true
|
||||
|
||||
# Truncate large log files (keep last 1000 lines)
|
||||
for logfile in /var/log/syslog /var/log/daemon.log /var/log/kern.log; do
|
||||
if [ -f "$logfile" ] && [ "$(stat -c%s "$logfile" 2>/dev/null)" -gt 104857600 ]; then
|
||||
tail -1000 "$logfile" > "${logfile}.tmp" && mv "${logfile}.tmp" "$logfile"
|
||||
log "Truncated $logfile (was >100MB)"
|
||||
fi
|
||||
done
|
||||
|
||||
# Journal logs older than 3 days
|
||||
journalctl --vacuum-time=3d 2>/dev/null || true
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Phase 6: Emergency cleanup (only if still critical)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
usage=$(disk_usage_pct)
|
||||
if [ "$usage" -gt 95 ]; then
|
||||
log "CRITICAL: Still at ${usage}% after cleanup"
|
||||
|
||||
# Nuclear option: remove ALL docker data except running containers
|
||||
docker system prune -af --volumes 2>/dev/null || true
|
||||
log "Ran docker system prune -af --volumes"
|
||||
|
||||
# Clear entire action cache
|
||||
if [ -d "$CACHE_DIR" ]; then
|
||||
rm -rf "${CACHE_DIR:?}/"*
|
||||
log "Cleared entire action cache"
|
||||
fi
|
||||
|
||||
usage=$(disk_usage_pct)
|
||||
log "After emergency cleanup: ${usage}%"
|
||||
fi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Summary
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
usage=$(disk_usage_pct)
|
||||
log "Cleanup complete. Disk usage: ${usage}%"
|
||||
|
||||
# Report top space consumers for diagnostics
|
||||
log "Top 10 directories under /var:"
|
||||
du -sh /var/*/ 2>/dev/null | sort -rh | head -10 | while read -r line; do
|
||||
log " $line"
|
||||
done
|
||||
10
.gitea/runner/cleanup.timer
Normal file
10
.gitea/runner/cleanup.timer
Normal file
@@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=Run CI runner cleanup every 30 minutes
|
||||
|
||||
[Timer]
|
||||
OnBootSec=5min
|
||||
OnUnitActiveSec=30min
|
||||
Persistent=true
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
||||
@@ -22,6 +22,17 @@ jobs:
|
||||
DEBIAN_FRONTEND: noninteractive
|
||||
|
||||
steps:
|
||||
- name: Free disk space
|
||||
run: |
|
||||
echo "=== Disk usage before cleanup ==="
|
||||
df -h /
|
||||
rm -rf /usr/share/dotnet /usr/local/lib/android /opt/ghc /opt/hostedtoolcache 2>/dev/null || true
|
||||
rm -rf /usr/local/share/boost /usr/share/swift 2>/dev/null || true
|
||||
apt-get autoremove -y 2>/dev/null || true
|
||||
apt-get clean 2>/dev/null || true
|
||||
echo "=== Disk usage after cleanup ==="
|
||||
df -h /
|
||||
|
||||
- name: Install system prerequisites
|
||||
run: |
|
||||
apt-get update -qq
|
||||
@@ -46,12 +57,16 @@ jobs:
|
||||
export PATH="$HOME/.pixi/bin:$PATH"
|
||||
pixi --version
|
||||
|
||||
- name: Compute cache date key
|
||||
id: cache-date
|
||||
run: echo "date=$(date -u +%Y%m%d)" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Restore ccache
|
||||
id: ccache-restore
|
||||
uses: https://github.com/actions/cache/restore@v4
|
||||
with:
|
||||
path: /tmp/ccache-kindred-create
|
||||
key: ccache-build-${{ github.ref_name }}-${{ github.run_id }}
|
||||
key: ccache-build-${{ github.ref_name }}-${{ steps.cache-date.outputs.date }}
|
||||
restore-keys: |
|
||||
ccache-build-${{ github.ref_name }}-
|
||||
ccache-build-main-
|
||||
@@ -60,6 +75,7 @@ jobs:
|
||||
run: |
|
||||
mkdir -p $CCACHE_DIR
|
||||
pixi run ccache -z
|
||||
pixi run ccache -p
|
||||
|
||||
- name: Configure (CMake)
|
||||
run: pixi run cmake --preset conda-linux-release
|
||||
@@ -71,11 +87,11 @@ jobs:
|
||||
run: pixi run ccache -s
|
||||
|
||||
- name: Save ccache
|
||||
if: always()
|
||||
if: always() && steps.ccache-restore.outputs.cache-hit != 'true'
|
||||
uses: https://github.com/actions/cache/save@v4
|
||||
with:
|
||||
path: /tmp/ccache-kindred-create
|
||||
key: ccache-build-${{ github.ref_name }}-${{ github.run_id }}
|
||||
key: ccache-build-${{ github.ref_name }}-${{ steps.cache-date.outputs.date }}
|
||||
|
||||
- name: Run C++ unit tests
|
||||
continue-on-error: true
|
||||
|
||||
@@ -67,20 +67,26 @@ jobs:
|
||||
export PATH="$HOME/.pixi/bin:$PATH"
|
||||
pixi --version
|
||||
|
||||
- name: Compute cache date key
|
||||
id: cache-date
|
||||
run: echo "date=$(date -u +%Y%m%d)" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Restore ccache
|
||||
id: ccache-restore
|
||||
uses: https://github.com/actions/cache/restore@v4
|
||||
with:
|
||||
path: /tmp/ccache-kindred-create
|
||||
key: ccache-release-linux-${{ github.run_id }}
|
||||
key: ccache-release-linux-${{ steps.cache-date.outputs.date }}
|
||||
restore-keys: |
|
||||
ccache-release-linux-
|
||||
ccache-build-main-
|
||||
|
||||
- name: Prepare ccache
|
||||
run: |
|
||||
mkdir -p $CCACHE_DIR
|
||||
# Ensure ccache is accessible to rattler-build's subprocess
|
||||
export PATH="$(pixi run bash -c 'echo $PATH')"
|
||||
pixi run ccache -z
|
||||
pixi run ccache -p
|
||||
|
||||
- name: Build release package (AppImage)
|
||||
working-directory: package/rattler-build
|
||||
@@ -92,11 +98,11 @@ jobs:
|
||||
run: pixi run ccache -s
|
||||
|
||||
- name: Save ccache
|
||||
if: always()
|
||||
if: always() && steps.ccache-restore.outputs.cache-hit != 'true'
|
||||
uses: https://github.com/actions/cache/save@v4
|
||||
with:
|
||||
path: /tmp/ccache-kindred-create
|
||||
key: ccache-release-linux-${{ github.run_id }}
|
||||
key: ccache-release-linux-${{ steps.cache-date.outputs.date }}
|
||||
|
||||
- name: Clean up intermediate build files
|
||||
run: |
|
||||
|
||||
@@ -1,3 +1,15 @@
|
||||
# Configure ccache to use a shared cache directory that persists across CI runs.
|
||||
# The workflow caches /tmp/ccache-kindred-create between builds.
|
||||
export CCACHE_DIR="${CCACHE_DIR:-/tmp/ccache-kindred-create}"
|
||||
export CCACHE_BASEDIR="${SRC_DIR:-$(pwd)}"
|
||||
export CCACHE_COMPRESS="${CCACHE_COMPRESS:-true}"
|
||||
export CCACHE_COMPRESSLEVEL="${CCACHE_COMPRESSLEVEL:-6}"
|
||||
export CCACHE_MAXSIZE="${CCACHE_MAXSIZE:-4G}"
|
||||
export CCACHE_SLOPPINESS="${CCACHE_SLOPPINESS:-include_file_ctime,include_file_mtime,pch_defines,time_macros}"
|
||||
mkdir -p "$CCACHE_DIR"
|
||||
echo "ccache config: CCACHE_DIR=$CCACHE_DIR CCACHE_BASEDIR=$CCACHE_BASEDIR"
|
||||
ccache -z || true
|
||||
|
||||
if [[ ${HOST} =~ .*linux.* ]]; then
|
||||
CMAKE_PRESET=conda-linux-release
|
||||
fi
|
||||
@@ -46,3 +58,6 @@ cmake --install build
|
||||
|
||||
mv ${PREFIX}/bin/FreeCAD ${PREFIX}/bin/freecad || true
|
||||
mv ${PREFIX}/bin/FreeCADCmd ${PREFIX}/bin/freecadcmd || true
|
||||
|
||||
echo "=== ccache statistics ==="
|
||||
ccache -s || true
|
||||
|
||||
Reference in New Issue
Block a user