# SSD disk-usage alert for turing3 /mnt/ssd (shared by Postgres + Plex + media). # A full SSD crashes Postgres and corrupts Plex's SQLite DB, so we warn early. # # NOTE: the Telegram bot token + chat id live in the `telegram-disk-alert` Secret, # created out-of-band (NOT in git) so the token stays out of history: # kubectl -n default create secret generic telegram-disk-alert \ # --from-literal=botToken='' --from-literal=chatId='' # # Mounts the existing RWX plex-data PVC purely to read `df` of the underlying SSD. apiVersion: batch/v1 kind: CronJob metadata: name: disk-usage-alert namespace: default spec: schedule: "*/15 * * * *" concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 3 jobTemplate: spec: backoffLimit: 0 activeDeadlineSeconds: 120 template: spec: restartPolicy: Never containers: - name: check image: curlimages/curl:8.11.1 imagePullPolicy: IfNotPresent env: - name: THRESHOLD # alert at/above this % used value: "90" - name: COOLDOWN_SEC # min seconds between alerts (3h) value: "10800" - name: MOUNT value: "/data" - name: BOT_TOKEN valueFrom: secretKeyRef: { name: telegram-disk-alert, key: botToken } - name: CHAT_ID valueFrom: secretKeyRef: { name: telegram-disk-alert, key: chatId } command: ["/bin/sh", "-c"] args: - | set -u pct=$(df -P "$MOUNT" | awk 'END{gsub("%","",$5); print $5}') avail=$(df -Ph "$MOUNT" | awk 'END{print $4}') now=$(date +%s) marker="$MOUNT/.disk-alert-last" send() { curl -s -m 15 "https://api.telegram.org/bot${BOT_TOKEN}/sendMessage" \ --data-urlencode "chat_id=${CHAT_ID}" \ --data-urlencode "text=$1" -d "parse_mode=HTML" >/dev/null } if [ "${TEST:-0}" = "1" ]; then send "✅ turingpi disk-alert test — SSD at ${pct}% (free ${avail}). Alerting works." echo "TEST sent (${pct}% used)"; exit 0 fi if [ "$pct" -ge "$THRESHOLD" ]; then last=0; [ -f "$marker" ] && last=$(cat "$marker" 2>/dev/null || echo 0) if [ $((now - last)) -ge "$COOLDOWN_SEC" ]; then send "⚠️ turingpi SSD ${pct}% full — only ${avail} free on /mnt/ssd. Postgres + Plex crash at 100%. Prune content / check maintainerr." echo "$now" > "$marker" echo "ALERT sent (${pct}% >= ${THRESHOLD}%)" else echo "over threshold (${pct}%) but within cooldown; skipping" fi else echo "ok: ${pct}% used, ${avail} free (threshold ${THRESHOLD}%)" fi resources: requests: { cpu: 10m, memory: 16Mi } limits: { memory: 64Mi } volumeMounts: - { name: data, mountPath: /data } volumes: - name: data persistentVolumeClaim: claimName: plex-data