From ff3e6f723c18af9382ea359b8ac747ed94bb9ae3 Mon Sep 17 00:00:00 2001 From: gilgamezh Date: Mon, 28 Jul 2025 14:18:07 +0200 Subject: [PATCH] Add comprehensive documentation and automated update script MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add README.md: Complete repository overview, architecture, and usage guide - Add update.sh: Automated K3s cluster upgrade script for all nodes - Add CLAUDE.md: Claude Code integration documentation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- CLAUDE.md | 112 +++++++++++++++++++++++++++++ README.md | 144 +++++++++++++++++++++++++++++++++++++ how_to_update.txt | 3 + update.sh | 180 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 439 insertions(+) create mode 100644 CLAUDE.md create mode 100644 README.md create mode 100644 how_to_update.txt create mode 100755 update.sh diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..643ebd0 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,112 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Repository Overview + +This repository contains Kubernetes configuration files for a K3s homelab cluster running on TuringPi hardware. It includes Helm charts, values files, and manifests for deploying various self-hosted applications. + +## Cluster Architecture + +### Hardware Setup +- **turing1**: Control plane + worker (master node, IP: 192.168.222.237) +- **turing2**: Worker node (currently SchedulingDisabled) +- **turing3**: Worker node (also serves as NFS server at turing3.lan) +- **turing4**: Worker node +- **beelink**: Additional worker node + +### Core Infrastructure +- **K3s version**: v1.31.6+k3s1 +- **Storage**: NFS-backed persistent volumes from turing3.lan:/mnt/ssd +- **Load Balancer**: MetalLB for bare metal LoadBalancer services +- **SSL**: cert-manager with Let's Encrypt (staging/production cluster issuers) +- **Ingress**: Nginx with LAN-only restrictions + +## Application Stack + +### Media Services +- **Plex**: kube-plex (Kubernetes-native with dynamic transcoding pods) +- **Jellyfin**: Alternative media server +- **Sonarr/Radarr**: TV/Movie management (Bananaspliff charts) +- **Prowlarr**: Indexer management (custom chart) +- **Transmission**: BitTorrent client with OpenVPN +- **FlareSolverr**: Captcha solver service + +### Other Applications +- **Actual Budget**: Personal finance (custom chart: my-actual-server/) +- **Home Assistant Voice LLMs**: AI voice integration (custom chart) +- **Ollama**: Local LLM inference +- **Prometheus**: Monitoring stack +- **PostgreSQL**: Database backend + +## Common Helm Operations + +### Repository Management +```bash +# Key repositories used +helm repo add metallb https://metallb.github.io/metallb +helm repo add nfs-subdir-external-provisioner https://kubernetes-sigs.github.io/nfs-subdir-external-provisioner +helm repo add jetstack https://charts.jetstack.io +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo add bananaspliff https://bananaspliff.github.io/geek-charts +helm repo add k8s-at-home https://k8s-at-home.com/charts/ +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add jellyfin https://jellyfin.github.io/jellyfin-helm +helm repo add ollama-helm https://otwld.github.io/ollama-helm/ +helm repo update +``` + +### Application Deployment Pattern +```bash +# Standard deployment with values file +helm upgrade -f _values.yaml -i + +# Examples from history: +helm upgrade actual my-actual-server -f actual_values.yaml -i +helm upgrade plex kube-plex/charts/kube-plex --values plex_values.yml +helm upgrade radarr bananaspliff/radarr -f radarr_values.yaml +helm upgrade sonarr bananaspliff/sonarr -f sonarr_values.yaml +helm upgrade prowlarr prowlarr -f prowlarr_values.yml +``` + +### Development Workflow +```bash +# Chart development +helm create +helm lint +helm template -f | vim - + +# Values inspection +helm show values > _values.yaml +helm get values +helm get manifest +``` + +## File Structure Patterns + +- `_values.yaml` - Helm values overrides for each application +- Custom charts in subdirectories (my-actual-server/, home-assistant-voice-llms/, prowlarr/) +- `*_persistent_volume.yml` - PV definitions for applications requiring storage +- Infrastructure manifests: metallb.yml, ingress.yaml, cluster-issuer-*.yaml + +## Storage Configuration + +- **NFS Server**: turing3.lan serving /mnt/ssd +- **StorageClass**: nfs-client (via nfs-subdir-external-provisioner) +- **Access Mode**: ReadWriteMany for shared media access +- **PVC Pattern**: Applications create their own PVCs or reference pre-existing ones + +## Network Setup + +- **Pod Network**: Cluster subnet requires allowlisting in Plex for transcoding +- **Ingress**: LAN-only access enforced via limit_ingress_to_lan.yaml +- **Load Balancer**: MetalLB provides external IPs for services +- **DNS**: .lan domain for internal services + +## Kube-Plex Specifics + +The kube-plex/ directory contains a Go application that replaces the standard Plex transcoder: +- Creates Kubernetes pods for each transcode job +- Requires AMD64 nodes (configured via nodeSelector) +- Mounts shared NFS volumes for media access +- Environment variables: DATA_PVC, CONFIG_PVC, TRANSCODE_PVC, PMS_IMAGE, PMS_INTERNAL_ADDRESS \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..796a89d --- /dev/null +++ b/README.md @@ -0,0 +1,144 @@ +# TuringPi K3s Homelab + +This repository contains Kubernetes configuration files for a K3s cluster running on TuringPi hardware. It includes Helm charts, values files, and manifests for deploying various self-hosted applications in a homelab environment. + +## 🏗️ Cluster Architecture + +### Hardware Setup +- **turing1**: Control plane + worker (192.168.222.237) +- **turing2**: Worker node +- **turing3**: Worker node (NFS server at turing3.lan) +- **turing4**: Worker node +- **beelink**: Additional x86_64 worker node + +### Infrastructure Stack +- **Kubernetes**: K3s lightweight distribution +- **Storage**: NFS-backed persistent volumes from turing3.lan:/mnt/ssd +- **Load Balancer**: MetalLB for bare metal LoadBalancer services +- **SSL**: cert-manager with Let's Encrypt certificates +- **Ingress**: Nginx with LAN-only access restrictions + +## 🚀 Applications + +### Media Services +- **Plex**: Via kube-plex (Kubernetes-native with dynamic transcoding) +- **Jellyfin**: Alternative media server +- **Sonarr/Radarr**: TV/Movie management +- **Prowlarr**: Indexer management +- **Transmission**: BitTorrent client with OpenVPN +- **FlareSolverr**: Captcha solver service + +### Other Applications +- **Actual Budget**: Personal finance management +- **Home Assistant Voice LLMs**: AI voice integration +- **Ollama**: Local LLM inference +- **Prometheus**: Monitoring and metrics +- **PostgreSQL**: Database backend + +## 📁 Repository Structure + +``` +├── *_values.yaml # Helm values overrides for applications +├── my-actual-server/ # Custom Helm chart for Actual Budget +├── home-assistant-voice-llms/ # Custom Helm chart for Voice AI +├── prowlarr/ # Custom Helm chart for Prowlarr +├── kube-plex/ # Kubernetes-native Plex implementation +├── *.yml # Infrastructure manifests (MetalLB, ingress, etc.) +└── persistent_volume*.yml # Storage definitions +``` + +## 🔧 Common Operations + +### Application Deployment +```bash +# Deploy with Helm using values files +helm upgrade -f _values.yaml -i + +# Examples: +helm upgrade actual my-actual-server -f actual_values.yaml -i +helm upgrade plex kube-plex/charts/kube-plex --values plex_values.yml +helm upgrade radarr bananaspliff/radarr -f radarr_values.yaml +``` + +### Infrastructure Management +```bash +# Apply Kubernetes manifests +kubectl apply -f metallb.yml +kubectl apply -f ingress.yaml + +# Check cluster status +kubectl get nodes +kubectl get pods --all-namespaces +``` + +## 🔄 K3s Cluster Updates + +### Automated Update +Run the provided script to update all nodes: +```bash +./update.sh +``` + +### Manual Update Process + +#### 1. Update Master Node (turing1) +```bash +ssh root@turing1 # password: turing +curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false sh -s - \ + --write-kubeconfig-mode 644 \ + --disable servicelb \ + --token torino \ + --node-ip 192.168.222.237 \ + --disable-cloud-controller \ + --disable local-storage +``` + +#### 2. Update Worker Nodes (turing2, turing3, turing4) +```bash +ssh root@ # password: turing +curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false \ + K3S_URL=https://192.168.222.237:6443 \ + K3S_TOKEN=torino sh - +``` + +#### 3. Update Beelink Node +```bash +ssh gilgamezh@beelink.lan # no password (SSH keys) +sudo curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false \ + K3S_URL=https://192.168.222.237:6443 \ + K3S_TOKEN=torino sh - +``` + +#### 4. Verify Update +```bash +kubectl get nodes # Check all nodes show new version +kubectl get pods --all-namespaces | grep -v Running # Check for issues +``` + +## 🔑 Access Information + +- **Cluster Token**: `torino` +- **Master Node**: `192.168.222.237:6443` +- **SSH Access**: + - TuringPi nodes: `root@` (password: `turing`) + - Beelink: `gilgamezh@beelink.lan` (SSH keys) + +## 📚 Additional Documentation + +- See `CLAUDE.md` for detailed Claude Code integration guide +- Custom Helm charts include their own README files +- Check application-specific `*_values.yaml` files for configuration options + +## 🛠️ Development + +### Helm Chart Development +```bash +helm create +helm lint +helm template -f | kubectl apply --dry-run=client -f - +``` + +### Storage Requirements +- NFS server must be running on turing3.lan +- Applications require ReadWriteMany access for shared media +- Persistent volumes are dynamically provisioned via nfs-subdir-external-provisioner \ No newline at end of file diff --git a/how_to_update.txt b/how_to_update.txt new file mode 100644 index 0000000..0b12620 --- /dev/null +++ b/how_to_update.txt @@ -0,0 +1,3 @@ +master: curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false sh -s - --write-kubeconfig-mode 644 --disable servicelb --token torino --node-ip 192.168.222.237 --disable-cloud-controller --disable local-storage + +nodes: curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false K3S_URL=https://192.168.222.237:6443 K3S_TOKEN=torino sh - diff --git a/update.sh b/update.sh new file mode 100755 index 0000000..4fb5431 --- /dev/null +++ b/update.sh @@ -0,0 +1,180 @@ +#!/bin/bash + +set -e + +# Colors for output +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +# Configuration +MASTER_NODE="turing1" +WORKER_NODES=("turing2" "turing3" "turing4") +BEELINK_NODE="beelink.lan" +MASTER_IP="192.168.222.237" +TOKEN="torino" +SSH_PASSWORD="turing" + +# Function to print colored output +print_status() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +print_warning() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +print_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Function to check if sshpass is available +check_dependencies() { + if ! command -v sshpass &> /dev/null; then + print_warning "sshpass not found, installing..." + if command -v pacman &> /dev/null; then + sudo pacman -S sshpass --noconfirm + elif command -v apt-get &> /dev/null; then + sudo apt-get update && sudo apt-get install -y sshpass + else + print_error "Cannot install sshpass automatically. Please install it manually." + exit 1 + fi + fi +} + +# Function to upgrade master node +upgrade_master() { + print_status "Upgrading master node: $MASTER_NODE" + + sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=no root@$MASTER_NODE ' + curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false sh -s - \ + --write-kubeconfig-mode 644 \ + --disable servicelb \ + --token torino \ + --node-ip 192.168.222.237 \ + --disable-cloud-controller \ + --disable local-storage + ' + + if [ $? -eq 0 ]; then + print_status "Master node $MASTER_NODE upgraded successfully" + else + print_error "Failed to upgrade master node $MASTER_NODE" + exit 1 + fi +} + +# Function to upgrade worker nodes +upgrade_worker() { + local node=$1 + print_status "Upgrading worker node: $node" + + sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=no root@$node " + curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false \ + K3S_URL=https://$MASTER_IP:6443 \ + K3S_TOKEN=$TOKEN sh - + " + + if [ $? -eq 0 ]; then + print_status "Worker node $node upgraded successfully" + else + print_error "Failed to upgrade worker node $node" + return 1 + fi +} + +# Function to upgrade beelink node +upgrade_beelink() { + print_status "Upgrading beelink node: $BEELINK_NODE" + + ssh -o PasswordAuthentication=no -o StrictHostKeyChecking=no gilgamezh@$BEELINK_NODE " + sudo curl -sfL https://get.k3s.io | INSTALL_K3S_SKIP_DOWNLOAD=false \ + K3S_URL=https://$MASTER_IP:6443 \ + K3S_TOKEN=$TOKEN sh - + " + + if [ $? -eq 0 ]; then + print_status "Beelink node $BEELINK_NODE upgraded successfully" + else + print_error "Failed to upgrade beelink node $BEELINK_NODE" + return 1 + fi +} + +# Function to verify cluster health +verify_cluster() { + print_status "Verifying cluster health..." + + # Wait a moment for nodes to register + sleep 10 + + print_status "Cluster nodes:" + kubectl get nodes + + print_status "Checking for unhealthy pods..." + unhealthy_pods=$(kubectl get pods --all-namespaces | grep -v Running | grep -v Completed | wc -l) + + if [ "$unhealthy_pods" -gt 1 ]; then # Greater than 1 because header line counts + print_warning "Found unhealthy pods:" + kubectl get pods --all-namespaces | grep -v Running | grep -v Completed + else + print_status "All pods are healthy" + fi + + print_status "Cluster upgrade completed successfully!" +} + +# Main execution +main() { + print_status "Starting K3s cluster upgrade..." + print_status "This will upgrade all nodes in the TuringPi cluster" + + # Check dependencies + check_dependencies + + # Show current cluster state + print_status "Current cluster state:" + kubectl get nodes + + read -p "Do you want to continue with the upgrade? (y/N): " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + print_status "Upgrade cancelled" + exit 0 + fi + + # Upgrade master node first + upgrade_master + + # Wait a bit for master to stabilize + sleep 15 + + # Upgrade worker nodes + failed_workers=0 + for worker in "${WORKER_NODES[@]}"; do + if ! upgrade_worker "$worker"; then + ((failed_workers++)) + fi + sleep 5 # Brief pause between worker upgrades + done + + # Upgrade beelink node + if ! upgrade_beelink; then + print_warning "Beelink node upgrade failed, but continuing..." + fi + + # Verify cluster health + verify_cluster + + if [ $failed_workers -gt 0 ]; then + print_warning "Upgrade completed with $failed_workers failed worker node(s)" + exit 1 + else + print_status "All nodes upgraded successfully!" + fi +} + +# Run main function +main "$@" \ No newline at end of file