diff --git a/proxmox-snapshot-rotation.sh b/proxmox-snapshot-rotation.sh new file mode 100644 index 0000000..45ec8b1 --- /dev/null +++ b/proxmox-snapshot-rotation.sh @@ -0,0 +1,260 @@ +#!/bin/bash + +# Script Configuration +readonly SCRIPT_NAME=$(basename "$0") +readonly LOG_DIR="/var/log/proxmox-snapshots" +readonly LOG_FILE="${LOG_DIR}/${SCRIPT_NAME%.*}.log" +readonly LOCK_FILE="/var/run/${SCRIPT_NAME%.*}.lock" +readonly MAX_LOG_SIZE_MB=50 +readonly MAX_LOG_FILES=5 + +# Variables +DATE=$(date +"%Y%m%d%H") +DEFAULT_KEEP=24 # Default number of snapshots to keep +DEFAULT_RETAIN_DAYS=7 # Default days to keep LVM archive files +DRY_RUN=false +QUIET=false +KEEP=$DEFAULT_KEEP +RETAIN_DAYS=$DEFAULT_RETAIN_DAYS +VERBOSE=false + +# Error codes +readonly E_LOCK=200 +readonly E_PERMISSIONS=201 +readonly E_INVALID_ARG=202 + +# Logging functions +setup_logging() { + # Create log directory if it doesn't exist + if [[ ! -d "$LOG_DIR" ]]; then + mkdir -p "$LOG_DIR" || { + echo "ERROR: Unable to create log directory: $LOG_DIR" + exit $E_PERMISSIONS + } + fi + + # Rotate logs if main log file exceeds max size + if [[ -f "$LOG_FILE" ]]; then + local size_mb=$(du -m "$LOG_FILE" | cut -f1) + if (( size_mb >= MAX_LOG_SIZE_MB )); then + for ((i=MAX_LOG_FILES-1; i>=1; i--)); do + [[ -f "${LOG_FILE}.$i" ]] && mv "${LOG_FILE}.$i" "${LOG_FILE}.$((i+1))" + done + mv "$LOG_FILE" "${LOG_FILE}.1" + fi + fi + + # Ensure log file exists and is writable + touch "$LOG_FILE" 2>/dev/null || { + echo "ERROR: Unable to create/access log file: $LOG_FILE" + exit $E_PERMISSIONS + } +} + +log() { + local level=$1 + shift + local message="[$(date +'%Y-%m-%d %H:%M:%S')] [$level] $*" + + if [[ "$QUIET" == "false" || "$level" == "ERROR" ]]; then + echo "$message" + fi + + if [[ "$DRY_RUN" == "false" ]]; then + echo "$message" >> "$LOG_FILE" + fi +} + +# Help function +show_help() { + cat << EOF +Usage: $SCRIPT_NAME [OPTIONS] +Automates the creation and rotation of Proxmox VM snapshots. + +Options: + -n, --dry-run Show what would be done without making changes + -q, --quiet Suppress output (except errors) + -v, --verbose Enable verbose logging + -k, --keep NUM Number of snapshots to keep (default: $DEFAULT_KEEP) + --retain-days NUM Days to keep LVM archives (default: $DEFAULT_RETAIN_DAYS) + -h, --help Show this help message + +Example: + $SCRIPT_NAME --keep 48 --retain-days 14 +EOF + exit 0 +} + +# Parameter parsing with validation +parse_parameters() { + while [[ "$#" -gt 0 ]]; do + case $1 in + -n|--dry-run) DRY_RUN=true ;; + -q|--quiet) QUIET=true ;; + -v|--verbose) VERBOSE=true ;; + -k|--keep) + if [[ ! $2 =~ ^[0-9]+$ ]]; then + log "ERROR" "Invalid value for --keep: $2" + exit $E_INVALID_ARG + fi + KEEP="$2" + shift + ;; + --retain-days) + if [[ ! $2 =~ ^[0-9]+$ ]]; then + log "ERROR" "Invalid value for --retain-days: $2" + exit $E_INVALID_ARG + fi + RETAIN_DAYS="$2" + shift + ;; + -h|--help) show_help ;; + *) + log "ERROR" "Unknown option: $1" + show_help + ;; + esac + shift + done +} + +# Lock management +create_lock() { + if [[ -e "$LOCK_FILE" ]]; then + local pid=$(cat "$LOCK_FILE") + if kill -0 "$pid" 2>/dev/null; then + log "ERROR" "Script is already running with PID $pid" + exit $E_LOCK + else + log "WARN" "Removing stale lock file" + rm -f "$LOCK_FILE" + fi + fi + echo $$ > "$LOCK_FILE" + trap 'rm -f "$LOCK_FILE"' EXIT +} + +# Function to delete old LVM archive files +delete_old_lvm_archives() { + local archive_path="/etc/lvm/archive" + if [[ -d "$archive_path" ]]; then + log "INFO" "Cleaning LVM archive files older than $RETAIN_DAYS days..." + if [[ "$DRY_RUN" == true ]]; then + find "$archive_path" -type f -mtime +"$RETAIN_DAYS" -print + else + local count=$(find "$archive_path" -type f -mtime +"$RETAIN_DAYS" -exec rm {} + -print | wc -l) + log "INFO" "Removed $count old LVM archive files" + fi + else + log "WARN" "LVM archive directory not found: $archive_path" + fi +} + +# Function to handle snapshot creation +create_snapshot() { + local type=$1 + local vmid=$2 + local snap_cmd=$3 + local snapshot_name="auto_$DATE" + + if [[ "$DRY_RUN" == true ]]; then + log "INFO" "[Dry-Run] Would create snapshot: $snap_cmd snapshot $vmid $snapshot_name" + return 0 + fi + + log "INFO" "Creating snapshot for $type $vmid: $snapshot_name" + if ! $snap_cmd snapshot "$vmid" "$snapshot_name" 2>/tmp/snap_error.$$; then + local error=$(cat /tmp/snap_error.$$) + log "ERROR" "Failed to create snapshot for $type $vmid: $error" + rm -f /tmp/snap_error.$$ + return 1 + fi + rm -f /tmp/snap_error.$$ + return 0 +} + +# Function to handle snapshot deletion +delete_snapshot() { + local type=$1 + local vmid=$2 + local snap_cmd=$3 + local snapshot_name=$4 + + if [[ "$DRY_RUN" == true ]]; then + log "INFO" "[Dry-Run] Would delete snapshot: $snap_cmd delsnapshot $vmid $snapshot_name" + return 0 + fi + + log "INFO" "Deleting snapshot for $type $vmid: $snapshot_name" + if ! $snap_cmd delsnapshot "$vmid" "$snapshot_name" 2>/tmp/snap_error.$$; then + local error=$(cat /tmp/snap_error.$$) + log "ERROR" "Failed to delete snapshot for $type $vmid: $error" + rm -f /tmp/snap_error.$$ + return 1 + fi + rm -f /tmp/snap_error.$$ + return 0 +} + +# Main function +main() { + setup_logging + create_lock + parse_parameters "$@" + + log "INFO" "Starting snapshot management (Keep: $KEEP, Retain days: $RETAIN_DAYS)" + [[ "$DRY_RUN" == true ]] && log "INFO" "Running in DRY-RUN mode" + + # Clean old LVM archives first + delete_old_lvm_archives + + # Fetch and process VM list + log "INFO" "Fetching VM list from pvesh..." + local vm_list + if ! vm_list=$(pvesh get /cluster/resources --type vm --output-format text --human-readable 0 --noborder --noheader 2>/tmp/pvesh_error.$$); then + local error=$(cat /tmp/pvesh_error.$$) + log "ERROR" "Failed to fetch VM list: $error" + rm -f /tmp/pvesh_error.$$ + exit 1 + fi + rm -f /tmp/pvesh_error.$$ + + echo "$vm_list" | awk '{split($1, a, "/"); TYPE=a[1]; VMID=a[2]; STATUS=$(NF-2); print TYPE, VMID, STATUS}' | \ + while read -r TYPE VMID STATUS; do + if [[ "$STATUS" != "running" ]]; then + [[ "$VERBOSE" == true ]] && log "INFO" "Skipping $TYPE $VMID - Status: $STATUS" + continue + fi + + # Determine snapshot command + local SNAP_CMD + case "$TYPE" in + lxc) SNAP_CMD="pct" ;; + qemu) SNAP_CMD="qm" ;; + *) + log "WARN" "Unknown VM type: $TYPE for VMID $VMID" + continue + ;; + esac + + # Create new snapshot + create_snapshot "$TYPE" "$VMID" "$SNAP_CMD" || continue + + # Manage snapshot rotation + local SNAPSHOTS + SNAPSHOTS=($($SNAP_CMD listsnapshot "$VMID" | grep -oP 'auto_\d{10}' | sort -r)) + local SNAP_COUNT=${#SNAPSHOTS[@]} + + if (( SNAP_COUNT > KEEP )); then + log "INFO" "Found $SNAP_COUNT snapshots for $TYPE $VMID, keeping $KEEP" + for (( i=KEEP; i