#!/bin/bash # Script Configuration readonly SCRIPT_NAME=$(basename "$0") readonly LOG_DIR="/var/log/proxmox-snapshots" readonly LOG_FILE="${LOG_DIR}/${SCRIPT_NAME%.*}.log" readonly LOCK_FILE="/var/run/${SCRIPT_NAME%.*}.lock" readonly MAX_LOG_SIZE_MB=50 readonly MAX_LOG_FILES=5 # Variables DATE=$(date +"%Y%m%d%H") DEFAULT_KEEP=24 # Default number of snapshots to keep DEFAULT_RETAIN_DAYS=7 # Default days to keep LVM archive files DRY_RUN=false QUIET=false KEEP=$DEFAULT_KEEP RETAIN_DAYS=$DEFAULT_RETAIN_DAYS VERBOSE=false # Error codes readonly E_LOCK=200 readonly E_PERMISSIONS=201 readonly E_INVALID_ARG=202 # Logging functions setup_logging() { # Create log directory if it doesn't exist if [[ ! -d "$LOG_DIR" ]]; then mkdir -p "$LOG_DIR" || { echo "ERROR: Unable to create log directory: $LOG_DIR" exit $E_PERMISSIONS } fi # Rotate logs if main log file exceeds max size if [[ -f "$LOG_FILE" ]]; then local size_mb=$(du -m "$LOG_FILE" | cut -f1) if (( size_mb >= MAX_LOG_SIZE_MB )); then for ((i=MAX_LOG_FILES-1; i>=1; i--)); do [[ -f "${LOG_FILE}.$i" ]] && mv "${LOG_FILE}.$i" "${LOG_FILE}.$((i+1))" done mv "$LOG_FILE" "${LOG_FILE}.1" fi fi # Ensure log file exists and is writable touch "$LOG_FILE" 2>/dev/null || { echo "ERROR: Unable to create/access log file: $LOG_FILE" exit $E_PERMISSIONS } } log() { local level=$1 shift local message="[$(date +'%Y-%m-%d %H:%M:%S')] [$level] $*" if [[ "$QUIET" == "false" || "$level" == "ERROR" ]]; then echo "$message" fi if [[ "$DRY_RUN" == "false" ]]; then echo "$message" >> "$LOG_FILE" fi } # Help function show_help() { cat << EOF Usage: $SCRIPT_NAME [OPTIONS] Automates the creation and rotation of Proxmox VM snapshots. Options: -n, --dry-run Show what would be done without making changes -q, --quiet Suppress output (except errors) -v, --verbose Enable verbose logging -k, --keep NUM Number of snapshots to keep (default: $DEFAULT_KEEP) --retain-days NUM Days to keep LVM archives (default: $DEFAULT_RETAIN_DAYS) -h, --help Show this help message Example: $SCRIPT_NAME --keep 48 --retain-days 14 EOF exit 0 } # Parameter parsing with validation parse_parameters() { while [[ "$#" -gt 0 ]]; do case $1 in -n|--dry-run) DRY_RUN=true ;; -q|--quiet) QUIET=true ;; -v|--verbose) VERBOSE=true ;; -k|--keep) if [[ ! $2 =~ ^[0-9]+$ ]]; then log "ERROR" "Invalid value for --keep: $2" exit $E_INVALID_ARG fi KEEP="$2" shift ;; --retain-days) if [[ ! $2 =~ ^[0-9]+$ ]]; then log "ERROR" "Invalid value for --retain-days: $2" exit $E_INVALID_ARG fi RETAIN_DAYS="$2" shift ;; -h|--help) show_help ;; *) log "ERROR" "Unknown option: $1" show_help ;; esac shift done } # Lock management create_lock() { if [[ -e "$LOCK_FILE" ]]; then local pid=$(cat "$LOCK_FILE") if kill -0 "$pid" 2>/dev/null; then log "ERROR" "Script is already running with PID $pid" exit $E_LOCK else log "WARN" "Removing stale lock file" rm -f "$LOCK_FILE" fi fi echo $$ > "$LOCK_FILE" trap 'rm -f "$LOCK_FILE"' EXIT } # Function to delete old LVM archive files delete_old_lvm_archives() { local archive_path="/etc/lvm/archive" if [[ -d "$archive_path" ]]; then log "INFO" "Cleaning LVM archive files older than $RETAIN_DAYS days..." if [[ "$DRY_RUN" == true ]]; then find "$archive_path" -type f -mtime +"$RETAIN_DAYS" -print else local count=$(find "$archive_path" -type f -mtime +"$RETAIN_DAYS" -exec rm {} + -print | wc -l) log "INFO" "Removed $count old LVM archive files" fi else log "WARN" "LVM archive directory not found: $archive_path" fi } # Function to handle snapshot creation create_snapshot() { local type=$1 local vmid=$2 local snap_cmd=$3 local snapshot_name="auto_$DATE" if [[ "$DRY_RUN" == true ]]; then log "INFO" "[Dry-Run] Would create snapshot: $snap_cmd snapshot $vmid $snapshot_name" return 0 fi log "INFO" "Creating snapshot for $type $vmid: $snapshot_name" if ! $snap_cmd snapshot "$vmid" "$snapshot_name" 2>/tmp/snap_error.$$; then local error=$(cat /tmp/snap_error.$$) log "ERROR" "Failed to create snapshot for $type $vmid: $error" rm -f /tmp/snap_error.$$ return 1 fi rm -f /tmp/snap_error.$$ return 0 } # Function to handle snapshot deletion delete_snapshot() { local type=$1 local vmid=$2 local snap_cmd=$3 local snapshot_name=$4 if [[ "$DRY_RUN" == true ]]; then log "INFO" "[Dry-Run] Would delete snapshot: $snap_cmd delsnapshot $vmid $snapshot_name" return 0 fi log "INFO" "Deleting snapshot for $type $vmid: $snapshot_name" if ! $snap_cmd delsnapshot "$vmid" "$snapshot_name" 2>/tmp/snap_error.$$; then local error=$(cat /tmp/snap_error.$$) log "ERROR" "Failed to delete snapshot for $type $vmid: $error" rm -f /tmp/snap_error.$$ return 1 fi rm -f /tmp/snap_error.$$ return 0 } # Main function main() { setup_logging create_lock parse_parameters "$@" log "INFO" "Starting snapshot management (Keep: $KEEP, Retain days: $RETAIN_DAYS)" [[ "$DRY_RUN" == true ]] && log "INFO" "Running in DRY-RUN mode" # Clean old LVM archives first delete_old_lvm_archives # Fetch and process VM list log "INFO" "Fetching VM list from pvesh..." local vm_list if ! vm_list=$(pvesh get /cluster/resources --type vm --output-format text --human-readable 0 --noborder --noheader 2>/tmp/pvesh_error.$$); then local error=$(cat /tmp/pvesh_error.$$) log "ERROR" "Failed to fetch VM list: $error" rm -f /tmp/pvesh_error.$$ exit 1 fi rm -f /tmp/pvesh_error.$$ echo "$vm_list" | awk '{split($1, a, "/"); TYPE=a[1]; VMID=a[2]; STATUS=$(NF-2); print TYPE, VMID, STATUS}' | \ while read -r TYPE VMID STATUS; do if [[ "$STATUS" != "running" ]]; then [[ "$VERBOSE" == true ]] && log "INFO" "Skipping $TYPE $VMID - Status: $STATUS" continue fi # Determine snapshot command local SNAP_CMD case "$TYPE" in lxc) SNAP_CMD="pct" ;; qemu) SNAP_CMD="qm" ;; *) log "WARN" "Unknown VM type: $TYPE for VMID $VMID" continue ;; esac # Create new snapshot create_snapshot "$TYPE" "$VMID" "$SNAP_CMD" || continue # Manage snapshot rotation local SNAPSHOTS SNAPSHOTS=($($SNAP_CMD listsnapshot "$VMID" | grep -oP 'auto_\d{10}' | sort -r)) local SNAP_COUNT=${#SNAPSHOTS[@]} if (( SNAP_COUNT > KEEP )); then log "INFO" "Found $SNAP_COUNT snapshots for $TYPE $VMID, keeping $KEEP" for (( i=KEEP; i