proxmox-snapshot-rotation/proxmox-snapshot-rotation.sh

260 lines
No EOL
7.6 KiB
Bash

#!/bin/bash
# Script Configuration
readonly SCRIPT_NAME=$(basename "$0")
readonly LOG_DIR="/var/log/proxmox-snapshots"
readonly LOG_FILE="${LOG_DIR}/${SCRIPT_NAME%.*}.log"
readonly LOCK_FILE="/var/run/${SCRIPT_NAME%.*}.lock"
readonly MAX_LOG_SIZE_MB=50
readonly MAX_LOG_FILES=5
# Variables
DATE=$(date +"%Y%m%d%H")
DEFAULT_KEEP=24 # Default number of snapshots to keep
DEFAULT_RETAIN_DAYS=7 # Default days to keep LVM archive files
DRY_RUN=false
QUIET=false
KEEP=$DEFAULT_KEEP
RETAIN_DAYS=$DEFAULT_RETAIN_DAYS
VERBOSE=false
# Error codes
readonly E_LOCK=200
readonly E_PERMISSIONS=201
readonly E_INVALID_ARG=202
# Logging functions
setup_logging() {
# Create log directory if it doesn't exist
if [[ ! -d "$LOG_DIR" ]]; then
mkdir -p "$LOG_DIR" || {
echo "ERROR: Unable to create log directory: $LOG_DIR"
exit $E_PERMISSIONS
}
fi
# Rotate logs if main log file exceeds max size
if [[ -f "$LOG_FILE" ]]; then
local size_mb=$(du -m "$LOG_FILE" | cut -f1)
if (( size_mb >= MAX_LOG_SIZE_MB )); then
for ((i=MAX_LOG_FILES-1; i>=1; i--)); do
[[ -f "${LOG_FILE}.$i" ]] && mv "${LOG_FILE}.$i" "${LOG_FILE}.$((i+1))"
done
mv "$LOG_FILE" "${LOG_FILE}.1"
fi
fi
# Ensure log file exists and is writable
touch "$LOG_FILE" 2>/dev/null || {
echo "ERROR: Unable to create/access log file: $LOG_FILE"
exit $E_PERMISSIONS
}
}
log() {
local level=$1
shift
local message="[$(date +'%Y-%m-%d %H:%M:%S')] [$level] $*"
if [[ "$QUIET" == "false" || "$level" == "ERROR" ]]; then
echo "$message"
fi
if [[ "$DRY_RUN" == "false" ]]; then
echo "$message" >> "$LOG_FILE"
fi
}
# Help function
show_help() {
cat << EOF
Usage: $SCRIPT_NAME [OPTIONS]
Automates the creation and rotation of Proxmox VM snapshots.
Options:
-n, --dry-run Show what would be done without making changes
-q, --quiet Suppress output (except errors)
-v, --verbose Enable verbose logging
-k, --keep NUM Number of snapshots to keep (default: $DEFAULT_KEEP)
--retain-days NUM Days to keep LVM archives (default: $DEFAULT_RETAIN_DAYS)
-h, --help Show this help message
Example:
$SCRIPT_NAME --keep 48 --retain-days 14
EOF
exit 0
}
# Parameter parsing with validation
parse_parameters() {
while [[ "$#" -gt 0 ]]; do
case $1 in
-n|--dry-run) DRY_RUN=true ;;
-q|--quiet) QUIET=true ;;
-v|--verbose) VERBOSE=true ;;
-k|--keep)
if [[ ! $2 =~ ^[0-9]+$ ]]; then
log "ERROR" "Invalid value for --keep: $2"
exit $E_INVALID_ARG
fi
KEEP="$2"
shift
;;
--retain-days)
if [[ ! $2 =~ ^[0-9]+$ ]]; then
log "ERROR" "Invalid value for --retain-days: $2"
exit $E_INVALID_ARG
fi
RETAIN_DAYS="$2"
shift
;;
-h|--help) show_help ;;
*)
log "ERROR" "Unknown option: $1"
show_help
;;
esac
shift
done
}
# Lock management
create_lock() {
if [[ -e "$LOCK_FILE" ]]; then
local pid=$(cat "$LOCK_FILE")
if kill -0 "$pid" 2>/dev/null; then
log "ERROR" "Script is already running with PID $pid"
exit $E_LOCK
else
log "WARN" "Removing stale lock file"
rm -f "$LOCK_FILE"
fi
fi
echo $$ > "$LOCK_FILE"
trap 'rm -f "$LOCK_FILE"' EXIT
}
# Function to delete old LVM archive files
delete_old_lvm_archives() {
local archive_path="/etc/lvm/archive"
if [[ -d "$archive_path" ]]; then
log "INFO" "Cleaning LVM archive files older than $RETAIN_DAYS days..."
if [[ "$DRY_RUN" == true ]]; then
find "$archive_path" -type f -mtime +"$RETAIN_DAYS" -print
else
local count=$(find "$archive_path" -type f -mtime +"$RETAIN_DAYS" -exec rm {} + -print | wc -l)
log "INFO" "Removed $count old LVM archive files"
fi
else
log "WARN" "LVM archive directory not found: $archive_path"
fi
}
# Function to handle snapshot creation
create_snapshot() {
local type=$1
local vmid=$2
local snap_cmd=$3
local snapshot_name="auto_$DATE"
if [[ "$DRY_RUN" == true ]]; then
log "INFO" "[Dry-Run] Would create snapshot: $snap_cmd snapshot $vmid $snapshot_name"
return 0
fi
log "INFO" "Creating snapshot for $type $vmid: $snapshot_name"
if ! $snap_cmd snapshot "$vmid" "$snapshot_name" 2>/tmp/snap_error.$$; then
local error=$(cat /tmp/snap_error.$$)
log "ERROR" "Failed to create snapshot for $type $vmid: $error"
rm -f /tmp/snap_error.$$
return 1
fi
rm -f /tmp/snap_error.$$
return 0
}
# Function to handle snapshot deletion
delete_snapshot() {
local type=$1
local vmid=$2
local snap_cmd=$3
local snapshot_name=$4
if [[ "$DRY_RUN" == true ]]; then
log "INFO" "[Dry-Run] Would delete snapshot: $snap_cmd delsnapshot $vmid $snapshot_name"
return 0
fi
log "INFO" "Deleting snapshot for $type $vmid: $snapshot_name"
if ! $snap_cmd delsnapshot "$vmid" "$snapshot_name" 2>/tmp/snap_error.$$; then
local error=$(cat /tmp/snap_error.$$)
log "ERROR" "Failed to delete snapshot for $type $vmid: $error"
rm -f /tmp/snap_error.$$
return 1
fi
rm -f /tmp/snap_error.$$
return 0
}
# Main function
main() {
setup_logging
create_lock
parse_parameters "$@"
log "INFO" "Starting snapshot management (Keep: $KEEP, Retain days: $RETAIN_DAYS)"
[[ "$DRY_RUN" == true ]] && log "INFO" "Running in DRY-RUN mode"
# Clean old LVM archives first
delete_old_lvm_archives
# Fetch and process VM list
log "INFO" "Fetching VM list from pvesh..."
local vm_list
if ! vm_list=$(pvesh get /cluster/resources --type vm --output-format text --human-readable 0 --noborder --noheader 2>/tmp/pvesh_error.$$); then
local error=$(cat /tmp/pvesh_error.$$)
log "ERROR" "Failed to fetch VM list: $error"
rm -f /tmp/pvesh_error.$$
exit 1
fi
rm -f /tmp/pvesh_error.$$
echo "$vm_list" | awk '{split($1, a, "/"); TYPE=a[1]; VMID=a[2]; STATUS=$(NF-2); print TYPE, VMID, STATUS}' | \
while read -r TYPE VMID STATUS; do
if [[ "$STATUS" != "running" ]]; then
[[ "$VERBOSE" == true ]] && log "INFO" "Skipping $TYPE $VMID - Status: $STATUS"
continue
fi
# Determine snapshot command
local SNAP_CMD
case "$TYPE" in
lxc) SNAP_CMD="pct" ;;
qemu) SNAP_CMD="qm" ;;
*)
log "WARN" "Unknown VM type: $TYPE for VMID $VMID"
continue
;;
esac
# Create new snapshot
create_snapshot "$TYPE" "$VMID" "$SNAP_CMD" || continue
# Manage snapshot rotation
local SNAPSHOTS
SNAPSHOTS=($($SNAP_CMD listsnapshot "$VMID" | grep -oP 'auto_\d{10}' | sort -r))
local SNAP_COUNT=${#SNAPSHOTS[@]}
if (( SNAP_COUNT > KEEP )); then
log "INFO" "Found $SNAP_COUNT snapshots for $TYPE $VMID, keeping $KEEP"
for (( i=KEEP; i<SNAP_COUNT; i++ )); do
delete_snapshot "$TYPE" "$VMID" "$SNAP_CMD" "${SNAPSHOTS[$i]}"
done
fi
done
log "INFO" "Snapshot management completed successfully"
}
# Execute main function with all arguments
main "$@"