├── README.md ├── ceph_backup.sh └── ceph_restore.sh /README.md: -------------------------------------------------------------------------------- 1 | # proxmox_ceph_backups 2 | BASH script to backup ceph images to NFS mount in ProxMox 4.x environment 3 | 4 | ProxMox v4.x using a ceph storage cluster is slow to backup disk images due to a compatibility issue between ceph and qemu. Additionally, the ProxMox vzdump utility does not offer a differential backup capability, only full backups. 5 | 6 | # ceph_backup.sh 7 | The ceph_backup.sh script will provide a differential backup capability that utilizes ceph export. This is a much faster backup method. On Saturday it will take a full export of the disk images in your ceph cluster. Every other day of the week it will take a differential snapshot based on the last Saturday's full image. 8 | 9 | The script performs cleanup keeping only the last 14 days workth of backups. It does this not by date of the backups but rather by number of backups. This way if your backup job does not complete for a few days, you don't delete good backup jobs that could be useful. 10 | 11 | The script also captures the VM conf files from the /etc/pve/qemu-server directory nightly to ensure there is a good copy if needed. 12 | 13 | # ceph_restore.sh 14 | The ceph_restore.sh script will walk a user through restoring a backup image or differential back into the ceph cluster using a menu based system. 15 | 16 | -------------------------------------------------------------------------------- /ceph_backup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Daily rbd differential backup via snapshot in the "ceph" pool 3 | # 4 | # 5 | # Usage: ceph_backup.sh 6 | 7 | convertsecs() { 8 | ((h=${1}/3600)) 9 | ((m=(${1}%3600)/60)) 10 | ((s=${1}%60)) 11 | printf "%02d:%02d:%02d\n" $h $m $s 12 | } 13 | 14 | LOG_FILE=/var/log/ceph_backup.log 15 | SOURCEPOOL="ceph" 16 | 17 | 18 | NFS_DIR="$1" 19 | BACKUP_DIR="$NFS_DIR/ceph_backups" 20 | CONFIG_DIR="$NFS_DIR/vm_configs" 21 | 22 | PIDFILE=/var/run/ceph_backup.pid 23 | if [[ -e "$PIDFILE" ]]; then 24 | PID=$(cat ${PIDFILE}) 25 | ps -p $PID > /dev/null 2>&1 26 | if [ $? -eq 0 ]; then 27 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Process already running with pid ${PID}" >>$LOG_FILE 28 | exit 1 29 | else 30 | ## Process not found assume not running 31 | echo $$ > $PIDFILE 32 | if [ $? -ne 0 ]; then 33 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Could not create PID file $PIFDILE" >>$LOG_FILE 34 | exit 1 35 | fi 36 | fi 37 | else 38 | echo $$ > $PIDFILE 39 | if [ $? -ne 0 ]; then 40 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Could not create PID file $PIFDILE" >>$LOG_FILE 41 | exit 1 42 | fi 43 | fi 44 | 45 | 46 | 47 | if [[ -z "$NFS_DIR" ]]; then 48 | echo "Usage: ceph_backup.sh " 49 | exit 1 50 | fi 51 | 52 | 53 | SNAPBACKUP=false 54 | START=$(date +%s) 55 | SAT_BACKUP=false 56 | 57 | if [[ $(date '+%a') == "Sat" ]]; then 58 | SAT_BACKUP=true 59 | fi 60 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: backup started" >>$LOG_FILE 61 | 62 | touch $LOG_FILE 63 | 64 | #list all volumes in the pool 65 | IMAGES=$(rbd ls $SOURCEPOOL) 66 | #IMAGES="vm-105-disk-2" 67 | 68 | #build inactive images list - images that are unused or backup=0 69 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Building inactive image list" >>$LOG_FILE 70 | 71 | declare -A isinactive 72 | 73 | #For each node on the ProxMox cluster 74 | for node in $(pvecm nodes | grep pve | awk '{print $3}' | awk -F. '{print $1}') 75 | do 76 | #Get list of all inactive disks from conf files 77 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: getting node:${node} inactive image list" >>$LOG_FILE 78 | while read image; do 79 | isinactive[$image]=1 80 | done < <(ssh root@${node} "grep \"\-disk-\" /etc/pve/qemu-server/*" | grep ceph | awk -F "\"*:\"*" '{print $2 ":" $4}' | grep "unused\|backup=0" | awk -F "\"*:\"*" '{print $2}' | awk -F "\"*,\"*" '{print $1}') 81 | done 82 | 83 | for LOCAL_IMAGE in $IMAGES; do 84 | 85 | #Check if image is in inactive images array 86 | if [[ ${isinactive[$LOCAL_IMAGE]-X} == "${isinactive[$LOCAL_IMAGE]}" ]]; then 87 | 88 | #if the image was found to be unused or backup=0 skip it and move on to the next image 89 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: ${LOCAL_IMAGE} found on inactive image list" >>$LOG_FILE 90 | continue 91 | 92 | fi 93 | 94 | TODAY=$(date '+%m-%d-%Y-%H-%M-%S') 95 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Beginning backup for ${LOCAL_IMAGE}" >>$LOG_FILE 96 | LOCAL_START=$(date +%s) 97 | 98 | #Get newest snapshot for image 99 | LATEST_SNAP=$(rbd snap ls "${SOURCEPOOL}/${LOCAL_IMAGE}" | grep -v "SNAPID" |sort -r | head -n 1 |awk '{print $2}') 100 | 101 | IMAGE_DIR="${BACKUP_DIR}/${LOCAL_IMAGE}" 102 | if [[ ! -e "$IMAGE_DIR" ]]; then 103 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: First run for ceph volume. Making backup directory $IMAGE_DIR" >>$LOG_FILE 104 | mkdir -p "$IMAGE_DIR" 105 | fi 106 | 107 | #Every Saturday grab a new snapshot and cleanup old backups 108 | if [[ "$SAT_BACKUP" == true ]]; then 109 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Creating weekly snap for $SOURCEPOOL/$LOCAL_IMAGE to backup" >>$LOG_FILE 110 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd snap create ${SOURCEPOOL}/${LOCAL_IMAGE}@${TODAY}" >>$LOG_FILE 111 | rbd snap create "${SOURCEPOOL}"/"${LOCAL_IMAGE}"@"${TODAY}" >>$LOG_FILE 2>&1 112 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd snap protect ${SOURCEPOOL}/${LOCAL_IMAGE}@${TODAY}" >>$LOG_FILE 113 | rbd snap protect "${SOURCEPOOL}"/"${LOCAL_IMAGE}"@"${TODAY}" >>$LOG_FILE 2>&1 114 | SNAPBACKUP=true 115 | LATEST_SNAP=$(rbd snap ls "${SOURCEPOOL}"/"${LOCAL_IMAGE}" | grep -v "SNAPID" | sort -r | head -n 1 |awk '{print $2}') 116 | OLDEST_SNAP=$(rbd snap ls "${SOURCEPOOL}"/"${LOCAL_IMAGE}" | grep -v "SNAPID" | sort | head -n 1 |awk '{print $2}') 117 | 118 | #Cleanup backups retaining 3 full snaps and diffs in between from file system and remove old snaps from ceph 119 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Cleanup old image backups" >>$LOG_FILE 120 | REFERENCEIMG=$(find "${IMAGE_DIR}" -name *.img -type f -printf '%T+ %f\n' | sort -r | awk '{print $2}'| sed '3q;d') 121 | #if we find files old enough to delete 122 | if [[ $REFERENCEIMG ]]; then 123 | find "${IMAGE_DIR}" -type f ! -newer "${IMAGE_DIR}"/"${REFERENCEIMG}" ! -name "${REFERENCEIMG}" -delete 124 | fi 125 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd snap unprotect ${SOURCEPOOL}/${LOCAL_IMAGE}@${OLDEST_SNAP}" >>$LOG_FILE 126 | rbd snap unprotect "${SOURCEPOOL}"/"${LOCAL_IMAGE}"@"${OLDEST_SNAP}" 127 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd snap rm ${SOURCEPOOL}/${LOCAL_IMAGE}@${OLDEST_SNAP}" >>$LOG_FILE 128 | rbd snap rm "${SOURCEPOOL}"/"${LOCAL_IMAGE}"@"${OLDEST_SNAP}" >>$LOG_FILE 129 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Cleanup finished" >>$LOG_FILE 130 | fi 131 | 132 | #check if there is a snapshot to backup 133 | if [[ -z "$LATEST_SNAP" ]]; then 134 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: no snap for $SOURCEPOOL/$LOCAL_IMAGE to backup" >>$LOG_FILE 135 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd snap create ${SOURCEPOOL}/${LOCAL_IMAGE}@${TODAY}" >>$LOG_FILE 136 | rbd snap create "${SOURCEPOOL}/${LOCAL_IMAGE}@${TODAY}" >>$LOG_FILE 2>&1 137 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd snap protect ${SOURCEPOOL}/${LOCAL_IMAGE}@${TODAY}" >>$LOG_FILE 138 | rbd snap protect "${SOURCEPOOL}/${LOCAL_IMAGE}@${TODAY}" 139 | SNAPBACKUP=true 140 | LATEST_SNAP=$(rbd snap ls "${SOURCEPOOL}/${LOCAL_IMAGE}" | grep -v "SNAPID" | sort -r | head -n 1 | awk '{print $2}') 141 | fi 142 | 143 | if [[ "$SNAPBACKUP" == true ]]; then 144 | # full export the image 145 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd export --rbd-concurrent-management-ops 20 ${SOURCEPOOL}/${LOCAL_IMAGE}@${LATEST_SNAP} ${IMAGE_DIR}/${LATEST_SNAP}.img" >>$LOG_FILE 146 | rbd export --rbd-concurrent-management-ops 20 "${SOURCEPOOL}/${LOCAL_IMAGE}@${LATEST_SNAP}" "${IMAGE_DIR}/${LATEST_SNAP}".img >>$LOG_FILE 2>&1 147 | 148 | LOCAL_END=$(date +%s) 149 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Finished backup for ${LOCAL_IMAGE} ($(convertsecs $(((LOCAL_END - LOCAL_START)))))" >>$LOG_FILE 150 | 151 | continue 152 | fi 153 | 154 | # export-diff the current from the weekly snapshot 155 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: rbd export-diff ${SOURCEPOOL}/${LOCAL_IMAGE} --from-snap ${LATEST_SNAP} ${IMAGE_DIR}/${TODAY}.diff" >>$LOG_FILE 156 | rbd export-diff "${SOURCEPOOL}/${LOCAL_IMAGE}" --from-snap "${LATEST_SNAP}" "${IMAGE_DIR}/${TODAY}".diff >>$LOG_FILE 2>&1 157 | 158 | LOCAL_END=$(date +%s) 159 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Finished backup for ${LOCAL_IMAGE} ($(convertsecs $(((LOCAL_END - LOCAL_START)))))" >>$LOG_FILE 160 | 161 | done 162 | 163 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Copying ProxMox VM/CT config files" >>$LOG_FILE 2>&1 164 | #For eachnode in the ProxMox cluster 165 | for node in $(pvecm nodes | grep pve | awk '{print $3}' | awk -F. '{print $1}') 166 | do 167 | #Get list of conf files on each node 168 | for filename in $(ssh root@"${node}" ls /etc/pve/qemu-server) 169 | do 170 | TODAY=$(date '+%m-%d-%Y-%H-%M-%S') 171 | VM_DIR="${filename%.*}" 172 | #if vm_dir doesn't exist, create it 173 | if [ ! -d "${CONFIG_DIR}/${VM_DIR}" ]; then 174 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: creating vm config directory ${CONFIG_DIR}/${VM_DIR}" >>$LOG_FILE 175 | mkdir "${CONFIG_DIR}/${VM_DIR}" 176 | fi 177 | #Copy each conf file to backup server 178 | scp root@"${node}":/etc/pve/qemu-server/"$filename" "${CONFIG_DIR}/${VM_DIR}/$filename-${TODAY}" >>$LOG_FILE 179 | 180 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Cleanup old vm config backups" >>$LOG_FILE 181 | REFERENCECONF="$(find "${CONFIG_DIR}/${VM_DIR}" -type f -printf '%T+ %f\n' | sort -r | awk '{print $2}'| sed '14q;d')" 182 | #if we find files old enough to delete 183 | if [[ -n "$REFERENCECONF" ]]; then 184 | find "${CONFIG_DIR}/${VM_DIR}" -type f ! -newer "${CONFIG_DIR}/${VM_DIR}/${REFERENCECONF}" ! -name "${REFERENCECONF}" -delete >>$LOG_FILE 185 | fi 186 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Finished cleanup of old vm config backups" >>$LOG_FILE 187 | 188 | done 189 | done 190 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: ProxMox VM/CT config files copied" >>$LOG_FILE 2>&1 191 | 192 | END=$(date +%s) 193 | 194 | echo "[$(date '+%m/%d/%Y:%H:%M:%S')] ceph_backup: Overall backup completed and took $(convertsecs $(((END - START))))" >>$LOG_FILE 195 | 196 | rm $PIDFILE 197 | 198 | -------------------------------------------------------------------------------- /ceph_restore.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | BACKUPPATH="/mnt/pve/pve-backup1/ceph_backups" 4 | CEPHPOOL="ceph" 5 | 6 | NUMVOLS=$(ls -l ${BACKUPPATH} | egrep '^d' | wc -l) 7 | 8 | read -ra array <<<$(ls -l /mnt/pve/pve-backup1/ceph_backups | egrep '^d' | awk '{print $9}' | awk '!/^ / && NF {print $1; print $1}') 9 | RESTOREVOL=$(whiptail --title "eResources Ceph Restore" --menu "Choose from available volumes: " --notags 25 60 14 "${array[@]}" 3>&1 1>&2 2>&3) 10 | 11 | exitstatus=$? 12 | 13 | case $exitstatus in 14 | 1) 15 | exit -1 16 | ;; 17 | 255) 18 | exit -1 19 | ;; 20 | esac 21 | 22 | NUMIMGS="0" 23 | array=( `for file in $(ls ${BACKUPPATH}/${RESTOREVOL}/); do echo ${file##*/} ; done | awk '!/^ / && NF {print $1; print $1}'` ) 24 | 25 | NUMIMGS=${#array[@]} 26 | 27 | if [[ "$NUMIMGS" -eq "0" ]]; then 28 | 29 | RESTOREIMG=$(whiptail --title "eResources Ceph Restore" --msgbox "There are no backups available for $RESTOREVOL" 8 60 3>&1 1>&2 2>&3) 30 | exit -1 31 | 32 | fi 33 | 34 | NUMIMGS=$(($NUMIMGS/2)) 35 | RESTOREIMG=$(whiptail --title "eResources Ceph Restore" --menu "Choose from available backups: " --notags 25 60 14 ${array[@]} 3>&1 1>&2 2>&3 36 | ) 37 | 38 | exitstatus=$? 39 | 40 | case $exitstatus in 41 | 1) 42 | exit -1 43 | ;; 44 | 255) 45 | exit -1 46 | ;; 47 | esac 48 | 49 | 50 | if [[ "${RESTOREIMG##*.}" != "img" && "${RESTOREIMG##*.}" != "diff" ]] 51 | then 52 | whiptail --title "eResources Ceph Restore" --msgbox "${RESTOREIMG} is not in the proper format." 8 60 3>&1 1>&2 2>&3 53 | exit -1 54 | 55 | elif [[ ${RESTOREIMG: -4} != ".img" ]] 56 | then 57 | RESTOREDIFF=$RESTOREIMG 58 | RESTOREIMG=$(find ${BACKUPPATH}/${RESTOREVOL}/*.img -type f ! -newer ${BACKUPPATH}/${RESTOREVOL}/$RESTOREDIFF -printf '%f\n' | sort -r | 59 | head -n 1) 60 | 61 | whiptail --title "eResources Ceph Restore" --yesno "In order to restore ${RESTOREDIFF}, ${RESTOREIMG} must also be restored. Proceed?" 860 3>&1 1>&2 2>&3 62 | 63 | else 64 | whiptail --title "eResources Ceph Restore" --yesno "Are you sure you want to restore ${RESTOREIMG}?" 8 60 3>&1 1>&2 2>&3 65 | 66 | fi 67 | 68 | exitstatus=$? 69 | 70 | case $exitstatus in 71 | 0) 72 | #Proceed with restore 73 | while [ -z $RESTORETO ] 74 | do 75 | RESTORETO=$(whiptail --inputbox "Enter volume name to restore to:" 8 78 --title "eResources Ceph Restore" 3>&1 1>&2 2>&3) 76 | done 77 | ;; 78 | 1) 79 | #Cancel 80 | exit -1 81 | ;; 82 | 255) 83 | exit -1 84 | ;; 85 | esac 86 | 87 | exitstatus=$? 88 | case $exitstatus in 89 | 0) 90 | if [[ $(rbd ls -p $CEPHPOOL | grep "\<${RESTORETO}\>") ]]; then 91 | 92 | whiptail --title "eResources Ceph Restore" --msgbox "Image ${RESTORETO} already exists in pool ${CEPHPOOL}." 8 60 3>&1 1>&2 2>&3 93 | 94 | else 95 | 96 | if [ -z $RESTOREDIFF ]; then 97 | 98 | rbd import ${BACKUPPATH}/${RESTOREVOL}/${RESTOREIMG} ${CEPHPOOL}/${RESTORETO} 99 | 100 | else 101 | 102 | rbd import ${BACKUPPATH}/${RESTOREVOL}/${RESTOREIMG} ${CEPHPOOL}/${RESTORETO} 103 | rbd snap create ${CEPHPOOL}/${RESTORETO}@${RESTOREIMG%.*} 104 | rbd import-diff ${BACKUPPATH}/${RESTOREVOL}/${RESTOREDIFF} ${CEPHPOOL}/${RESTORETO} 105 | rbd snap rm ${CEPHPOOL}/${RESTORETO}@${RESTOREIMG%.*} 106 | fi 107 | 108 | 109 | 110 | fi 111 | ;; 112 | 1) 113 | #Cancel 114 | exit -1 115 | ;; 116 | 255) 117 | exit -1 118 | ;; 119 | esac 120 | 121 | --------------------------------------------------------------------------------