please find below a
1. A script that takes remote base backups of a postgresql database.
it assumes a password less communication between the backup
server and the database server. After taking a base backup it removes
that un neccesary WAL log files from the archive folder. The script has been
running in my server for past few weeks without any problems. It takes
apprx 40 mins to backup around 40 GB data.
sample execution on the backup server
$ ./pg_rsync.sh 216.247.238.130 /mnt/disk2/base_backups /mnt/disk2/wal_archive
or in crontab as
30 20 * * * /usr/bin/time /home/postgres/pg_rsync.sh 216.247.238.130 /mnt/disk2/base_backups /mnt/disk2/wal_archive
Note: script also assumes that you have setup remote wal archiving using a suitable
archive_command in postgresql.conf , these scripts are *not* posted here.
any suggestions welcome.
------------------------ script begins (pg_rsync.sh)------------------------------------------------
#!/bin/bash
##################################################
# it does following
# 1. checks existance and permission of imp folders.
# 2. takes base backup to a destined folder by rsync
# 3. removes unwanted archived log files.
# Contributions: Grega Bremec (gregab@xxxxxxx) ,
# Alvaro Herrera (alvherre@xxxxxxxxxxxxxxxxx)
# Preliminary Version: Mallah (mallah.rajesh@xxxxxxxxx)
##################################################
then
echo "Usage: $0 <HOSTNAME> <BACKUP DIRECTORY> <WAL ARCHIVE DIRECTORY>"
exit 1
fi
HOSTNAME=$1
BACKUPFOLDER=$2
WAL_ARCHIVE=$3
echo "HOSTNAME=$HOSTNAME"
echo "BACKUPFOLDER=$BACKUPFOLDER"
echo "WAL_ARCHIVE=$WAL_ARCHIVE"
if [ -z $BACKUPFOLDER ] || [ ! -d $BACKUPFOLDER ] || [ ! -w $BACKUPFOLDER ]
then
echo "Sorry base backup folder $BACKUPFOLDER does not exists or is not writable or is not specified!"
exit 1
fi
if [ -z $WAL_ARCHIVE ] || [ ! -d $WAL_ARCHIVE ] || [ ! -w $WAL_ARCHIVE ]
then
echo "Sorry WAL archive folder $WAL_ARCHIVE does not exists or is not writable or is not specified!"
exit 1
fi
PSQL_BIN=`which psql` || /usr/local/pgsql/bin/psql
RSYNC_BIN=`which rsync` || /usr/bin/rsync
SSH_BIN=`which ssh` || /usr/bin/ssh
for PROG in $PSQL_BIN $RSYNC_BIN $SSH_BIN ; do
if [ ! -f $PROG ] || [ ! -x $PROG ]
then
echo "Sorry $PROG does not exists or is not executable by you"
echo "Please set env variable PATH to include psql and rsync"
exit 1
else
echo "Using $PROG"
fi
done
RSYNC_OPTS="--delete-after -a -e $SSH_BIN --exclude pg_xlog"
RSYNC="$RSYNC_BIN $RSYNC_OPTS"
PSQL=$PSQL_BIN
today=`date +%d-%m-%Y-%H-%M-%S`
label=base_backup_${today}
echo "Querying data_directory and tablespace folders from server $HOSTNAME"
DATA_DIR=`$PSQL -q -Upostgres -d template1 -h $HOSTNAME -c "show data_directory;" -P tuples_only -P format=unaligned 2>&1`
RVAL=$?
if [ $RVAL -ne 0 ]
then
echo "Some error in getting data_directory:$DATA_DIR"
exit 1;
fi
echo "DATA_DIR:$DATA_DIR"
TBL_SPCS=(`$PSQL -q -Upostgres -d template1 -h $HOSTNAME -c "SELECT spclocation from pg_catalog.pg_tablespace where length(spclocation)>0;" -P tuples_only -P format=unaligned`)
RVAL=$?
if [ $RVAL -ne 0 ]
then
echo "There is some problem in getting table spaces"
exit 1;
fi
DIRS=( "${TBL_SPCS[@]}" $DATA_DIR)
echo "Folders for Backup:"
CTR=0
while [ -n "${DIRS[${CTR}]}" ]; do
echo -n "${DIRS[${CTR}]} --> "
DIRS[${CTR}]=`ssh $HOSTNAME readlink -f ${DIRS[${CTR}]}`
echo "${DIRS[${CTR}]} (after symlink resolution)"
CTR=$((CTR + 1))
done
unset CTR
CP=`$PSQL -q -Upostgres -d template1 -h$HOSTNAME -c "SELECT pg_start_backup('$label');" -P tuples_only -P format=unaligned 2>&1`
RVAL=$?
if [ $RVAL -ne 0 ]
then
echo "PSQL pg_start_backup failed:$CP"
exit 1;
fi
echo "pg_start_backup executed successfully: $CP"
echo "RSYNC begins.."
# rsync each of the folders to the backup folder.
CTR=0
while [ -n "${DIRS[${CTR}]}" ]; do
echo "Syncing ${DIRS[${CTR}]}..."
time ${RSYNC} $HOSTNAME:${DIRS[${CTR}]} ${BACKUPFOLDER}
RVAL=$?
echo "Sync finished with exit status ${RVAL}"
if [[ ${RVAL} -eq 0 || ${RVAL} -eq 23 ]]; then
echo "Rsync success"
else
echo "Rsync failed"
$PSQL -Upostgres -h$HOSTNAME template1 -c "SELECT pg_stop_backup();"
exit 1
fi
CTR=$((CTR + 1))
done
unset CTR
echo "Executing pg_stop_backup in server ... "
$PSQL -Upostgres -h$HOSTNAME template1 -c "SELECT pg_stop_backup();"
if [ $? -ne 0 ]
then
echo "PSQL pg_stop_backup failed"
exit 1;
fi
echo "pg_stop_backup done successfully"
# read the backup_label file in pgdatadir and get the name of start wal file
# below is example content.
#START WAL LOCATION: E/A9145E4 (file 000000010000000E0000000A)
#CHECKPOINT LOCATION: E/A92939C
#START TIME: 2006-04-01 14:36:48 IST
#LABEL: base_backup_01-04-2006-14-36-45
DATA_DIR_NAME=`basename $DATA_DIR`
BACKUP_LABEL=$BACKUPFOLDER/$DATA_DIR_NAME/backup_label
echo "BACKUP_LABEL: $BACKUP_LABEL"
START_LINE=`grep -i "START WAL LOCATION" $BACKUP_LABEL` # get the like containing START WAL LOCATION
START_LINE=${START_LINE/#START*file /} # strip something like 'START WAL LOCATION: E/A9145E4 (file ' from begin.
START_LINE=${START_LINE/%)/} # strip ')' from end.
# REF_FILE_NUM is something like 000000010000000A00000068
REF_FILE_NUM=$START_LINE
echo "Content of $BACKUP_LABEL"
echo "------------- begin -----------"
cat $BACKUP_LABEL
echo "------------- end -----------"
echo "Read Start Wal as : $REF_FILE_NUM"
echo "REF_FILE_NUM=$REF_FILE_NUM"
# iterate list of files in the WAL_ARCHIVE folder
for i in `ls -1 $WAL_ARCHIVE` ;
do
# $i is :000000010000000A0000005D.bz2 eg
# get first 24 chars in filename
FILE_NUM=${i:0:24}
# compare if the number is less than the reference
# here string comparison is being used.
if [[ $FILE_NUM < $REF_FILE_NUM ]]
then
echo "$FILE_NUM [ $i ] removed"
rm -f $WAL_ARCHIVE/$i
else
echo "$FILE_NUM [ $i ] not removed"
fi
done
#------------------------- script ends -----------------------------------------------------------------