Skip the previous link. The correct link is https://github.com/kubedb/postgres/tree/pg-db/hack/docker/postgres/9.6.7/scripts.
Also I am going to copy paste the codes here if that is more helpful.
Primary/run.sh
```
#!/usr/bin/env bash | |
set -e | |
echo "Running as Primary" | |
# set password ENV | |
export PGPASSWORD=${POSTGRES_PASSWORD:-postgres} | |
export ARCHIVE=${ARCHIVE:-} | |
if [ ! -e "$PGDATA/PG_VERSION" ]; then | |
if [ "$RESTORE" = true ]; then | |
echo "Restoring Postgres from base_backup using wal-g" | |
/scripts/primary/restore.sh | |
else | |
/scripts/primary/start.sh | |
fi | |
fi | |
# In already running primary server from previous releases, postgresql.conf may not contain 'wal_log_hints = on' | |
# Set it using 'sed'. ref: https://stackoverflow.com/a/11245501/4628962 | |
sed -i '/wal_log_hints/c\wal_log_hints = on' $PGDATA/postgresql.conf | |
# This node can become new leader while not able to create trigger file, So, left over recovery.conf from | |
# last bootup (when this node was standby) may exists. And, that will force this node to become STANDBY. | |
# So, delete recovery.conf. | |
if [[ -e $PGDATA/recovery.conf ]] && [[ $(cat $PGDATA/recovery.conf | grep -c "standby_mode = on") -gt 0 ]]; then | |
# recovery.conf file exists and contains standby_mode = on. So, this is left over from previous standby state. | |
rm $PGDATA/recovery.conf | |
fi | |
# push base-backup | |
if [ "$ARCHIVE" == "wal-g" ]; then | |
# set walg ENV | |
CRED_PATH="/srv/wal-g/archive/secrets" | |
export WALE_S3_PREFIX=$(echo "$ARCHIVE_S3_PREFIX") | |
export AWS_ACCESS_KEY_ID=$(cat "$CRED_PATH/AWS_ACCESS_KEY_ID") | |
export AWS_SECRET_ACCESS_KEY=$(cat "$CRED_PATH/AWS_SECRET_ACCESS_KEY") | |
sed -i "/archive_command/c\archive_command = 'wal-g wal-push %p'" $PGDATA/postgresql.conf | |
sed -i "/archive_mode/c\archive_mode = on" $PGDATA/postgresql.conf # todo: alwasy | |
sed -i "/archive_timeout/c\archive_timeout = 60" $PGDATA/postgresql.conf | |
pg_ctl -D "$PGDATA" -w start | |
PGUSER="postgres" wal-g backup-push "$PGDATA" >/dev/null | |
pg_ctl -D "$PGDATA" -m smart -w stop # todo: fast | |
fi | |
exec postgres |
```
```
#!/bin/bash | |
set -e | |
mkdir -p "$PGDATA" | |
rm -rf "$PGDATA"/* | |
chmod 0700 "$PGDATA" | |
export POSTGRES_INITDB_ARGS=${POSTGRES_INITDB_ARGS:-} | |
export POSTGRES_INITDB_XLOGDIR=${POSTGRES_INITDB_XLOGDIR:-} | |
# Create the transaction log directory before initdb is run | |
if [ "$POSTGRES_INITDB_XLOGDIR" ]; then | |
mkdir -p "$POSTGRES_INITDB_XLOGDIR" | |
chown -R postgres "$POSTGRES_INITDB_XLOGDIR" | |
chmod 700 "$POSTGRES_INITDB_XLOGDIR" | |
export POSTGRES_INITDB_ARGS="$POSTGRES_INITDB_ARGS --xlogdir $POSTGRES_INITDB_XLOGDIR" | |
fi | |
initdb $POSTGRES_INITDB_ARGS --pgdata="$PGDATA" >/dev/null | |
# setup postgresql.conf | |
cp /scripts/primary/postgresql.conf /tmp | |
echo "wal_level = replica" >>/tmp/postgresql.conf | |
echo "max_wal_senders = 99" >>/tmp/postgresql.conf | |
echo "wal_keep_segments = 32" >>/tmp/postgresql.conf | |
echo "wal_log_hints = on" >>/tmp/postgresql.conf | |
mv /tmp/postgresql.conf "$PGDATA/postgresql.conf" | |
# setup pg_hba.conf | |
{ echo; echo 'local all all trust'; } >>"$PGDATA/pg_hba.conf" | |
{ echo 'host all all 127.0.0.1/32 trust'; } >>"$PGDATA/pg_hba.conf" | |
{ echo 'host all all 0.0.0.0/0 md5'; } >>"$PGDATA/pg_hba.conf" | |
{ echo 'host replication postgres 0.0.0.0/0 md5'; } >>"$PGDATA/pg_hba.conf" | |
# start postgres | |
pg_ctl -D "$PGDATA" -w start >/dev/null | |
export POSTGRES_USER=${POSTGRES_USER:-postgres} | |
export POSTGRES_DB=${POSTGRES_DB:-$POSTGRES_USER} | |
export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres} | |
psql=(psql -v ON_ERROR_STOP=1) | |
# create database with specified name | |
if [ "$POSTGRES_DB" != "postgres" ]; then | |
"${psql[@]}" --username postgres <<-EOSQL | |
CREATE DATABASE "$POSTGRES_DB" ; | |
EOSQL | |
echo | |
fi | |
if [ "$POSTGRES_USER" = "postgres" ]; then | |
op="ALTER" | |
else | |
op="CREATE" | |
fi | |
# alter postgres superuser | |
"${psql[@]}" --username postgres <<-EOSQL | |
$op USER "$POSTGRES_USER" WITH SUPERUSER PASSWORD '$POSTGRES_PASSWORD'; | |
EOSQL | |
echo | |
psql+=(--username "$POSTGRES_USER" --dbname "$POSTGRES_DB") | |
echo | |
# initialize database | |
for f in "$INITDB"/*; do | |
case "$f" in | |
*.sh) echo "$0: running $f"; . "$f" ;; | |
*.sql) echo "$0: running $f"; "${psql[@]}" -f "$f"; echo ;; | |
*.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${psql[@]}"; echo ;; | |
*) echo "$0: ignoring $f" ;; | |
esac | |
echo | |
done | |
# stop server | |
pg_ctl -D "$PGDATA" -m fast -w stop >/dev/null | |
if [ "$STREAMING" == "synchronous" ]; then | |
# setup synchronous streaming replication | |
echo "synchronous_commit = remote_write" >>"$PGDATA/postgresql.conf" | |
echo "synchronous_standby_names = '*'" >>"$PGDATA/postgresql.conf" | |
fi | |
if [ "$ARCHIVE" == "wal-g" ]; then | |
# setup postgresql.conf | |
echo "archive_command = 'wal-g wal-push %p'" >>"$PGDATA/postgresql.conf" | |
echo "archive_timeout = 60" >>"$PGDATA/postgresql.conf" | |
echo "archive_mode = on" >>"$PGDATA/postgresql.conf" #todo: alwasy | |
fi |
```
Replica/Run.sh
```
#!/usr/bin/env bash | |
set -e | |
echo "Running as Replica" | |
# set password ENV | |
export PGPASSWORD=${POSTGRES_PASSWORD:-postgres} | |
export ARCHIVE=${ARCHIVE:-} | |
gracefully_shutdown_host() { | |
echo "Gracefully shutting down database" | |
# start postgres server in background | |
postgres >/dev/null & | |
# Waiting for running Postgres | |
while true; do | |
echo "Attempting pg_ctl status on localhost" | |
pg_ctl status && break | |
sleep 2 | |
done | |
# stop postgres server | |
pg_ctl -D "$PGDATA" stop >/dev/null 2>&1 | |
} | |
take_pg_basebackup() { | |
mkdir -p "$PGDATA" | |
rm -rf "$PGDATA"/* | |
chmod 0700 "$PGDATA" | |
echo "Taking base backup." | |
pg_basebackup -c fast -X fetch --no-password --pgdata "$PGDATA" --username=postgres --host="$PRIMARY_HOST" | |
} | |
setup_postgresql_config() { | |
# setup recovery.conf | |
cp /scripts/replica/recovery.conf /tmp | |
echo "recovery_target_timeline = 'latest'" >>/tmp/recovery.conf | |
echo "archive_cleanup_command = 'pg_archivecleanup $PGWAL %r'" >>/tmp/recovery.conf | |
# primary_conninfo is used for streaming replication | |
echo "primary_conninfo = 'application_name=$HOSTNAME host=$PRIMARY_HOST'" >>/tmp/recovery.conf | |
mv /tmp/recovery.conf "$PGDATA/recovery.conf" | |
# setup postgresql.conf | |
cp /scripts/primary/postgresql.conf /tmp | |
echo "wal_level = replica" >>/tmp/postgresql.conf | |
echo "max_wal_senders = 99" >>/tmp/postgresql.conf | |
echo "wal_keep_segments = 32" >>/tmp/postgresql.conf | |
echo "wal_log_hints = on" >>/tmp/postgresql.conf | |
if [ "$STANDBY" == "hot" ]; then | |
echo "hot_standby = on" >>/tmp/postgresql.conf | |
fi | |
if [ "$STREAMING" == "synchronous" ]; then | |
# setup synchronous streaming replication | |
echo "synchronous_commit = remote_write" >>/tmp/postgresql.conf | |
echo "synchronous_standby_names = '*'" >>/tmp/postgresql.conf | |
fi | |
mv /tmp/postgresql.conf "$PGDATA/postgresql.conf" | |
} | |
# Waiting for running Postgres | |
TIMER=0 | |
while [[ ${TIMER} -le 30 ]]; do | |
pg_isready --host="$PRIMARY_HOST" --timeout=2 &>/dev/null && break | |
echo "Attempting pg_isready on primary" | |
TIMER=$((TIMER + 1)) | |
sleep 2 | |
done | |
pg_isready --host="$PRIMARY_HOST" --timeout=2 | |
while true; do | |
psql -h "$PRIMARY_HOST" --no-password --username=postgres --command="select now();" &>/dev/null && break | |
echo "Attempting query on primary" | |
sleep 2 | |
done | |
if [ ! -e "$PGDATA/PG_VERSION" ]; then | |
take_pg_basebackup | |
else | |
# pg_rewind. https://www.postgresql.org/docs/9.6/app-pgrewind.html | |
# Why pg_rewind? refs: | |
# - Resolves conflict of different timelines. ref: | |
# 1. http://hlinnaka.iki.fi/presentations/NordicPGDay2015-pg_rewind.pdf | |
# 2. part(1 of 3) https://blog.2ndquadrant.com/introduction-to-pgrewind/ | |
# 3. part(2 of 3) https://blog.2ndquadrant.com/pgrewind-and-pg95/ | |
# 4. part(3 of 3) https://blog.2ndquadrant.com/back-to-the-future-part-3-pg_rewind-with-postgresql-9-6/ | |
# Why don't just pull all WAL file? | |
# - Doesn't solve conflict between different timelines, (mostly, in failover scenario, where a standby node becomes primary) | |
# So, after pulling wal files, it is required to run pg_rewind. | |
# > pw_rewind. Possible error: | |
# 1. target server must be shut down cleanly | |
# 2. could not find previous WAL record at 0/30000F8 | |
# 3. could not find common ancestor of the source and target cluster's timelines | |
# 4. target server needs to use either data checksums or "wal_log_hints = on" | |
# 5. None of the above. Example: "source and target clusters are from different systems" | |
EXIT_CODE=0 | |
PG_REWIND_OUTPUT=$(pg_rewind --source-server="host=$PRIMARY_HOST user=postgres port=5432 dbname=postgres" --target-pgdata=$PGDATA) || EXIT_CODE=$? | |
echo "${PG_REWIND_OUTPUT}" | |
# Target database (localhost) must be shutdown cleanly to perform pg_rewind. | |
# So, check and if necessary, re-stop the database gracefully. | |
if [[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "target server must be shut down cleanly") -gt 0 ]]; then | |
setup_postgresql_config | |
gracefully_shutdown_host | |
EXIT_CODE=0 | |
PG_REWIND_OUTPUT=$(pg_rewind --source-server="host=$PRIMARY_HOST user=postgres port=5432 dbname=postgres" --target-pgdata=$PGDATA) || EXIT_CODE=$? | |
echo ${PG_REWIND_OUTPUT} | |
fi | |
if | |
([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "could not find previous WAL record") -gt 0 ]]) || | |
# If the server diverged from primary and the diverged WAL doesn't exist anymore, | |
# pg_rewind will throw an error similar to "could not find previous WAL record at 0/30000F8". | |
# We have to manually fetch WALs starting from the missing point. | |
# At this point, we will take pg_basebackup. | |
# TODO: for wal-g or other kind of wal-archiving, fetch missing WALs from archive storage (may be). Then, run pg_rewind again | |
([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "could not find common ancestor") -gt 0 ]]) || | |
# Since 9.6, pg_rewind is very powerful to find common ancestor while running on non-promoted master. | |
# ref: https://www.enterprisedb.com/blog/pgrewind-improvements-postgresql-96 | |
# Yet, if the error shows up, taking pg_basebackup is a must. | |
([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "server needs to use either data checksums") -gt 0 ]]) | |
# In case of upgrade from previous database version, where 'wal_log_hints' was not turned on, this error may occur. | |
# But, will not occur again after adding 'wal_log_hints = on' on config file. | |
# We could have skipped here and manually pull WAL files so that this node can redo wal files. | |
# But, again, that will not resolve conflict between timelines. | |
# So, take base_backup and continue | |
then | |
take_pg_basebackup | |
elif [[ "$EXIT_CODE" != "0" ]]; then | |
# In another scenario, pg_rewind is failing and the reason is not 'non-existing WAL' or 'no common ancestor'. | |
# One example is, "source and target clusters are from different systems". this error happens when, | |
# the primary starts from scratch (when user forces to do, ie deleting pvc manually). It is safe to exit in these similar cases | |
# without processing further, because accidental behavior of user can result into data loss. | |
echo "pg_rewind is failing and the reason is: $PG_REWIND_OUTPUT" | |
exit 1 | |
fi | |
fi | |
setup_postgresql_config | |
# push base-backup | |
if [ "$ARCHIVE" == "wal-g" ]; then | |
# set walg ENV | |
CRED_PATH="/srv/wal-g/archive/secrets" | |
export WALE_S3_PREFIX=$(echo "$ARCHIVE_S3_PREFIX") | |
export AWS_ACCESS_KEY_ID=$(cat "$CRED_PATH/AWS_ACCESS_KEY_ID") | |
export AWS_SECRET_ACCESS_KEY=$(cat "$CRED_PATH/AWS_SECRET_ACCESS_KEY") | |
# setup postgresql.conf | |
echo "archive_command = 'wal-g wal-push %p'" >>"$PGDATA/postgresql.conf" | |
echo "archive_timeout = 60" >>"$PGDATA/postgresql.conf" | |
echo "archive_mode = on" >>"$PGDATA/postgresql.conf" # todo: alwasy | |
fi | |
exec postgres |
```
On Tue, Jan 29, 2019 at 6:31 PM Abdullah Al Maruf <maruf.2hin@xxxxxxxxx> wrote:
Hello,I am trying to build an automated system in docker/kubernetes where a container/pod will automatically schedule itself as a Master or Standby.In short, I have 1 master nodes (0th node) and three standby nodes (1st, 2nd & 3rd node). When I make the 3rd node as master (by trigger file) and restarts 0th node as a replica, It shows no problem.But when, both nodes are offline and our leader selection chooses 0th node as a master, and tries to reattach the 3rd node as Replica, It throws an error similar to:```LOG: invalid record length at 0/B000098: wanted 24, got 0LOG: started streaming WAL from primary at 0/B000000 on timeline 2LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098FATAL: terminating walreceiver process due to administrator commandLOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098LOG: record with incorrect prev-link 10000/21B0000 at 0/B000098```If I disable archive_mode, I never faced this error with same script. It only apperas when archive is on, and also not all the times it happens but most of the time it does.The error message appears after every 5 seconds.------------------------------------------------------------------------------------------------------------Scenario In details:I have two folders for scripts.├── primary│ ├── postgresql.conf│ ├── restore.sh│ ├── run.sh│ └── start.sh└── replica├── recovery.conf└── run.shI have a system that will choose the leader. If the current pod is leader, it will run `primary/run.sh`, and If it is a replica, it will run `replica/run.sh`. The problem is not related to restore.sh at this moment. So I am skipping restore.sh.The files can be found here. https://github.com/kubedb/postgres/tree/master/hack/docker/postgres/9.6.7/scripts