Re: Error: record with incorrect prev-link ---/--- at ---/---, when archiving is 'on'

Abdullah Al Maruf <maruf.2hin@xxxxxxxxx> · Tue, 29 Jan 2019 18:39:38 +0600

Skip the previous link. The correct link is https://github.com/kubedb/postgres/tree/pg-db/hack/docker/postgres/9.6.7/scripts.

Also I am going to copy paste the codes here if that is more helpful. 

Primary/run.sh
```
#!/usr/bin/env bash

set -e

echo "Running as Primary"

# set password ENV
export PGPASSWORD=${POSTGRES_PASSWORD:-postgres}

export ARCHIVE=${ARCHIVE:-}

if [ ! -e "$PGDATA/PG_VERSION" ]; then
  if [ "$RESTORE" = true ]; then
    echo "Restoring Postgres from base_backup using wal-g"
/scripts/primary/restore.sh
 else
/scripts/primary/start.sh
 fi
fi

# In already running primary server from previous releases, postgresql.conf may not contain 'wal_log_hints = on'
# Set it using 'sed'. ref: https://stackoverflow.com/a/11245501/4628962
sed -i '/wal_log_hints/c\wal_log_hints = on' $PGDATA/postgresql.conf

# This node can become new leader while not able to create trigger file, So, left over recovery.conf from
# last bootup (when this node was standby) may exists. And, that will force this node to become STANDBY.
# So, delete recovery.conf.
if [[ -e $PGDATA/recovery.conf ]] && [[ $(cat $PGDATA/recovery.conf | grep -c "standby_mode = on") -gt 0 ]]; then
    # recovery.conf file exists and contains standby_mode = on. So, this is left over from previous standby state.
 rm $PGDATA/recovery.conf
fi

# push base-backup
if [ "$ARCHIVE" == "wal-g" ]; then
 # set walg ENV
 CRED_PATH="/srv/wal-g/archive/secrets"
 export WALE_S3_PREFIX=$(echo "$ARCHIVE_S3_PREFIX")
 export AWS_ACCESS_KEY_ID=$(cat "$CRED_PATH/AWS_ACCESS_KEY_ID")
 export AWS_SECRET_ACCESS_KEY=$(cat "$CRED_PATH/AWS_SECRET_ACCESS_KEY")

 sed -i "/archive_command/c\archive_command = 'wal-g wal-push %p'" $PGDATA/postgresql.conf
 sed -i "/archive_mode/c\archive_mode = on" $PGDATA/postgresql.conf # todo: alwasy
 sed -i "/archive_timeout/c\archive_timeout = 60" $PGDATA/postgresql.conf

 pg_ctl -D "$PGDATA" -w start
PGUSER="postgres" wal-g backup-push "$PGDATA" >/dev/null
 pg_ctl -D "$PGDATA" -m smart -w stop # todo: fast
fi

exec postgres 
```

Primary/start.sh 
```
#!/bin/bash

set -e

mkdir -p "$PGDATA"
rm -rf "$PGDATA"/*
chmod 0700 "$PGDATA"

export POSTGRES_INITDB_ARGS=${POSTGRES_INITDB_ARGS:-}
export POSTGRES_INITDB_XLOGDIR=${POSTGRES_INITDB_XLOGDIR:-}

# Create the transaction log directory before initdb is run
if [ "$POSTGRES_INITDB_XLOGDIR" ]; then
 mkdir -p "$POSTGRES_INITDB_XLOGDIR"
 chown -R postgres "$POSTGRES_INITDB_XLOGDIR"
 chmod 700 "$POSTGRES_INITDB_XLOGDIR"

 export POSTGRES_INITDB_ARGS="$POSTGRES_INITDB_ARGS --xlogdir $POSTGRES_INITDB_XLOGDIR"
fi

initdb $POSTGRES_INITDB_ARGS --pgdata="$PGDATA" >/dev/null

# setup postgresql.conf
cp /scripts/primary/postgresql.conf /tmp
echo "wal_level = replica" >>/tmp/postgresql.conf
echo "max_wal_senders = 99" >>/tmp/postgresql.conf
echo "wal_keep_segments = 32" >>/tmp/postgresql.conf
echo "wal_log_hints = on" >>/tmp/postgresql.conf

mv /tmp/postgresql.conf "$PGDATA/postgresql.conf"

# setup pg_hba.conf
{ echo; echo 'local all all trust'; } >>"$PGDATA/pg_hba.conf"
{ echo 'host all all 127.0.0.1/32 trust'; } >>"$PGDATA/pg_hba.conf"
{ echo 'host all all 0.0.0.0/0 md5'; } >>"$PGDATA/pg_hba.conf"
{ echo 'host replication postgres 0.0.0.0/0 md5'; } >>"$PGDATA/pg_hba.conf"

# start postgres
pg_ctl -D "$PGDATA" -w start >/dev/null

export POSTGRES_USER=${POSTGRES_USER:-postgres}
export POSTGRES_DB=${POSTGRES_DB:-$POSTGRES_USER}
export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}

psql=(psql -v ON_ERROR_STOP=1)

# create database with specified name
if [ "$POSTGRES_DB" != "postgres" ]; then
 "${psql[@]}" --username postgres <<-EOSQL
CREATE DATABASE "$POSTGRES_DB" ;
EOSQL
 echo
fi

if [ "$POSTGRES_USER" = "postgres" ]; then
 op="ALTER"
else
op="CREATE"
fi

# alter postgres superuser
"${psql[@]}" --username postgres <<-EOSQL
 $op USER "$POSTGRES_USER" WITH SUPERUSER PASSWORD '$POSTGRES_PASSWORD';
EOSQL
echo

psql+=(--username "$POSTGRES_USER" --dbname "$POSTGRES_DB")
echo

# initialize database
for f in "$INITDB"/*; do
 case "$f" in
    *.sh) echo "$0: running $f"; . "$f" ;;
 *.sql) echo "$0: running $f"; "${psql[@]}" -f "$f"; echo ;;
    *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${psql[@]}"; echo ;;
 *) echo "$0: ignoring $f" ;;
  esac
 echo
done

# stop server
pg_ctl -D "$PGDATA" -m fast -w stop >/dev/null

if [ "$STREAMING" == "synchronous" ]; then
 # setup synchronous streaming replication
  echo "synchronous_commit = remote_write" >>"$PGDATA/postgresql.conf"
 echo "synchronous_standby_names = '*'" >>"$PGDATA/postgresql.conf"
fi

if [ "$ARCHIVE" == "wal-g" ]; then
 # setup postgresql.conf
 echo "archive_command = 'wal-g wal-push %p'" >>"$PGDATA/postgresql.conf"
 echo "archive_timeout = 60" >>"$PGDATA/postgresql.conf"
 echo "archive_mode = on" >>"$PGDATA/postgresql.conf" #todo: alwasy
fi
```

Replica/Run.sh

```
#!/usr/bin/env bash

set -e

echo "Running as Replica"

# set password ENV
export PGPASSWORD=${POSTGRES_PASSWORD:-postgres}

export ARCHIVE=${ARCHIVE:-}

gracefully_shutdown_host() {
 echo "Gracefully shutting down database"

  # start postgres server in background
postgres >/dev/null &

  # Waiting for running Postgres
 while true; do
    echo "Attempting pg_ctl status on localhost"
 pg_ctl status && break
 sleep 2
 done

 # stop postgres server
 pg_ctl -D "$PGDATA" stop >/dev/null 2>&1
}

take_pg_basebackup() {
 mkdir -p "$PGDATA"
 rm -rf "$PGDATA"/*
 chmod 0700 "$PGDATA"

 echo "Taking base backup."
 pg_basebackup -c fast -X fetch --no-password --pgdata "$PGDATA" --username=postgres --host="$PRIMARY_HOST"
}

setup_postgresql_config() {
  # setup recovery.conf
cp /scripts/replica/recovery.conf /tmp
  echo "recovery_target_timeline = 'latest'" >>/tmp/recovery.conf
  echo "archive_cleanup_command = 'pg_archivecleanup $PGWAL %r'" >>/tmp/recovery.conf

  # primary_conninfo is used for streaming replication
  echo "primary_conninfo = 'application_name=$HOSTNAME host=$PRIMARY_HOST'" >>/tmp/recovery.conf
mv /tmp/recovery.conf "$PGDATA/recovery.conf"

  # setup postgresql.conf
cp /scripts/primary/postgresql.conf /tmp
  echo "wal_level = replica" >>/tmp/postgresql.conf
  echo "max_wal_senders = 99" >>/tmp/postgresql.conf
  echo "wal_keep_segments = 32" >>/tmp/postgresql.conf
 echo "wal_log_hints = on" >>/tmp/postgresql.conf

 if [ "$STANDBY" == "hot" ]; then
 echo "hot_standby = on" >>/tmp/postgresql.conf
 fi

  if [ "$STREAMING" == "synchronous" ]; then
    # setup synchronous streaming replication
 echo "synchronous_commit = remote_write" >>/tmp/postgresql.conf
    echo "synchronous_standby_names = '*'" >>/tmp/postgresql.conf
  fi

mv /tmp/postgresql.conf "$PGDATA/postgresql.conf"
}

# Waiting for running Postgres
TIMER=0
while [[ ${TIMER} -le 30 ]]; do
 pg_isready --host="$PRIMARY_HOST" --timeout=2 &>/dev/null && break
  echo "Attempting pg_isready on primary"
TIMER=$((TIMER + 1))
sleep 2
done

pg_isready --host="$PRIMARY_HOST" --timeout=2

while true; do
psql -h "$PRIMARY_HOST" --no-password --username=postgres --command="select now();" &>/dev/null && break
  echo "Attempting query on primary"
 sleep 2
done

if [ ! -e "$PGDATA/PG_VERSION" ]; then
 take_pg_basebackup
else
 # pg_rewind. https://www.postgresql.org/docs/9.6/app-pgrewind.html

 # Why pg_rewind? refs:
  # - Resolves conflict of different timelines. ref:
  # 1. http://hlinnaka.iki.fi/presentations/NordicPGDay2015-pg_rewind.pdf
  # 2. part(1 of 3) https://blog.2ndquadrant.com/introduction-to-pgrewind/
  # 3. part(2 of 3) https://blog.2ndquadrant.com/pgrewind-and-pg95/
  # 4. part(3 of 3) https://blog.2ndquadrant.com/back-to-the-future-part-3-pg_rewind-with-postgresql-9-6/

  # Why don't just pull all WAL file?
  # - Doesn't solve conflict between different timelines, (mostly, in failover scenario, where a standby node becomes primary)
  # So, after pulling wal files, it is required to run pg_rewind.

  # > pw_rewind. Possible error:
  # 1. target server must be shut down cleanly
  # 2. could not find previous WAL record at 0/30000F8
  # 3. could not find common ancestor of the source and target cluster's timelines
  # 4. target server needs to use either data checksums or "wal_log_hints = on"
  # 5. None of the above. Example: "source and target clusters are from different systems"

EXIT_CODE=0
PG_REWIND_OUTPUT=$(pg_rewind --source-server="host=$PRIMARY_HOST user=postgres port=5432 dbname=postgres" --target-pgdata=$PGDATA) || EXIT_CODE=$?
  echo "${PG_REWIND_OUTPUT}"

  # Target database (localhost) must be shutdown cleanly to perform pg_rewind.
  # So, check and if necessary, re-stop the database gracefully.
  if [[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "target server must be shut down cleanly") -gt 0 ]]; then
setup_postgresql_config
gracefully_shutdown_host

EXIT_CODE=0
PG_REWIND_OUTPUT=$(pg_rewind --source-server="host=$PRIMARY_HOST user=postgres port=5432 dbname=postgres" --target-pgdata=$PGDATA) || EXIT_CODE=$?
    echo ${PG_REWIND_OUTPUT}
  fi

  if
([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "could not find previous WAL record") -gt 0 ]]) ||
      # If the server diverged from primary and the diverged WAL doesn't exist anymore,
      # pg_rewind will throw an error similar to "could not find previous WAL record at 0/30000F8".
      # We have to manually fetch WALs starting from the missing point.
      # At this point, we will take pg_basebackup.
      # TODO: for wal-g or other kind of wal-archiving, fetch missing WALs from archive storage (may be). Then, run pg_rewind again
([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "could not find common ancestor") -gt 0 ]]) ||
      # Since 9.6, pg_rewind is very powerful to find common ancestor while running on non-promoted master.
      # ref: https://www.enterprisedb.com/blog/pgrewind-improvements-postgresql-96
      # Yet, if the error shows up, taking pg_basebackup is a must.
([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT | grep -c "server needs to use either data checksums") -gt 0 ]])
      # In case of upgrade from previous database version, where 'wal_log_hints' was not turned on, this error may occur.
      # But, will not occur again after adding 'wal_log_hints = on' on config file.
      # We could have skipped here and manually pull WAL files so that this node can redo wal files.
      # But, again, that will not resolve conflict between timelines.
      # So, take base_backup and continue
  then
take_pg_basebackup

  elif [[ "$EXIT_CODE" != "0" ]]; then
    # In another scenario, pg_rewind is failing and the reason is not 'non-existing WAL' or 'no common ancestor'.
    # One example is, "source and target clusters are from different systems". this error happens when,
    # the primary starts from scratch (when user forces to do, ie deleting pvc manually). It is safe to exit in these similar cases
    # without processing further, because accidental behavior of user can result into data loss.
    echo "pg_rewind is failing and the reason is: $PG_REWIND_OUTPUT"
    exit 1
  fi
fi

setup_postgresql_config

# push base-backup
if [ "$ARCHIVE" == "wal-g" ]; then
  # set walg ENV
 CRED_PATH="/srv/wal-g/archive/secrets"
 export WALE_S3_PREFIX=$(echo "$ARCHIVE_S3_PREFIX")
 export AWS_ACCESS_KEY_ID=$(cat "$CRED_PATH/AWS_ACCESS_KEY_ID")
  export AWS_SECRET_ACCESS_KEY=$(cat "$CRED_PATH/AWS_SECRET_ACCESS_KEY")

 # setup postgresql.conf
  echo "archive_command = 'wal-g wal-push %p'" >>"$PGDATA/postgresql.conf"
 echo "archive_timeout = 60" >>"$PGDATA/postgresql.conf"
 echo "archive_mode = on" >>"$PGDATA/postgresql.conf" # todo: alwasy
fi

exec postgres
```
On Tue, Jan 29, 2019 at 6:31 PM Abdullah Al Maruf <maruf.2hin@xxxxxxxxx> wrote:
Hello, I am trying to build an automated system in docker/kubernetes where a container/pod will automatically schedule itself as a Master or Standby. 

In short, I have 1 master nodes (0th node) and three standby nodes (1st, 2nd & 3rd node). When I make the 3rd node as master (by trigger file) and restarts 0th node as a replica, It shows no problem. 

But when, both nodes are offline and our leader selection chooses 0th node as a master, and tries to reattach the 3rd node as Replica, It throws an error similar to:

```
LOG:  invalid record length at 0/B000098: wanted 24, got 0
LOG:  started streaming WAL from primary at 0/B000000 on timeline 2
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
FATAL:  terminating walreceiver process due to administrator command
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
LOG:  record with incorrect prev-link 10000/21B0000 at 0/B000098
```

If I disable archive_mode, I never faced this error with same script. It only apperas when archive is on, and also not all the times it happens but most of the time it does. 
The error message appears after every 5 seconds.

------------------------------------------------------------------------------------------------------------

Scenario In details:

I have two folders for scripts. 

├── primary
│   ├── postgresql.conf
│   ├── restore.sh
│   ├── run.sh
│   └── start.sh
└── replica
    ├── recovery.conf
    └── run.sh

I have a system that will choose the leader. If the current pod is leader, it will run `primary/run.sh`, and If it is a replica, it will run `replica/run.sh`. The problem is not related to restore.sh at this moment. So I am skipping restore.sh.

The files can be found here. https://github.com/kubedb/postgres/tree/master/hack/docker/postgres/9.6.7/scripts 

#!/usr/bin/env bash

	set -e

	echo "Running as Primary"

	# set password ENV
	export PGPASSWORD=${POSTGRES_PASSWORD:-postgres}

	export ARCHIVE=${ARCHIVE:-}

	if [ ! -e "$PGDATA/PG_VERSION" ]; then
	if [ "$RESTORE" = true ]; then
	echo "Restoring Postgres from base_backup using wal-g"
	/scripts/primary/restore.sh
	else
	/scripts/primary/start.sh
	fi
	fi

	# In already running primary server from previous releases, postgresql.conf may not contain 'wal_log_hints = on'
	# Set it using 'sed'. ref: https://stackoverflow.com/a/11245501/4628962
	sed -i '/wal_log_hints/c\wal_log_hints = on' $PGDATA/postgresql.conf

	# This node can become new leader while not able to create trigger file, So, left over recovery.conf from
	# last bootup (when this node was standby) may exists. And, that will force this node to become STANDBY.
	# So, delete recovery.conf.
	if [[ -e $PGDATA/recovery.conf ]] && [[ $(cat $PGDATA/recovery.conf \| grep -c "standby_mode = on") -gt 0 ]]; then
	# recovery.conf file exists and contains standby_mode = on. So, this is left over from previous standby state.
	rm $PGDATA/recovery.conf
	fi

	# push base-backup
	if [ "$ARCHIVE" == "wal-g" ]; then
	# set walg ENV
	CRED_PATH="/srv/wal-g/archive/secrets"
	export WALE_S3_PREFIX=$(echo "$ARCHIVE_S3_PREFIX")
	export AWS_ACCESS_KEY_ID=$(cat "$CRED_PATH/AWS_ACCESS_KEY_ID")
	export AWS_SECRET_ACCESS_KEY=$(cat "$CRED_PATH/AWS_SECRET_ACCESS_KEY")

	sed -i "/archive_command/c\archive_command = 'wal-g wal-push %p'" $PGDATA/postgresql.conf
	sed -i "/archive_mode/c\archive_mode = on" $PGDATA/postgresql.conf # todo: alwasy
	sed -i "/archive_timeout/c\archive_timeout = 60" $PGDATA/postgresql.conf

	pg_ctl -D "$PGDATA" -w start
	PGUSER="postgres" wal-g backup-push "$PGDATA" >/dev/null
	pg_ctl -D "$PGDATA" -m smart -w stop # todo: fast
	fi

	exec postgres

#!/bin/bash

	set -e

	mkdir -p "$PGDATA"
	rm -rf "$PGDATA"/*
	chmod 0700 "$PGDATA"

	export POSTGRES_INITDB_ARGS=${POSTGRES_INITDB_ARGS:-}
	export POSTGRES_INITDB_XLOGDIR=${POSTGRES_INITDB_XLOGDIR:-}

	# Create the transaction log directory before initdb is run
	if [ "$POSTGRES_INITDB_XLOGDIR" ]; then
	mkdir -p "$POSTGRES_INITDB_XLOGDIR"
	chown -R postgres "$POSTGRES_INITDB_XLOGDIR"
	chmod 700 "$POSTGRES_INITDB_XLOGDIR"

	export POSTGRES_INITDB_ARGS="$POSTGRES_INITDB_ARGS --xlogdir $POSTGRES_INITDB_XLOGDIR"
	fi

	initdb $POSTGRES_INITDB_ARGS --pgdata="$PGDATA" >/dev/null

	# setup postgresql.conf
	cp /scripts/primary/postgresql.conf /tmp
	echo "wal_level = replica" >>/tmp/postgresql.conf
	echo "max_wal_senders = 99" >>/tmp/postgresql.conf
	echo "wal_keep_segments = 32" >>/tmp/postgresql.conf
	echo "wal_log_hints = on" >>/tmp/postgresql.conf

	mv /tmp/postgresql.conf "$PGDATA/postgresql.conf"

	# setup pg_hba.conf
	{ echo; echo 'local all all trust'; } >>"$PGDATA/pg_hba.conf"
	{ echo 'host all all 127.0.0.1/32 trust'; } >>"$PGDATA/pg_hba.conf"
	{ echo 'host all all 0.0.0.0/0 md5'; } >>"$PGDATA/pg_hba.conf"
	{ echo 'host replication postgres 0.0.0.0/0 md5'; } >>"$PGDATA/pg_hba.conf"

	# start postgres
	pg_ctl -D "$PGDATA" -w start >/dev/null

	export POSTGRES_USER=${POSTGRES_USER:-postgres}
	export POSTGRES_DB=${POSTGRES_DB:-$POSTGRES_USER}
	export POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-postgres}

	psql=(psql -v ON_ERROR_STOP=1)

	# create database with specified name
	if [ "$POSTGRES_DB" != "postgres" ]; then
	"${psql[@]}" --username postgres <<-EOSQL
	CREATE DATABASE "$POSTGRES_DB" ;
	EOSQL
	echo
	fi

	if [ "$POSTGRES_USER" = "postgres" ]; then
	op="ALTER"
	else
	op="CREATE"
	fi

	# alter postgres superuser
	"${psql[@]}" --username postgres <<-EOSQL
	$op USER "$POSTGRES_USER" WITH SUPERUSER PASSWORD '$POSTGRES_PASSWORD';
	EOSQL
	echo

	psql+=(--username "$POSTGRES_USER" --dbname "$POSTGRES_DB")
	echo

	# initialize database
	for f in "$INITDB"/*; do
	case "$f" in
	*.sh) echo "$0: running $f"; . "$f" ;;
	*.sql) echo "$0: running $f"; "${psql[@]}" -f "$f"; echo ;;
	*.sql.gz) echo "$0: running $f"; gunzip -c "$f" \| "${psql[@]}"; echo ;;
	*) echo "$0: ignoring $f" ;;
	esac
	echo
	done

	# stop server
	pg_ctl -D "$PGDATA" -m fast -w stop >/dev/null

	if [ "$STREAMING" == "synchronous" ]; then
	# setup synchronous streaming replication
	echo "synchronous_commit = remote_write" >>"$PGDATA/postgresql.conf"
	echo "synchronous_standby_names = '*'" >>"$PGDATA/postgresql.conf"
	fi

	if [ "$ARCHIVE" == "wal-g" ]; then
	# setup postgresql.conf
	echo "archive_command = 'wal-g wal-push %p'" >>"$PGDATA/postgresql.conf"
	echo "archive_timeout = 60" >>"$PGDATA/postgresql.conf"
	echo "archive_mode = on" >>"$PGDATA/postgresql.conf" #todo: alwasy
	fi

#!/usr/bin/env bash

	set -e

	echo "Running as Replica"

	# set password ENV
	export PGPASSWORD=${POSTGRES_PASSWORD:-postgres}

	export ARCHIVE=${ARCHIVE:-}

	gracefully_shutdown_host() {
	echo "Gracefully shutting down database"

	# start postgres server in background
	postgres >/dev/null &

	# Waiting for running Postgres
	while true; do
	echo "Attempting pg_ctl status on localhost"
	pg_ctl status && break
	sleep 2
	done

	# stop postgres server
	pg_ctl -D "$PGDATA" stop >/dev/null 2>&1
	}

	take_pg_basebackup() {
	mkdir -p "$PGDATA"
	rm -rf "$PGDATA"/*
	chmod 0700 "$PGDATA"

	echo "Taking base backup."
	pg_basebackup -c fast -X fetch --no-password --pgdata "$PGDATA" --username=postgres --host="$PRIMARY_HOST"
	}

	setup_postgresql_config() {
	# setup recovery.conf
	cp /scripts/replica/recovery.conf /tmp
	echo "recovery_target_timeline = 'latest'" >>/tmp/recovery.conf
	echo "archive_cleanup_command = 'pg_archivecleanup $PGWAL %r'" >>/tmp/recovery.conf

	# primary_conninfo is used for streaming replication
	echo "primary_conninfo = 'application_name=$HOSTNAME host=$PRIMARY_HOST'" >>/tmp/recovery.conf
	mv /tmp/recovery.conf "$PGDATA/recovery.conf"

	# setup postgresql.conf
	cp /scripts/primary/postgresql.conf /tmp
	echo "wal_level = replica" >>/tmp/postgresql.conf
	echo "max_wal_senders = 99" >>/tmp/postgresql.conf
	echo "wal_keep_segments = 32" >>/tmp/postgresql.conf
	echo "wal_log_hints = on" >>/tmp/postgresql.conf

	if [ "$STANDBY" == "hot" ]; then
	echo "hot_standby = on" >>/tmp/postgresql.conf
	fi

	if [ "$STREAMING" == "synchronous" ]; then
	# setup synchronous streaming replication
	echo "synchronous_commit = remote_write" >>/tmp/postgresql.conf
	echo "synchronous_standby_names = '*'" >>/tmp/postgresql.conf
	fi

	mv /tmp/postgresql.conf "$PGDATA/postgresql.conf"
	}

	# Waiting for running Postgres
	TIMER=0
	while [[ ${TIMER} -le 30 ]]; do
	pg_isready --host="$PRIMARY_HOST" --timeout=2 &>/dev/null && break
	echo "Attempting pg_isready on primary"
	TIMER=$((TIMER + 1))
	sleep 2
	done

	pg_isready --host="$PRIMARY_HOST" --timeout=2

	while true; do
	psql -h "$PRIMARY_HOST" --no-password --username=postgres --command="select now();" &>/dev/null && break
	echo "Attempting query on primary"
	sleep 2
	done

	if [ ! -e "$PGDATA/PG_VERSION" ]; then
	take_pg_basebackup
	else
	# pg_rewind. https://www.postgresql.org/docs/9.6/app-pgrewind.html

	# Why pg_rewind? refs:
	# - Resolves conflict of different timelines. ref:
	# 1. http://hlinnaka.iki.fi/presentations/NordicPGDay2015-pg_rewind.pdf
	# 2. part(1 of 3) https://blog.2ndquadrant.com/introduction-to-pgrewind/
	# 3. part(2 of 3) https://blog.2ndquadrant.com/pgrewind-and-pg95/
	# 4. part(3 of 3) https://blog.2ndquadrant.com/back-to-the-future-part-3-pg_rewind-with-postgresql-9-6/

	# Why don't just pull all WAL file?
	# - Doesn't solve conflict between different timelines, (mostly, in failover scenario, where a standby node becomes primary)
	# So, after pulling wal files, it is required to run pg_rewind.

	# > pw_rewind. Possible error:
	# 1. target server must be shut down cleanly
	# 2. could not find previous WAL record at 0/30000F8
	# 3. could not find common ancestor of the source and target cluster's timelines
	# 4. target server needs to use either data checksums or "wal_log_hints = on"
	# 5. None of the above. Example: "source and target clusters are from different systems"

	EXIT_CODE=0
	PG_REWIND_OUTPUT=$(pg_rewind --source-server="host=$PRIMARY_HOST user=postgres port=5432 dbname=postgres" --target-pgdata=$PGDATA) \|\| EXIT_CODE=$?
	echo "${PG_REWIND_OUTPUT}"

	# Target database (localhost) must be shutdown cleanly to perform pg_rewind.
	# So, check and if necessary, re-stop the database gracefully.
	if [[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT \| grep -c "target server must be shut down cleanly") -gt 0 ]]; then
	setup_postgresql_config
	gracefully_shutdown_host

	EXIT_CODE=0
	PG_REWIND_OUTPUT=$(pg_rewind --source-server="host=$PRIMARY_HOST user=postgres port=5432 dbname=postgres" --target-pgdata=$PGDATA) \|\| EXIT_CODE=$?
	echo ${PG_REWIND_OUTPUT}
	fi

	if
	([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT \| grep -c "could not find previous WAL record") -gt 0 ]]) \|\|
	# If the server diverged from primary and the diverged WAL doesn't exist anymore,
	# pg_rewind will throw an error similar to "could not find previous WAL record at 0/30000F8".
	# We have to manually fetch WALs starting from the missing point.
	# At this point, we will take pg_basebackup.
	# TODO: for wal-g or other kind of wal-archiving, fetch missing WALs from archive storage (may be). Then, run pg_rewind again
	([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT \| grep -c "could not find common ancestor") -gt 0 ]]) \|\|
	# Since 9.6, pg_rewind is very powerful to find common ancestor while running on non-promoted master.
	# ref: https://www.enterprisedb.com/blog/pgrewind-improvements-postgresql-96
	# Yet, if the error shows up, taking pg_basebackup is a must.
	([[ "$EXIT_CODE" != "0" ]] && [[ $(echo $PG_REWIND_OUTPUT \| grep -c "server needs to use either data checksums") -gt 0 ]])
	# In case of upgrade from previous database version, where 'wal_log_hints' was not turned on, this error may occur.
	# But, will not occur again after adding 'wal_log_hints = on' on config file.
	# We could have skipped here and manually pull WAL files so that this node can redo wal files.
	# But, again, that will not resolve conflict between timelines.
	# So, take base_backup and continue
	then
	take_pg_basebackup

	elif [[ "$EXIT_CODE" != "0" ]]; then
	# In another scenario, pg_rewind is failing and the reason is not 'non-existing WAL' or 'no common ancestor'.
	# One example is, "source and target clusters are from different systems". this error happens when,
	# the primary starts from scratch (when user forces to do, ie deleting pvc manually). It is safe to exit in these similar cases
	# without processing further, because accidental behavior of user can result into data loss.
	echo "pg_rewind is failing and the reason is: $PG_REWIND_OUTPUT"
	exit 1
	fi
	fi

	setup_postgresql_config

	# push base-backup
	if [ "$ARCHIVE" == "wal-g" ]; then
	# set walg ENV
	CRED_PATH="/srv/wal-g/archive/secrets"
	export WALE_S3_PREFIX=$(echo "$ARCHIVE_S3_PREFIX")
	export AWS_ACCESS_KEY_ID=$(cat "$CRED_PATH/AWS_ACCESS_KEY_ID")
	export AWS_SECRET_ACCESS_KEY=$(cat "$CRED_PATH/AWS_SECRET_ACCESS_KEY")

	# setup postgresql.conf
	echo "archive_command = 'wal-g wal-push %p'" >>"$PGDATA/postgresql.conf"
	echo "archive_timeout = 60" >>"$PGDATA/postgresql.conf"
	echo "archive_mode = on" >>"$PGDATA/postgresql.conf" # todo: alwasy
	fi

	exec postgres