pgpool-II + keepalived achieves high availability

Installation

Download

https://pgpool.net/mediawiki/index.php/Downloads

Compile and install


tar -zxvf pgpool-II-4.4.4.tar.gz
cd pgpool-II-4.4.4.tar.gz
./configure --prefix=/usr/local/pgpool --with-pgsql=/usr/local/postgres/
 make & amp; & amp; make install

#pgpool-regclass plug-in installation
cd /root/pgpool-II-4.4.4/src/sql/pgpool-regclass
make & amp; & amp; make install
psql -h /data/pgdata/tmp -p 4567 -U postgres -f pgpool-regclass.sql template1
# The following test is not executed
ln -s /root/pgpool-II-4.4.4//src/sql/pgpool-regclass/* /usr/local/postgres/lib/

# pgpool-recovery plug-in installation
cd /root/pgpool-II-4.4.4/src/sql/pgpool-recovery
make & amp; & amp; make install
psql -h /data/pgdata/tmp -p 4567 -U postgres -f pgpool-recovery.sql template1
cp /usr/local/pgpool/etc/recovery_1st_stage.sample /data/pgdata/recovery_1st_stage

chown -R postgres /usr/local/pgpool
# Link
ln -s /usr/local/pgpool/bin/* /usr/local/bin

Configuration

Create account

# Create super user, pcp management command user, stream replication check user
CREATE ROLE pgpool WITH LOGIN;
CREATE ROLE u_standby WITH REPLICATION LOGIN;
ALTER USER postgres ENCRYPTED PASSWORD 'Pg_2023zys!';
alter user pgpool encrypted password 'Pg_2023zys!';
alter user u_standby encrypted password 'Pg_2023zys!';
GRANT pg_monitor TO pgpool;

# On all postgresql nodes, create this file in the postgres user directory
cat << EOF > ~/.pgpass
10.10.10.11:4567:*:postgres:Pg_2023zys!
10.10.10.11:4567:*:repuser:Pg_2023zys!
10.10.10.11:4567:*:pgpool:Pg_2023zys!
10.10.10.12:4567:*:postgres:Pg_2023zys!
10.10.10.12:4567:*:pgpool:Pg_2023zys!
10.10.10.12:4567:*:repuser:Pg_2023zys!
localhost:4567:*:postgres:Pg_2023zys!
localhost:4567:*:pgpool:Pg_2023zys!
localhost:4567:*:repuser:Pg_2023zys!
EOF


Configuration.pcppass

su-postgres
vi.pcppass

10.10.10.12:5432:replication:u_standby:Pg_2023zys!
10.10.10.11:9999:postgres:pgpool:Pg_2023zys!
10.10.10.11:9999:postgres:postgres:Pg_2023zys!
10.10.10.11:5432:postgres:postgres:Pg_2023zys!
10.10.10.12:5432:postgres:postgres:Pg_2023zys!
localhost:9898:pgpool:Pg_2023zys!


echo 'pgpool:''pg_md5 -u=pgpool Pg_2023zys\!` >> /usr/local/pgpool/etc/pcp.conf

Configure pool_hba.conf file


cat << EOF > /usr/local/pgpool/etc/pool_hba.conf
host all all 0.0.0.0/0 md5
host all pgpool 0.0.0.0/0 trust
local all all trust
EOF

Configure node number

#10.10.10.11
echo "0" > /usr/local/pgpool/etc/pgpool_node_id

#10.10.10.12
echo "1" > /usr/local/pgpool/etc/pgpool_node_id

Configure pgpool.conf

# Backend cluster mode: using streaming replication
backend_clustering_mode = 'streaming_replication'

# - pgpool connection settings -
# Listening address: listen to all addresses
listen_addresses = '*'
# Listening port: 9999
port=9999
# socket directory
#socket_dir = '/usr/local/pgpool/tmp'

# - pcp connection settings -
# pcp listening address: listen to all addresses
pcp_listen_addresses = '*'
# pcp listening port: 9898
pcp_port = 9898
# pcp socket directory
pcp_socket_dir = '/usr/local/pgpool/tmp'

# Stream replication check
sr_check_user = 'pgpool'
sr_check_password = 'Pg_2023zys!'
sr_check_database = 'postgres'


# - Health check settings -
health_check_period = 5 # Frequency of checking health status (seconds)
health_check_timeout = 30 # Health check timeout (seconds)
health_check_user = 'pgpool'
health_check_password = 'Pg_2023zys!'
health_check_max_retries = 3 # Maximum number of retries

search_primary_node_timeout = 10min # Timeout for finding the primary node

# - Backend connection settings -
backend_hostname0 = '10.10.10.11' # The host name or IP address of the first backend database
backend_port0 = 4567 # The port number of the first backend database
backend_weight0 = 1 #The weight of the first backend database
backend_data_directory0 = '/data/pgdata' # The data directory of the first backend database
backend_flag0 = 'ALLOW_TO_FAILOVER' #Indicates that failover is allowed
#backend_application_name0 = 'pg1' # (optional) backend application name

backend_hostname1 = '10.10.10.12' # The host name or IP address of the second backend database
backend_port1 = 4567 # The port number of the second backend database
backend_weight1 = 1 # The weight of the second backend database
backend_data_directory1 = '/data/pgdata' # The data directory of the second backend database
backend_flag1 = 'ALLOW_TO_FAILOVER' #Indicates that failover is allowed
#backend_application_name1 = 'pg2' # (optional) backend application name

# - Failover configuration -
failover_command = '/usr/local/pgpool/etc/failover.sh %d %h %p %D %m %H %M %P %r %R %N %S' # Custom failover command
follow_primary_command = '/usr/local/pgpool/etc/follow_primary.sh %d %h %p %D %m %H %M %P %r %R' # Customize the command to switch to the primary server

# - Online recovery configuration -
recovery_user = 'postgres' # PostgreSQL user for online recovery
recovery_password = 'Pg_2023zys!'
recovery_1st_stage_command = 'recovery_1st_stage' # First stage recovery command

# - Client authentication configuration -
enable_pool_hba = on # Enable client authentication
pool_passwd = '/usr/local/pgpool/etc/pool_passwd' # File to store user passwords

# - Watchdog configuration -
use_watchdog = on # Enable watchdog
#delegate_IP = '10.221.232.185' # Delegate IP address
#if_up_cmd = '/usr/bin/sudo /sbin/ip addr add $_IP_$/24 dev bond0 label bond0:1' # Network interface startup command
#if_down_cmd = '/usr/bin/sudo /sbin/ip addr del $_IP_$/24 dev bond0' # Network interface shutdown command
#arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I bond0' # Command for sending ARP request
hostname0 = '10.10.10.11' # Hostname or IP address of watchdog node 0
wd_port0 = 9000 #Port number of watchdog node 0
pgpool_port0 = 9999 # pgpool port number on watchdog node 0
hostname1 = '10.10.10.12' # Hostname or IP address of watchdog node 1
wd_port1 = 9000 #Port number of watchdog node 1
pgpool_port1 = 9999 # pgpool port number on watchdog node 1

wd_lifecheck_method = 'heartbeat' # Use heartbeat detection for survival check
wd_interval = 10 # Heartbeat detection interval between watchdog nodes (seconds)
heartbeat_hostname0 = '10.10.10.11' # Host name or IP address of heartbeat detection node 0
heartbeat_port0 = 9694 #Port number of heartbeat detection node 0
heartbeat_device0 = '' # Heartbeat detection node 0 network device (if needed)
heartbeat_hostname1 = '10.10.10.12' # Host name or IP address of heartbeat detection node 1
heartbeat_port1 = 9694 #Port number of heartbeat detection node 1
heartbeat_device1 = '' # Network device for heartbeat detection node 1 (if needed)
wd_heartbeat_keepalive = 2 # Keep alive parameters for heartbeat detection
wd_heartbeat_deadtime = 30 #Death time parameter for heartbeat detection
#wd_escalation_command = '/usr/local/pgpool/etc/escalation.sh' # Failover upgrade command


other_pgpool_hostname0 = '10.10.10.12'
other_pgpool_port0 = 9999
other_wd_port0 = 9000

# - Log settings -
log_destination = 'stderr'
logging_collector = on
log_directory = '/usr/local/pgpool/'
log_filename = 'pgpool-%Y-%m-%d_%H%M%S.log'
log_truncate_on_rotation = on
log_rotation_age = 1d # Log rotation interval
log_rotation_size = 10MB #Maximum file size for log rotation

pid_file_name = '/usr/local/pgpool/pgp_9999.pid' # File name to store PID

# Load balancing mode: off
load_balance_mode = off

Configuration script

cp -p /usr/local/pgpool/etc/failover.sh{<!-- -->.sample,}
cp -p /usr/local/pgpool/etc/follow_primary.sh{<!-- -->.sample,}
chown postgres:postgres /usr/local/pgpool/etc/{<!-- -->failover.sh,follow_primary.sh}
chmod + x /usr/local/pgpool/etc/{<!-- -->failover.sh,follow_primary.sh}


##**Place the recovery_First_stage and pgpool_remote_start scripts in the $PG_DATA directory**
cp -p /db/pgpool/etc/recovery_1st_stage.sample $PG_5432/recovery_1st_stage
cp -p /db/pgpool/etc/pgpool_remote_start.sample $PG_5432/pgpool_remote_start
chown postgres:postgres $PG_5432/{<!-- -->recovery_1st_stage,pgpool_remote_start}
chmod + x $PG_5432/{<!-- -->recovery_1st_stage,pgpool_remote_start}

Script description

  • failover.sh

This script is run by the failover command (failover_command). The main function of this script is to perform failover when a PostgreSQL failure occurs. It connects to the new primary node via SSH, detects whether a failover needs to be skipped, tests SSH connections with invalid passwords, handles the situation where the standby node is down, and finally promotes the standby node to the primary node.

  • follow_primary.sh

This script is run after the failover_command. The main function of this script is to synchronize the standby node with the new primary node after a PostgreSQL failover. The script attempts to use pg_rewind, and if that fails, pg_basebackup. During the synchronization process, replication slots are also created and managed, and different configuration files are used depending on the version of PostgreSQL. Finally, the script attaches the spare node to Pgpool-II.

  • recovery_1st_stage

This script is executed by “recovery_1st_stage” to recover a standby node. For example, when using pcp_recovery_node to recover a node online. The main function of this script is to restore a standby node via pg_basebackup. During execution, the script creates and manages replication slots, generates recovery configuration files relevant to the PostgreSQL version, and performs necessary cleanup.

  • pgpool_remote_start

This script is executed after recovery_1st_stage and is mainly used to remotely start the recovered node through pg_ctl.

:::info
Note:
Basically, the parameters in the script need to be modified, and the installation environment needs to be installed for modification.
:::

Configure pool_passwd

pg_md5 --md5auth --username=pgpool "Pg_2023zys!"
pg_md5 --md5auth --username=postgres "Pg_2023zys!"
pg_md5 --md5auth --username=u_standby "Pg_2023zys!"

cat /usr/local/pgpool/etc/pool_passwd


Online recovery

After a postgresql crashes, online recovery can be performed through pcp_recovery_node. There are 2 scripts for the recovery steps.

  • recovery_1st_stage: Complete node redeployment and replication
  • pgpool_remote_start: Start a new node
##**Place the recovery_First_stage and pgpool_remote_start scripts in the $PG_DATA directory**
cp -p /usr/local/pgpool/etc/recovery_1st_stage.sample /data/pgdata/recovery_1st_stage
cp -p /usr/local/pgpool/etc/pgpool_remote_start.sample /data/pgdata/pgpool_remote_start
chown postgres:postgres /data/pgdata/{<!-- -->recovery_1st_stage,pgpool_remote_start}
chmod + x /data/pgdata/{<!-- -->recovery_1st_stage,pgpool_remote_start}

Where the script needs to be modified

  1. recovery_1st_stage:
  • PGHOME=/usr/local/postgres
  • ARCHIVEDIR=/var/lib/pgsql/archivedir
  • REPLUSER=repuser
  • passfile=’/home/postgres/.pgpass’’
    :::danger
    Note:
    The /home/postgres/.pgpass file must be created so that you can execute psql in the script without entering a password.
    :::
  1. pgpool_remote_start
  • PGHOME=/usr/local/postgres
  • ARCHIVEDIR=/var/lib/pgsql/archivedir
  • REPLUSER=repuser
  • passfile=’/home/postgres/.pgpass’’

:::info
Pay attention to whether PGHOME in the scripts recovery_1st_stage and pgpool_remote_start is configured correctly. PCP_USER in the script follow_primary.sh is pgpool.
Where the script needs to be modified
:::

# In order to use the online recovery function, pgpool_recovery, pgpool_remote_start, pgpool_switch_xlog and other functions are required, so we need to install pgpool_recovery on template1 of PostgreSQL server server1.
#Create pg plug-in
su-postgres
psql template1 -c "CREATE EXTENSION pgpool_recovery"
psql postgres -c "CREATE EXTENSION pgpool_recovery"

#Perform online recovery

pcp_recovery_node -h 10.10.10.11 -p 9898 -U pgpool -n 0

:::info
[root@node1 pgpool]# pcp_recovery_node –help
pcp_recovery_node – recovery node
usage:
pcp_recovery_node [options…] [node ID]
Options:
-U, –username=NAME Username for PCP authentication
-h, –host=HOSTNAME pgpool-II host
-p, –port=PORT PCP port number
-w, –no-password never prompt for password
-W, –password force password prompt (should happen automatically)
-n, –node-id=NODEID ID of the backend node
-d, –debug enable debug messages (optional)
-v, –verbose output verbose messages
-?, –help print this help message
:::

Start and stop pgpool

Start

pg_conf
# First check if the parameters are correct

pgpool

# -n: Start Pgpool-II in foreground mode and display console output.

If you need to specify other parameters such as configuration files, you can use the following command:

pgpool -c /path/to/pgpool.conf
# -c <configuration file>: Specify the configuration file path of Pgpool-II

RELOAD

pgpool reload

Stop

 pgpool -m smart stop

-m : Specify the shutdown mode, you can choose “smart”, “fast” or “immediate”, their meanings are as follows:

  • “smart”: Exit after all clients have disconnected.
  • “fast”: Exit directly and use appropriate shutdown method.
  • “immediate”: Same as “fast” mode, exit immediately.

Other common parameter descriptions:

  • -a : Specifies the path to the pool_hba.conf configuration file.
  • -f : Specifies the path to the pgpool.conf configuration file.
  • -F : Specifies the path to the pcp.conf configuration file.
  • -n: Start in foreground mode and display console output.
  • -d: Start in debug mode.
  • -C: Clear query cache oidmaps, used when memqcache_method is set to memcached.
  • -x: Enable various assertion checks for debugging.
  • -D: Discard the pgpool_status file and do not restore the previous state.

Connect to PGPOOL

# socket connection

psql -p9999 -U pgpool -d postgres

# Internet connection
psql -h10.10.10.11 -p9999 -U pgpool -d postgres

keepalived

Monitor pgpool through keepalived, replace watchdog, and if a Pgpool node fails, migrate the IP to another surviving pgpool
Keepalived is easier to use and more reasonable than watchdog when using two nodes. If there are more than 2 nodes, it is recommended to use watchdog, because watchdog elects the master node, while keepalived is implemented through weight matching.

Main (10.10.10.11)

keepalived.conf

vrrp_script chk_pgpool {<!-- -->
    script "/usr/local/bin/chk_pgpool.sh"
    interval 2
}

vrrp_instance VI_1 {<!-- -->
    state BACKUP
    interface bond1
    virtual_router_id 84
    nopreempt
    priority 100
    advert_int 1
    authentication {<!-- -->
        auth_type PASS
        auth_pass Nmwg_2023
    }
    virtual_ipaddress {<!-- -->
        10.221.232.184/24devbond1
    }
    virtual_ipaddress_excluded {<!-- -->
        2409:8010:5af0:1101:10:845:0:79/96 dev bond1
    }
    track_script {<!-- -->
        chk_pgpool
    }
}


chk_pgpool.sh

#!/bin/bash

#Define database connection parameters
PGPOOL_HOST="10.10.10.11" # PgPool host name or IP address
PGPOOL_PORT="9999" # PgPool listening port
PGPOOL_USER="pgpool" #Username used when connecting to the database
PGPOOL_PASSWORD="Pg_2023zys!" # Password used when connecting to the database

# Log file path
LOG_FILE="/usr/local/pgpool/chk_pgpool.log"

# Check if the log file exists, if not create it
if [ ! -f "$LOG_FILE" ]; then
  touch "$LOG_FILE"
fi

# Get the current time and write it to the log file
LOG_TIMESTAMP=$(date + '%Y-%m-%d %H:%M:%S')
echo "[$LOG_TIMESTAMP] - Starting PgPool health check..." >> "$LOG_FILE"

# Use the psql command to try to connect to the PgPool database
PGPASSWORD=$PGPOOL_PASSWORD psql -h "$PGPOOL_HOST" -p "$PGPOOL_PORT" -U "$PGPOOL_USER" -d postgres -c "SELECT 1;" >> "$LOG_FILE" 2> & amp;1

# Check the exit status of psql command
if [ $? -eq 0 ]; then
  # The connection is successful, check whether the PgPool process exists
  pgrep "pgpool" > /dev/null
  if [ $? -eq 0 ]; then
    # PgPool process exists
    echo "[$LOG_TIMESTAMP] - PgPool health check successful." >> "$LOG_FILE"
    exit 0
  else
    # PgPool process does not exist
    echo "[$LOG_TIMESTAMP] - PgPool process not found." >> "$LOG_FILE"
    exit 1
  fi
else
  # Connection failed
  echo "[$LOG_TIMESTAMP] - PgPool health check failed. Connection error." >> "$LOG_FILE"
  exit 1
fi

Preparation (10.10.10.12)

keepalived.conf

vrrp_script chk_pgpool {<!-- -->
    script "/usr/local/bin/chk_pgpool.sh"
    interval 2
}

vrrp_instance VI_1 {<!-- -->
    state BACKUP
    interface bond1
    virtual_router_id 84
    priority 99
    advert_int 1
    authentication {<!-- -->
        auth_type PASS
        auth_pass Nmwg_2023
    }
    virtual_ipaddress {<!-- -->
        10.10.10.100/24devbond1
    }
    virtual_ipaddress_excluded {<!-- -->
        2409:8010:5af0:1101:10:845:0:79/96 dev bond1
    }
    track_script {<!-- -->
        chk_pgpool
    }
}


chk_pgpool.sh

#!/bin/bash

#Define database connection parameters
PGPOOL_HOST="10.10.10.12" # PgPool host name or IP address
PGPOOL_PORT="9999" # PgPool listening port
PGPOOL_USER="pgpool" #Username used when connecting to the database
PGPOOL_PASSWORD="Pg_2023zys!" # Password used when connecting to the database

# Log file path
LOG_FILE="/usr/local/pgpool/chk_pgpool.log"

# Check if the log file exists, if not create it
if [ ! -f "$LOG_FILE" ]; then
  touch "$LOG_FILE"
fi

# Get the current time and write it to the log file
LOG_TIMESTAMP=$(date + '%Y-%m-%d %H:%M:%S')
echo "[$LOG_TIMESTAMP] - Starting PgPool health check..." >> "$LOG_FILE"

#Use the psql command to try to connect to the PgPool database
PGPASSWORD=$PGPOOL_PASSWORD psql -h "$PGPOOL_HOST" -p "$PGPOOL_PORT" -U "$PGPOOL_USER" -d postgres -c "SELECT 1;" >> "$LOG_FILE" 2> & amp;1

# Check the exit status of psql command
if [ $? -eq 0 ]; then
  # The connection is successful, check whether the PgPool process exists
  pgrep "pgpool" > /dev/null
  if [ $? -eq 0 ]; then
    # PgPool process exists
    echo "[$LOG_TIMESTAMP] - PgPool health check successful." >> "$LOG_FILE"
    exit 0
  else
    # PgPool process does not exist
    echo "[$LOG_TIMESTAMP] - PgPool process not found." >> "$LOG_FILE"
    exit 1
  fi
else
  # Connection failed
  echo "[$LOG_TIMESTAMP] - PgPool health check failed. Connection error." >> "$LOG_FILE"
  exit 1
fi