PostgreSQL repmgr高可用叢集+keepalived高可用

sync_tb發表於2020-09-02

最近在學習PostgreSQL資料庫高可用架構repmgr高可用叢集,發現網上完整的參考資料不是很多,比較零碎,於是經過在虛擬機器嘗試測試,分享下個人完整的repmgr叢集的搭建過程,步驟包含簡單的系統引數配置,pg資料庫安裝,repmgr叢集安裝配置以及keepalived安裝配置,因為搭建步驟個人理解和查閱資料,可能存在些許錯誤,請指正。


搭建過程主要分為主要以下幾步:

1. 系統引數配置/環境準備

2. PG原始碼安裝配置

3. repmgr原始碼安裝配置

    repmgr基本叢集配置

    repmgrd守護程式配置

    witness監控配置

4. keepalived原始碼安裝配置

    keepalived軟體安裝

5. 自動切換指令碼部署

環境準備

作業系統:

  • Red Hat Enterprise Linux Server release 7.8 (Maipo)

地址分配:

  • 192.168.56.201 pg1
    192.168.56.202 pg2
    192.168.56.203 pg3
    192.168.56.200 pg4

軟體版本:

  • postgresql 10.14
    repmgr 5.1.0
    keepalived 2.0.20


1. 系統環境準備

--所有節點配置

1.1 新增/etc/hosts

192.168.56.201 pg1
192.168.56.202 pg2
192.168.56.203 pg3
192.168.56.200 pg4

1.2 關閉並禁用防火牆

systemctl stop firewalld
systemctl disabled firewalld

1.3 禁用/etc/selinux/config

SELINUX=disabled
set enforce 0

1.4 建立postgres使用者

useradd postgres
echo postgres|passwrd postgres --stdin

1.5 建立postgres互信

--sshUserSetup.sh指令碼為Oracle資料庫軟體提供,可以網上下載,或者採用傳統方式建立互信
./sshUserSetup.sh -user postgres -hosts "pg1 pg2 pg3 pg4" -confirmno -noPromptPassphrase -advanced

1.6 軟體依賴包安裝

--需要配置yum,系統ISO映象配置本地yum源即可
yum install -y gcc systemd-devel readline-devel zlib-devel flex autoconfautomake libtool libnl3-devel openssl-devel

1.7 建立軟體安裝目錄

pvcreate /dev/sdb
vgcreate vg_db /dev/sdb
lvcreate -L +10g -n lv_pg vg_db
mkfs.xfs /dev/mapper/vg_db-lv_pg
mkdir /PostgreSQL
mount /dev/mapper/vg_db-lv_pg /PostgreSQL --新增/etc/fstab
mkdir /PostgreSQL/data
chmod 700 /PostgreSQL/data
chown -R postgres:postgres /PostgreSQL


2. postgresql資料庫配置

--所有節點配置

2.1 資料庫軟體安裝

su - root
tar -zxvf postgresql-10.14.tar.gz
cd postgresql-10.14
./configure --prefix=/PostgreSQL --with-systemd
make world -j8
make install-world -j8

2.2 配置postgres使用者環境變數~/.bash_profile

su - postgres
export PGUSER=postgres
export PGPORT=5432
export PGDATA=/PostgreSQL/data
export PGHOME=/PostgreSQL
export PATH=$PGHOME/bin:$PATH
export PGPASSFILE=~postgres/.pgpass # repmgr password file

 2.3 配置訪問控制pg_hba.conf

host    all         all             192.168.56.1/24        trust
local    replication     repmgr                          trust
host    replication     repmgr          127.0.0.1/32          trust
host    replication     repmgr          192.168.56.0/24        trust
local    repmgr        repmgr                          trust
host    repmgr        repmgr          127.0.0.1/32          trust
host    repmgr        repmgr          192.168.56.0/24         trust

2.4 修改配置檔案postgresql.conf

listen_addresses = '*'   
port = 5432 
max_connections = 100        #最大連結數
wal_level = replica         #wal日誌級別決定了有多少資訊寫入wal,或者logical
wal_log_hints = on         #預寫日誌命中,日誌修改的時候預寫日誌進行記錄,同步必須開on 維護系統的命令也記錄
archive_mode = on          #開啟歸檔模式 流複製必須開
archive_command = '/bin/true'   #預設填寫 9.5以後淘汰了
max_wal_senders = 10        #傳送流複製的程式數
max_replication_slots = 10
hot_standby = on          #開啟熱備
shared_preload_libraries =  'repmgr'   #安裝的外掛
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_truncate_on_rotation = 'on'
log_rotation_age = '1d'
log_rotation_size = 0

2.5 啟動資料庫

su - postgres
pg_ctl start -D $PGDATA -l /tmp/logfile

2.6 建立repmgr使用者和資料庫

su - postgres
createuser -s repmgr 
createdb -O repmgr repmgr 
psql -c 'ALTER USER repmgr SET search_path TO repmgr,"$user",public;'

2.7使用者連線測試

su - postgres
psql 'host=pg1 user=repmgr dbname=repmgr connect_timeout=2'
psql 'host=pg2 user=repmgr dbname=repmgr connect_timeout=2'
psql 'host=pg3 user=repmgr dbname=repmgr connect_timeout=2'
psql 'host=pg4 user=repmgr dbname=repmgr connect_timeout=2'


3. repmgr叢集配置

--所有節點配置

3.1 repmgr軟體安裝

su - root
tar -zxvf repmgr-5.1.0.tar.gz
export PATH=/PostgreSQL/bin:$PATH
cd repmgr-5.1.0
./configure
make && make install

3.2 修改repmgr.conf

cp repmgr-5.1.0/repmgr.conf.sample /etc/repmgr.conf
vi /etc/repmgr.conf
# node_id / node_name / conninfo 每個節點不一樣
# 本地節點連線配置
node_id=1                        
node_name='pg1'                  
conninfo='host=pg1 dbname=repmgr user=repmgr connect_timeout=2'                  
data_directory='/PostgreSQL/data'                
replication_user='repmgr'        
# 使用複製槽,防止日誌覆寫或丟失
use_replication_slots=yes
# 啟用witness使用        
witness_sync_interval=15
primary_visibility_consensus=true
# repmgr日誌設定         
log_level='INFO'                 
log_file='/PostgreSQL/data/repmgrd.log'                  
log_status_interval=30   
# 執行檔案路徑
pg_bindir='/PostgreSQL/bin'
repmgr_bindir='/PostgreSQL/bin'
# 密碼檔案位置,前面配置了PGPASSFILE環境變數
passfile='/home/postgres/.pgpass'
ssh_options='-q -o ConnectTimeout=10'
# repmgrd自動監控配置
monitoring_history=yes
monitor_interval_secs=2
 
# 自動切換配置
repmgrd_service_start_command = 'repmgrd --daemonize=true'
repmgrd_service_stop_command = 'kill `cat /PostgreSQL/repmgrd.pid`'
repmgrd_pid_file='/PostgreSQL/repmgrd.pid'
 
failover='automatic'
reconnect_attempts=4
reconnect_interval=10
promote_command='/PostgreSQL/data/bin/repmgr standby promote -f /etc/repmgr.conf --log-to-file'
follow_command='/PostgreSQL/data/bin/repmgr standby follow -f /etc/repmgr.conf --upstream-node-id=%n --log-to-file'
service_start_command = '/PostgreSQL/bin/pg_ctl  -w -D /PostgreSQL/data start'
service_stop_command = '/PostgreSQL/bin/pg_ctl  -D /PostgreSQL/data -W -m fast stop'
service_restart_command = '/PostgreSQL/bin/pg_ctl  -w -D /PostgreSQL/data restart'
service_reload_command = '/PostgreSQL/bin/pg_ctl  -w -D /PostgreSQL/data reload'
service_promote_command = '/PostgreSQL/bin/pg_ctl  -w -D /PostgreSQL/data promote

3.3 註冊主節點

--節點一操作,本處定義pg1為primary
[postgres@pg1 ~]$ repmgr primary register
[postgres@pg1 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                   
----+------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------
 1  | pg1  | primary | * running |          | default  | 100      | 1        | host=pg1 user=repmgr dbname=repmgr connect_timeout=2

3.4 克隆並註冊備用節點

--預註冊
[postgres@pg2 ~]$ repmgr -h pg1 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run
..........
INFO:all prerequisites for "standby clone" are met
--註冊
[postgres@pg2 ~]$ repmgr -h pg1 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone
NOTICE:you can now start your PostgreSQL server
HINT:for example: pg_ctl -D /PostgreSQL/data start
HINT:after starting the server, you need to register this standby with "repmgr standby register"
[postgres@pg2 ~]$ pg_ctl -D /PostgreSQL/data start -l /tmp/logfile
[postgres@pg2 ~]$ repmgr standby register
-- pg3 重複以上操作
--檢視叢集狀態
[postgres@pg1 ~]$ repmgr cluster show
 ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                   
----+------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------
 1  | pg1  | primary | * running |          | default  | 100      | 1        | host=pg1 user=repmgr dbname=repmgr connect_timeout=2
 2  | pg2  | standby |   running | pg1      | default  | 100      | 1        | host=pg2 user=repmgr dbname=repmgr connect_timeout=2
 3  | pg3  | standby |   running | pg1      | default  | 100      | 1        | host=pg3 user=repmgr dbname=repmgr connect_timeout=2

3.5 克隆並註冊witness節點

--postgresql.conf,pg_hba.conf,repmgr.conf檔案已經配置好,資料庫前面已經啟動,直接註冊witness節點即可
[postgres@pg4 ~]$ repmgr witness register -h pg1 -U repmgr
INFO:connecting to witness node "pg4" (ID: 4)
INFO:connecting to primary node
NOTICE:attempting to install extension "repmgr"
NOTICE:"repmgr" extension successfully installed
INFO: witness registration complete
NOTICE:witness node "pg4" (ID: 4) successfully registered
--檢視叢集狀態
[postgres@pg1  ~]$ repmgr service status --detail
 ID | Name | Role    | Status    | Upstream | Location | Priority |   repmgrd   | PID   | Paused? | Upstream last seen
----+------+---------+-----------+----------+----------+----------+-------------+-------+---------+--------------------
 1  | pg1  | primary | * running |          | default  | 100      | not running | 11451 | no      | n/a                
 2  | pg2  | standby |   running | pg1      | default  | 100      | not running | 2546  | no      | 1 second(s) ago    
 3  | pg3  | standby |   running | pg1      | default  | 100      | not running | 16018 | no      | 0 second(s) ago    
 4  | pg4  | witness | * running | pg1      | default  | 0        | not running | 26061 | no      | 0 second(s) ago

3.6 啟動repmgrd守護節點

--因為前面已經配置了repmgrd_service_start_command / repmgrd_service_stop_command引數
[postgres@pg1  ~]$ repmgr daemon start
[postgres@pg2  ~]$ repmgr daemon start
[postgres@pg3  ~]$ repmgr daemon start
[postgres@pg4  ~]$ repmgr daemon start
--檢視叢集狀態
[postgres@pg1 ~]$repmgr service status --detail
 ID | Name | Role    | Status    | Upstream | Location | Priority | repmgrd | PID   | Paused? | Upstream last seen
----+------+---------+-----------+----------+----------+----------+---------+-------+---------+--------------------
 1  | pg1  | primary | * running |          | default  | 100      | running | 11451 | no      | n/a                
 2  | pg2  | standby |    running | pg1      | default  | 100      | running | 2546  | no      | 1 second(s) ago    
 3  | pg3  | standby |    running | pg1      | default  | 100      | running | 16018 | no      | 0 second(s) ago    
 4  | pg4  | witness | * running | pg1      | default  | 0        | running | 26061 | no      | 0 second(s) ago  
 
 --節點間連通性檢測
[postgres@pg1  ~]$ repmgr cluster crosscheck
INFO: connecting to database
Name | Id | 1  | 2  | 3  | 4 
-----+----+----+----+----+----
pg1 | 1 | * | * | * | * 
pg2 | 2 | * | * | * | * 
pg3 | 3 | * | * | * | * 
pg4 | 4 | * | * | * | *


4 keepalived配置

4.1 keepalived軟體安裝(pg1/pg2/pg3)

su - root
tar -zxvf keepalived-2.0.20.tar.gz
cd keepalived-2.0.20
./autogen.sh
./configure
make -j4
make install

4.2 修改keepalived.conf(pg1/pg2/pg3)

su - root
mkdir /etc/keepalived
cp /usr/local/etc/keepalived/keepalived.conf  /etc/keepalived/keepalived.conf
vi /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
   notification_email {
     acassen@firewall.loc
     failover@firewall.loc
     sysadmin@firewall.loc
   }
   notification_email_from Alexandre.Cassen@firewall.loc
   smtp_server 127.0.0.1  --根據實際修改
   smtp_connect_timeout 30
   router_id LVS_DEVEL
   vrrp_skip_check_adv_addr
   vrrp_strict  --註釋掉,可能導致ip不能ping通
   vrrp_garp_interval 0.1  --防止日誌告警,隨便修改
   vrrp_gna_interval 0.1  --防止日誌告警,隨便修改
}
 
vrrp_instance VI_1 {
    state MASTER
    interface enp0s3  --繫結虛擬ip的網路卡
    virtual_router_id 51
    priority 100
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 1111
    }
    virtual_ipaddress {
        192.168.56.204  --需要繫結的ip地址
        192.168.56.205  --需要繫結的ip地址
        192.168.56.206  --需要繫結的ip地址
    }
}

4.3 啟動keepalived服務(pg1/pg2/pg3)

su - root
systemctl start keepalived
systemctl enable keepalived
systemctl is-enabled keepalived
--檢視ip掛載狀態
[postgres@pg1 tmp]$ifconfig -a
......
enp0s3:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST>  mtu 1500
        inet 192.168.56.204  netmask 255.255.255.0  broadcast 0.0.0.0
        ether 08:00:27:4f:a5:a9  txqueuelen 1000  (Ethernet)

       

5 配置keepalived切換指令碼

PostgreSQL repmgr高可用叢集+keepalived高可用 SwitchoverKeepAlived.zip

5.1 systemctl呼叫許可權設定(pg1/pg2/pg3)

[postgres@pg1 ~]$visudo
## Allow root to run any commands anywhere
root          ALL=(ALL)       ALL
postgres        ALL=(ALL)       NOPASSWD: /usr/bin/systemctl

5.2 建立切換指令碼守護程式(pg1/pg2/pg3)

--透過計劃任務(後面新增)每分鐘執行一次,確認keepalived_repmgr.sh切換指令碼在執行
--指令碼前面定義了1個變數,定義了指令碼存放位置了日誌存放位置
cd ~postgres
vi keepalived_daemon.sh
#!/bin/sh
#######################################################################################
#   Author: sync_tb
#  Created: 2020/09/01
#  ToDo:
#    Daemon for monitoring keepalived failover scripts
#######################################################################################
 
SCR_DIR='/home/postgres'
KEEP_MGR=`ps -ef|grep 'keepalived_repmgr.sh'|grep -v grep|wc -l`
if [[ ${KEEP_MGR} -lt 1 ]]; then
    nohup /bin/sh ${SCR_DIR}/keepalived_repmgr.sh >> ${SCR_DIR}/keepalived_repmgr.log &
fi

5.3 建立切換指令碼(pg1/pg2/pg3)

--透過掃描repmgr叢集節點確認primary節點,然後透過修改keepalived權重(priority)引數控制,利用IP搶佔模式
把IP漂移到repmgr primary節點上面,整個切換過程需要一定時間,大概10s左右,可以根據實際進行調整,不宜太短,
時間太短可能導致IP漂移沒有完成的情況下,開始下一次檢測然後就會使切換指令碼處於一個迴圈修改priority的情況中,
從而導致VIP在三個節點間來回切換。
--指令碼前面定義了6個變數,可以透過NODES引數控制keepalived只在某幾個節點上面漂移,此時需要確報repmgr primary
也要在這幾個節點上面。
cd ~postgres
vi keepalived_repmgr.sh
#!/bin/sh
#######################################################################################
#   Author: sync_tb
#  Created: 2020/09/01
#  ToDo:
#    Created for monitoring keepalived for PostgreSQL repmgr cluster
#    When production shutdown with errors,it needs about 10s to failover.
#######################################################################################
 
NODES='pg1 pg2 pg3'  # 叢集節點
VIP='192.168.56.204'   # VIP,多個請用管道隔開(|)
PGHOME=/PostgreSQL     # $PGHOME
REPMGR_CONF='/etc/repmgr.conf'  # repmgr配置檔案
KEEP_CONF='/etc/keepalived/keepalived.conf' # keepalived配置檔案
 
# state time function to write logfiles
function log_time(){
    date '+%Y-%m-%d %H:%M:%S '
}
 
echo "$(log_time) Repmgr and KeepAlived auto-failover monitoring start..."
while true
do
    for NODE in ${NODES}
    do
        KEEP_STA=`ssh $NODE "/usr/sbin/ip a|grep -w ${VIP}|wc -l"`
        if [[ ${KEEP_STA} -eq 1 ]]; then
            KEEP_MASTER=$NODE
            echo "$(log_time) KeepAlived Master is ${KEEP_MASTER}."
        fi
    done
 
    for NODE in ${NODES}
    do
        MGR_STA=`ssh $NODE "$PGHOME/bin/pg_controldata|grep 'in production'|wc -l"`
        if [[ ${MGR_STA} -ne 1 ]]; then
            continue
        else
            MGR_PRIMARY=$NODE
            echo "$(log_time) Repmgr Cluster Primary is ${MGR_PRIMARY}."
            break
        fi
    done
 
    sleep 1s
    if [[ ${KEEP_MASTER} == ${MGR_PRIMARY} ]]; then
        echo "$(log_time) High Available Architecture running normally."
    else
        echo "$(log_time) KeepAlived Master is ${KEEP_MASTER}, but Repmgr Primary is ${MGR_PRIMARY}."
        for NODE in ${NODES}
        do
 
            case $NODE in
                ${MGR_PRIMARY} )
                    ssh ${NODE} "sed -i 's/priority .*/priority 100/g' ${KEEP_CONF}"
                    ssh ${NODE} "sudo systemctl restart keepalived"
                    echo "$(log_time) Modify KeepAlived priority for ${NODE} to 100."
                    ;;
                ${KEEP_MASTER} )
                    ssh ${NODE} "sed -i 's/priority .*/priority 60/g' ${KEEP_CONF}"
                    ssh ${NODE} "sudo systemctl restart keepalived"
                    echo "$(log_time) Modify KeepAlived priority for ${NODE} to 60."
                    ;;
                * )
                    ssh ${NODE} "sed -i 's/priority .*/priority 80/g' ${KEEP_CONF}"
                    ssh ${NODE} "sudo systemctl restart keepalived"
                    echo "$(log_time) Modify KeepAlived priority for ${NODE} to 80."
                    ;;
            esac
        done
        SECS=10  # 切換等待時長,單位秒
        while [[ ${SECS} -ge 0 ]]
        do
            echo "$(log_time) Waiting for failover to target: ${SECS}s"
            let SECS--
            if [[ ${SECS} -eq 0 ]]; then
                break
            fi
            sleep 1s
        done
        echo "$(log_time) Failover KeepAlived to ${MGR_PRIMARY} completed."
    fi
    sleep 2s
done

5.4 新增計劃任務(pg1/pg2/pg3)

[postgres@pg1 ~]$crontab -l
* * * * * sh /home/postgres/keepalived_daemon.sh
--日誌預覽
[postgres@pg1 ~]$tail -f ~/keepalived_repmgr.log 
2020-09-02 04:34:50  High Available Architecture running normally.
2020-09-02 04:34:52  KeepAlived Master is pg1.
2020-09-02 04:34:52  Repmgr Cluster Primary is pg1.
2020-09-02 04:34:53  High Available Architecture running normally.
2020-09-01 22:34:36  KeepAlived Master is pg3.
2020-09-01 22:34:36  Repmgr Cluster Primary is pg2.
2020-09-01 22:34:37  KeepAlived Master is pg3, but Repmgr Primary is pg2.
2020-09-01 22:34:38  Modify KeepAlived priority for pg1 to 80.
2020-09-01 22:34:40  Modify KeepAlived priority for pg2 to 100.
2020-09-01 22:34:41  Modify KeepAlived priority for pg3 to 60.
2020-09-01 22:34:41  Waiting for failover to target: 5s
2020-09-01 22:34:42  Waiting for failover to target: 4s
2020-09-01 22:34:43  Waiting for failover to target: 3s
2020-09-01 22:34:44  Waiting for failover to target: 2s
2020-09-01 22:34:45  Waiting for failover to target: 1s
2020-09-01 22:34:45  Failover KeepAlived to pg2 completed.
2020-09-01 22:34:48  KeepAlived Master is pg2.
2020-09-01 22:34:48  Repmgr Cluster Primary is pg2.
2020-09-01 22:34:49  High Available Architecture running normally.


結束


來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/30496307/viewspace-2716459/,如需轉載,請註明出處,否則將追究法律責任。

相關文章