## 环境:
PostgreSQL版:9.6
角色 OS IP
master CentOS7 10.100.12.73
slave CentOS7 10.100.12.74
vIP 10.100.12.63
## 主从安装postgresql
postgresql官网安装文档:https://www.postgresql.org/download/linux/redhat/
* Install the repository RPM:
yum -y install https://download.postgresql.org/pub/repos/yum/9.6/redhat/rhel-7-x86_64/pgdg-redhat96-9.6-3.noarch.rpm
* Install the client packages:
yum -y install postgresql96
* Optionally install the server packages:
yum -y install postgresql96-server postgresql96-devel
* Optionally initialize the database and enable automatic start:
/usr/pgsql-9.6/bin/postgresql96-setup initdb mv /usr/lib/systemd/system/postgresql-9.6.service /usr/lib/systemd/system/postgresql.service systemctl enable postgresql 暂时先不启动服务
把/usr/pgsql-9.6/bin 加入系统环境变量
tail /etc/profile
## PATH export PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin:/usr/pgsql-9.6/bin
重载环境变量
. /etc/profile
## master服务器配置
启动postgresql服务
systemctl start postgresql
创建同步用户repluser
su - postgres psql create role repluser login replication encrypted password 'yHJ7TXda9q9zzIsv';
postgres=# du # 查看用户
q
编辑 /var/lib/pgsql/9.6/data/pg_hba.conf 新增下面两行
host replication repluser 10.100.12.74/32 md5
host all all 0.0.0.0/0 md5
mkdir -p /data/pgsql/archivedir; chown -R postgres:postgres /data/pgsql/archivedir
编辑 /var/lib/pgsql/9.6/data/postgresql.conf
listen_addresses = '*' # what IP address(es) to listen on; port = 5432 # (change requires restart) max_connections = 512 # (change requires restart) #从库的 max_connections要大于主库 shared_buffers = 128MB # min 128kB dynamic_shared_memory_type = posix # the default is the first option wal_level = hot_standby # minimal, replica, or logical #热备模式
archive_mode = on # enables archiving; off, on, or always #允许归档
archive_command = 'test ! -f /data/pgsql/archivedir/%f && cp %p /data/pgsql/archivedir/%f' # command to use to archive a logfile segment max_wal_senders = 8 # max number of walsender processes #可以设置最多几个流复制链接,差不多有几个从,就设置多少 wal_keep_segments = 1024 # in logfile segments, 16MB each; 0 disables log_destination = 'stderr' # Valid values are combinations of logging_collector = on # Enable capturing of stderr and csvlog log_directory = 'pg_log' # directory where log files are written, log_filename = 'postgresql-%a.log' # log file name pattern, log_truncate_on_rotation = on # If on, an existing log file with the log_rotation_age = 1d # Automatic rotation of logfiles will log_rotation_size = 0 # Automatic rotation of logfiles will log_line_prefix = '< %m > ' # special values: log_timezone = 'PRC' datestyle = 'iso, mdy' timezone = 'PRC' lc_messages = 'en_US.UTF-8' # locale for system error message lc_monetary = 'en_US.UTF-8' # locale for monetary formatting lc_numeric = 'en_US.UTF-8' # locale for number formatting lc_time = 'en_US.UTF-8' # locale for time formatting default_text_search_config = 'pg_catalog.english'
重启postgresql服务
systemctl restart postgresql
## slave服务器配置
mkdir -p /data/pgsql/archivedir; chown -R postgres:postgres /data/pgsql/archivedir
su - postgres rm -rf /var/lib/pgsql/9.6/data/* #开始没有启动从库服务,这一步可以省略
pg_basebackup -h 10.100.12.73 -U repluser -D /var/lib/pgsql/9.6/data -X stream -P
cp /usr/pgsql-9.6/share/recovery.conf.sample /var/lib/pgsql/9.6/data/recovery.conf
修改配置文件 /var/lib/pgsql/9
.6
/data/recovery
.conf
grep -v "^#" /var/lib/pgsql/9.6/data/recovery.conf recovery_target_timeline = 'latest' standby_mode = on primary_conninfo = 'host=10.100.12.73 port=5432 user=repluser password=yHJ7TXda9q9zzIsv' # e.g. 'host=localhost port=5432' trigger_file = '/var/lib/pgsql/9.6/data/trigger.kenyon' #主从切换时后的触发文件,即 touch /var/lib/pgsql/9.6/data/trigger.kenyon 就可切换主从,也可以使用命令 /usr/pgsql-9.6/bin/pg_ctl promote
配置postgresql.conf文件
listen_addresses = '*' # what IP address(es) to listen on; port = 5432 # (change requires restart) max_connections = 1024 # (change requires restart) 一般从的最大链接要大于主的 shared_buffers = 128MB # min 128kB dynamic_shared_memory_type = posix # the default is the first option wal_level = hot_standby # minimal, replica, or logical archive_mode = on # enables archiving; off, on, or always archive_command = 'test ! -f /data/pgsql/archivedir/%f && cp %p /data/pgsql/archivedir/%f' # command to use to archive a logfile segment max_wal_senders = 8 # max number of walsender processes wal_keep_segments = 1024 # in logfile segments, 16MB each; 0 disables hot_standby = on # "on" allows queries during recovery #说明这台机器不仅仅用于数据归档,也用于查询 max_standby_streaming_delay = 30s # max delay before canceling queries wal_receiver_status_interval = 10s # send replies at least this often #多久向主报告一次从的状态 hot_standby_feedback = on # send info from standby to prevent #如果有错误的数据复制,是否向主进行反馈 log_destination = 'stderr' # Valid values are combinations of logging_collector = on # Enable capturing of stderr and csvlog log_directory = 'pg_log' # directory where log files are written, log_filename = 'postgresql-%a.log' # log file name pattern, log_truncate_on_rotation = on # If on, an existing log file with the log_rotation_age = 1d # Automatic rotation of logfiles will log_rotation_size = 0 # Automatic rotation of logfiles will log_line_prefix = '< %m > ' # special values: log_timezone = 'PRC' datestyle = 'iso, mdy' timezone = 'PRC' lc_messages = 'en_US.UTF-8' # locale for system error message lc_monetary = 'en_US.UTF-8' # locale for monetary formatting lc_numeric = 'en_US.UTF-8' # locale for number formatting lc_time = 'en_US.UTF-8' # locale for time formatting default_text_search_config = 'pg_catalog.english'
启动 postgresql服务
## 查看postgresql主从状态
在 master上执行
su - postgres psql select client_addr,sync_state from pg_stat_replication; select * from pg_stat_replication;
pg_controldata /var/lib/pgsql/9.6/data # 这种方法对于直接kill进程的情况下是不适用的,查看结果不准确,Database cluster state:信息
主库状态为:in production
备机状态为: in archive recovery
## keepalived配置
主从安装keepalived
yum -y install keepalived
master keepalived 配置
cat /etc/keepalived/keepalived.conf
global_defs { notification_email { admin@xx.com } notification_email_from keepalived@xx.com smtp_server 127.0.0.1 smtp_connect_timeout 30 router_id pg_ha } vrrp_script chk_postgresql { script "/etc/keepalived/script/script/check_postgresql.sh |grep 'postgresql_success' " interval 2 weight -10 } vrrp_instance VI_1 { state BACKUP ############ 辅机为 BACKUP interface eth0 virtual_router_id 62 mcast_src_ip 10.100.12.73 priority 100 ########### 权值要比 back 高 advert_int 2 nopreempt authentication { auth_type PASS auth_pass SNKQusp4kFpUKz } track_script { chk_postgresql ### 执行监控的服务 } virtual_ipaddress { 10.100.12.63 } notify_master "/bin/python /etc/keepalived/script/keepalived_notify.py 'PostgreSQL-1 [10.100.12.73] change to master, vip:10.100.12.63' " notify_backup "/bin/python /etc/keepalived/script/keepalived_notify.py 'PostgreSQL-1 [10.100.12.73] postgresql check faild, change to slave, vip:10.100.12.63' " }
sh脚本:
cd /etc/keepalived/script #sh脚本赋予可执行权限
cat check_postgresql.sh
#!/bin/bash # songyanlin pguser="postgres" BIN="/usr/pgsql-9.6/bin" datef=`date +%Y-%M-%d" "%H:%m` data_dir="/var/lib/pgsql/9.6/data" log_dir="/var/log/postgresql.log" service_name="postgresql" pid="postmaster" status="postgresql_failed" status_success="postgresql_success" function CheckService(){ local ret=`$BIN/pg_controldata $data_dir |grep -E "in production|in archive recovery" |wc -l` echo $ret } function CheckPs(){ local ret=`pidof $pid |wc -l` echo $ret } if [ $(CheckService) == 0 -o $(CheckPs) == 0 ]; then echo "$datef postgresql master status is erro!" >> $log_dir service $service_name restart if [ $(CheckService) != 0 -a $(CheckPs) != 0 ]; then status=$status_success fi else status=$status_success fi echo $status
cat keepalived_notify.py
#!/usr/bin/env python # -*- coding:utf-8 -*- import smtplib from email.mime.text import MIMEText from email.header import Header import sys, time, subprocess, random # 第三方 SMTP 服务 mail_host="smtp.exmail.qq.com" #设置服务器 userinfo_list = [{'user':'rp1@qq.com','pass':'pwd'}, {'user':'rp2@qq.com','pass':'pwd'}, {'user':'rp3@tuandai.com','pass':'pwd'}] user_inst = userinfo_list[random.randint(0, len(userinfo_list)-1)] mail_user=user_inst['user'] #用户名 mail_pass=user_inst['pass'] #口令 sender = mail_user # 邮件发送者 receivers = ['mymail@163.com', 'gogo@qq.com'] # 接收邮件,可设置为你的QQ邮箱或者其他邮箱 p = subprocess.Popen('hostname', shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) hostname = p.stdout.readline().split(' ')[0] message_to = '' for i in receivers: message_to += i + ';' def print_help(): note = '''python script.py message ''' print(note) exit(1) time_stamp = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) message_content = '' if len(sys.argv) == 2: message_content = '%s [%s] %s' %(time_stamp, hostname ,sys.argv[1]) subject = '%s [%s] postgresql status is error' %(time_stamp, hostname) else: print_help() message = MIMEText(message_content, 'plain', 'utf-8') message['From'] = Header(sender, 'utf-8') message['To'] = Header(message_to, 'utf-8') message['Subject'] = Header(subject, 'utf-8') try: smtpObj = smtplib.SMTP() smtpObj.connect(mail_host, 25) # 25 为 SMTP 端口号 smtpObj.login(mail_user,mail_pass) smtpObj.sendmail(sender, receivers, message.as_string()) print("邮件发送成功") except smtplib.SMTPException as e: print("Error: 无法发送邮件") print(e)
slave keepalived配置
cat /etc/keepalived/keepalived.conf
global_defs { notification_email { admin@xx.com } notification_email_from keepalived@xx.com smtp_server 127.0.0.1 smtp_connect_timeout 30 router_id pg_ha } vrrp_script chk_postgresql { script "/etc/keepalived/script/check_postgresql.sh |grep 'postgresql_success' " interval 2 weight -10 } vrrp_instance VI_1 { state BACKUP ############ 辅机为 BACKUP interface eth0 virtual_router_id 62 mcast_src_ip 10.100.12.74 priority 99 ########### 权值要比 back 高 advert_int 2 #nopreempt authentication { auth_type PASS auth_pass SNKQusp4kFpUKz } track_script { chk_postgresql ### 执行监控的服务 } virtual_ipaddress { 10.100.12.63 } notify_master "/etc/keepalived/script/postgresql_slave_to_master.sh" }
sh脚本:
check_postgresql.sh keepalived_notify.py与master相同
cat postgresql_slave_to_master.sh
#!/bin/bash # pguser="postgres" BIN="/usr/pgsql-9.6/bin" datef=`date +%Y-%M-%d" "%H:%m` data_dir="/var/lib/pgsql/9.6/data" log_dir="/var/log/postgresql.log" service_name="postgresql" pid="postmaster" status="postgresql_failed" status_success="postgresql_success" function CheckService(){ local ret=`$BIN/pg_controldata $data_dir |grep "in production" |wc -l` echo $tet } function CheckStatus(){ local ret=`$BIN/pg_controldata $data_dir |grep "shut down in recovery" |wc -l` echo $ret } function CheckStatus2(){ local ret=`$BIN/pg_controldata $data_dir |grep "in archive recovery" |wc -l` echo $ret } if [ $(CheckStatus) != 0 ];then service $service_name restart fi if [ $(CheckStatus2) != 0 ]; then su - $pguser -c "$BIN/pg_ctl promote" fi /bin/python /etc/keepalived/script/keepalived_notify.py "PostgreSQL[10.100.12.74] change to master, vip:10.100.12.63"
附:
若主从已经切换后,把原来的master设置为从,可按上面从机设置方法设置
postgresql扩展组件
报错:
Running handlers:
There was an error running gitlab-ctl reconfigure:
bash[migrate gitlab-rails database] (gitlab::database_migrations line 51) had an error: Mixlib::ShellOut::ShellCommandFailed: Expected process to exit with [0], but received '1'
---- Begin output of "bash" "/tmp/chef-script20180125-31534-ul2ug1" ----
STDOUT: rake aborted!
ActiveRecord::StatementInvalid: PG::UndefinedFile: ERROR: could not open extension control file "/usr/pgsql-9.6/share/extension/pg_trgm.control": No such file or directory
: CREATE EXTENSION IF NOT EXISTS "pg_trgm"
/opt/gitlab/embedded/service/gitlab-rails/db/schema.rb:18:in `block in <top (required)>'
/opt/gitlab/embedded/service/gitlab-rails/db/schema.rb:14:in `<top (required)>'
/opt/gitlab/embedded/service/gitlab-rails/lib/tasks/gitlab/db.rake:52:in `block (3 levels) in <top (required)>'
/opt/gitlab/embedded/bin/bundle:23:in `load'
/opt/gitlab/embedded/bin/bundle:23:in `<main>'
Caused by:
PG::UndefinedFile: ERROR: could not open extension control file "/usr/pgsql-9.6/share/extension/pg_trgm.control": No such file or directory
/opt/gitlab/embedded/service/gitlab-rails/db/schema.rb:18:in `block in <top (required)>'
/opt/gitlab/embedded/service/gitlab-rails/db/schema.rb:14:in `<top (required)>'
/opt/gitlab/embedded/service/gitlab-rails/lib/tasks/gitlab/db.rake:52:in `block (3 levels) in <top (required)>'
/opt/gitlab/embedded/bin/bundle:23:in `load'
/opt/gitlab/embedded/bin/bundle:23:in `<main>'
Tasks: TOP => db:schema:load
(See full trace by running task with --trace)
-- enable_extension("plpgsql")
-> 0.0224s
-- enable_extension("pg_trgm")
STDERR:
---- End output of "bash" "/tmp/chef-script20180125-31534-ul2ug1" ----
Ran "bash" "/tmp/chef-script20180125-31534-ul2ug1" returned 1
yum -y install postgresql96-contrib-9.6.6 # 默认的 yum -y install postgresql-contrib
su - postgres -bash-4.2$ psql gitlabhq_production psql (9.6.6) Type "help" for help. postgres=# CREATE EXTENSION pg_trgm;