本文共 12135 字,大约阅读时间需要 40 分钟。
1)小型监控: 1.在pg库主机上部署,每5分钟执行一次,插入到我的测试pg库内 [root@mysqltest tina_shell]# cat jk_pg.sh #!/bin/bash #适用于中转库192.168.12.8和12.2 running_port=`netstat -nat|grep "LISTEN"|grep "5432"|sed -n 2p|awk -F : '{print $4}'` jk_host=`ifconfig |grep "inet addr:192.168"|awk '{print $2}'|awk -F : '{print $2}'` record_time=`date +"%Y-%m-%d %H:%M:%S"` waiting_count=`ps -ef|grep postgres|grep -v startup |grep waiting|wc -l` streaming=`ps -ef|grep wal|grep streaming |awk '{print $15}'` #tbjk=`ps -ef|grep postgres|grep startup|grep waiting|wc -l` cipan=`df -ah |grep % |grep -v tmpfs|grep -v boot` usersum=`ps -ef|grep postgres |grep -E "engine|fenxi|sqluser" |wc -l` #echo $jk_host $record_time $waiting_count $streaming $tbjk >>/tmp/pg_check_state.log psql -h 192.168.12.31 -U postgres -p 1922 -d tina -c "insert into jk_pg(jk_host,record_time,waiting_count,streaming,running_port,cipan,usersum) values('$jk_host','$record_time','$waiting_count','$streaming','$running_port','$cipan','$usersum');" 2.部署crontab cat /etc/crontab 0 20 * * * root sh /tina_shell/backup.sh 4 * * * * root sh /tina_shell/pg_delete_archivelog.sh */5 * * * * root sh /tina_shell/jk_pg.sh 3.建表 CREATE TABLE jk_pg ( id serial NOT NULL, jk_host character varying, -- 监控主机的ip地址 record_time timestamp without time zone, -- 监控的时间 waiting_count integer, -- 发生waiting等待的进程数ps -ef|grep postgres|grep -v startup |grep waiting|wc -l streaming character varying, -- 正在进行同步的日志ps -ef|grep wal|grep streaming |awk '{print $13}' usersum integer, -- 当前连接用户总数(sqluser、engine、fenxi) tbjk integer, -- ps -ef|grep postgres|grep startup|grep waiting|wc -l running_port integer, -- 检测pg运行是否正常,如果没有显示5432端口,那pg就挂了 cipan character varying, -- 磁盘情况 locks character varying, -- 锁表情况 beizhu character varying -- 填写一些异常的备注 ) WITH ( OIDS=FALSE ); COMMENT ON TABLE jk_pg IS '自制监控表-tina'; 查看监控数据 tina=# select * from jk_pg order by record_time desc,jk_host desc limit 4; id | jk_host | record_time | waiting_count | streaming | usersum | tbjk | running_port | cipan | locks | beizhu ------+----------------+---------------------+---------------+--------------+---------+------+--------------+------------------------------------------------------+-------+-------- 7654 | 192.168.12.2 | 2016-01-13 11:00:01 | 0 | F2B/CE5349B0 | 161 | | 5432 | Filesystem Size Used Avail Use% Mounted on +| | | | | | | | | | /dev/sda2 104G 21G 78G 22% / +| | | | | | | | | | /dev/sdc1 917G 540G 331G 63% /opt/db_backup+| | | | | | | | | | /dev/sdb 939G 370G 522G 42% /home/pgsql | | 7655 | 192.168.12.1 | 2016-01-13 11:00:01 | 0 | F2B/CEE173E8 | 26 | 0 | 5432 | Filesystem Size Used Avail Use% Mounted on +| | | | | | | | | | /dev/sda3 103G 6.1G 92G 7% / +| | | | | | | | | | /dev/sdb1 939G 285G 606G 32% /home/pgsql | | 7653 | 192.168.12.8 | 2016-01-13 11:00:01 | 0 | | 30 | | 5432 | Filesystem Size Used Avail Use% Mounted on +| | | | | | | | | | /dev/sda3 27G 1.9G 24G 8% / +| | | | | | | | | | /dev/sda2 29G 4.1G 24G 15% /var +| | | | | | | | | | /dev/sdb1 252G 118G 122G 50% /home | | 2)pg统计库所有表的行数 [root@pg-ro tmp]# cat tinadb.sh #!/bin/bash #2015-11-3 tina date=`date +"%Y-%m-%d %H:%M:%S"` echo "begin time is: $date" >>/tmp/tongji.log tables=$(psql -U postgres -d tinadb -c "select tablename from pg_tables where schemaname='public' order by tablename;"|grep -v "tablename" |grep -v "rows"|grep -v "\-") #echo $tables >>/tmp/tongji.log for table in $tables do echo $table >>/tmp/tongji.log psql -U postgres -d tinadb -c "select count(*) from $table;" |grep -v "count" |grep -v "row"|grep -v "\-">>/tmp/tongji.log done #echo "ok!" >>/tmp/tongji.log 查看--并直接粘贴到execl表格中 [root@pg-ro tmp]# cat /tmp/tongji.log |awk 'NF==1{printf "%s ", $1;next}1' begin time is: 2015-11-03 14:12:12 t1 11024 t2 8267537 t3 1684 t4 2 统计其他库,直接用vi替换功能替换db名即可: 替换 :%s/tinadb/dbname/g 3)pg 定期vacuum和reindex脚本 [root@pg tina_shell]# cat pg_tinadb_vacuum.sh #!/bin/bash #2014-10-22 tina date=`date +"%Y-%m-%d %H:%M:%S"` echo "begin time is: $date" >>/tmp/pg_tinadb_vacuum.log tables=$(psql -U postgres -d tinadb -c "select tablename from pg_tables where schemaname='public';" |grep -v "tablename" |grep -v "rows"|grep -v "\-") echo $tables >>/tmp/pg_tinadb_vacuum.log indexes=$(psql -U postgres -d tinadb -c "select indexname from pg_indexes where schemaname='public' and indexname not like '%pkey';"|grep -v "indexname"|grep -v "\-" |grep -v "row") for table in $tables do psql -U postgres -d tinadb -c "vacuum full $table;">>/tmp/pg_tinadb_vacuum.log echo "table $table has finished vacuum.">>/tmp/pg_tinadb_vacuum.log done for index in $indexes do psql -U postgres -d tinadb -c "reindex index $index;">>/tmp/pg_tinadb_vacuum.log echo "index $index has finished reindex.">>/tmp/pg_tinadb_vacuum.log done 查看后台日志: [root@pg tmp]# tail -f pg_tinadb_vacuum.log begin time is: 2016-01-13 11:38:26 VACUUM table t1 has finished vacuum. VACUUM table t2 has finished vacuum. VACUUM table t3 has finished vacuum. VACUUM table t4 has finished vacuum. REINDEX index t1_rin_idx has finished reindex. 建议:如果库中存在大表,就单独手动操作,不然可能会导致执行时长时间锁表,影响其他业务。 4)pg日常备份脚本 [root@mysqltest tina_shell]# cat backup.sh #!/bin/bash #本地备份保存目录 bkdir=/home/bk_pg day=`date +"%Y%m%d"` #直接指定备份哪些,也可以通过pg_database查询所有非模板和系统db进行自动备份 DB="tinadb testdb" cd $bkdir #result=0 if [ -f $bkdir/pg.md5 ] then rm -f $bkdir/pg.md5 fi for db in $DB do pg_dump --host localhost --port 5432 --username "postgres" --format custom --blobs --encoding UTF8 --verbose $db --file $bkdir/$db.$day.backup &> $bkdir/bk.log pgret=$? if [ "$pgret" -ne "0" ] then echo "$pgtime $db backup fail" >> $bkdir/pg.md5 exit 1 else md5sum $bkdir/$db.$day.backup >> $bkdir/pg.md5 fi done #上传ftp,异地保存一份备份 lftp backup.work <<END user username userpasswd lcd $bkdir cd 12.8_pg put tinadb.$day.backup put testdb.$day.backup put pg.md5 exit END #删除两天前的备份 find $bkdir/ -type f -mtime +2 -exec rm -f {} \; 5)简易的pg主从同步检测脚本1 [root@mysqltest tina_shell]# cat pg_check_sync.sh #!/bin/bash #check pg database whether is running pg_port=`netstat -nat|grep "LISTEN"|grep "5432"|sed -n 2p|awk -F : '{print $4}'|awk '{gsub(/ /,"")}1'` host_ip=`ifconfig |grep "inet addr:192.168"|awk '{print $2}'|awk -F : '{print $2}'` date=`date +"%Y-%m-%d %H:%M:%S"` echo $date >>/tmp/pg_check_state.log if [ "$pg_port" = "5432" ] then echo "$host_ip postgresql is running" >> /tmp/pg_check_state.log else echo "Warnning -$host_ip postgresql is not running!" >>/tmp/pg_check_state.log fi #check the role of the host pg_role1=`ps -ef |grep wal| awk '{print $10}'|grep "sender"` pg_role2=`ps -ef |grep wal| awk '{print $10}'|grep "receiver"` pg_slave_ip=`ps -ef|grep wal|grep sender|awk '{print $13}'|awk -F "(" '{print $1}'` if [ "$pg_role1" == "sender" -a "$pg_role2" == "" ] then echo "$host_ip is master host and $pg_slave_ip is slave host" >>/tmp/pg_check_state.log else if [ "$pg_role1" == "" -a "$pg_role2" == "receiver" ] then echo "$host_ip is postgresql slave host.Please execute the shell in the master host!" >>/tmp/pg_check_state.log else echo "check whether the database has slave host" >>/tmp/pg_check_state.log fi fi #check whether the slave is synchronous pg_sync_status=$(su - postgres -c "psql -c 'select state from pg_stat_replication;'|sed -n 3p") if [ "$pg_sync_status" = " streaming" ] then echo "the slave is synchronous" >>/tmp/pg_check_state.log else echo "warnning - please check the sync status of slave database " >>/tmp/pg_check_state.log fi 执行结果: 1.单节点 [root@mysqltest tina_shell]# cat /tmp/pg_check_state.log 2016-01-13 15:04:53 192.168.12.8 postgresql is running check whether the database has slave host ----请检查该pg库是否有从库 2.主节点 [root@pg tina_shell]# cat /tmp/pg_check_state.log 2016-01-13 15:03:31 192.168.12.2 postgresql is running 192.168.12.2 is master host and 192.168.12.1 is slave host the slave is synchronous ----主从同步 3.从节点 [root@pg tina_shell]# cat /tmp/pg_check_state.log 2016-01-13 15:00:44 192.168.12.1 postgresql is running 192.168.12.1 is postgresql slave host.Please execute the shell in the master host! ---此ip上pg是从库,请在主库上执行脚本 6)简易的pg主从同步检测脚本2 root@pg /usr/lib64/nagios/plugins]#cat check_pgsync.sh #!/bin/bash # nrpe command: check pg sql and sync state. # customer config pgport= pgdbname= pgdbuser= # default value. pgport=${pgport:-5432} pgdbname=${pgdbname:-postgres} pgdbuser=${pgdbuser:-postgres} if [ -z "$pgport" ]; then echo "error: pgport no defined" exit 4 fi msg_ok="OK - pg is running and slave is synchronous." msg_warn="WARNING - pg is running but slave synchronous fail." msg_crit="CRITIAL - pg is not running on port: $pgport" # check pg running if netstat -ntple | grep -q "[:]$pgport"; then # check slave db host. if ps -ef | grep -q "[w]al receiver process"; then echo "error: it seems you are running me in slave db host." fi # check slave synchronous if psql -d "$pgdbname" -U "$pgdbuser" \ -c 'select state from pg_stat_replication;' \ | grep -q "[s]treaming" then echo "$msg_ok" exit 0 else echo "$msg_warn" exit 1 fi else echo "$msg_crit" exit 2 fi exit 5 1.单节点 [root@mysqltest tina_shell]# ./check_pgsync.sh WARNING - pg is running but slave synchronous fail. 2.主节点 [root@pg tina_shell]# ./check_pgsync.sh OK - pg is running and slave is synchronous. 3.从节点 [root@pg-ro tina_shell]# ./check_pgsync.sh error: it seems you are running me in slave db host. WARNING - pg is running but slave synchronous fail. 7)pg主从切换shell脚本(闲来无事写的,不建议部署生产) 主库:192.168.10.232 从库:192.168.10.233 环境:主从同步,主库突然挂掉 脚本都部署好之后,只需要在主从执行第一个脚本,就会触发后面脚本的操作,一步到位。 (部分参数需要提前设置好) 1、检测主库是否正常启动,如果不是正常启动,就去执行从库的切换脚本 [postgres@localhost tmp]$ cat pg_check_master.sh #!/bin/bash #check the master pg whether is running pg_port=`netstat -nat|grep "LISTEN"|grep "5432"|sed -n 2p|awk -F : '{print $4}'|awk '{gsub(/ /,"")}1'` host_ip=`ifconfig |grep "inet addr:192.168"|awk '{print $2}'|awk -F : '{print $2}'` date=`date +"%Y-%m-%d %H:%M:%S"` echo $date >>/tmp/pg_check_master.log if [ "$pg_port" = "5432" ] then echo "$host_ip postgresql is running" >> /tmp/pg_check_master.log else echo "Warnning -$host_ip postgresql is not running!" >>/tmp/pg_check_master.log echo "the slave is switching to the master ...please waiting" >>/tmp/pg_check_master.log ssh 192.168.10.233 "sh /tmp/pg_switch.sh" fi 2、创建从库的触发文件,将从库启动成主库(触发文件,主库和从库的名字最好不要设置成一样的,以免不好区分) [postgres@localhost tmp]$ cat pg_switch.sh #!/bin/bash #swtch slave to master date=`date +"%Y-%m-%d %H:%M:%S"` echo $date >>/tmp/pg_switch.log cd /pg/data rm -fr recovery.done touch /tmp/pg.trigger.456 sleep 20s if [ -f '/pg/data/recovery.done' ] then echo "the slave has switched to the master successful!" >> /tmp/pg_switch.log echo "the old master is going to switch to the new slave!">>/tmp/pg_switch.log his_file=`ls -lt /pg/data/pg_xlog/0000000*.history |sed -n 1p|awk '{print $9}'` scp $his_file root@192.168.10.232:/pg/data/pg_xlog ssh 192.168.10.232 "sh /tmp/start_new_slave.sh" else echo "warnning:the slave has switched fail!">>/tmp/pg_switch.log fi 3、注意recovery.conf会随着主从的变化而消失,因此我们可以先将内容写好的文件备份到上一级目录 内容包含如下: vi /pg/recovery.conf.bak recovery_target_timeline = 'latest' standby_mode = 'on' primary_conninfo = 'host=192.168.10.233 port=5432 user=postgres password=tina' trigger_file = '/tmp/pg.trigger.456' 4、有了时间线文件、有了recovery.conf,检查一下pg_hba.conf,就可以直接启动pg新从库了,并做一个主从同步的检查。 [root@localhost tmp]# cat start_new_slave.sh #!/bin/bash date=`date +"%Y-%m-%d %H:%M:%S"` echo $date >>/tmp/start_new_slave.log chown postgres.postgres /pg/data/pg_xlog/*.history cp /pg/recovery.conf.bak /pg/data/recovery.conf chown postgres.postgres recovery.conf su - postgres -c "pg_ctl -D /pg/data start" >>/tmp/start_new_slave.log 2&>1 pg_slave_status=`ps -ef |grep wal| awk '{print $10}'|grep "receiver"` if [ "$pg_slave_status" = "receiver" ] then echo "the slave sync is ok!" >>/tmp/start_new_slave.log else echo "error:please check the slave whether is running or not!" >>/tmp/start_new_slave.log fi 8)pg删除归档日志 [root@pg tina_shell]# cat pg_delete_archivedlog.sh #!/bin/bash find /home/pgsql/backup_new/archived_log/ -type f -mtime +2 -exec rm {} \; 9)常用拼接sql select 'select count(*) from '||tablename||';' from pg_tables where schemaname='public'; select 'alter table '||tablename||' add constraint u_'||tablename||' unique(sample_h);' from pg_tables where tablename like 't_wh20%'; 转载地址:http://qdpfa.baihongyu.com/