os: centos 7.4
mongodb:3.2
postgresql: 9.6
mosql:
MoSQL is no longer being actively maintained。可惜了,mosql现在已经不再维护。
# mosql --help
Usage: /usr/local/rvm/rubies/ruby-1.9.3-p551/bin/mosql [options]
-h, --help Display this message
-v Increase verbosity
-c [collections.yml], Collection map YAML file
--collections
--sql [sqluri] SQL server to connect to
--mongo [mongouri] Mongo connection string
--schema [schema] PostgreSQL 'schema' to namespace tables
--ignore-delete Ignore delete operations when tailing
--only-db [dbname] Don't scan for mongo dbs, just use the one specified
--tail-from [timestamp] Start tailing from the specified UNIX timestamp
--service [service] Service name to use when storing tailing state
--skip-tail Don't tail the oplog, just do the initial import
--reimport Force a data re-import
--no-drop-tables Don't drop the table if it exists during the initial import
--unsafe Ignore rows that cause errors on insert
--oplog-filter [filter] An additional JSON filter for the oplog query
yml
mosql_to_edw_test_t0.yml
# cat mosql_to_edw_test_t0.yml
fund:
test_t0:
:columns:
- id:
:source: _id
:type: TEXT
- c0:
:source: c0
:type: TEXT
- w_insert_time: timestamp not null default now()
- w_update_time: timestamp not null default now()
:meta:
:table: test_t0
:extra_props: false
reimport 初始化
mosql_to_edw_test_t0_init.sh
# cat mosql_to_edw_test_t0_init.sh
#/bin/bash
source /etc/profile
date
echo "`date` mosql start!"
mosql -c /root/mosql/mosql_to_edw_test_t0.yml --service test_t0 --only-db test --mongo mongodb://peiyb:peiybpeiyb@127.0.0.1:27017/test?readPreference=secondary --sql postgres://peiyb:peiybpeiyb@127.0.0.1/edw --schema public --reimport --no-drop-tables
date
echo "mosql done..."
tail 增量更新
增量更新使用就是MongoRive,对 oplog 做tail操作
去掉 –reimport –no-drop-tables
mosql_to_edw_test_t0_tail.sh
# cat mosql_to_edw_test_t0_tail.sh
#/bin/bash
source /etc/profile
date
echo "`date` mosql start!"
mosql -c /root/mosql/mosql_to_edw_test_t0.yml --service test_t0 --only-db test --mongo mongodb://peiyb:peiybpeiyb@127.0.0.1:27017/test?readPreference=secondary --sql postgres://peiyb:peiybpeiyb@127.0.0.1/edw --schema public
date
echo "mosql done..."
monitor
结束初始化后,就可用增量脚本日常运行。但是要监控增量脚本,因为 oplog 会被覆盖的。
monitor_mosql_to_edw_tail.sh
# cat monitor_mosql_to_edw_tail.sh
#/bin/bash
#20180626 peiyb add for monitor mosql disconnect from mongodb
source /etc/profile
echo "#########################################################"
echo "`date` monitor mosql to edw start!"
#monitor mosql to edw
CPID=$$
TABNAMELIST="test_t0"
TABNAMEARRAY=(${TABNAMELIST//,/ })
for TABNAME in ${TABNAMEARRAY[@]}
do
echo "`date` $TABNAME"
ISALIVE=`ps -ef|grep -i "/root/mosql/mosql_to_edw_${TABNAME}_tail.sh"|grep -v $CPID |grep -v "grep"|awk '{print $2}' `
if [ -n "$ISALIVE" ];then
echo "`date` $TABNAME mosql process isalive ok"
#check logfile last line
FILETXT=`sed -n '$p' /root/mosql/log/mosql_to_edw_${TABNAME}_tail.log`
if [[ $FILETXT =~ "INFO Mongoriver: Saved state" ]];then
echo "`date` $TABNAME mosql tail process is ok,[$FILETXT]"
else
echo "`date` $TABNAME mosql tail process is not ok,now restart it,[$FILETXT]"
#logfile not start with " INFO Mongoriver: Saved state:",then kill this mosql
for KPID in `ps -ef|grep -i "/root/mosql/mosql_to_edw_${TABNAME}_tail.sh"|grep -v $CPID |grep -v "grep"|awk '{print $2}' `
do
echo "`date` $TABNAME kill -9 $KPID"
#ps -ef|grep -i $KPID|grep -v "grep"
ps -ef|grep -i $KPID|grep -v "grep"|awk '{print $2}'|xargs kill -9
done
sleep 2
#start mosql
echo "`date` $TABNAME mosql tail process start"
/bin/bash /root/mosql/mosql_to_edw_${TABNAME}_tail.sh >> /root/mosql/log/mosql_to_edw_${TABNAME}_tail.log 2>&1 &
sleep 3
fi
else
echo "`date` $TABNAME mosql process isalive is not ok"
echo "`date` $TABNAME mosql tail process start"
#if not exist this mosql process,then start it
/bin/bash /root/mosql/mosql_to_edw_${TABNAME}_tail.sh >> /root/mosql/log/mosql_to_edw_${TABNAME}_tail.log 2>&1 &
sleep 3
fi
echo "##################"
done
crontab
# crontab -l
*/5 * * * * bash /root/mosql/monitor_mosql_to_edw_tail.sh >> /root/mosql/log/monitor_mosql_to_edw_tail.log 2>&1 &
之所以考虑每个表对应一个脚本,是因为监控时发现进程异常,单独启动某个脚本,如果需要同步的表特别多时,就对表进行分组。