• mongo 同步数据到 postgrsql


    os: centos 7.4
    mongodb:3.2
    postgresql: 9.6
    mosql:

    MoSQL is no longer being actively maintained。可惜了,mosql现在已经不再维护。

    # mosql --help
    Usage: /usr/local/rvm/rubies/ruby-1.9.3-p551/bin/mosql [options] 
        -h, --help                       Display this message
        -v                               Increase verbosity
        -c [collections.yml],            Collection map YAML file
            --collections
            --sql [sqluri]               SQL server to connect to
            --mongo [mongouri]           Mongo connection string
            --schema [schema]            PostgreSQL 'schema' to namespace tables
            --ignore-delete              Ignore delete operations when tailing
            --only-db [dbname]           Don't scan for mongo dbs, just use the one specified
            --tail-from [timestamp]      Start tailing from the specified UNIX timestamp
            --service [service]          Service name to use when storing tailing state
            --skip-tail                  Don't tail the oplog, just do the initial import
            --reimport                   Force a data re-import
            --no-drop-tables             Don't drop the table if it exists during the initial import
            --unsafe                     Ignore rows that cause errors on insert
            --oplog-filter [filter]      An additional JSON filter for the oplog query

    yml

    mosql_to_edw_test_t0.yml

    # cat mosql_to_edw_test_t0.yml 
    fund:
      test_t0:
        :columns:
        - id:
          :source: _id
          :type: TEXT
        - c0:
          :source: c0
          :type: TEXT
        - w_insert_time: timestamp not null default now()
        - w_update_time: timestamp not null default now()
        :meta:
          :table: test_t0
          :extra_props: false
    

    reimport 初始化

    mosql_to_edw_test_t0_init.sh

    # cat mosql_to_edw_test_t0_init.sh
    
    #/bin/bash
    source /etc/profile
    date
    echo "`date` mosql start!"
    
    mosql -c /root/mosql/mosql_to_edw_test_t0.yml --service test_t0 --only-db test --mongo mongodb://peiyb:peiybpeiyb@127.0.0.1:27017/test?readPreference=secondary --sql postgres://peiyb:peiybpeiyb@127.0.0.1/edw --schema public  --reimport --no-drop-tables
    date
    echo "mosql done..."

    tail 增量更新

    增量更新使用就是MongoRive,对 oplog 做tail操作
    去掉 –reimport –no-drop-tables
    mosql_to_edw_test_t0_tail.sh

    # cat mosql_to_edw_test_t0_tail.sh
    
    #/bin/bash
    source /etc/profile
    date
    echo "`date` mosql start!"
    
    mosql -c /root/mosql/mosql_to_edw_test_t0.yml --service test_t0 --only-db test --mongo mongodb://peiyb:peiybpeiyb@127.0.0.1:27017/test?readPreference=secondary --sql postgres://peiyb:peiybpeiyb@127.0.0.1/edw --schema public 
    date
    echo "mosql done..."

    monitor

    结束初始化后,就可用增量脚本日常运行。但是要监控增量脚本,因为 oplog 会被覆盖的。
    monitor_mosql_to_edw_tail.sh

    # cat monitor_mosql_to_edw_tail.sh
    
    #/bin/bash
    #20180626  peiyb add for monitor mosql disconnect from mongodb
    
    source /etc/profile
    
    echo "#########################################################"
    echo "`date` monitor mosql to edw start!"
    
    #monitor mosql to edw
    CPID=$$
    
    TABNAMELIST="test_t0"
    TABNAMEARRAY=(${TABNAMELIST//,/ })
    
    for TABNAME in ${TABNAMEARRAY[@]}
    do
      echo "`date` $TABNAME"
    
      ISALIVE=`ps -ef|grep -i "/root/mosql/mosql_to_edw_${TABNAME}_tail.sh"|grep -v $CPID |grep -v "grep"|awk '{print $2}' `
    
      if [ -n "$ISALIVE" ];then
        echo "`date` $TABNAME mosql process isalive ok"
    
        #check logfile last line
        FILETXT=`sed -n '$p' /root/mosql/log/mosql_to_edw_${TABNAME}_tail.log`
    
        if [[ $FILETXT =~ "INFO Mongoriver: Saved state" ]];then
          echo "`date` $TABNAME mosql tail process is ok,[$FILETXT]"
        else
          echo "`date` $TABNAME mosql tail process is not ok,now restart it,[$FILETXT]"
          #logfile not start with " INFO Mongoriver: Saved state:",then kill this mosql 
          for KPID in `ps -ef|grep -i "/root/mosql/mosql_to_edw_${TABNAME}_tail.sh"|grep -v $CPID |grep -v "grep"|awk '{print $2}' `
          do
             echo "`date` $TABNAME kill -9 $KPID"
             #ps -ef|grep -i $KPID|grep -v "grep"
             ps -ef|grep -i $KPID|grep -v "grep"|awk '{print $2}'|xargs  kill -9 
          done
          sleep 2
          #start mosql  
          echo "`date` $TABNAME mosql tail process start"
          /bin/bash /root/mosql/mosql_to_edw_${TABNAME}_tail.sh >> /root/mosql/log/mosql_to_edw_${TABNAME}_tail.log 2>&1 &
          sleep 3
        fi 
      else
        echo "`date` $TABNAME mosql process isalive is not ok"
        echo "`date` $TABNAME mosql tail process start"
        #if not exist this mosql process,then start it
        /bin/bash /root/mosql/mosql_to_edw_${TABNAME}_tail.sh >> /root/mosql/log/mosql_to_edw_${TABNAME}_tail.log 2>&1 &
        sleep 3
      fi
    
      echo "##################"
    done
    

    crontab

    # crontab -l
    
    */5 * * * * bash /root/mosql/monitor_mosql_to_edw_tail.sh >> /root/mosql/log/monitor_mosql_to_edw_tail.log 2>&1 &
    

    之所以考虑每个表对应一个脚本,是因为监控时发现进程异常,单独启动某个脚本,如果需要同步的表特别多时,就对表进行分组。

    参考:
    https://github.com/stripe/mosql

  • 相关阅读:
    PHP实现栈(Stack)数据结构
    为什么推荐std::string而不是char*
    PHP实现插入排序算法
    OpenCms Application dev-ref
    OpenCMS integration with Spring MVC--reference
    安装opencms时遇到问题及解决方法
    Java ZIP File Example---refernce
    Top 10 Algorithms for Coding Interview--reference
    JVMInternals--reference
    java code to byte code--partone--reference
  • 原文地址:https://www.cnblogs.com/ctypyb2002/p/9792956.html
Copyright © 2020-2023  润新知