• 使用dataX将数据从Mysql数据库同步到Oracle数据


    最近有需求将数据从mysql同步到oracle,之前有使用kettle将表从oracle同步到mysql,这里使用的插件依然是阿里的dataX

    详细见:Githup地址:https://github.com/alibaba/DataX

    这里也是根据阿里云的一篇帖子来进行验证

    https://yq.aliyun.com/articles/715393?spm=a2c4e.11155472.0.0.3aef4dc1IWo28F

    1 环境需要

        JDK(1.8以上,推荐1.8)
        Python(推荐Python2.6.X)
        Apache Maven 3.x (Compile DataX)

    2 #服务器配置

    [root@yhq ~]# yum search java|grep jdk
    [root@yhq ~]# yum install java-1.8.0-openjdk.x86_64 -y
    [root@yhq ~]# java -version
    openjdk version "1.8.0_242"
    OpenJDK Runtime Environment (build 1.8.0_242-b07)
    OpenJDK 64-Bit Server VM (build 25.242-b07, mixed mode)
    [root@yhq ~]# python --version
    Python 2.6.6
    下载地址:https://maven.apache.org/download.cgi
    [root@yhq soft]# ls apache-maven-3.6.3-bin.tar.gz 
    apache-maven-3.6.3-bin.tar.gz
    [root@yhq soft]# tar -zxvf apache-maven-3.6.3-bin.tar.gz 
    [root@yhq soft]#  mkdir /opt/maven
    [root@yhq soft]# mv apache-maven-3.6.3/* /opt/maven/
    [root@yhq soft]# ln -s /opt/maven/b
    bin/  boot/ 
    [root@yhq soft]# ln -s /opt/maven/bin/mvn /usr/bin/mvn
    [root@yhq soft]# vim /etc/profile.d/maven.sh
    export M2_HOME=/opt/maven
    export PATH=${M2_HOME}/bin:${PATH}
    [root@yhq soft]# mvn -v
    Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f)
    Maven home: /opt/maven
    Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64/jre
    Default locale: en_US, platform encoding: UTF-8
    OS name: "linux", version: "2.6.32-754.el6.x86_64", arch: "amd64", family: "unix"
    #安装datax
    [root@yhq soft]# tar -zxvf datax.tar.gz
    [root@yhq soft]# ls datax
    bin  conf  job  lib  plugin  script  tmp
    #自检测试脚本
    [root@yhq soft]# python datax/bin/datax.py datax/job/job.json 
    
    DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
    Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
    
    
    2020-02-09 23:27:35.717 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
    2020-02-09 23:27:35.763 [main] INFO  Engine - the machine info  => 
    
        osInfo:    Oracle Corporation 1.8 25.242-b07
        jvmInfo:    Linux amd64 2.6.32-754.el6.x86_64
        cpu num:    2
    2020-02-09 23:27:46.974 [job-0] INFO  JobContainer - PerfTrace not enable!
    2020-02-09 23:27:46.975 [job-0] INFO  StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 0.020s |  All Task WaitReaderTime 0.121s | Percentage 100.00%
    2020-02-09 23:27:46.976 [job-0] INFO  JobContainer - 
    任务启动时刻                    : 2020-02-09 23:27:35
    任务结束时刻                    : 2020-02-09 23:27:46
    任务总计耗时                    :                 10s
    任务平均流量                    :          253.91KB/s
    记录写入速度                    :          10000rec/s
    读出记录总数                    :              100000
    读写失败总数                    :                   0

    #测试使用

    mysql 表
     CREATE TABLE `datax_test` (
      `id` int(11) NOT NULL,
      `name` varchar(20) DEFAULT NULL,
      `birthday` date DEFAULT NULL,
      `memo` varchar(32) DEFAULT NULL,
      PRIMARY KEY (`id`)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
    #插入10w数据

    #在oracle里面可以先创建好表结构,或者用kettle同步表

    create table datax_test
    (id number,
    name varchar2(20),
    birthday date,
    memo varchar2(32));

    #创建配置文件
    #可以通过命令查看配置模板: python datax.py -r {YOUR_READER} -w {YOUR_WRITER}

    #  python datax.py -r streamreader -w streamwriter
    [root@yhq job]# vim job1.json
    {
        "job": {
            "setting": {
                "speed": {
                    "channel": 5
                }
            },
            "content": [
                {
                    "reader": {
                        "name": "mysqlreader",
                        "parameter": {
                            "username": "system",
                            "password": "****",
                            "column": ["*"],
                            "connection": [
                               {
                                  "table": ["datax_test"],
                                  "jdbcUrl": ["jdbc:mysql://ip:3306/vision"]
                               }
                            ]
                        }
                    },
                    "writer": {
                        "name": "oraclewriter",
                        "parameter": {
                            "username": "vision",
                            "password": "***",
                            "column": ["*"],
                            "connection": [
                                {
                                   "jdbcUrl": "jdbc:oracle:thin:@ip:1521:bol",
                                   "table": ["datax_test"]
                                }
                            ]
                          }
                        }
                    }
            ]
        }
    }

    #启动数据同步

    [root@yhq job]# python /data/soft/datax/bin/datax.py job1.json 
    
    DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
    Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
    
    
    2020-02-10 00:12:03.796 [main] INFO  VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
    2020-02-10 00:12:03.807 [main] INFO  Engine - the machine info  => 
    
        osInfo:    Oracle Corporation 1.8 25.242-b07
        jvmInfo:    Linux amd64 2.6.32-754.el6.x86_64
        cpu num:    2
    
        totalPhysicalMemory:    -0.00G
        freePhysicalMemory:    -0.00G
        maxFileDescriptorCount:    -1
        currentOpenFileDescriptorCount:    -1
    
        GC Names    [PS MarkSweep, PS Scavenge]
    
        MEMORY_NAME                    | allocation_size                | init_size                      
        PS Eden Space                  | 256.00MB                       | 256.00MB                       
        Code Cache                     | 240.00MB                       | 2.44MB                         
        Compressed Class Space         | 1,024.00MB                     | 0.00MB                         
        PS Survivor Space              | 42.50MB                        | 42.50MB                        
        PS Old Gen                     | 683.00MB                       | 683.00MB                       
        Metaspace                      | -0.00MB                        | 0.00MB                         
    
    
    2020-02-10 00:12:03.869 [main] INFO  Engine - 
    2020-02-10 00:13:48.061 [job-0] INFO  JobContainer - PerfTrace not enable!
    2020-02-10 00:13:48.061 [job-0] INFO  StandAloneJobContainerCommunicator - Total 99999 records, 1888875 bytes | Speed 21.96KB/s, 1190 records/s | Error 0 records, 0 bytes |  All Task WaitWriterTime 42.323s |  All Task WaitReaderTime 29.424s | Percentage 100.00%
    2020-02-10 00:13:48.069 [job-0] INFO  JobContainer - 
    任务启动时刻                    : 2020-02-10 00:12:05
    任务结束时刻                    : 2020-02-10 00:13:48
    任务总计耗时                    :                102s
    任务平均流量                    :           21.96KB/s
    记录写入速度                    :           1190rec/s
    读出记录总数                    :               99999
    读写失败总数                    :                   0

    #在oracle查看数据
    select count(*) from datax_test; #同步正常
    #剩下的增量同步,在crontab中定时任务就行。
    $ crontab -e
    #会进入已有crontab文件编辑界面,继续增加定时任务即可,本示例增加以下内容,并保存
    0,10,20,30,40,50 * * * *  python /data/soft/datax/bin/datax.py /data/soft/datax/job/mysql2odps.json  >>/tmp/log.`date +\%Y\%m\%d\%H\%M\%S`  2>&1

    测试环境

    #测试环境
    ==mysql db 5.7.27 centos 6.10
    create database vision;==oracle db 11.2.0.4  centos 6.10
    select file#,name from v$datafile;
    
    create tablespace vision
      logging
      datafile '/u01/app/oracle/oradata/bol/vision01.dbf'
      size 100m
      autoextend on
      next 500m;
      
      create user vision identified by ****  default tablespace vision ;
      grant connect,resource,dba to vision;==
    delimiter //
    create procedure insrt_t ()
    begin
    declare i int default 1;
    declare j int default 1;
    while i < 100000 do
    #insert into test values (i,'a');
    insert into datax_test
    (id,name,birthday,memo)
    VALUES
    (i,'yhq',now(),'123');
    set i=i+1;
    set j=j+1;
    if j=5000 then
    set j=1;
    commit;
    end if;
    end while;
    end
    //
    delimiter ;
  • 相关阅读:
    Mysql数据库分库备份,分表备份
    KICKSTART无人值守安装
    git的使用
    iowait 过高问题的查找及解决linux
    Redis配置文件redis.conf详解
    安装和使用ZFS
    GlusterFS配置及使用
    Ceph与Gluster之开源存储的对比
    去掉Tomcat的管理页面
    Tomcat不加项目名称访问设置
  • 原文地址:https://www.cnblogs.com/yhq1314/p/12294205.html
Copyright © 2020-2023  润新知