今天业务需求需要从ORACLE数据库抽取数据库到MYSQL,自己写了个并发抽取玩一下
service层,对数据进行了切分,每50万一组
@Service public class DemoService { @Autowired private JdbcTemplate jdbcTemplate; @Autowired private DoInsert doInsert; public void demoInsert( ) throws SQLException { int min=1; int max=2222789; List<String> sqls=new ArrayList<>(); while(min<2222789){ int sign=min+500000; String sql="select db_id,owner,sgement_name,segment_type,partition_name,segment_size,id from DB_SEGMENTS where id>="+min+" and id<="+sign; sqls.add(sql); min=sign+1; } String sql="select db_id,owner,sgement_name,segment_type,partition_name,segment_size,id from DB_SEGMENTS where id>="+min+" and id<="+max; sqls.add(sql); for(String s:sqls){ doInsert.insert(s,jdbcTemplate); } } }
读取一万条数据,做一次insert操作
@Slf4j @Component public class DoInsert { @Async("testExecutor") public void insert(String sql, JdbcTemplate jdbcTemplate) throws SQLException { JdbcConnection jdbcConnection = new JdbcConnection("jdbc:oracle:thin:@192.168.0.210:1521/exam", "issue","issue","ORACLE"); jdbcConnection.creatConnection(); ResultSet rs = jdbcConnection.executeQuery(sql); List<Object[]> result = new ArrayList<>(); String insert_sql = "insert into db_segments values (?,?,?,?,?,?,?)"; while (rs.next()){ result.add(new Object[]{ rs.getLong(1), rs.getString(2), rs.getString(3), rs.getString(4), rs.getString(5), rs.getLong(6), rs.getInt(7) }); if (result.size() == 10000) { jdbcTemplate.batchUpdate(insert_sql, result); result =new ArrayList<>(); } } if (result.size() != 0) { jdbcTemplate.batchUpdate(insert_sql, result); } } }
线程池配置:
package com.example.demo; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import java.util.concurrent.Executor; import java.util.concurrent.ThreadPoolExecutor; @Configuration @EnableAsync public class TestThreadPoolConfig { private static final int CORE_POOL_SIZE = 4; private static final int MAX_POOL_SIZE = 8; private static final int QUEUE_CAPACITY = 20; private String threadNamePrefix = "test_thread_"; @Bean public Executor testExecutor() { ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor(); taskExecutor.setCorePoolSize(4); taskExecutor.setMaxPoolSize(12); taskExecutor.setQueueCapacity(50); taskExecutor.setKeepAliveSeconds(60); taskExecutor.setThreadNamePrefix("InstMin-"); taskExecutor.initialize(); return taskExecutor; } }
JdbcConnection连接类:
package com.example.demo; import lombok.extern.slf4j.Slf4j; import java.sql.*; import java.util.Properties; @Slf4j public class JdbcConnection { private String URL; private String USERNAME; private String PASSWORD; private String type; private Connection conn = null; public String getURL() { return this.URL; } public void setURL(String uRL) { this.URL = uRL; } public String getPASSWORD() { return this.PASSWORD; } public void setPASSWORD(String pASSWORD) { this.PASSWORD = pASSWORD; } public JdbcConnection(String URL, String PASSWORD, String USERNAME, String type) { this.URL=URL; this.type=type; this.PASSWORD=PASSWORD; this.USERNAME=USERNAME; String DRIVER = null; if(type.equals("ORACLE")){ DRIVER="oracle.jdbc.driver.OracleDriver"; }else if((type.equals("MYSQL"))) { DRIVER="com.mysql.cj.jdbc.Driver"; }else if((type.equals("PGSQL"))){ DRIVER="org.postgresql.Driver"; } try { Class.forName(DRIVER); } catch (Exception var2) { log.info("加载数据库驱动失败!"); log.info(var2.getMessage()); } } public boolean creatConnection() { boolean isCreated = true; Properties props = new Properties() ; props.put( "user" , USERNAME) ; props.put( "password" , PASSWORD) ; try { if(type.equals("ORACLE")){ props.put( "oracle.net.CONNECT_TIMEOUT" , "5000") ; }else if((type.equals("MYSQL"))) { props.put( "connectTimeout" , "5000"); }else if((type.equals("PGSQL"))){ props.put( "loginTimeout" , "5"); } this.conn = DriverManager.getConnection(URL, props); } catch (Exception var3) { isCreated = false; log.info(var3.getMessage()); } return isCreated; } public boolean executeUpdate(String sql) { if (this.conn == null) { this.creatConnection(); } try { Statement stmt = this.conn.createStatement(); stmt.executeUpdate(sql); } catch (SQLException var4) { log.info("SQL更新失败!"); log.info(var4.getMessage()); return false; } return true; } public ResultSet executeQuery(String sql) { ResultSet rs = null; if (this.conn == null) { this.creatConnection(); } try { Statement stmt = this.conn.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); stmt.setFetchSize(5000); stmt.setQueryTimeout(3600); rs = stmt.executeQuery(sql); } catch (SQLException var4) { log.info("SQL查询失败!"); log.info(var4.getMessage()); } return rs; } public void closeConnection() { if (this.conn != null) { try { this.conn.close(); } catch (SQLException var5) { var5.printStackTrace(); } finally { this.conn = null; } } } }
application配置文件
server.port=8091
spring.datasource.url=jdbc:mysql://10.50.16.201:3306/monitor?useUnicode=true&characterEncoding=utf8&useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true&allowMultiQueries=true&rewriteBatchedStatements=true
#spring.datasource.url=jdbc:oracle:thin:@192.168.0.210:1521/exam
spring.datasource.username=issue
spring.datasource.password=issue
spring.datasource.driver-class-name=oracle.jdbc.driver.OracleDriver
第一次执行的时候执行抽取的时候,appleication配置datasource没有加 allowMultiQueries=true
200万的数据抽取了10几分钟,感觉不对,观察mysql的processlist;发现4个回话并行执行insert into xx values(xx,xx,xx,xx);
state事件为waiting for handler commit,官网说明(The thread is waiting for a transaction to commit versus other parts of query processing.)
观察了一下redo size大小2G,观察了一下io也都正常,结论就是事务太多,commit过于频繁。
然后联想到mysqldump导入数据库的情况,insert into 都是多个value(一次插入多条,减少交互次数,提高效率),类似 insert into xx values (xx,xx,xx,xx),(xx,xx,xx,xx)...
怀疑还是jdbcTemplate.batchUpdate方法有问题,查阅相关资料,只有当allowMultiQueries=true时,才能发挥batch提交的特点,mysql数据库行为变为一个insert into 插入多个value;
修改参数重新执行:几秒就完成了200w数据的抽取;
哎 坑!!!
后面又用mysql-》oracle做了测试,发现oracle的jdbc驱动在处理batchUpdate就没类似的性能问题,也是几秒钟就完成了抽取;
感觉还是mybaits这种批量插入比较好(可控,简单明了),自己拼接多个value的插入
<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace="com.example.demo.DemoMapper"> <insert id="batchInsert" parameterType="java.util.List"> insert into db_segments(db_id,owner,sgement_name,segment_type,partition_name,segment_size,id)values <foreach collection="list" separator="," item="item"> (#{item.db_id},#{item.owner},#{item.sgement_name},#{item.segment_type},#{item.partition_name},#{item.segment_size},#{item.id}) </foreach> </insert> </mapper>