• 将大量数据批量插入Oracle表的类,支持停止续传


    之前用create table select * from XXTable无疑是创建庞大表的最快方案之一,但是数据重复率是个问题,且数据难以操控。

    于是我在之前批量插数据的基础上更新了一个类,让它具有了Resume的能力,这样可以利用碎片时间能插一点是一点。

    以后此类还可能改进,先留一个版本在这里。

    数据库连接参数类:

    class DBParam {
        public final static String Driver = "oracle.jdbc.driver.OracleDriver";
        public final static String DbUrl = "jdbc:oracle:thin:@127.0.0.1:1521:orcl";
        public final static String User = "ufo";
        public final static String Pswd = "1234";
    }

    HugeTbBatchInserter类:

    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.ResultSet;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.text.SimpleDateFormat;
    import java.util.ArrayList;
    import java.util.Calendar;
    import java.util.Date;
    import java.util.List;
    import java.util.Random;
    
    class TypeField{
        String type;
        String field;
    }
    
    // Insert huge records to a table
    public class HugeTbBatchInserter {
        private final int BatchSize=250;// Batch insert size,可以根据机器性能提高
        private final int Total_Record_Count=100000000;// 最好是BatchSize的整倍数
        
        // 如果是多个表,扩充数组即可
        // PK:主键 CH:文字 DT:Datetime,RND:百以内随机数 还可以根据需要扩充代号,在getInsertSql函数中则根据代号来设置值
        private final String[][] tableArray= {
            {"score:"+Total_Record_Count,"PK:ID","RND:stuid","RND:courseid","RND:score"},
        };
        
        /**
         * 批量插值
         */
        public void batchInsert() {
            Connection conn = null;
            Statement stmt = null;
            
            try{
                Class.forName(DBParam.Driver).newInstance();
                conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd);
                stmt = conn.createStatement();
                System.out.println("Begin to access "+DBParam.DbUrl+" as "+DBParam.User+"...");
                
                for(String[] innerArr:tableArray) {
                    String tableName=innerArr[0].split(":")[0];
                    System.out.println("Table:"+tableName);
                    
                    int existCount=fetchExistCount(tableName,stmt);
                    System.out.println("Exist record count:"+existCount);
                    
                    int maxId=fetchMaxId(tableName,stmt);
                    System.out.println("Max id:"+maxId);
                    
                    int count=Integer.parseInt(innerArr[0].split(":")[1])-existCount;
                    System.out.println("准备向表"+tableName+"插入"+count+"条记录.");
                    
                    // 是否需要插值前先清空,自行判断再放开
                    //truncateTable(tableName,stmt);
                    
                    // 真正插入数据
                    insertTestDataTo(tableName,maxId+1,count,innerArr,stmt);
                }
            } catch (Exception e) {
                System.out.print(e.getMessage());
            } finally {
                try {
                    stmt.close();
                    conn.close();
                } catch (SQLException e) {
                    System.out.print("Can't close stmt/conn because of " + e.getMessage());
                }
            }
        }
        
        /**
         * 以当前时间为基准减去数十秒
         * @param n
         * @return
         */
        private static String getDatetimeBefore(int n) {
            try {
                Calendar now = Calendar.getInstance();
                now.add(Calendar.SECOND,-n*10);//日期减去n*10秒
                
                Date newDate=now.getTime();
                
                SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                String retval = sdf.format(newDate);
                return retval;
            }
            catch(Exception ex) {
                ex.printStackTrace();
                return null;
            }
        }
        
        /**
         * 清空一个表的数据,注意此功能有破坏性,不可恢复,注意备份好数据
         * @param tableName
         * @param conn
         * @param stmt
         * @throws SQLException
         */
        private void truncateTable(String tableName,Statement stmt) throws SQLException{
            String sql="truncate table "+tableName;
            stmt.execute(sql);
            System.out.println("truncated table:"+tableName);
        }
        
        /**
         * 得到表中已有的最大ID值
         * @param tableName
         * @param conn
         * @param stmt
         * @return
         * @throws SQLException
         */
        private int fetchMaxId(String tableName,Statement stmt)  throws SQLException{
            String sql="select max(id) as max from "+tableName+"";
            
            ResultSet rs = stmt.executeQuery(sql);
            
            while (rs.next()) {
                int max = rs.getInt("max");
                return max;
            }
            
            return 0;
        }
        
        /**
         * 得到表中现存数量
         * @param tableName
         * @param conn
         * @param stmt
         * @return
         * @throws SQLException
         */
        private int fetchExistCount(String tableName,Statement stmt)  throws SQLException{
            String sql="select count(*) as cnt from "+tableName+"";
            
            ResultSet rs = stmt.executeQuery(sql);
            
            while (rs.next()) {
                int cnt = rs.getInt("cnt");
                return cnt;
            }
            
            return 0;
        }
        
        /**
         * 向一个表插入数据
         * @param tableName
         * @param count
         * @param innerArr
         * @param conn
         * @param stmt
         * @throws SQLException
         */
        private void insertTestDataTo(String tableName,int startId,int count,String[] innerArr,Statement stmt) throws SQLException{
            // 得到字段名和字段类型
            List<TypeField> typefields=new ArrayList<TypeField>();
            for(int i=1;i<innerArr.length;i++) {
                String temp=innerArr[i];
                String[] arrTmp=temp.split(":");
                
                TypeField tf=new TypeField();
                tf.type=arrTmp[0];
                tf.field=arrTmp[1];
                typefields.add(tf);
            }
            
            List<String> fields=new ArrayList<String>();
            List<String> values=new ArrayList<String>();
            int index=0;
            for(TypeField tf:typefields) {
                fields.add(tf.field);
                values.add("''{"+index+"}''");
                index++;
            }
            
            index=0;
            int times=count/BatchSize;
            for(int i=0;i<times;i++) {
                long startTime = System.currentTimeMillis();
                StringBuilder sb=new StringBuilder();
                sb.append("INSERT ALL ");
                
                for(int j=0;j<BatchSize;j++) {
                    index=i*BatchSize+j+startId;
                    sb.append(getInsertSql(tableName,typefields,index));
                }
                
                sb.append(" select * from dual");
                String sql = sb.toString();
                stmt.executeUpdate(sql);
                
                long endTime = System.currentTimeMillis();
                System.out.println("#"+i+"/"+times+" "+BatchSize+" records inserted to Table:'"+tableName+"',time elapsed:"+(endTime-startTime)+"ms.");
            }
        }
        
        /**
         * 得到批量插入语句
         * @param tableName
         * @param typefields
         * @param index
         * @return
         */
        private String getInsertSql(String tableName,List<TypeField> typefields,int index) {
            String currTime=getDatetimeBefore(index);
            
            StringBuilder sb=new StringBuilder();
            sb.append(" INTO "+tableName+"(");
            List<String> fields=new ArrayList<String>();
            for(TypeField tf:typefields) {
                fields.add(tf.field);
            }
            sb.append(String.join(",",fields));
            
            sb.append(") values(");
            List<String> values=new ArrayList<String>();
            for(TypeField tf:typefields) {
                if(tf.type.equals("PK")) {
                    values.add("'"+String.valueOf(index)+"'");
                }else if(tf.type.equals("CH")) {
                    values.add("'0'");
                }else if(tf.type.equals("RND")) {
                    values.add("'"+getRND()+"'");
                }else if(tf.type.equals("DT")) {
                    values.add("to_date('"+currTime+"','yyyy-MM-dd HH24:mi:ss')");
                }
            }
            sb.append(String.join(",",values));
            sb.append(")");
            
            String insertSql=sb.toString();
            return insertSql;
        }
        
        private static String getRND() {
            return getRandom(0,100);
        }
        
        private static String getRandom(int min, int max){
            Random random = new Random();
            int s = random.nextInt(max) % (max - min + 1) + min;
            return String.valueOf(s);
        }
        
    
        
        /**
         * 将秒转化为日时分秒
         * @param secondCount
         * @return
         */
        private static String sec2DHMS(long secondCount) {
            String retval = null;
        
            long days = secondCount / (60 * 60 * 24);
            long hours = (secondCount % (60 * 60 * 24)) / (60 * 60);
            long minutes = (secondCount % (60 * 60)) / 60;
            long seconds = secondCount % 60;
            
            String strSeconds="";
            if(seconds!=0) {
                strSeconds=seconds + "s";
            }
        
            if (days > 0) {
                retval = days + "d" + hours + "h" + minutes + "m" + strSeconds;
            } else if (hours > 0) {
                retval = hours + "h" + minutes + "m" + strSeconds;
            } else if (minutes > 0) {
                retval = minutes + "m" + strSeconds;
            } else {
                retval = strSeconds;
            }
        
            return retval;
        }
        
        public static void main(String[] args) {
            HugeTbBatchInserter mi=new HugeTbBatchInserter();
            long startTime = System.currentTimeMillis();
            mi.batchInsert();
            long endTime = System.currentTimeMillis();
            
            System.out.println("Time elapsed:" + sec2DHMS((endTime - startTime)/1000) );
        }
    }

    这个类运行起来是这样的: 

    Begin to access jdbc:oracle:thin:@127.0.0.1:1521:orcl as ufo...
    Table:score
    Exist record count:3351500
    Max id:3351499
    准备向表score插入96648500条记录.
    #0/386594 250 records inserted to Table:'score',time elapsed:284ms.
    #1/386594 250 records inserted to Table:'score',time elapsed:282ms.
    #2/386594 250 records inserted to Table:'score',time elapsed:324ms.
    #3/386594 250 records inserted to Table:'score',time elapsed:284ms.
    #4/386594 250 records inserted to Table:'score',time elapsed:302ms.
    #5/386594 250 records inserted to Table:'score',time elapsed:330ms.
    #6/386594 250 records inserted to Table:'score',time elapsed:291ms.
    #7/386594 250 records inserted to Table:'score',time elapsed:335ms.
    #8/386594 250 records inserted to Table:'score',time elapsed:372ms.
    #9/386594 250 records inserted to Table:'score',time elapsed:374ms.

    下面这个类虽然更快些,但插入总量有限,需要改进,也留一个版本在这里吧:

    import java.sql.Connection;
    import java.sql.DriverManager;
    import java.sql.SQLException;
    import java.sql.Statement;
    import java.text.DecimalFormat;
    
    class DBParam {
        public final static String Driver = "oracle.jdbc.driver.OracleDriver";
        public final static String DbUrl = "jdbc:oracle:thin:@127.0.0.1:1521:orcl";
        public final static String User = "ufo";
        public final static String Pswd = "1234";
    }
    // Insert records to srcore table
    public class ScoreInserter {
        private final String Table="score";
        private final int Total=1000000;
        
        public boolean fillTable() {
            Connection conn = null;
            Statement stmt = null;
            
            try{
                Class.forName(DBParam.Driver).newInstance();
                conn = DriverManager.getConnection(DBParam.DbUrl, DBParam.User, DBParam.Pswd);
                conn.setAutoCommit(false);
                stmt = conn.createStatement();
                
                long startMs = System.currentTimeMillis();
                clearTable(stmt,conn);
                fillDataInTable(stmt,conn);
                
                
                long endMs = System.currentTimeMillis();
                System.out.println("It takes "+ms2DHMS(startMs,endMs)+" to fill "+toEastNumFormat(Total)+" records to table:'"+Table+"'.");
            } catch (Exception e) {
                e.printStackTrace();
            } finally {
                try {
                    stmt.close();
                    conn.close();
                } catch (SQLException e) {
                    System.out.print("Can't close stmt/conn because of " + e.getMessage());
                }
            }
            
            return false;
        }
        
        private void clearTable(Statement stmt,Connection conn) throws SQLException {
            stmt.executeUpdate("truncate table "+Table);
            conn.commit();
            System.out.println("Cleared table:'"+Table+"'.");
        }
        
        private void fillDataInTable(Statement stmt,Connection conn) throws SQLException {
            StringBuilder sb=new StringBuilder();
            sb.append(" Insert into "+Table);
            sb.append(" select dbms_random.value(0,200),dbms_random.value(1,10),dbms_random.value(0,101) from dual ");
            sb.append(" connect by level<="+Total);
            sb.append(" order by dbms_random.random");
            
            String sql=sb.toString();
            stmt.executeUpdate(sql);
            conn.commit();
            
        }
        
        // 将整数在万分位以逗号分隔表示
        public static String toEastNumFormat(long number) {
            DecimalFormat df = new DecimalFormat("#,####");
            return df.format(number);
        }
        
        // change seconds to DayHourMinuteSecond format
        private static String ms2DHMS(long startMs, long endMs) {
            String retval = null;
            long secondCount = (endMs - startMs) / 1000;
            String ms = (endMs - startMs) % 1000 + "ms";
    
            long days = secondCount / (60 * 60 * 24);
            long hours = (secondCount % (60 * 60 * 24)) / (60 * 60);
            long minutes = (secondCount % (60 * 60)) / 60;
            long seconds = secondCount % 60;
    
            if (days > 0) {
                retval = days + "d" + hours + "h" + minutes + "m" + seconds + "s";
            } else if (hours > 0) {
                retval = hours + "h" + minutes + "m" + seconds + "s";
            } else if (minutes > 0) {
                retval = minutes + "m" + seconds + "s";
            } else {
                retval = seconds + "s";
            }
    
            return retval + ms;
        }
        
        public static void main(String[] args) {
            ScoreInserter si=new ScoreInserter();
            si.fillTable();
        }
    }

    --END-- 2020年1月4日16点57分

  • 相关阅读:
    Golang之字符串格式化
    BZOJ 4513: [Sdoi2016]储能表 [数位DP !]
    BZOJ 3329: Xorequ [数位DP 矩阵乘法]
    BZOJ 1833: [ZJOI2010]count 数字计数 [数位DP]
    HDU2089 不要62 BZOJ1026: [SCOI2009]windy数 [数位DP]
    未完
    [Miller-Rabin & Pollard-rho]【学习笔记】
    BZOJ 3551: [ONTAK2010]Peaks加强版 [Kruskal重构树 dfs序 主席树]
    BZOJ 3123: [Sdoi2013]森林 [主席树启发式合并]
    BZOJ 3545: [ONTAK2010]Peaks [Splay启发式合并]
  • 原文地址:https://www.cnblogs.com/heyang78/p/12149638.html
Copyright © 2020-2023  润新知