• java


    读取orc文件

        @Test
        public void readOrc() throws IOException {
            Configuration conf = new Configuration();
            Reader reader = OrcFile.createReader(new Path("/tmp/Orc.orc"),
                    OrcFile.readerOptions(conf));
            RecordReader rows = reader.rows();
            VectorizedRowBatch batch = reader.getSchema().createRowBatch();
            while (rows.nextBatch(batch)) {
                System.out.println(batch.toString());
            }
            rows.close();
        }

    写orc文件---一行

        @Test
        public void writeLine3() throws IOException {
            Configuration conf = new Configuration();
            TypeDescription schema = TypeDescription.fromString("struct<x:int,y:int>");
            Writer writer = OrcFile.createWriter(new Path("/tmp/Orc.orc"),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema));
            VectorizedRowBatch batch = schema.createRowBatch();
            LongColumnVector x = (LongColumnVector) batch.cols[0];
            LongColumnVector y = (LongColumnVector) batch.cols[1];
            int row = batch.size++;
            x.vector[row] = 2;
            y.vector[row] = 2 * 3;
            if (batch.size != 0) {
                writer.addRowBatch(batch);
                batch.reset();
            }
            writer.close();
        }

    写orc文件--多行

        @Test
        public void writeLine2() throws IOException {
            String[] lines = new String[]{"1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd", "1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd", "1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd", "1,a,aa", "2,b,bb", "3,c,cc", "4,d,dd"};
    //        String[] lines = new String[]{"1,2,4", "1,2,3", "1,2,3", "1,2,3", "1,2,3", "1,2,3", "1,2,3", "1,2,3"};
    
    
            Configuration conf = new Configuration();
            TypeDescription schema = TypeDescription.fromString("struct<field1:String,field2:String,field3:String>");
    //        TypeDescription schema = TypeDescription.fromString("struct<field1:int,field2:int,field3:int>");
            Writer writer = OrcFile.createWriter(new Path("/tmp/Orc.orc"),
                    OrcFile.writerOptions(conf)
                            .setSchema(schema).overwrite(true));
            VectorizedRowBatch batch = schema.createRowBatch();
            List<? super ColumnVector> columnVectors = new ArrayList<>();
    
            for (int i = 0; i < batch.numCols; i++) {
                columnVectors.add(batch.cols[i]);
            }
    
            for (String line : lines) {
                String[] columns = line.split(",");
                System.out.println(batch.size);
                int row = batch.size++;
                for (int i = 0; i < columns.length; i++) {
                    switch (columnVectors.get(i).getClass().getName()) {
                        case "org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector":
                            BytesColumnVector bytesColumnVector = BytesColumnVector.class.cast(columnVectors.get(i));
                            bytesColumnVector.setVal(row, columns[i].getBytes(), 0, columns[i].getBytes().length);
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.LongColumnVector":
                            LongColumnVector longColumnVector = LongColumnVector.class.cast(columnVectors.get(i));
                            longColumnVector.vector[row] = Long.parseLong(columns[i]);
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.Decimal64ColumnVector":
                            Decimal64ColumnVector decimal64ColumnVector = Decimal64ColumnVector.class.cast(columnVectors.get(i));
                            decimal64ColumnVector.set(row, HiveDecimal.create(columns[i]));
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector":
                            DecimalColumnVector decimalColumnVector = DecimalColumnVector.class.cast(columnVectors.get(i));
                            decimalColumnVector.set(row, HiveDecimal.create(columns[i]));
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector":
                            DoubleColumnVector doubleColumnVector = DoubleColumnVector.class.cast(columnVectors.get(i));
                            doubleColumnVector.vector[row] = Double.parseDouble(columns[i]);
                            break;
                        case "org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector":
                            TimestampColumnVector timestampColumnVector = TimestampColumnVector.class.cast(columnVectors.get(i));
                            timestampColumnVector.set(row, java.sql.Timestamp.valueOf(columns[i]));
                            break;
                    }
                    if (batch.size == batch.getMaxSize()) {
                        writer.addRowBatch(batch);
                        batch.reset();
                    }
                }
            }
            if (batch.size != 0) {
                writer.addRowBatch(batch);
                batch.reset();
            }
            writer.close();
    
        }

    引用jar

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
    import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
    import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
    import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
    import org.apache.orc.*;
    import org.junit.Test;
    
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;
  • 相关阅读:
    前端学PHP之错误处理
    mysql数据库学习目录
    前端学数据库之存储
    前端学数据库之函数
    用shell脚本监控进程是否存在 不存在则启动的实例
    在notepad++里面使用正则表达式替换掉所有行逗号前面内容
    mysql合并 两个count语句一次性输出结果的方法
    硬件中断和DPC一直占40-52%左右 解决方法
    解决secureCRT 数据库里没有找到防火墙 '无' 此会话降尝试不通过防火墙进行连接。
    Java eclipse下 Ant build.xml实例详解 附完整项目源码
  • 原文地址:https://www.cnblogs.com/BigWrite/p/13651478.html
Copyright © 2020-2023  润新知