• 多线程读大文件


    1. 确定每个线程读的 开始和结束位置 , 因为是读文件,结束位置不是行末的话,结束位置继续往后移动一位,直到行末

    2. 通道映射技术 https://blog.csdn.net/qq_41969879/article/details/81629469

    package com.util.file;
    
    import lombok.EqualsAndHashCode;
    import org.apache.commons.lang3.StringUtils;
    
    import java.io.ByteArrayOutputStream;
    import java.io.File;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.RandomAccessFile;
    import java.nio.MappedByteBuffer;
    import java.nio.channels.FileChannel;
    import java.nio.charset.Charset;
    import java.util.HashSet;
    import java.util.Set;
    import java.util.concurrent.CyclicBarrier;
    import java.util.concurrent.ExecutorService;
    import java.util.concurrent.Executors;
    import java.util.concurrent.atomic.AtomicLong;
    import java.util.function.Consumer;
    
    /**
     * @description: 单文件读取
     * @author: AlbertXe
     * @create: 2020-10-20 12:50
     */
    public class BigFileReader {
        private int threadSize;
        private int buffSize;
        private String charset;
        private Consumer consumer;
    
        private long fileLength;
        private ExecutorService executorService;
        private RandomAccessFile rAccessFile;
        private CyclicBarrier cyclicBarrier;
        private AtomicLong count;
        private Set<StartEndPair> pairs = new HashSet<>();
    
    
        public BigFileReader(File file, int threadSize, int buffSize, String charset, Consumer consumer) {
            this.threadSize = threadSize;
            this.buffSize = buffSize;
            this.charset = charset;
            this.consumer = consumer;
            fileLength = file.length();
            executorService = Executors.newFixedThreadPool(threadSize);
            try {
                rAccessFile = new RandomAccessFile(file, "r");
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            count = new AtomicLong(0L);
        }
    
        public void start() {
            long eachSize = fileLength / threadSize;
            try {
                calStartEnd(0, eachSize);
            } catch (IOException e) {
                e.printStackTrace();
            }
            long s = System.currentTimeMillis();
            cyclicBarrier = new CyclicBarrier(pairs.size(), () -> {
                System.out.println("all time :" + (System.currentTimeMillis() - s));
                System.out.println("sum: " + count.get());
            });
    
            for (StartEndPair pair : pairs) {
                System.out.println("分片:" + pair);
                executorService.submit(new ReadTask(pair));
            }
            executorService.shutdown();
        }
    
        private class ReadTask implements Runnable {
            private long start;
            private long size;
            private byte[] readBuff;
    
            public ReadTask(StartEndPair pair) {
                start = pair.start;
                size = pair.end - 1 - start;
                readBuff = new byte[buffSize];
            }
    
            @Override
            public void run() {
                try {
                    // 快速读写技术 内存镜像 文件不能超过1.5G
                    MappedByteBuffer map = rAccessFile.getChannel().map(FileChannel.MapMode.READ_ONLY, start, size);
                    ByteArrayOutputStream os = new ByteArrayOutputStream();
                    for (int i = 0; i < size; i += buffSize) {
                        int readLength;
                        if (i + buffSize <= size) {
                            readLength = buffSize;
                        } else {
                            readLength = (int) (size - i);
                        }
                        map.get(readBuff, 0, readLength);
                        for (int j = 0; j < readLength; j++) {
                            byte b = readBuff[j];
                            if (b == '
    ' || b == '
    ') {
                                handler(os.toByteArray());
                                os.reset();
                            } else {
                                os.write(b);
                            }
                        }
                    }
                    if (os.size() > 0) {
                        handler(os.toByteArray());
                    }
    
                    cyclicBarrier.await(); // 性能测试
    
    
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
    
            private void handler(byte[] bs) {
                String line;
                if (charset == null) {
                    line = new String(bs);
                } else {
                    line = new String(bs, Charset.forName(charset));
                }
                if (StringUtils.isNotBlank(line)) {
                    consumer.accept(line);
                    count.incrementAndGet();
                }
            }
        }
    
        /**
         * 计算起始位置
         *
         * @param start
         * @param size
         */
        private void calStartEnd(long start, long size) throws IOException {
            if (start > fileLength - 1) {
                return;
            }
            StartEndPair pair = new StartEndPair();
            pair.start = start;
            long endPosition = start + size - 1;
            if (endPosition > fileLength - 1) {
                endPosition = fileLength - 1;
                pair.end = endPosition;
                pairs.add(pair);
                return;
            }
            rAccessFile.seek(endPosition);
            byte b = (byte) rAccessFile.read();
            while (b != '
    ' && b != '
    ') {
                endPosition++;
                if (endPosition > fileLength - 1) {
                    endPosition = fileLength - 1;
                    break;
                }
                rAccessFile.seek(endPosition);
                b = (byte) rAccessFile.read();
            }
            pair.end = endPosition;
            pairs.add(pair);
            calStartEnd(endPosition + 1, size);
        }
    
        @EqualsAndHashCode
        private class StartEndPair {
            public long start;
            public long end;
        }
    
        public static class Builder {
            private File file;
            private int threadSize = 10;
            private int buffSize = 1 << 20;
            private String charset = "gbk";
            private Consumer consumer;
    
            public Builder(String file) {
                this.file = new File(file);
            }
    
            public Builder withThreadSize(int threadSize) {
                this.threadSize = threadSize;
                return this;
            }
    
            public Builder withBuffSize(int buffSize) {
                this.buffSize = buffSize;
                return this;
            }
    
            public Builder withCharset(String charset) {
                this.charset = charset;
                return this;
            }
    
            public Builder withConsumer(Consumer consumer) {
                this.consumer = consumer;
                return this;
            }
    
    
            public BigFileReader build() {
                BigFileReader bigFileReader = new BigFileReader(file, threadSize, buffSize, charset, consumer);
                return bigFileReader;
            }
        }
    
    
    }

     测试类

    package com.util.file;
    
    public class Main {
    
        public static void main(String[] args) {
            BigFileReader.Builder builder = new BigFileReader.Builder("d:/a.txt");
    
            builder.withThreadSize(10)
                    .withCharset("gbk")
                    .withBuffSize(1024)
                    .withConsumer(line -> {
                        System.out.println(line);
                    });
            BigFileReader bigFileReader = builder.build();
            bigFileReader.start();
        }
    
    }
     

     

  • 相关阅读:
    软件开发目录规范
    编译Python文件
    python文件的两种用途
    函数高级实战之ATM和购物车系统升级
    函数高级小结
    SpringCloud---(4)Eureka与Zookeeper的区别
    Maven父工程(统一管理依赖)
    SpringCloud-----(1)微服务基础概念
    传统项目架构图
    Session
  • 原文地址:https://www.cnblogs.com/albertXe/p/13850780.html
Copyright © 2020-2023  润新知