• Java快速读取大文件


    Java快速读取大文件

    最近公司服务器监控系统需要做一个东西来分析Java应用程序的日志。

    第一步探索:

        首先我想到的是使用RandomAccessFile,因为他可以很方便的去获取和设置文件指针,下面是我的代码。

    package cn.mucang.exception.analyzer;
    
    import cn.mucang.exception.analyzer.analyze.LogAnalyzer;
    import cn.mucang.exception.analyzer.config.AnalyseConfig;
    import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.BufferedReader;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.IOException;
    import java.io.InputStreamReader;
    import java.io.RandomAccessFile;
    
    /**
     * @author Gao Youbo
     * @since 2015/3/16.
     */
    public class LogUtils {
    
        private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class);
    
        /**
         * 分析日志
         *
         * @param analyzer 分析器
         * @throws IOException
         */
        public static void analyse(LogAnalyzer analyzer) throws IOException {
            AnalyseConfig analyseConfig = analyzer.getAnalyseConfig();
            File file = new File(analyseConfig.getPath());
            LOG.info("开始分析日志文件...{}", file.getAbsolutePath());
            if (!file.exists()) {
                throw new IOException("日志文件不存在:" + analyseConfig);
            }
            if (analyseConfig.getFilePointer() < 0) {
                analyseConfig.setFilePointer(0);
            }
    
            FileInputStream stream = new FileInputStream(file);
            InputStreamReader reader = new InputStreamReader(stream);
            BufferedReader bufferedReader = new BufferedReader(reader);
            try (RandomAccessFile logFile = new RandomAccessFile(file, "r")) {
                long length = logFile.length();
                analyzer.getAnalyseConfig().setFileLenght(length); //设置文件字节长度
                if (analyseConfig.getFilePointer() > length) {
                    throw new IllegalArgumentException("开始指针位置越界");
                } else {
                    logFile.seek(analyseConfig.getFilePointer());
                }
                String line; //行数据
                int lineNumber = analyseConfig.getLineNumber(); //行号
                DefaultLogLineBuilder lb = null;
                long start = System.currentTimeMillis();
                while ((line = logFile.readLine()) != null) {
                    bufferedReader.readLine();
                    lineNumber++;
                    long filePointer = logFile.getFilePointer();
                    if (ParseUtils.isNewLine(lineNumber, line)) {
                        if (lb != null) {
                            analyzer.analyse(lb.getLogLine());
                        }
                        lb = new DefaultLogLineBuilder();
                    }
                    if (lb != null) {
                        lb.append(lineNumber, filePointer, line);
                        if (length == logFile.getFilePointer()) { //文档读取完了,调用一下分析
                            analyzer.analyse(lb.getLogLine());
                        }
                    }
                    if (lineNumber % 10000 == 0) {
                        long end = System.currentTimeMillis();
                        System.out.println(String.format("line=%s, used=%sms", lineNumber, end - start));
                        start = System.currentTimeMillis();
                    }
                }
            }
        }
    
    }

    下面看一下性能,分析一万行日志平均需要1500毫秒,因为我的日志分析使用到了正则,开始速度慢我以为是大量的正则运算造成的。

    第二部探索:

    我自己写了一个LogReader,自己控制指针位置。下面看一下代码:

    package cn.mucang.exception.analyzer;
    
    import java.io.BufferedReader;
    import java.io.Closeable;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.IOException;
    import java.io.InputStreamReader;
    
    /**
     * @author Gao Youbo
     * @since 2015-03-25 09:02
     */
    public class LogReader implements Closeable {
        /**
         * 文件大小
         */
        private long length;
        /**
         * 文件指针位置
         */
        private long filePointer;
        private FileInputStream inputStream;
        private InputStreamReader inputStreamReader;
        private BufferedReader bufferedReader;
    
        public LogReader(File logFile) throws FileNotFoundException {
            this.inputStream = new FileInputStream(logFile);
            this.inputStreamReader = new InputStreamReader(inputStream);
            this.bufferedReader = new BufferedReader(inputStreamReader);
            this.length = logFile.length();
        }
    
        public int read() throws IOException {
            filePointer++;
            return bufferedReader.read();
        }
    
        public String readLine() throws IOException {
            StringBuffer input = new StringBuffer();
            int c = -1;
            boolean eol = false; //end of line
            while (!eol) {
                switch (c = read()) {
                    case -1:
                    case '
    ':
                        eol = true;
                        break;
                    case '
    ':
                        eol = true;
                        long cur = getFilePointer();
                        if ((read()) != '
    ') {
                            skip(cur);
                        }
                    default:
                        input.append((char) c);
                        break;
                }
            }
            if ((c == -1) && (input.length() == 0)) {
                return null;
            }
            return input.toString();
        }
    
        /**
         * 获取当前读取到的指针
         *
         * @return
         * @throws IOException
         */
        public long getFilePointer() throws IOException {
            return filePointer;
        }
    
        /**
         * 从当前位置跳过n个char
         *
         * @param n
         * @return 实际跳过多少个char
         * @throws IOException
         */
        public long skip(long n) throws IOException {
            return inputStreamReader.skip(n);
        }
    
        /**
         * 返回日志文件的大小
         *
         * @return
         */
        public long length() {
            return length;
        }
    
        @Override
        public void close() throws IOException {
            if (bufferedReader != null) {
                bufferedReader.close();
            }
            if (inputStreamReader != null) {
                inputStreamReader.close();
            }
            if (inputStream != null) {
                inputStream.close();
            }
        }
    }
    
    package cn.mucang.exception.analyzer;
    
    import cn.mucang.exception.analyzer.analyze.LogAnalyzer;
    import cn.mucang.exception.analyzer.config.AnalyseConfig;
    import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.File;
    import java.io.IOException;
    
    /**
     * @author Gao Youbo
     * @since 2015/3/16.
     */
    public class LogUtils {
    
        private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class);
    
    
        /**
         * 分析日志
         *
         * @param analyzer 分析器
         * @throws java.io.IOException
         */
        public static void analyse(LogAnalyzer analyzer) throws IOException {
            AnalyseConfig analyseConfig = analyzer.getAnalyseConfig();
            File file = new File(analyseConfig.getPath());
            System.out.println(file.getAbsolutePath());
            LOG.info("开始分析日志文件...{}", file.getAbsolutePath());
            if (!file.exists()) {
                throw new IOException("日志文件不存在:" + analyseConfig);
            }
            if (analyseConfig.getFilePointer() < 0) {
                analyseConfig.setFilePointer(0);
            }
            try (LogReader logReader = new LogReader(file)) {
                long length = logReader.length();
                analyzer.getAnalyseConfig().setFileLenght(length); //设置文件字节长度
                if (analyseConfig.getFilePointer() > length) {
                    throw new IllegalArgumentException("开始指针位置越界");
                } else {
                    logReader.skip(analyseConfig.getFilePointer());
                }
                String line; //行数据
                int lineNumber = analyseConfig.getLineNumber(); //行号
                DefaultLogLineBuilder lb = null;
                long start = System.currentTimeMillis();
                while ((line = logReader.readLine()) != null) {
                    lineNumber++;
                    long filePointer = logReader.getFilePointer();
                    if (ParseUtils.isNewLine(lineNumber, line)) {
                        if (lb != null) {
                            analyzer.analyse(lb.getLogLine());
                        }
                        lb = new DefaultLogLineBuilder();
                    }
                    if (lb != null) {
                        lb.append(lineNumber, filePointer, line);
                        if (length == filePointer) { //文档读取完了,调用一下分析
                            analyzer.analyse(lb.getLogLine());
                        }
                    }
                    if (lineNumber % 10000 == 0) {
                        long end = System.currentTimeMillis();
                        System.out.println(String.format("line=%s, used=%s", lineNumber, end - start));
                        start = System.currentTimeMillis();
                    }
                }
            }
        }
    }
    接下来是测试的性能:

    日志解析速度提高了10倍。

  • 相关阅读:
    美团DSP
    pid稳态控制
    Tensorflow的gRPC编程(一)
    信息增益,信息增益率
    tensorflow dnn 参考
    java tfserving grpc 通信调用代码解析 【重点参考】
    Saltstack module dnsmasq 详解
    Saltstack module django 详解
    Saltstack module disk 详解
    Saltstack module dig 详解
  • 原文地址:https://www.cnblogs.com/firstdream/p/5585280.html
Copyright © 2020-2023  润新知