Java快速读取大文件
最近公司服务器监控系统需要做一个东西来分析Java应用程序的日志。
第一步探索:
首先我想到的是使用RandomAccessFile,因为他可以很方便的去获取和设置文件指针,下面是我的代码。
package cn.mucang.exception.analyzer; import cn.mucang.exception.analyzer.analyze.LogAnalyzer; import cn.mucang.exception.analyzer.config.AnalyseConfig; import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.RandomAccessFile; /** * @author Gao Youbo * @since 2015/3/16. */ public class LogUtils { private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class); /** * 分析日志 * * @param analyzer 分析器 * @throws IOException */ public static void analyse(LogAnalyzer analyzer) throws IOException { AnalyseConfig analyseConfig = analyzer.getAnalyseConfig(); File file = new File(analyseConfig.getPath()); LOG.info("开始分析日志文件...{}", file.getAbsolutePath()); if (!file.exists()) { throw new IOException("日志文件不存在:" + analyseConfig); } if (analyseConfig.getFilePointer() < 0) { analyseConfig.setFilePointer(0); } FileInputStream stream = new FileInputStream(file); InputStreamReader reader = new InputStreamReader(stream); BufferedReader bufferedReader = new BufferedReader(reader); try (RandomAccessFile logFile = new RandomAccessFile(file, "r")) { long length = logFile.length(); analyzer.getAnalyseConfig().setFileLenght(length); //设置文件字节长度 if (analyseConfig.getFilePointer() > length) { throw new IllegalArgumentException("开始指针位置越界"); } else { logFile.seek(analyseConfig.getFilePointer()); } String line; //行数据 int lineNumber = analyseConfig.getLineNumber(); //行号 DefaultLogLineBuilder lb = null; long start = System.currentTimeMillis(); while ((line = logFile.readLine()) != null) { bufferedReader.readLine(); lineNumber++; long filePointer = logFile.getFilePointer(); if (ParseUtils.isNewLine(lineNumber, line)) { if (lb != null) { analyzer.analyse(lb.getLogLine()); } lb = new DefaultLogLineBuilder(); } if (lb != null) { lb.append(lineNumber, filePointer, line); if (length == logFile.getFilePointer()) { //文档读取完了,调用一下分析 analyzer.analyse(lb.getLogLine()); } } if (lineNumber % 10000 == 0) { long end = System.currentTimeMillis(); System.out.println(String.format("line=%s, used=%sms", lineNumber, end - start)); start = System.currentTimeMillis(); } } } } }
下面看一下性能,分析一万行日志平均需要1500毫秒,因为我的日志分析使用到了正则,开始速度慢我以为是大量的正则运算造成的。
第二部探索:
我自己写了一个LogReader,自己控制指针位置。下面看一下代码:
package cn.mucang.exception.analyzer; import java.io.BufferedReader; import java.io.Closeable; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; /** * @author Gao Youbo * @since 2015-03-25 09:02 */ public class LogReader implements Closeable { /** * 文件大小 */ private long length; /** * 文件指针位置 */ private long filePointer; private FileInputStream inputStream; private InputStreamReader inputStreamReader; private BufferedReader bufferedReader; public LogReader(File logFile) throws FileNotFoundException { this.inputStream = new FileInputStream(logFile); this.inputStreamReader = new InputStreamReader(inputStream); this.bufferedReader = new BufferedReader(inputStreamReader); this.length = logFile.length(); } public int read() throws IOException { filePointer++; return bufferedReader.read(); } public String readLine() throws IOException { StringBuffer input = new StringBuffer(); int c = -1; boolean eol = false; //end of line while (!eol) { switch (c = read()) { case -1: case ' ': eol = true; break; case ' ': eol = true; long cur = getFilePointer(); if ((read()) != ' ') { skip(cur); } default: input.append((char) c); break; } } if ((c == -1) && (input.length() == 0)) { return null; } return input.toString(); } /** * 获取当前读取到的指针 * * @return * @throws IOException */ public long getFilePointer() throws IOException { return filePointer; } /** * 从当前位置跳过n个char * * @param n * @return 实际跳过多少个char * @throws IOException */ public long skip(long n) throws IOException { return inputStreamReader.skip(n); } /** * 返回日志文件的大小 * * @return */ public long length() { return length; } @Override public void close() throws IOException { if (bufferedReader != null) { bufferedReader.close(); } if (inputStreamReader != null) { inputStreamReader.close(); } if (inputStream != null) { inputStream.close(); } } } package cn.mucang.exception.analyzer; import cn.mucang.exception.analyzer.analyze.LogAnalyzer; import cn.mucang.exception.analyzer.config.AnalyseConfig; import cn.mucang.exception.analyzer.support.DefaultLogLineBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; /** * @author Gao Youbo * @since 2015/3/16. */ public class LogUtils { private static final Logger LOG = LoggerFactory.getLogger(LogUtils.class); /** * 分析日志 * * @param analyzer 分析器 * @throws java.io.IOException */ public static void analyse(LogAnalyzer analyzer) throws IOException { AnalyseConfig analyseConfig = analyzer.getAnalyseConfig(); File file = new File(analyseConfig.getPath()); System.out.println(file.getAbsolutePath()); LOG.info("开始分析日志文件...{}", file.getAbsolutePath()); if (!file.exists()) { throw new IOException("日志文件不存在:" + analyseConfig); } if (analyseConfig.getFilePointer() < 0) { analyseConfig.setFilePointer(0); } try (LogReader logReader = new LogReader(file)) { long length = logReader.length(); analyzer.getAnalyseConfig().setFileLenght(length); //设置文件字节长度 if (analyseConfig.getFilePointer() > length) { throw new IllegalArgumentException("开始指针位置越界"); } else { logReader.skip(analyseConfig.getFilePointer()); } String line; //行数据 int lineNumber = analyseConfig.getLineNumber(); //行号 DefaultLogLineBuilder lb = null; long start = System.currentTimeMillis(); while ((line = logReader.readLine()) != null) { lineNumber++; long filePointer = logReader.getFilePointer(); if (ParseUtils.isNewLine(lineNumber, line)) { if (lb != null) { analyzer.analyse(lb.getLogLine()); } lb = new DefaultLogLineBuilder(); } if (lb != null) { lb.append(lineNumber, filePointer, line); if (length == filePointer) { //文档读取完了,调用一下分析 analyzer.analyse(lb.getLogLine()); } } if (lineNumber % 10000 == 0) { long end = System.currentTimeMillis(); System.out.println(String.format("line=%s, used=%s", lineNumber, end - start)); start = System.currentTimeMillis(); } } } } }接下来是测试的性能:
日志解析速度提高了10倍。