• Java读取大文件


    原文地址:http://wgslucky.blog.163.com/blog/static/97562532201332324639689/ 

    java 读取一个巨大的文本文件既能保证内存不溢出又能保证性能  

     1 import java.io.BufferedReader;
     2 import java.io.File;
     3 import java.io.FileReader;
     4 import java.io.RandomAccessFile;
     5 import java.nio.ByteBuffer;
     6 import java.nio.MappedByteBuffer;
     7 import java.nio.channels.FileChannel;
     8 
     9 public class ReadBig {
    10 public static String fff = "C:\mq\read\from.xml";
    11 
    12 public static void main1(String[] args) throws Exception {
    13 
    14   final int BUFFER_SIZE = 0x300000;// 缓冲区大小为3M
    15 
    16   File f = new File(fff);
    17 
    18   /**
    19    * 
    20    * map(FileChannel.MapMode mode,long position, long size)
    21    * 
    22    * mode - 根据是按只读、读取/写入或专用(写入时拷贝)来映射文件,分别为 FileChannel.MapMode 类中所定义的
    23    * READ_ONLY、READ_WRITE 或 PRIVATE 之一
    24    * 
    25    * position - 文件中的位置,映射区域从此位置开始;必须为非负数
    26    * 
    27    * size - 要映射的区域大小;必须为非负数且不大于 Integer.MAX_VALUE
    28    * 
    29    * 所以若想读取文件后半部分内容,如例子所写;若想读取文本后1/8内容,需要这样写map(FileChannel.MapMode.READ_ONLY,
    30    * f.length()*7/8,f.length()/8)
    31    * 
    32    * 想读取文件所有内容,需要这样写map(FileChannel.MapMode.READ_ONLY, 0,f.length())
    33    * 
    34    */
    35 
    36   MappedByteBuffer inputBuffer = new RandomAccessFile(f, "r")
    37     .getChannel().map(FileChannel.MapMode.READ_ONLY,
    38       f.length() / 2, f.length() / 2);
    39 
    40   byte[] dst = new byte[BUFFER_SIZE];// 每次读出3M的内容
    41 
    42   long start = System.currentTimeMillis();
    43 
    44   for (int offset = 0; offset < inputBuffer.capacity(); offset += BUFFER_SIZE) {
    45 
    46    if (inputBuffer.capacity() - offset >= BUFFER_SIZE) {
    47 
    48     for (int i = 0; i < BUFFER_SIZE; i++)
    49 
    50      dst[i] = inputBuffer.get(offset + i);
    51 
    52    } else {
    53 
    54     for (int i = 0; i < inputBuffer.capacity() - offset; i++)
    55 
    56      dst[i] = inputBuffer.get(offset + i);
    57 
    58    }
    59 
    60    int length = (inputBuffer.capacity() % BUFFER_SIZE == 0) ? BUFFER_SIZE
    61      : inputBuffer.capacity() % BUFFER_SIZE;
    62 
    63    System.out.println(new String(dst, 0, length));// new
    64    // String(dst,0,length)这样可以取出缓存保存的字符串,可以对其进行操作
    65 
    66   }
    67 
    68   long end = System.currentTimeMillis();
    69 
    70   System.out.println("读取文件文件一半内容花费:" + (end - start) + "毫秒");
    71 
    72 }
     1 public static void main2(String[] args) throws Exception {
     2   int bufSize = 1024;
     3   byte[] bs = new byte[bufSize];
     4   ByteBuffer byteBuf = ByteBuffer.allocate(1024);
     5   FileChannel channel = new RandomAccessFile(fff, "r").getChannel();
     6   while (channel.read(byteBuf) != -1) {
     7    int size = byteBuf.position();
     8    byteBuf.rewind();
     9    byteBuf.get(bs); // 把文件当字符串处理,直接打印做为一个例子。
    10    System.out.print(new String(bs, 0, size));
    11    byteBuf.clear();
    12   }
    13 
    14 }
    1 public static void main(String[] args) throws Exception {
    2   BufferedReader br = new BufferedReader(new FileReader(fff));
    3   String line = null;
    4   while ((line = br.readLine()) != null) {
    5    System.out.println(line);
    6   }
    7 }
     1 public static void main(String[] args) throws Exception {
     2     int bufSize = 1024;
     3     byte[] bs = new byte[bufSize];
     4     ByteBuffer byteBuf = ByteBuffer.allocate(1024);
     5     FileChannel channel = new RandomAccessFile("d:\filename","r").getChannel();
     6     while(channel.read(byteBuf) != -1) {
     7       int size = byteBuf.position();
     8       byteBuf.rewind();
     9       byteBuf.get(bs);
    10       // 把文件当字符串处理,直接打印做为一个例子。
    11       System.out.print(new String(bs, 0, size));
    12       byteBuf.clear();
    13     }
    14   }
    15 
    16 }

    java 读取大容量文件,内存溢出?怎么按几行读取,读取多次

     1 import java.io.BufferedReader;
     2 import java.io.FileNotFoundException;
     3 import java.io.FileReader;
     4 import java.io.IOException;
     5 import java.io.RandomAccessFile;
     6 import java.util.Scanner;
     7 
     8 public class TestPrint {
     9     public static void main(String[] args) throws IOException {
    10         String path = "你要读的文件的路径";
    11         RandomAccessFile br=new RandomAccessFile(path,"rw");//这里rw看你了。要是之都就只写r
    12         String str = null, app = null;
    13         int i=0;
    14         while ((str = br.readLine()) != null) {
    15             i++;
    16             app=app+str;
    17             if(i>=100){//假设读取100行
    18                 i=0;
    19 //                这里你先对这100行操作,然后继续读
    20 app=null;
    21             }
    22         }
    23         br.close();
    24     }
    25 
    26 }
     

    当逐行读写大于2G的文本文件时推荐使用以下代码

     

     1 void largeFileIO(String inputFile, String outputFile) {
     2         try {
     3             BufferedInputStream bis = new BufferedInputStream(new FileInputStream(new File(inputFile)));
     4             BufferedReader in = new BufferedReader(new InputStreamReader(bis, "utf-8"), 10 * 1024 * 1024);//10M缓存
     5             FileWriter fw = new FileWriter(outputFile);
     6             while (in.ready()) {
     7                 String line = in.readLine();
     8                 fw.append(line + " ");
     9             }
    10             in.close();
    11             fw.flush();
    12             fw.close();
    13         } catch (IOException ex) {
    14             ex.printStackTrace();
    15         }

    jdk本身就支持超大文件的读写

      网上的文章基本分为两大类,一类是使用BufferedReader类读写超大文件;另一类是使用RandomAccessFile类读取,经过比较,最后使用了前一种方式进行超大文件的读取,下面是相关代码,其实很简单

    1 File file = new File(filepath);   
    2 BufferedInputStream fis = new BufferedInputStream(new FileInputStream(file));    
    3 BufferedReader reader = new BufferedReader(new InputStreamReader(fis,"utf-8"),5*1024*1024);// 用5M的缓冲读取文本文件  
    4   
    5 String line = "";
    6 while((line = reader.readLine()) != null){
    7 //TODO: write your business
    8 }
  • 相关阅读:
    A New Approach to Line Simplification Based on Image Processing: A Case Study of Water Area Boundaries
    3D模型
    数码相机控制点的自动定位检校
    道路网匹配
    多线程操作数据拷贝要加线程锁
    编程琐事
    C++ 指定路径文件夹存在与否查询及文件夹创建
    C++ 网络编程之正确使用UDP广播及多播
    C++ 获得系统时间
    C++ 数据写入文件与读回
  • 原文地址:https://www.cnblogs.com/linksky1018/p/4140749.html
Copyright © 2020-2023  润新知