package com.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.HashSet; import java.util.Set; public class LoadUserWords { public static Set<String> loadUserWords(InputStream input) { String line; Set<String> myWords = new HashSet<String>(); try { BufferedReader br = new BufferedReader(new InputStreamReader(input, "UTF-8"), 1024); while ((line = br.readLine()) != null) { line = line.trim().toLowerCase(); myWords.add(line); } br.close(); } catch (IOException e) { System.err.println("WARNING: cannot open user words list!"); } return myWords; } }
这个是java读取中文文件的代码,一般不会出现乱码,不过如果数量级很大的话,部分会是乱码(个人测试)原因未知,
测试例子,是将一个txt的内容读取出来,放在一个list里并对他们进行排序
package com.dict; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import com.util.LoadUserWords; public class WordDictionary extends LoadUserWords { private static WordDictionary singleInstance; private static Set<String> userWords = new HashSet<String>(); public static Set<String> getUserWords() { return userWords; } public static void setUserWords(Set<String> userWords) { WordDictionary.userWords = userWords; } public static WordDictionary getInstance() { if (singleInstance == null) { singleInstance = new WordDictionary(); try { singleInstance.read(); } catch (Exception e) { } } return singleInstance; } public void read() { userWords = loadUserWords((InputStream) this.getClass() .getResourceAsStream("a.txt")); } public void console() { read(); Set<String> userWords = getUserWords(); List<String> us = new ArrayList<String>(); Iterator<String> it = userWords.iterator(); while (it.hasNext()) { us.add(it.next()); } Collections.sort(us); for (String string : us) { System.out.println(string); } } public static void main(String[] args) { new WordDictionary().console(); } }