• How to Write a Spelling Corrector用java 写拼写检查器 Java实现 以备查验


    import java.io.*;
    import java.util.*;
    import java.util.regex.*;

    class Spelling {

    private final HashMap<String, Integer> nWords = new HashMap<String, Integer>();

    public Spelling(String file) throws IOException {
    BufferedReader in = new BufferedReader(new FileReader(file));
    Pattern p = Pattern.compile("\w+");
    for(String temp = ""; temp != null; temp = in.readLine()){
    Matcher m = p.matcher(temp.toLowerCase());
    while(m.find())
    nWords.put((temp = m.group()), nWords.containsKey(temp) ? nWords.get(temp) + 1 : 1);
    }
    in.close();
    // System.out.println(nWords.size());
    }

    private final ArrayList<String> edits(String word) {
    ArrayList<String> result = new ArrayList<String>();
    for(int i=0; i < word.length(); ++i) result.add(word.substring(0, i) + word.substring(i+1));
    for(int i=0; i < word.length()-1; ++i) result.add(word.substring(0, i) + word.substring(i+1, i+2) + word.substring(i, i+1) + word.substring(i+2));
    for(int i=0; i < word.length(); ++i) for(char c='a'; c <= 'z'; ++c) result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i+1));
    for(int i=0; i <= word.length(); ++i) for(char c='a'; c <= 'z'; ++c) result.add(word.substring(0, i) + String.valueOf(c) + word.substring(i));
    return result;
    }

    public final String correct(String word) {
    //如果词袋子里面含有这个词直接返回
    if(nWords.containsKey(word)) return word;
    //没有这个词的话,那就认为这个词拼写错误 找到所有的可能的基于这个词的可能词汇
    ArrayList<String> list = edits(word);
    HashMap<Integer, String> candidates = new HashMap<Integer, String>();
    //在猜想的词汇表中如果与字典中的词重合,那就放进候选列表里面
    for(String s : list) if(nWords.containsKey(s)) candidates.put(nWords.get(s),s);
    //如果在候选列表里面有候选
    if(candidates.size() > 0) return candidates.get(Collections.max(candidates.keySet()));
    //没有候选的时候怎么办?
    for(String s : list)
    for(String w : edits(s))
    //进行第二次匹配,拿出猜想的可能词汇,再进行一次猜想, 再不行的话,直接返回原来的word
    if(nWords.containsKey(w))
    candidates.put(nWords.get(w),w);
    return candidates.size() > 0 ? candidates.get(Collections.max(candidates.keySet())) : word;
    }

    public static void main(String args[]) throws IOException {
    if(args.length > 0) System.out.println((new Spelling("big.txt")).correct(args[0]));
    }

    }

    http://raelcunha.com/spell-correct.php

  • 相关阅读:
    PHP木马免杀的一些总结
    regsvr32 bypass windows defender 新思路
    要点4:C的文件操作
    【Go语言探险】线上奇怪日志问题的排查
    Eclipse部署虚拟项目目录
    osgEarth使用笔记4——加载矢量数据
    three.js中的矩阵计算
    GDAL数据集写入空间坐标参考
    matlab 基础
    nginx 配置文件解读
  • 原文地址:https://www.cnblogs.com/mrcharles/p/4744842.html
Copyright © 2020-2023  润新知