• TRIE树


    TRIE树的java实现(又称单词查找树,查询效率比哈希树高)

    import org.apache.commons.lang3.Validate;
    
    import java.util.HashMap;
    import java.util.Map;
    
    public class TrieTree<T> {
        private TreeNode<T> root = new TreeNode<>();
        private int size = 0;
    
        public int getSize() {
            return size;
        }
    
        public T put(String key, T value) {
            Validate.notEmpty(key);
            Validate.isTrue(key.length() <= 64);
    
            TreeNode<T> current = this.root;
            for (int offset = 0; offset < key.length(); offset++) {
                char head = key.charAt(offset);
                TreeNode<T> child = current.children.get(head);
                if (child == null) {
                    child = new TreeNode<>();
                    current.children.put(head, child);
                }
                current = child;
            }
            T oldValue = current.value;
            current.value = value;
            if (oldValue == null) {
                this.size++;
            }
            return oldValue;
        }
    
        public T find(String text, int offset) {
            TreeNode<T> current = this.root;
            for (; offset < text.length(); offset++) {
                char ch = text.charAt(offset);
                TreeNode<T> child = current.children.get(ch);
                if (child == null) return current.value;
                current = child;
            }
            if (current.children.isEmpty()) {
                return current.value;
            } else {
                return null;
            }
        }
    
        private static class TreeNode<T> {
            Map<Character, TreeNode<T>> children = new HashMap<>();
            T value;
        }
    }

     

    使用

    import com.google.common.base.Strings;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    import port.mllib.SparseVectorJ;
    
    import java.io.BufferedReader;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.util.ArrayList;
    import java.util.List;
    
    public class HotWords {
        private static final Logger LOG = LoggerFactory.getLogger(HotWords.class);
    
        private TrieTree<Integer> tree;
    
        public HotWords(InputStream inStream) {
            try {
                BufferedReader reader = new BufferedReader(new InputStreamReader(inStream, "UTF-8"));
                this.tree = new TrieTree<>();
                int i = 0;
                String line;
                while (((line = reader.readLine()) != null)) {
                    this.tree.put(line, i++);
                }
                inStream.close();
                reader.close();
            } catch (Exception e) {
                LOG.error("fail load hot words", e);
                throw new RuntimeException(e);
            }
        }
    
        public SparseVectorJ findAll(String text) {
            List<Integer> ret = new ArrayList<>();
            if (Strings.isNullOrEmpty(text)) {
                return new SparseVectorJ(tree.getSize(), new int[0], new double[0]);
            }
    
            for (int i = 0; i < text.length(); ++i) {
                Integer id = tree.find(text, i);
                if (id != null) ret.add(id);
            }
    
            int[] ii = new int[ret.size()];
            double[] vv = new double[ret.size()];
            for (Integer i = 0; i < ret.size(); i++) {
                ii[i] = ret.get(i);
                vv[i] = 1;
            }
            return new SparseVectorJ(tree.getSize(), ii, vv);
        }
    }
  • 相关阅读:
    Google是如何赚钱的?
    网站数据连接
    表单验证
    Web Proxy Autodiscovery Protocol
    把SQL2000的数据库迁移至SQL2005
    WFE与Index服务器之前的通讯
    SOS 的帮助输出
    WinDbg.exe中使用的SOS.dll的命令列表
    HTTPS 简介
    Error: A web configuration modification operation is already running
  • 原文地址:https://www.cnblogs.com/tengpan-cn/p/8640668.html
Copyright © 2020-2023  润新知