package com.app; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.SAXReader; public class Rule { /** * 分类规则内容,键值为类别(大类#中类#小类),Value值为关键字 */ private LinkedHashMap<String, ArrayList<KeyWords>> rule = new LinkedHashMap<String, ArrayList<KeyWords>>(); /** * 加载分类规则文件 * * @param path */ public Rule(String path) { if (this.rule.isEmpty()) { try { loadXml(path); } catch (DocumentException e) { e.printStackTrace(); } } } /** * 使用dom4j 中saxreader 获取Document容器,利用此容器的elementIterator读取xml文件 */ public void loadXml(String rulePath) throws DocumentException { // 获取读取xml的对象 SAXReader sr = new SAXReader(); // 得到xml所在位置,然后开始读取,并将数据放入doc中 Document doc = sr.read(rulePath); // 向外取数据,获取xml的根节点 Element root = doc.getRootElement(); ArrayList<KeyWords> keyWords = new ArrayList<KeyWords>(); iteElement(root, "", keyWords); } public void iteElement(Element element, String className, ArrayList<KeyWords> keyWords) { // 遍历该子节点 Iterator it = element.elementIterator(); while (it.hasNext()) { ArrayList<KeyWords> keyWords_clone = (ArrayList<KeyWords>) keyWords .clone(); // 获取节点 Element firstClass = (Element) it.next(); // 到达叶子节点 if (firstClass.elements().size() == 0) { String word = firstClass.getText(); String weight = firstClass.attributeValue("weight"); KeyWords words = new KeyWords(new HashSet<String>( Arrays.asList(word.split("\s+"))), Double.valueOf(weight)); keyWords_clone.add(words); rule.put(className, keyWords_clone); return; } else { String dalei = firstClass.attributeValue("name"); String feature = firstClass.attributeValue("feature"); String weight = firstClass.attributeValue("weight"); KeyWords firWords = new KeyWords(new HashSet<String>( Arrays.asList(feature.split("\s+"))), Double.valueOf(weight)); keyWords_clone.add(firWords); // 递归调用 if (className.length() < 1) { iteElement(firstClass, className + dalei, keyWords_clone); } else { iteElement(firstClass, className + "#" + dalei, keyWords_clone); } } } } /** * 每一类别的规则关键词 */ class KeyWords { /** * 关键词列表 */ HashSet<String> value; /** * 权重 */ double weight; public KeyWords(HashSet<String> value, double weight) { this.value = value; this.weight = weight; } /** * @return the value */ public HashSet<String> getValue() { return value; } /** * @param value * the value to set */ public void setValue(HashSet<String> value) { this.value = value; } /** * @return the weight */ public double getWeight() { return weight; } /** * @param weight * the weight to set */ public void setWeight(double weight) { this.weight = weight; } } /** * @return the rule */ public LinkedHashMap<String, ArrayList<KeyWords>> getRule() { return rule; } public static void main(String[] args) { Rule r = new Rule("rule2.xml"); LinkedHashMap<String, ArrayList<KeyWords>> rule = r.getRule(); for (String className : rule.keySet()) { System.out.println(className + "---------------"); ArrayList<KeyWords> keyWords = rule.get(className); for (KeyWords words : keyWords) { HashSet<String> value = words.getValue(); System.out.println(value.toString()); } } } }