【引言】
目前在Java中用于解析XML的技术很多,主流的有DOM、SAX、JDOM、DOM4j,下文主要介绍这4种解析XML文档技术的使用、优缺点及性能测试。
一、【基础知识——扫盲】
sax、dom是两种对xml文档进行解析的方法(没有具体实现,只是接口),所以只有它们是无法解析xml文档的;jaxp只是api,它进一步封装了sax、dom两种接口,并且提供了DomcumentBuilderFactory/DomcumentBuilder和SAXParserFactory/SAXParser(默认使用xerces解释器)。
二、【DOM、SAX、JDOM、DOM4j简单使用介绍】
1、【DOM(Document Object Model) 】
由W3C提供的接口,它将整个XML文档读入内存,构建一个DOM树来对各个节点(Node)进行操作。
示例代码:
[html] view plaincopyprint?
<?xml version="1.0"
encoding="UTF-8"?>
<university name="pku">
<college name="c1">
<class name="class1">
<student name="stu1" sex='male' age="21"
/>
<student name="stu2" sex='female' age="20"
/>
<student name="stu3" sex='female' age="20"
/>
</class>
<class name="class2">
<student name="stu4" sex='male' age="19"
/>
<student name="stu5" sex='female' age="20"
/>
<student name="stu6" sex='female' age="21"
/>
</class>
</college>
<college name="c2">
<class name="class3">
<student name="stu7" sex='male' age="20"
/>
</class>
</college>
<college name="c3">
</college>
</university>
<?xml version="1.0"
encoding="UTF-8"?>
<university name="pku">
</university>
后文代码中有使用到text.xml(该文档放在src路径下,既编译后在classes路径下),都是指该xml文档。
[java] view plaincopyprint?
package test.xml;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationExcep
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfiguration
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
public class TestDom {
public static void main(String[] args) {
read();
//write();
}
public static void read() {
DocumentBuilderFactory dbf =
DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
InputStream in =
TestDom.class.getClassLoader().getResourceAsStream("test.xml");
Document doc = builder.parse(in);
// root <university>
Element root = doc.getDocumentElement();
if (root == null) return;
System.err.println(root.getAttribute("name"));
// all college node
NodeList collegeNodes = root.getChildNodes();
if (collegeNodes == null) return;
for(int i = 0; i < collegeNodes.getLength(); i++)
{
Node college = collegeNodes.item(i);
if (college != null &&
college.getNodeType() == Node.ELEMENT_NODE) {
System.err.println(" " +
college.getAttributes().getNamedItem("name").getNodeValue());
// all class node
NodeList classNodes = college.getChildNodes();
if (classNodes == null) continue;
for (int j = 0; j < classNodes.getLength(); j++)
{
Node clazz = classNodes.item(j);
if (clazz != null &&
clazz.getNodeType() == Node.ELEMENT_NODE) {
System.err.println(" " +
clazz.getAttributes().getNamedItem("name").getNodeValue());
// all student node
NodeList studentNodes = clazz.getChildNodes();
if (studentNodes == null) continue;
for (int k = 0; k < studentNodes.getLength(); k++)
{
Node student = studentNodes.item(k);
if (student != null &&
student.getNodeType() == Node.ELEMENT_NODE) {
System.err.print(" " +
student.getAttributes().getNamedItem("name").getNodeValue());
System.err.print(" " +
student.getAttributes().getNamedItem("sex").getNodeValue());
System.err.println(" " +
student.getAttributes().getNamedItem("age").getNodeValue());
}
}
}
}
}
}
} catch (ParserConfigurationExcep
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void write() {
DocumentBuilderFactory dbf =
DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = dbf.newDocumentBuilder();
InputStream in =
TestDom.class.getClassLoader().getResourceAsStream("test.xml");
Document doc = builder.parse(in);
// root <university>
Element root = doc.getDocumentElement();
if (root == null) return;
// 修改属性
root.setAttribute("name", "tsu");
NodeList collegeNodes = root.getChildNodes();
if (collegeNodes != null) {
for (int i = 0; i <collegeNodes.getLength() - 1;
i++) {
// 删除节点
Node college = collegeNodes.item(i);
if (college.getNodeType() == Node.ELEMENT_NODE) {
String collegeName =
college.getAttributes().getNamedItem("name").getNodeValue();
if ("c1".equals(collegeName) || "c2".equals(collegeName)) {
root.removeChild(college);
} else if ("c3".equals(collegeName)) {
Element newChild = doc.createElement_x("class");
newChild.setAttribute("name", "c4");
college.a(newChild);
}
}
}
}
// 新增节点
Element addCollege = doc.createElement_x("college");
addCollege.setAttribute("name", "c5");
root.a(addCollege);
Text text = doc.createTextNode("text");
addCollege.a(text);
// 将修改后的文档保存到文件
TransformerFactory transFactory =
TransformerFactory.newInstance();
Transformer transFormer = transFactory.newTransformer();
DOMSource domSource = new DOMSource(doc);
File file = new File("src/dom-modify.xml");
if (file.exists()) {
file.delete();
}
file.createNewFile();
FileOutputStream out = new FileOutputStream(file);
StreamResult xmlResult = new StreamResult(out);
transFormer.transform(domSource, xmlResult);
System.out.println(file.getAbsolutePath());
} catch (ParserConfigurationExcep
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (TransformerConfiguration
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
package test.xml;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationExcep
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfiguration
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.SAXException;
public class TestDom {