Java 解析 XML文件

个人博客网：https://wushaopei.github.io/ (你想要这里多有)

xml 文件：

<?xml version="1.0" encoding="UTF-8"?>
<invoiceOrderStore>
    <InvoiceOrder id="1">
        <invoiceOrder>001</invoiceOrder>
        <companyName>冰与火之歌1</companyName>
        <taxNumber>冰与火之歌1</taxNumber>
        <accountBank>冰与火之歌1</accountBank>
        <companyAddress>乔治马丁1</companyAddress>
        <bankNumber>20141</bankNumber>
        <companyTelephone>891</companyTelephone>
        <accountName>891</accountName>
    </InvoiceOrder>
    <InvoiceOrder id="2">
        <invoiceOrder>002</invoiceOrder>
        <companyName>冰与火之歌2</companyName>
        <taxNumber>冰与火之歌2</taxNumber>
        <accountBank>冰与火之歌2</accountBank>
        <companyAddress>乔治马丁2</companyAddress>
        <bankNumber>20142</bankNumber>
        <companyTelephone>892</companyTelephone>
        <accountName>892</accountName>
    </InvoiceOrder>
    <InvoiceOrder id="3">
        <invoiceOrder>003</invoiceOrder>
        <companyName>冰与火之歌3</companyName>
        <taxNumber>冰与火之歌3</taxNumber>
        <accountBank>冰与火之歌3</accountBank>
        <companyAddress>乔治马丁3</companyAddress>
        <bankNumber>20143</bankNumber>
        <companyTelephone>893</companyTelephone>
        <accountName>893</accountName>
    </InvoiceOrder>
</invoiceOrderStore>

1、基于DOM解析器转化XML文档并解析：

说明：

DOM解析器把XML文档转化为一个包含其内容的树，并可以对树进行遍历。用DOM解析模型的优点是编程容易，开发人员只需要调用建树的指令，然后利用navigation APIs访问所需的树节点来完成任务。可以很容易的添加和修改树中的元素。然而由于使用DOM解析器的时候需要处理整个XML文档，所以对性能和内存的要求比较高，尤其是遇到很大的XML文件的时候。由于它的遍历能力，DOM解析器常用于XML文档需要频繁的改变的服务中。

案例代码：

package com.example.poiutis.xml;

import com.example.poiutis.model.InvoiceOrder;
import org.w3c.dom.*;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.util.ArrayList;
import java.util.List;


/**
 * @ClassName  用DOM方式读取xml文件
 * @Description TODO
 * @Author shaopei
 * @Date 2019/8/1 10:57
 * @Version 1.0
 */
public class ReadxmlByDom {

    private static List<InvoiceOrder> invoiceOrderss = null;
    private static List<String> contents = null;

    public List<InvoiceOrder> getDoXml(String fileName) throws Exception {
        // 定义工厂API 使应用程序能够从XML文档获取生成DOM对象树的解析器
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        // 获取此类的实例之后，将可以从各种输入源解析XML
        DocumentBuilder builder = factory.newDocumentBuilder();
        // builder.parse(this.getClass().getResourceAsStream("/" + fileName));
        // Document接口表示整个HTML或XML文档，从概念上讲，它是文档树的根，并提供对文档数据的基本访问
        Document document = builder.parse(this.getClass().getResourceAsStream(
                "/" + fileName));
        // 获取根节点
        Element root = document.getDocumentElement();
        System.out.println(root.getNodeName());

        invoiceOrderss = new ArrayList<InvoiceOrder>();

        //读取database节点NodeList接口提供对节点的有序集合的抽象
        NodeList nodeList = root.getElementsByTagName("InvoiceOrder");
        for (int i = 0; i < nodeList.getLength(); i++) {

            InvoiceOrder invoiceOrder = new InvoiceOrder();

            // 获取一个节点
            Node node = nodeList.item(i);
            // 获取该节点所有属性
            NamedNodeMap attributes = node.getAttributes();
            for (int j = 0; j < attributes.getLength(); j++) {
                Node attribute = attributes.item(j);
                System.out.println(attribute.getNodeName() + ":"
                        + attribute.getNodeValue());
                invoiceOrder.setId(Integer.parseInt(attribute.getNodeValue()));
            }

            //获取所有子节点数据
            NodeList childNodes = node.getChildNodes();
            System.out.println(childNodes.getLength());
           contents = new ArrayList<>();

            for(int j=1;j<childNodes.getLength();j+=2){
                Node item = childNodes.item(j);
                String content = item.getFirstChild().getTextContent();
                contents.add(content);
//                System.out.println(content);

            }

            if(contents.size()>=8){

                invoiceOrder.setInvoiceOrder(contents.get(0));
                invoiceOrder.setCompanyName(contents.get(1));
                invoiceOrder.setTaxNumber(contents.get(2));
                invoiceOrder.setAccountBank(contents.get(3));
                invoiceOrder.setCompanyAddress(contents.get(4));
                invoiceOrder.setBankNumber(contents.get(5));
                invoiceOrder.setCompanyTelephone(contents.get(6));
                invoiceOrder.setAccountName(contents.get(7));
                invoiceOrderss.add(invoiceOrder);
            }
        }
        return invoiceOrderss;

    }

 public static void main(String[] args) {
     
        try {
            List<InvoiceOrder> doXml = new ReadxmlByDom().getDoXml("invoiceOrder.xml");
            for (InvoiceOrder invoiceOrder : doXml) {
                System.out.println(invoiceOrder.toString());
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}

2、使用SAX方式解析XML

说明：

AX解析器采用了基于事件的模型，它在解析XML文档的时候可以触发一系列的事件，当发现给定的tag的时候，它可以激活一个回调方法，告诉该方法制定的标签已经找到。SAX对内存的要求通常会比较低，因为它让开发人员自己来决定所要处理的tag.特别是当开发人员只需要处理文档中所包含的部分数据时，SAX这种扩展能力得到了更好的体现。但用SAX解析器的时候编码工作会比较困难，而且很难同时访问同一个文档中的多处不同数据。

案例代码：

package com.example.poiutis.xml;


import com.example.poiutis.model.InvoiceOrder;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.ArrayList;
import java.util.List;

/**
 * @ClassName 用SAX解析xml文件时需要的handler
 * @Description TODO
 * @Author shaopei
 * @Date 2019/8/1 14:36
 * @Version 1.0
 */
public class SAXParseHandler extends DefaultHandler{

    private List<InvoiceOrder> list;         //存放解析到的invoiceOrder数组
    private InvoiceOrder invoiceOrder;               //存放当前解析的invoiceOrder

    private String content = null;   //存放当前节点值

    private Integer count = 0;
    /**
     * 开始解析xml文档时调用此方法
     */
    @Override
    public void startDocument() throws SAXException {

        super.startDocument();
        System.out.println("开始解析xml文件");
        list = new ArrayList<InvoiceOrder>();
    }

    /**
     * 文档解析完成后调用此方法
     */
    @Override
    public void endDocument() throws SAXException {

        super.endDocument();
        System.out.println("xml文件解析完毕");

    }

    /**
     * 开始解析节点时调用此方法
     */
    @Override
    public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {

        super.startElement(uri, localName, qName, attributes);

        //当节点名为invoiceOrder时,获取invoiceOrder的属性id
        if(qName.equals("InvoiceOrder")){
            invoiceOrder = new InvoiceOrder();
            String id = attributes.getValue("id");//System.out.println("id值为"+id);
            invoiceOrder.setId(Integer.parseInt(id));

            //xml解析张数
            count ++ ;
            System.out.println( count + "--- 次");
        }

    }


    /**
     *节点解析完毕时调用此方法
     *
     *@param qName 节点名
     */
    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {

        super.endElement(uri, localName, qName);
        if(qName.equals("invoiceOrder")){
            invoiceOrder.setInvoiceOrder(content);
            System.out.println("发票单号"+"---"+content);
        }else if(qName.equals("companyName")){
            invoiceOrder.setCompanyName(content);
              System.out.println("公司名"+"---"+content);
        }else if(qName.equals("taxNumber")){
            invoiceOrder.setTaxNumber(content);
              System.out.println("金额"+"---"+content);
        }else if(qName.equals("accountBank")){
            invoiceOrder.setAccountBank(content);
              System.out.println("开户行"+"---"+content);
        }else if(qName.equals("companyAddress")){
            invoiceOrder.setCompanyAddress(content);
              System.out.println("公司地址"+"---"+content);
        }else if(qName.equals("bankNumber")){
            invoiceOrder.setBankNumber(content);
              System.out.println("账号"+"---"+content);
        }else if(qName.equals("companyTelephone")){
            invoiceOrder.setCompanyTelephone(content);
              System.out.println("公司电话"+"---"+content);
        }else if(qName.equals("accountName")){
            invoiceOrder.setAccountName(content);
              System.out.println("账户类型"+"---"+content);
        }else if(qName.equals("invoiceOrder")){         //当结束当前invoiceOrder解析时,将该invoiceOrder添加到数组后置为空，方便下一次invoiceOrder赋值
            list.add(invoiceOrder);
            invoiceOrder = null;
        }

    }


    /**
     * 此方法用来获取节点的值
     */
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {

        super.characters(ch, start, length);

        content = new String(ch, start, length);
        //收集不为空白的节点值
//      if(!content.trim().equals("")){
//          System.out.println("节点值为："+content);
//      }

    }

    public List<InvoiceOrder> getInvoiceOrders(){

        return list;
    }

}

package com.example.poiutis.xml;

import com.example.poiutis.model.InvoiceOrder;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.util.List;


/**
 * @ClassName  SAX方式解析XML
 * @Description TODO
 * @Author wushaopei
 * @Date 2019/8/1 14:45
 * @Version 1.0
 */
public class ReadXmlBySAX {

    private SAXParserFactory sParserFactory = null;
    private SAXParser parser = null;


    public List<InvoiceOrder> getInvoiceOrders(String fileName) throws Exception {
        SAXParserFactory sParserFactory = SAXParserFactory.newInstance();
        SAXParser parser = sParserFactory.newSAXParser();

        SAXParseHandler handler = new SAXParseHandler();
        parser.parse(fileName, handler);

        return handler.getInvoiceOrders();

    }

 public static void main(String[] args) {
     
         try {
            invoiceOrders = new ReadXmlBySAX().getInvoiceOrders("src/main/resources/invoiceOrder.xml");
            for(InvoiceOrder invoiceOrder: invoiceOrders){
                System.out.println(invoiceOrder);
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
       
    }



}

3、用JDOM方式读取xml文件

说明：

JDOM仅使用具体类而不使用接口。API大量使用了Collections类；JDOM自身不包含解析器。它通常使用SAX2解析器来解析和验证输入XML文档（尽管它还可以将以前构造的DOM表示作为输入）。它包含一些转换器以将JDOM表示输出成SAX2事件流、DOM模型或XML文本文档。JDOM是在Apache许可证变体下发布的开放源码。

案例代码：

package com.example.poiutis.xml;

import com.example.poiutis.model.InvoiceOrder;
import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;


/**
 * @ClassName 用JDOM方式读取xml文件
 * @Description TODO
 * @Author wushaopei
 * @Date 2019/8/1 15:14
 * @Version 1.0
 */
public class ReadXMLByJDom {

    private List<InvoiceOrder> invoiceOrders = null;
    private InvoiceOrder invoiceOrder = null;

    public List<InvoiceOrder> getInvoiceOrders(String fileName) {
        SAXBuilder saxBuilder = new SAXBuilder();
        try {
            Document document = saxBuilder.build(new FileInputStream(fileName));
            //获取根节点bookstore
            Element rootElement = document.getRootElement();
            //获取根节点的子节点，返回子节点的数组
            List<Element> bookList = rootElement.getChildren();
            invoiceOrders = new ArrayList<InvoiceOrder>();
            for (Element bookElement : bookList) {
                invoiceOrder = new InvoiceOrder();
                //获取bookElement的属性
                List<Attribute> bookAttributes = bookElement.getAttributes();
                for (Attribute attribute : bookAttributes) {
                    if (attribute.getName().equals("id")) {
                        String id = attribute.getValue(); //System.out.println(id);
                        invoiceOrder.setId(Integer.parseInt(id));
                    }
                }
                //获取bookElement的子节点
                List<Element> children = bookElement.getChildren();

                for (Element child : children) {
                    if (child.getName().equals("invoiceOrder")) {
                        String invoiceOrderid = child.getValue();
                        invoiceOrder.setInvoiceOrder(invoiceOrderid);
//                        System.out.println("发票单号"+"---"+invoiceOrderid);
                    } else if (child.getName().equals("companyName")) {
                        String companyName = child.getValue();
                        invoiceOrder.setCompanyName(companyName);
//                        System.out.println("公司名"+"---"+content);
                    } else if (child.getName().equals("taxNumber")) {
                        String taxNumber = child.getValue();
                        invoiceOrder.setTaxNumber(taxNumber);
//                        System.out.println("金额"+"---"+content);
                    } else if (child.getName().equals("accountBank")) {
                        String accountBank = child.getValue();
                        invoiceOrder.setAccountBank(accountBank);
//                        System.out.println("开户行"+"---"+content);
                    } else if (child.getName().equals("companyAddress")) {
                        String companyAddress = child.getValue();
                        invoiceOrder.setCompanyAddress(companyAddress);
//                        System.out.println("公司地址"+"---"+content);
                    } else if (child.getName().equals("bankNumber")) {
                        String bankNumber = child.getValue();
                        invoiceOrder.setBankNumber(bankNumber);
//                        System.out.println("账号"+"---"+bankNumber);
                    } else if (child.getName().equals("companyTelephone")) {
                        String companyTelephone = child.getValue();
                        invoiceOrder.setCompanyTelephone(companyTelephone);
//                        System.out.println("公司电话"+"---"+companyTelephone);
                    } else if (child.getName().equals("accountName")) {
                        String accountName = child.getValue();
                        invoiceOrder.setAccountName(accountName);
//                        System.out.println("账户类型"+"---"+accountName);
                    }

                }

                invoiceOrders.add(invoiceOrder);
                invoiceOrder = null;

            }

        } catch (FileNotFoundException e) {

            e.printStackTrace();
        } catch (JDOMException e) {

            e.printStackTrace();
        } catch (IOException e) {

            e.printStackTrace();
        }

        return invoiceOrders;

    }
 public static void main(String[] args) {
     
           String fileName = "src/main/resources/invoiceOrder.xml";
        List<InvoiceOrder> invoiceOrders= new ReadXMLByJDom().getInvoiceOrders(fileName);
        for(InvoiceOrder invoiceOrder : invoiceOrders){
            System.out.println(invoiceOrder);
        }
       
    }

}

4、用DOM4J方法读取xml文件

说明：

DOM4J使用接口和抽象基本类方法。DOM4J大量使用了API中的Collections类，但是在许多情况下，它还提供一些替代方法以允许更好的性能或更直接的编码方法。直接好处是，虽然DOM4J付出了更复杂的API的代价，但是它提供了比JDOM大得多的灵活性。 DOM4J性能最好，连Sun的JAXM也在用DOM4J.目前许多开源项目中大量采用DOM4J，例如大名鼎鼎的Hibernate也用DOM4J来读取XML配置文件。

案例代码：

package com.example.poiutis.xml;

import com.example.poiutis.model.InvoiceOrder;
import org.dom4j.Attribute;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import java.io.File;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * @ClassName 用DOM4J方法读取xml文件
 * @Description TODO
 * @Author wushaopei
 * @Date 2019/8/1 15:34
 * @Version 1.0
 */
public class ReadXMLByDom4j {


    private List<InvoiceOrder> invoiceOrders = null;
    private InvoiceOrder invoiceOrder = null;

    public List<InvoiceOrder> getInvoiceOrders(File file) {

        SAXReader reader = new SAXReader();
        try {
            Document document = reader.read(file);
            Element bookstore = document.getRootElement();
            Iterator storeit = bookstore.elementIterator();

            invoiceOrders = new ArrayList<InvoiceOrder>();
            while (storeit.hasNext()) {

                invoiceOrder = new InvoiceOrder();
                Element bookElement = (Element) storeit.next();
                //遍历bookElement的属性
                List<Attribute> attributes = bookElement.attributes();
                for (Attribute attribute : attributes) {
                    if (attribute.getName().equals("id")) {
                        String id = attribute.getValue();//System.out.println(id);
                        invoiceOrder.setId(Integer.parseInt(id));
                    }
                }

                Iterator bookit = bookElement.elementIterator();
                while (bookit.hasNext()) {
                    Element child = (Element) bookit.next();
//                    String nodeName = child.getName();

                    if(child.getName().equals("invoiceOrder")){
                        String invoiceOrderid = child.getStringValue();
                        invoiceOrder.setInvoiceOrder(invoiceOrderid);
//                        System.out.println("发票单号"+"---"+invoiceOrderid);
                    }else if(child.getName().equals("companyName")){
                        String companyName = child.getStringValue();
                        invoiceOrder.setCompanyName(companyName);
//                        System.out.println("公司名"+"---"+content);
                    }else if(child.getName().equals("taxNumber")){
                        String taxNumber = child.getStringValue();
                        invoiceOrder.setTaxNumber(taxNumber);
//                        System.out.println("金额"+"---"+content);
                    }else if(child.getName().equals("accountBank")){
                        String accountBank = child.getStringValue();
                        invoiceOrder.setAccountBank(accountBank);
//                        System.out.println("开户行"+"---"+content);
                    }else if(child.getName().equals("companyAddress")){
                        String companyAddress = child.getStringValue();
                        invoiceOrder.setCompanyAddress(companyAddress);
//                        System.out.println("公司地址"+"---"+content);
                    }else if(child.getName().equals("bankNumber")){
                        String bankNumber = child.getStringValue();
                        invoiceOrder.setBankNumber(bankNumber);
//                        System.out.println("账号"+"---"+bankNumber);
                    }else if(child.getName().equals("companyTelephone")){
                        String companyTelephone = child.getStringValue();
                        invoiceOrder.setCompanyTelephone(companyTelephone);
//                        System.out.println("公司电话"+"---"+companyTelephone);
                    }else if(child.getName().equals("accountName")){
                        String accountName = child.getStringValue();
                        invoiceOrder.setAccountName(accountName);
//                        System.out.println("账户类型"+"---"+accountName);
                    }
                }
                invoiceOrders.add(invoiceOrder);
                invoiceOrder = null;

            }
        } catch (DocumentException e) {

            e.printStackTrace();
        }


        return invoiceOrders;

    }
public static void main(String[] args) {
     
          File file = new File("src/main/resources/invoiceOrder.xml");
        List<InvoiceOrder> invoiceOrders = new ReadXMLByDom4j().getInvoiceOrders(file);
        for (InvoiceOrder invoiceOrder : invoiceOrders) {
            System.out.println(invoiceOrder.toString());
        }
       
    }
}

GitHub

相关阅读:
bzoj 2038
ACM训练联盟周赛 A. Teemo's bad day
The 2018 ACM-ICPC China JiangSu Provincial Programming Contest J. Set
惊艳，Dubbo域名已改，也不再局限于Java！
6月份值得一看的 Java 技术干货！
90 % Java 程序员被误导的一个性能优化策略
 Spring Cloud Finchley 正式发布，包含 4 个重大更新！
Java 11 快要来了，编译 & 运行一个命令搞定！
Spring Boot 单元测试详解+实战教程
 Java 10 实战第 1 篇：局部变量类型推断
原文地址：https://www.cnblogs.com/wushaopei/p/11979226.html