student.xml
<?xml version="1.0" encoding="UTF-8" ?> <students> <student number="heima_0001"> <name id="1">tom</name> <age>18</age> <sex>male</sex> </student> <student number="heima_0002"> <name>jack</name> <age>18</age> <sex>female</sex> </student> </students>
Jsoup案例
package cn.itcast.xml.jsoup; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.File; import java.io.IOException; import java.net.URL; /** * @author 旗木五五开 * @create 2020-02-12 0:09 * Jsoup对象功能 */ public class JsoupDemo2 { public static void main(String[] args) throws IOException { // // 获取文件路径 String path = cn.itcast.xml.jsoup.JsoupDemo1.class.getClassLoader().getResource("student.xml").getPath(); // 1.parse(File in, String charsetName):解析xml文档,加载文档进内存,获取dom树-->Document对象 Document document1 = Jsoup.parse(new File(path), "utf-8"); System.out.println(document1); // 2.parse(String html):解析HTML或XML字符串的 String str ="<?xml version="1.0" encoding="UTF-8" ?> " + " <students> " + " " + " <student number="heima_0001"> " + " <name>tom</name> " + " <age>18</age>8 " + " <sex>male</sex> " + " </student> " + " " + " <student number="heima_0002"> " + " <name>jack</name> " + " <age>18</age> " + " <sex>female</sex> " + " </student> " + " " + " </students>"; Document document2 = Jsoup.parse(str); System.out.println(document2); // 3.parse(URL url, int timeoutMillis):通过网络路径获取指定的HTML或者XML的文档对象 URL url=new URL("https://baike.baidu.com/item/jsoup/9012509");//代表网络中的一个资源路径 Document document3 = Jsoup.parse(url, 10000);//10秒超时,不再访问 System.out.println(document3); } }
Documet案例
package cn.itcast.xml.jsoup; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.File; import java.io.IOException; /** * @author 旗木五五开 * @create 2020-02-12 13:15 * Document/Element对象功能 */ public class JsoupDemo3 { public static void main(String[] args) throws IOException{ // 1.获取路径 String path = cn.itcast.xml.jsoup.JsoupDemo1.class.getClassLoader().getResource("student.xml").getPath(); // 2.获取Document对象 Document document = Jsoup.parse(new File(path), "utf-8"); // 3.获取元素对象 // 3.1获取所有student对象 Elements elements1 = document.getElementsByTag("student"); System.out.println(elements1); System.out.println("----------"); // 3.2获取属性名为id的元素对象 Elements elements2 = document.getElementsByAttribute("id"); System.out.println(elements2); System.out.println("-----------"); // 3.3获取number属性值为heima_0001的元素 Elements elements3 = document.getElementsByAttributeValue("number", "heima_0001"); System.out.println(elements3); System.out.println("-------"); // 3.4获取id属性值的元素对象 Element itcast = document.getElementById("1"); System.out.println(itcast); } }
Element案例
package cn.itcast.xml.jsoup; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.File; import java.io.IOException; /** * @author 旗木五五开 * @create 2020-02-13 12:02 */ public class JsoupDemo4 { public static void main(String[] args) throws IOException { // 1.获取路径 String path = cn.itcast.xml.jsoup.JsoupDemo1.class.getClassLoader().getResource("student.xml").getPath(); // 2.获取Document对象 Document document = Jsoup.parse(new File(path), "utf-8"); // 通过Document对象获取那么标签,获取所有的name标签。 Elements elements1 = document.getElementsByTag("name"); // 获取到2个,全部的 System.out.println(elements1.size()); System.out.println("--------------"); // 通过Element对象获取子标签对象 // 1.通过document对象获取student元素集合内的第一个element对象 Element element_student = document.getElementsByTag("student").get(0); // 2.通过第一个对象,获取子标签getElementsByTag() Elements ele_name = element_student.getElementsByTag("name"); // 输出为1,证明获取的是子标签 System.out.println(ele_name.size()); System.out.println("--------------"); // 获取student对象的属性值 String number = element_student.attr("number"); System.out.println(number); System.out.println("--------------"); // 获取文本内容 String text = ele_name.text(); String html = ele_name.html(); System.out.println(text); System.out.println(html); } }