(1)doc.getElementsByTag(String tagName);
(2)doc.getElementById(String id);
(3)doc.getElementsByClass(String className);
(4)doc.getElementsByAttribute(String key);
elements=document.getElementsByAttribute("width"); for(Element e:elements){ System.out.println(e.toString()); }
(5)doc.getElementsByAttributeValue(String key,String value);
示例:通过key-value查找src=“/images/logo_small.gif”的元素
//根据key-value名称来查询DOM(查找src="") elements=document.getElementsByAttributeValue("src", "/images/logo_small.gif"); System.out.println(elements.get(0).toString());
示例:通过key-value查找target=“_blank”的元素
elements=document.getElementsByAttributeValue("target","_blank"); for(Element e:elements){ System.out.println(e.toString()); }
使用document.select();选择元素
通过class一级一级往下找
package com.oracle.zibo; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Demo2 { public static void main(String[] args) throws Exception { CloseableHttpClient closeableHttpClient=HttpClients.createDefault(); HttpGet httpGet=new HttpGet("http://www.bootcss.com/"); CloseableHttpResponse closeableHttpResponse=closeableHttpClient.execute(httpGet); HttpEntity httpEntity=closeableHttpResponse.getEntity(); //获取实体、网页内容 String str=EntityUtils.toString(httpEntity, "utf-8"); closeableHttpResponse.close(); closeableHttpClient.close(); Document document=Jsoup.parse(str); //解析网页 //查找bootstrap主页下的所有标题 Elements elements=document.select(".row .thumbnail .caption h3 a"); for(Element e:elements){ System.out.println(e.text()); } } }
使用a["href"]
查找所有带href属性的a标签
//查找a[href] Elements elements=document.select("a[href]"); for(Element e:elements){ System.out.println(e.html()); }
使用"img[src$=.png]"
查找扩展名为.png的图片的元素
Elements elements=document.select("img[src$=.png]"); for(Element e:elements){ System.out.println(e.toString()); }
取得我们需要的信息
Elements elements=document.select("img[src$=.png]"); for(Element e:elements){ System.out.println(e.toString()); System.out.println(e.text()); //取得标签中的内容 System.out.println(e.html()); //取得标签中的html代码 System.out.println(e.attr("src")); //取得某属性的属性值 }
e.attr(属性),返回属性值
.first()取得第一个
.last()取得最后一个
Element element=document.select("img[src$=.gif]").first(); System.out.println(element.attr("src")); //取得某属性的属性值