/** * 过滤https协议 * @author edwin */ public class FiltratHttpsUtils { static HostnameVerifier hv = new HostnameVerifier() { public boolean verify(String urlHostName, SSLSession session) { System.out.println("Warning: URL Host: " + urlHostName + " vs. "+ session.getPeerHost()); return true; } }; private static void trustAllHttpsCertificates() throws Exception { javax.net.ssl.TrustManager[] trustAllCerts = new javax.net.ssl.TrustManager[1]; javax.net.ssl.TrustManager tm = new miTM(); trustAllCerts[0] = tm; javax.net.ssl.SSLContext sc = javax.net.ssl.SSLContext.getInstance("SSL"); sc.init(null, trustAllCerts, null); javax.net.ssl.HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory()); } static class miTM implements javax.net.ssl.TrustManager, javax.net.ssl.X509TrustManager { public java.security.cert.X509Certificate[] getAcceptedIssuers() { return null; } public boolean isServerTrusted(java.security.cert.X509Certificate[] certs) { return true; } public boolean isClientTrusted(java.security.cert.X509Certificate[] certs) { return true; } public void checkServerTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } public void checkClientTrusted(java.security.cert.X509Certificate[] certs, String authType) throws java.security.cert.CertificateException { return; } } public static void doFiltra() throws Exception { trustAllHttpsCertificates(); HttpsURLConnection.setDefaultHostnameVerifier(hv); }
}
只需要在请求前调用:FiltratHttpsUtils .doFiltra()即可。
package com.dao; import java.io.File; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.Connection.Method; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.dao.HttpsUrlValidator.FiltratHttpsUtils; public class PhotoVerification { public static void main(String[] args) throws Exception { try { FiltratHttpsUtils .doFiltra(); String url="https://inv-veri.chinatax.gov.cn/index.html"; // HttpsUrlValidator.retrieveResponseFromServer(url); // doc = Jsoup.connect(url).header("User-Agent",rand_agents).timeout(10000).get(); // body = doc.getElementsByTag("body").html(); // log.info(e.getMessage()); // //通过Jsoup的Connect方法获取document类 Document document = Jsoup.connect(url).get(); // System.out.println(document.title());//控制台打印网页标题 //根据class获取Elements类 // Element timeElement = document.getElementById("yzminfo");//文字 // Element titleElement = document.getElementById("yzm_img");//图片的base64码 // Element timeElement = document.getElementById("yzminfo");//文字 Element titleElement = document.getElementById("imgarea");//图片的base64码 System.out.println(timeElement); System.out.println("----------------------"); System.out.println(titleElement); //指定文件名及路径 // File file = new File("D:\title.txt"); // File contentFile = new File("D:\content.txt"); // if(!file.exists()){ // file.createNewFile(); // } // if(!contentFile.exists()){ // contentFile.createNewFile(); // } // //写入本地 // PrintWriter pw = new PrintWriter("D:\title.txt"); // PrintWriter contentPw = new PrintWriter("D:\content.txt"); // pw.close(); // // //// String href = titleElement.get(i).attr("href");//取出新闻标题的url // String schoolHref = "http://www.haie.edu.cn/"; //因为取出来的新闻url不规范,直接访问不了,需要将其拼接成正常的网页url // String contentHref = schoolHref+href; //重复第一步的内容,根据URL取Documet类 // Document contentDoc = Jsoup.connect(contentHref).get(); //继续观察网页,取出新闻详细页面的文字。 // Elements contentElement = contentDoc.getElementsByClass("contentstyle125127"); // Elements authorElement = contentDoc.getElementsByClass("authorstyle125127"); // String content = contentElement.text(); // String author = authorElement.text(); //打印出作者,新闻详细内容 // contentPw.println(author); // contentPw.println(content); // contentPw.println("---------------------------"); // // contentPw.close(); } catch (IOException e) { e.printStackTrace(); } } }