/** * @Title: webclientTest.java * @Package webclient * @Description: TODO(用一句话描述该文件做什么) * @author A18ccms A18ccms_gmail_com * @date 2016年8月30日 下午3:52:41 * @version V1.0 */ package webclient; import java.io.IOException; import java.net.MalformedURLException; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlButton; import com.gargoylesoftware.htmlunit.html.HtmlForm; import com.gargoylesoftware.htmlunit.html.HtmlPage; /** * @ClassName: webclientTest * @Description: Webclent基本操作 * @author zeze * @date 2016年8月30日 下午3:52:41 * */ public class webclientTest { /** * * @Title: main * @param @param * args 设定文件 * @return void 返回类型 */ public static void main(String[] args) { // TODO Auto-generated method stub WebClient webClient = new WebClient(BrowserVersion.CHROME); webClient.getOptions().setTimeout(20000); webClient.getCookieManager().setCookiesEnabled(true); webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); webClient.getOptions().setThrowExceptionOnScriptError(false); webClient.getOptions().setCssEnabled(false); webClient.getOptions().setJavaScriptEnabled(false); webClient.addRequestHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/43.0.2357.134 Safari/537.36"); webClient.addRequestHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); webClient.addRequestHeader("Accept-Encoding", "gzip, deflate, sdch"); webClient.addRequestHeader("Accept-Language", "zh-CN,zh;q=0.8"); webClient.addRequestHeader("Connection", "keep-alive"); webClient.addRequestHeader("Host", "www.cnblogs.com"); // webClient.addRequestHeader("", ""); HtmlPage page = null; try { String Url = "http://www.cnblogs.com/zeze"; page = webClient.getPage(Url); Document doc = Jsoup.parse(page.asXml()); System.out.println(doc); while (doc.select("title").text().equals("Robot Check")) { HtmlForm form = page.getForms().get(0); HtmlButton button = (HtmlButton) form.getElementsByTagName("button").get(0); form.getInputByName("field-keywords").setValueAttribute(""); doc = Jsoup.parse(page.asXml()); try { page = button.click(); } catch (IOException e1) { e1.printStackTrace(); } } } catch (FailingHttpStatusCodeException e) { } catch (MalformedURLException e) { } catch (IOException eq) { } } }