package org.spider.htmlunit;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.DefaultCredentialsProvider;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.Cookie;
import org.jsoup.helper.StringUtil;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
* @description:
* @author: long.li
* @date: 2019/2/20 14:40
public class HtmlUnitUtils {
public static void main(String[] args) throws Exception {
HtmlUnitBuilder builder = HtmlUnitBuilder.config()
.url("www.baidu.com")
.enableJS(true)
.enableCookie(true);
getPage(builder).asText();
public static HtmlPage getPage(HtmlUnitBuilder builder)throws Exception{
WebClient webClient = getWebClient(builder);
int count = -1;
while(true){
try {
count++;
return webClient.getPage(builder.url());
catch (Exception e){
if(e instanceof IOException && count < builder.retry()){
}else {
throw e;
public static WebClient getWebClient(HtmlUnitBuilder builder){
WebClient webClient = null;
if(isBlank(builder.proxyHost())){
webClient = new WebClient(BrowserVersion.CHROME);
}else{
webClient = new WebClient(BrowserVersion.CHROME,builder.proxyHost(),builder.proxyPort());
if(!isBlank(builder.username())){
((DefaultCredentialsProvider) webClient.getCredentialsProvider()).
addCredentials(builder.username(),builder.password());
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setCssEnabled(builder.enableCSS());
webClient.getOptions().setJavaScriptEnabled(builder.enableJS());
if(builder.enableAjax()) {
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
if(builder.enableCookie()){
webClient.getCookieManager().setCookiesEnabled(true);
for(Map.Entry<String,String> pair:builder.cookies().entrySet()){
webClient.getCookieManager().addCookie(new Cookie("/",pair.getKey(),pair.getKey()));
webClient.waitForBackgroundJavaScript(builder.waitForBackgroundJavaScript());
Map<String,String> headers = builder.headers();
if(headers!=null&&headers.size()>0){
for(Map.Entry<String,String> header:headers.entrySet()){
webClient.addRequestHeader(header.getKey(),header.getValue());
return webClient;
* org.jsoup.helper.StringUtil
* @param string
* @return
private static boolean isBlank(String string) {
if (string == null || string.length() == 0)
return true;
int l = string.length();
for (int i = 0; i < l; i++) {
if (!StringUtil.isWhitespace(string.codePointAt(i)))
return false;
return true;
private static boolean isWhitespace(int c){
return c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '\r';
二. HtmlUnitBuilder
package org.spider.htmlunit;
import java.util.HashMap;
import java.util.Map;
* @description:
* @author: long.li
* @date: 2019/2/20 14:40
public class HtmlUnitBuilder {
private String url;
private String proxyHost;
private int proxyPort;
private String username;
private String password;
private boolean enableCSS = false;
private boolean enableJS = false;
private boolean enableAjax = false;
private boolean enableCookie = false;
private int waitForBackgroundJavaScript = 0;
private int retry = 0;
private Map<String, String> headers = new HashMap<>();
private Map<String, String> cookies = new HashMap<>();
public static HtmlUnitBuilder config() {
return new HtmlUnitBuilder();
public HtmlUnitBuilder url(String url) {
this.url = url;
return this;
public HtmlUnitBuilder retry(int retry) {
this.retry = retry;
return this;
* 不需要验证的代理服务器
* @param proxyHost
* @param proxyPort
* @return
public HtmlUnitBuilder proxy(String proxyHost, int proxyPort) {
this.proxyHost = proxyHost;
this.proxyPort = proxyPort;
return this;
* 需要验证的代理服务器
* @param proxyHost
* @param proxyPort
* @param username
* @param password
* @return
public HtmlUnitBuilder proxy(String proxyHost, int proxyPort,
String username, String password) {
this.proxyHost = proxyHost;
this.proxyPort = proxyPort;
this.username = username;
this.password = password;
return this;
public HtmlUnitBuilder enableCSS(boolean enableCSS) {
this.enableCSS = enableCSS;
return this;
public HtmlUnitBuilder enableJS(boolean enableJS) {
this.enableJS = enableJS;
return this;
public HtmlUnitBuilder enableAjax(boolean enableAjax) {
this.enableAjax = enableAjax;
return this;
public HtmlUnitBuilder enableCookie(boolean enableCookie) {
this.enableCookie = enableCookie;
return this;
public HtmlUnitBuilder cookies(Map<String, String> cookies){
this.cookies = cookies;
return this;
* 设置新的请求头集合
* @param headers
* @return
public HtmlUnitBuilder headers(Map<String, String> headers) {
this.headers = headers;
return this;
* 添加请求头参数
* @param key 键
* @param value 值
* @return
public HtmlUnitBuilder addHeader(String key, String value) {
headers.put(key, value);
return this;
public HtmlUnitBuilder waitForBackgroundJavaScript(int waitForBackgroundJavaScript) {
this.waitForBackgroundJavaScript = waitForBackgroundJavaScript;
return this;
public String url() {
return url;
public int retry() {
return retry;
public String proxyHost() {
return proxyHost;
public int proxyPort() {
return proxyPort;
public String username() {
return username;
public String password() {
return password;
public boolean enableCSS() {
return enableCSS;
public boolean enableJS() {
return enableJS;
public boolean enableAjax() {
return enableAjax;
public boolean enableCookie() {
return enableCookie;
public int waitForBackgroundJavaScript() {
return waitForBackgroundJavaScript;
public Map<String, String> headers() {
return headers;
public Map<String,String> cookies(){
return cookies;