一篇请求都是不会自动加载 js 可以使用 webCinet 方法就你能异步加载,最终把数据加载完毕。
<!-- https://mvnrepository.com/artifact/net.sourceforge.htmlunit/htmlunit -->
<dependency>
<groupId>net.sourceforge.htmlunit</groupId>
<artifactId>htmlunit</artifactId>
<version>2.62.0</version>
</dependency>
异步加载方法
public static String getPageWaitHtmlResults(String url) {
WebClient webClient = new WebClient();
//启用JS解释器,默认为true
webClient.getOptions().setJavaScriptEnabled(true);
//禁用css支持
webClient.getOptions().setCssEnabled(false);
//js运行错误时,是否抛出异常
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setTimeout(300000);
webClient.getOptions().setActiveXNative(false);
webClient.getOptions().setUseInsecureSSL(false);
////很重要,设置支持AJAX
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.addRequestHeader("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9");
webClient.addRequestHeader("user-agent", "Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1");
HtmlPage page = null;
try {
page = webClient.getPage(url);
} catch (IOException e) {
// e.printStackTrace();
log.error("数据异常 {}", e.getMessage());
} finally {
}
webClient.waitForBackgroundJavaScript(3 * 1000);
//以xml的形式获取响应文本
String pageXml = page.asXml();
return pageXml;
}
通过上面就异步把数据加载下来