HttpClient
Get
Get(无参)
public static void main(String[] args) {
//这是不带参数的
//第一个是HttpClient,搜索引擎
CloseableHttpClient httpClient = HttpClients.createDefault();
//第二个是HttpGet,url
org.apache.http.client.methods.HttpGet httpGet = new org.apache.http.client.methods.HttpGet("https://onlineweb.zhihuishu.com/onlinestuh5");
CloseableHttpResponse response = null;
//第三个是使用HttpClient发起请求,获得response 对象
try {
response = httpClient.execute(httpGet);
if(response.getStatusLine().getStatusCode()==200)
{
String string = EntityUtils.toString(response.getEntity(), "utf-8");
System.out.println(string.toString());
}
} catch (IOException ioException) {
ioException.printStackTrace();
}finally {
try {
response.close();
httpClient.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
//解析响应
}
Get(有参)
public static void main(String[] args) throws Exception {
//带参数的
/*创建*/
CloseableHttpClient client = HttpClients.createDefault();
/*创建uri*/
URIBuilder uriBuilder = new URIBuilder("https://t.bilibili.com/");
uriBuilder.setParameter("spm_id_from", "333.1007.0.0");
/*get*/
HttpGet httpGet = new HttpGet(uriBuilder.build());
System.out.println("访问的是:"+httpGet);
/*调用*/
try {
CloseableHttpResponse response = client.execute(httpGet);
if(response.getStatusLine().getStatusCode()==200)
{
String s = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(s.length());
}
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
Post
Post无参
public static void main(String[] args) {
//这是不带参数的
//第一个是HttpClient,搜索引擎
CloseableHttpClient httpClient = HttpClients.createDefault();
//第二个是HttpGet,url
HttpPost httpPost = new HttpPost("https://www.bilibili.com/");
CloseableHttpResponse response = null;
//第三个是使用HttpClient发起请求,获得response 对象
try {
response = httpClient.execute(httpPost);
if(response.getStatusLine().getStatusCode()==200)
{
String string = EntityUtils.toString(response.getEntity(), "utf-8");
System.out.println(string.toString());
}
} catch (IOException ioException) {
ioException.printStackTrace();
}finally {
try {
response.close();
httpClient.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
}
Post有参
public static void main(String[] args) throws Exception {
//这是不带参数的
//第一个是HttpClient,搜索引擎
CloseableHttpClient httpClient = HttpClients.createDefault();
//第二个是HttpGet,url
/*建一个表单*/
List<NameValuePair> list = new ArrayList<NameValuePair>();
list.add(new BasicNameValuePair("spm_id_from","333.1007.0.0"));
/*创建表单ENtity对象*/
UrlEncodedFormEntity formEntity = new UrlEncodedFormEntity(list);
HttpPost httpPost = new HttpPost("https://www.bilibili.com/");
httpPost.setEntity(formEntity);
CloseableHttpResponse response = null;
//第三个是使用HttpClient发起请求,获得response 对象
try {
response = httpClient.execute(httpPost);
if(response.getStatusLine().getStatusCode()==200)
{
String string = EntityUtils.toString(response.getEntity(), "utf-8");
System.out.println(string.length());
}
} catch (IOException ioException) {
ioException.printStackTrace();
}finally {
try {
response.close();
httpClient.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
//解析响应
}
HttpClient连接池
public static void main(String[] args) {
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
/*设置最大连接数*/
cm.setMaxTotal(100);
/*设置主机连接数,也是网站的最大连接数*/
cm.setDefaultMaxPerRoute(10);
doGet(cm);
}
private static void doGet(PoolingHttpClientConnectionManager cm)
{
CloseableHttpClient client = HttpClients.createDefault();
HttpGet httpGet = new HttpGet("http://www.bilibili.com");
CloseableHttpResponse response = null;
try {
response = client.execute(httpGet);
if(response.getStatusLine().getStatusCode()==200)
{
String s = EntityUtils.toString(response.getEntity(), "utf8");
System.out.println(s.length());
}
} catch (IOException ioException) {
ioException.printStackTrace();
}finally {
if(response!=null)
{
try {
response.close();
} catch (IOException ioException) {
ioException.printStackTrace();
}
}
}
}
JSoup
四个资源:
<!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.13.1</version>
</dependency>
<!--Junit-->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.commons/commons-lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.12.0</version>
</dependency>
配置log4j.properties(最好在resources目录下)
log4j.rootLogger=debug, stdout, R
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
# Pattern to output the caller's file name and line number.
log4j.appender.stdout.layout.ConversionPattern=%5p [%t] (%F:%L) - %m%n
log4j.appender.R=org.apache.log4j.RollingFileAppender
log4j.appender.R.File=example.log
log4j.appender.R.MaxFileSize=100KB
# Keep one backup file
log4j.appender.R.MaxBackupIndex=5
log4j.appender.R.layout=org.apache.log4j.PatternLayout
log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n
测试URL
@Test
public void testURl() throws Exception{
Document document = Jsoup.parse(new URL("http://www.baidu.com"), 10000);
String title = document.getElementsByTag("title").first().text();
System.out.println(title);
}
选择器操作(部分) 和HTML挺像的