看了Jsoup教程,去试了试爬取数据玩玩。就拿b站的排行榜试试水
一、导入依赖
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.11.3</version>
</dependency>
二、敲码
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
public List<Map<String, Object>> main() throws IOException {
String url = "https://www.bilibili.com/v/popular/rank/all";
Document document = Jsoup.parse(new URL(url), 30000);
Elements elements = document.getElementsByClass("rank-item");
List<Map<String, Object>> dataPojo = new ArrayList<>();
for (Element el : elements) {
String num = el.getElementsByClass("num").text();
String src = el.getElementsByTag("a").eq(0).attr("href");
String info = el.getElementsByClass("title").eq(0).text();
String detail = el.getElementsByClass("detail").eq(0).text();
String upName = el.getElementsByClass("up-name").eq(0).text();
String pts = el.getElementsByClass("pts").eq(0).text();
// 这里我没有细分detail字段(其实这里有三个值的,就用数组分割了)
String[] split = detail.split(" ");
pts = pts.substring(0,pts.length()-4);
Map<String, Object> map = new LinkedHashMap<>();
map.put("序号",num);
map.put("路径","https:"+src);
map.put("信息",info);
map.put("播放量",split[0]);
map.put("弹幕数量",split[1]);
map.put("作者",upName);
map.put("曝光亮",pts);
dataPojo.add(map);
}
return dataPojo;
}
三、将数据存储至excel
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.1</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.0.7</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>3.17</version>
</dependency>
import cn.hutool.poi.excel.ExcelUtil;
import cn.hutool.poi.excel.ExcelWriter;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public static void main(String[] args) throws IOException {
//获取数据
HtmlJsoupData jsoupData = new HtmlJsoupData();
List<Map<String, Object>> list = jsoupData.main();
//通过工具类创建writer
ExcelWriter writer = ExcelUtil.getWriter("C:/Users/2902404395/Desktop/data.xls");
//跳过当前行,既第一行,非必须,在此演示用
writer.passCurrentRow();
// 设置宽度
writer.setColumnWidth(0,20);
writer.setColumnWidth(1,40);
writer.setColumnWidth(2,90);
writer.setColumnWidth(3,30);
writer.setColumnWidth(4,30);
writer.setColumnWidth(5,30);
writer.setColumnWidth(6,30);
writer.setColumnWidth(7,20);
writer.setDefaultRowHeight(17);
writer.write(list, true);
//关闭writer,释放内存
writer.close();
System.out.println("成功");
}
最后-效果图
源码