import requests
import parsel
import time
import os
start = time.time() # 记录程序开始时间
# 判断文件是否存在
if not os.path.exists('./image'):
# 创建文件
os.mkdir('./image')
url = "https://fabiaoqing.com/biaoqing/lists/page/{0}.html"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
}
count = 0 # 记录爬取图片数量
for i in range(1,6):
url_i = url.format(i) # 遍历多页url
response = requests.get(url=url_i,headers=headers).text # 请求数据
sel = parsel.Selector(response) # 解析
divs = sel.css(".tagbqppdiv") # 提取数据
for div in divs:
title = div.css("img.ui.image.lazy::attr(title)").get() # 提取文件名
img_url = div.css("img.ui.image.lazy::attr(data-original)").get() # 提取图片url
suffix = img_url.split(".")[-1] # 提取图片后缀
path = "./image/" + title + "." + suffix # 拼接路径
img_response = requests.get(url=img_url,headers=headers).content # 利用图片url再次请求
try: # 异常捕获
with open(path, "wb") as f: # 保存文件
f.write(img_response)
count += 1
print("保存成功{0},爬取第 {1} 页,已爬取 {2} 张图片".format(title, i, count))
except Exception as e:
print(e)
time.sleep(0.5) # 延时
end = time.time()
f.close() # 关闭文件
print("爬取完毕,爬取{0}页,一共{1}张,用{2}秒".format(count, count, end-start))
pycharm输出
学会了之后再也不用担心没有表情包了。
打卡第61天,对python大数据感兴趣的朋友欢迎一起、讨论、交流,请多指教!