requests用parsel解析爬取网络表情包-CFANZ编程社区

requests用parsel解析爬取网络表情包

import requests
import parsel
import time
import os



start = time.time()        # 记录程序开始时间
# 判断文件是否存在
if not os.path.exists('./image'):
    # 创建文件
    os.mkdir('./image')

url = "https://fabiaoqing.com/biaoqing/lists/page/{0}.html"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36"
}
count = 0        # 记录爬取图片数量
for i in range(1,6):
    url_i = url.format(i)        # 遍历多页url
    response = requests.get(url=url_i,headers=headers).text    # 请求数据
    sel = parsel.Selector(response)        # 解析
    divs = sel.css(".tagbqppdiv")          # 提取数据
    for div in divs:
        title = div.css("img.ui.image.lazy::attr(title)").get()    # 提取文件名
        img_url = div.css("img.ui.image.lazy::attr(data-original)").get()    # 提取图片url
        suffix = img_url.split(".")[-1]       # 提取图片后缀
        path = "./image/" + title + "." + suffix    # 拼接路径
        img_response = requests.get(url=img_url,headers=headers).content    # 利用图片url再次请求
        try:        # 异常捕获
            with open(path, "wb") as f:    # 保存文件
                f.write(img_response)
            count += 1
            print("保存成功{0}，爬取第 {1} 页，已爬取 {2} 张图片".format(title, i, count))
        except Exception as e:
            print(e)
        time.sleep(0.5)        # 延时
end = time.time()
f.close()       # 关闭文件
print("爬取完毕，爬取{0}页，一共{1}张，用{2}秒".format(count, count, end-start))