0
点赞
收藏
分享

微信扫一扫

python异步协程实战:wallhaven壁纸网站

西街小学的王 2022-04-26 阅读 71
python安全

开发环境:Python3.9、idea

 相较于线程池大约优化了50%的速度,在学校网100M宽带下,爬取一页24张需要60s

代码中默认是下载第一页的图片,想要多爬点的话加个循环就好了

整套代码如下,仅供参考

import time
import aiohttp
import asyncio
import requests
import aiofiles
from lxml import etree


async def aiodownload(li):
    # 发送下载请求
    # 得到图片内容
    # 保存到文件
    # resp.content.read()  ==>  requests.content()

    async with aiohttp.ClientSession() as session:
        sub_url = "".join(li.xpath('./figure/a/@href'))
        sub_resp = requests.get(sub_url, headers=headers)  # get请求子网页
        if len(sub_resp.text) < 1000:  # 粗糙的429处理办法 :(
            await asyncio.sleep(3)
        sub_html = etree.HTML(sub_resp.text)
        img_url = "".join(sub_html.xpath('/html/body/main/section/div[1]/img/@src'))  # 图片的下载链接

        async with session.get(img_url, headers=headers) as img_resp:
            img_name = img_url.split('/')[-1]
            async with aiofiles.open("img/" + img_name, mode="wb") as f:
                await f.write(await img_resp.content.read())
                print("下载完成", img_name)
        await asyncio.sleep(0)


async def main():
    url = "https://wallhaven.cc/toplist?page=1"
    resp = requests.get(url, headers=headers)
    html = etree.HTML(resp.text)
    # 拿到图片组
    lis = html.xpath('/html/body/main/div[1]/section[1]/ul/li')
    tasks = []
    for li in lis:
        tasks.append(asyncio.create_task(aiodownload(li)))  # 将转换后的协程对象添加进入任务列表
    await asyncio.wait(tasks)


if __name__ == '__main__':
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.44"
    }
    t1 = time.time()
    print("开始下载...")
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())
    print("下载完成...")
    t2 = time.time()
    print('运行时间:', t2 - t1)

举报

相关推荐

0 条评论